Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CBRD-25376] revise error message that may occur when loading ha_node_list and ha_replica_list #5231

Merged
merged 6 commits into from
Jun 30, 2024
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 52 additions & 18 deletions src/executables/master_heartbeat.c
Original file line number Diff line number Diff line change
Expand Up @@ -2658,13 +2658,14 @@ hb_cluster_load_group_and_node_list (char *ha_node_list, char *ha_replica_list)
{
int priority, num_nodes;
char tmp_string[LINE_MAX];
char err_string[LINE_MAX];
char *p, *savep;
HB_NODE_ENTRY *node;

if (ha_node_list == NULL)
if (ha_node_list == NULL || ha_node_list[0] == '\0')
mhoh3963 marked this conversation as resolved.
Show resolved Hide resolved
{
MASTER_ER_LOG_DEBUG (ARG_FILE_LINE, "invalid ha_node_list. (ha_node_list:NULL).\n");
return ER_FAILED;
sprintf (err_string, "%s is empty", prm_get_name (PRM_ID_HA_NODE_LIST));
hornetmj marked this conversation as resolved.
Show resolved Hide resolved
goto error;
}

hb_Cluster->myself = NULL;
Expand All @@ -2687,7 +2688,16 @@ hb_cluster_load_group_and_node_list (char *ha_node_list, char *ha_replica_list)
{
if (are_hostnames_equal (node->host_name, hb_Cluster->host_name))
{
hb_Cluster->myself = node;
if (hb_Cluster->state == HB_NSTATE_REPLICA)
{
sprintf (err_string, "In replica mode, (%s) must not be specified in the %s",
hb_Cluster->host_name, prm_get_name (PRM_ID_HA_NODE_LIST));
goto error;
}
else
{
hb_Cluster->myself = node;
}
#if defined (HB_VERBOSE_DEBUG)
MASTER_ER_LOG_DEBUG (ARG_FILE_LINE, "find myself node. (myself:%p, priority:%d). \n",
hb_Cluster->myself, hb_Cluster->myself->priority);
Expand All @@ -2697,11 +2707,12 @@ hb_cluster_load_group_and_node_list (char *ha_node_list, char *ha_replica_list)
}
}

if (hb_Cluster->state == HB_NSTATE_REPLICA && hb_Cluster->myself != NULL)
if (hb_Cluster->state != HB_NSTATE_REPLICA && hb_Cluster->myself == NULL)
{
MASTER_ER_LOG_DEBUG (ARG_FILE_LINE, "myself should be in the ha_replica_list. \n");
return ER_FAILED;
sprintf (err_string, "cannot find (%s) in the %s", hb_Cluster->host_name, prm_get_name (PRM_ID_HA_NODE_LIST));
goto error;
}

num_nodes = priority;

if (ha_replica_list)
Expand All @@ -2712,15 +2723,23 @@ hb_cluster_load_group_and_node_list (char *ha_node_list, char *ha_replica_list)
{
tmp_string[0] = '\0';
}

if (hb_Cluster->state == HB_NSTATE_REPLICA && tmp_string[0] == '\0')
{
sprintf (err_string, "%s is empty", prm_get_name (PRM_ID_HA_REPLICA_LIST));
goto error;
}

for (priority = 0, p = strtok_r (tmp_string, "@", &savep); p; priority++, p = strtok_r (NULL, " ,:", &savep))
{

if (priority == 0)
{
if (strcmp (hb_Cluster->group_id, p) != 0)
{
MASTER_ER_LOG_DEBUG (ARG_FILE_LINE, "different group id ('ha_node_list', 'ha_replica_list') \n");
return ER_FAILED;
sprintf (err_string, "group id of (%s, %s) is different", prm_get_name (PRM_ID_HA_NODE_LIST),
prm_get_name (PRM_ID_HA_REPLICA_LIST));
goto error;
}
}
else
Expand All @@ -2730,20 +2749,34 @@ hb_cluster_load_group_and_node_list (char *ha_node_list, char *ha_replica_list)
{
if (are_hostnames_equal (node->host_name, hb_Cluster->host_name))
{
hb_Cluster->myself = node;
hb_Cluster->state = HB_NSTATE_REPLICA;
if (hb_Cluster->state != HB_NSTATE_REPLICA)
{
sprintf (err_string, "In not replica mode, (%s) must not be specified in the %s",
hb_Cluster->host_name, prm_get_name (PRM_ID_HA_REPLICA_LIST));
goto error;
}
else
{
hb_Cluster->myself = node;
}
}
}
}
}

if (hb_Cluster->myself == NULL)
if (hb_Cluster->state == HB_NSTATE_REPLICA && hb_Cluster->myself == NULL)
{
MASTER_ER_LOG_DEBUG (ARG_FILE_LINE, "cannot find myself. \n");
return ER_FAILED;
sprintf (err_string, "In replica mode, (%s) must be specified in the %s", hb_Cluster->host_name,
prm_get_name (PRM_ID_HA_REPLICA_LIST));
goto error;
}

return num_nodes + priority;

error:

MASTER_ER_SET (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_PRM_BAD_VALUE, 1, err_string);
return ER_FAILED;
}


Expand Down Expand Up @@ -4836,9 +4869,12 @@ hb_cluster_initialize (const char *nodes, const char *replicas)
struct sockaddr_in udp_saddr;
char host_name[CUB_MAXHOSTNAMELEN];

if (nodes == NULL)
if (nodes == NULL || nodes[0] == '\0')
hornetmj marked this conversation as resolved.
Show resolved Hide resolved
{
MASTER_ER_SET (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_PRM_BAD_VALUE, 1, prm_get_name (PRM_ID_HA_NODE_LIST));
char err_string[LINE_MAX];

sprintf (err_string, "%s is empty", prm_get_name (PRM_ID_HA_NODE_LIST));
MASTER_ER_SET (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_PRM_BAD_VALUE, 1, err_string);

return ER_PRM_BAD_VALUE;
}
Expand Down Expand Up @@ -4893,7 +4929,6 @@ hb_cluster_initialize (const char *nodes, const char *replicas)
hb_Cluster->num_nodes);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that this debugging log information is not useful. How about removing it together? Below is the remaining master error log from my tests.

Time: 05/30/24 11:37:38.472 - ERROR *** file ../src/executables/master_heartbeat.c, line 2779 ERROR CODE = -839, Tran = -1, EID = 2
"ha_node_list is empty.": Unknown system parameter or bad value.

Time: 05/30/24 11:37:38.472 - DEBUG *** file ../src/executables/master_heartbeat.c, line 4920
hb_Cluster->num_nodes is smaller than '1'. (num_nodes=-1).

Time: 05/30/24 11:37:38.472 - DEBUG *** file ../src/executables/master_heartbeat.c, line 5185
hb_cluster_initialize failed. (error=-839).
Time: 05/30/24 11:42:45.109 - ERROR *** file ../src/executables/master_heartbeat.c, line 2779 ERROR CODE = -839, Tran = -1, EID = 2
"cannot find (hornetmj1) in the ha_node_list.": Unknown system parameter or bad value.

Time: 05/30/24 11:42:45.109 - DEBUG *** file ../src/executables/master_heartbeat.c, line 4920
hb_Cluster->num_nodes is smaller than '1'. (num_nodes=-1).

Time: 05/30/24 11:42:45.109 - DEBUG *** file ../src/executables/master_heartbeat.c, line 5185
hb_cluster_initialize failed. (error=-839).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got it, I will remove debug log that is "hb_Cluster->num_nodes is smaller than '1'. (num_nodes=-1)"

pthread_mutex_unlock (&hb_Cluster->lock);

MASTER_ER_SET (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_PRM_BAD_VALUE, 1, prm_get_name (PRM_ID_HA_NODE_LIST));
return ER_PRM_BAD_VALUE;
}

Expand Down Expand Up @@ -5536,7 +5571,6 @@ hb_reload_config (void)
if (hb_Cluster->num_nodes < 1
|| (hb_Cluster->master && hb_return_node_by_name (hb_Cluster->master->host_name) == NULL))
{
MASTER_ER_SET (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_PRM_BAD_VALUE, 1, prm_get_name (PRM_ID_HA_NODE_LIST));
error = ER_PRM_BAD_VALUE;
goto reconfig_error;
}
Expand Down
Loading