Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:sschapiro:openstack:upstream
cluster
fenced_use_cpg_ringid.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File fenced_use_cpg_ringid.patch of Package cluster
commit f300d76b20ffc02fdda22ac205eb3b1c8258d544 Author: David Teigland <teigland@redhat.com> Date: Fri Mar 19 11:10:13 2010 -0500 fenced: use cpg ringid bz 584140 Use the new totem cpg callback to synchronize cman events and cpg events. Without this fix, the following two test cases cause fenced to become confused and stuck. test 1 ------ - nodes 1,2,3,4 - configure no fencing for all, or force fencing to fail - service cman start on all - use iptables to add network partition: 1 | 2,3,4 - remove iptables partition resulting in merge: 1,2,3,4 - nodes 2,3,4 should kill corosync on node 1 automatically - reboot node 1 if any dlm or gfs were being used (otherwise just verify all cluster daemons have exited) - service cman start on node 1 - fence_tool ls on 1,2,3,4 should all show normal state (4 members, 0 victims, wait state none, members 1,2,3,4) fence domain member count 4 victim count 0 victim now 0 master nodeid 2 wait state none members 1 2 3 4 test 2 ------ - nodes 1,2,3,4 - configure no fencing for all, or force fencing to fail - service cman start on all - use iptables to add network partition: 1,2 | 3,4 - remove iptables partition resulting in merge: 1,2,3,4 - reboot nodes 1 and 2 if any dlm or gfs were being used (otherwise just verify all cluster daemons have exited) (a variation of this test reboots nodes 3 and 4 instead) - service cman start on nodes 1 and 2 - fence_tool ls on 1,2,3,4 should all show normal state (4 members, 0 victims, wait state none, members 1,2,3,4) fence domain member count 4 victim count 0 victim now 0 master nodeid 2 wait state none members 1 2 3 4 Signed-off-by: David Teigland <teigland@redhat.com> diff --git a/fence/fenced/cpg.c b/fence/fenced/cpg.c index c9d86f3..a8629b9 100644 --- a/fence/fenced/cpg.c +++ b/fence/fenced/cpg.c @@ -73,7 +73,11 @@ struct id_info { }; static cpg_handle_t cpg_handle_daemon; +static cpg_handle_t cpg_handle_domain; +static struct cpg_name group_name_daemon; +static struct cpg_name group_name_domain; static int cpg_fd_daemon; +static int cpg_fd_domain; static struct protocol our_protocol; static struct list_head daemon_nodes; static struct cpg_address daemon_member[MAX_NODES]; @@ -132,6 +136,38 @@ static void log_config(const struct cpg_name *group_name, m_buf, j_buf, l_buf); } +static void log_ringid(cpg_handle_t handle, + struct cpg_ring_id *ringid, + const uint32_t *member_list, + size_t member_list_entries) +{ + char m_buf[128]; + size_t i, len, pos; + int ret; + const char *name = "unknown"; + + if (handle == cpg_handle_domain) + name = group_name_domain.value; + else if (handle == cpg_handle_daemon) + name = group_name_daemon.value; + + memset(m_buf, 0, sizeof(m_buf)); + + len = sizeof(m_buf); + pos = 0; + for (i = 0; i < member_list_entries; i++) { + ret = snprintf(m_buf + pos, len - pos, " %u", + member_list[i]); + if (ret >= len - pos) + break; + pos += ret; + } + + log_debug("%s ring %u:%llu %zu memb%s", + name, ringid->nodeid, (unsigned long long)ringid->seq, + member_list_entries, m_buf); +} + static void fd_info_in(struct fd_info *fi) { fi->fd_info_size = le32_to_cpu(fi->fd_info_size); @@ -372,7 +408,7 @@ static void node_history_start(struct fd *fd, int nodeid) node->add_time = time(NULL); } -static void node_history_left(struct fd *fd, int nodeid) +static void node_history_left(struct fd *fd, int nodeid, uint32_t seq) { struct node_history *node; @@ -383,9 +419,10 @@ static void node_history_left(struct fd *fd, int nodeid) } node->left_time = time(NULL); + node->left_seq = seq; } -static void node_history_fail(struct fd *fd, int nodeid) +static void node_history_fail(struct fd *fd, int nodeid, uint32_t seq) { struct node_history *node; @@ -396,6 +433,7 @@ static void node_history_fail(struct fd *fd, int nodeid) } node->fail_time = time(NULL); + node->fail_seq = seq; node->check_quorum = 1; } @@ -646,36 +684,11 @@ static void receive_victim_done(struct fd *fd, struct fd_header *hd, int len) free(node); } +/* we know that the quorum value here is consistent with the cpg events + because the ringid's are in sync per the previous check_ringid_done */ + static int check_quorum_done(struct fd *fd) { - struct node_history *node; - int wait_count = 0; - - /* We don't want to trust the cluster_quorate value until we know - that cman has seen the same nodes fail that we have. So, we - first make sure that all nodes we've seen fail are also - failed in cman, then we can just check cluster_quorate. This - assumes that we'll get to this function to do all the checks - before any of the failed nodes can actually rejoin and become - cman members again (if that assumption doesn't hold, perhaps - do something with timestamps of join/fail). */ - - list_for_each_entry(node, &fd->node_history, list) { - if (!node->check_quorum) - continue; - - if (!is_cluster_member_reread(node->nodeid)) { - node->check_quorum = 0; - } else { - log_debug("check_quorum %d is_cluster_member", - node->nodeid); - wait_count++; - } - } - - if (wait_count) - return 0; - if (!cluster_quorate) { log_debug("check_quorum not quorate"); return 0; @@ -685,8 +698,28 @@ static int check_quorum_done(struct fd *fd) return 1; } +/* wait for cman ringid and cpg ringid to be the same so we know our + information from each service is based on the same node state */ + +static int check_ringid_done(struct fd *fd) +{ + if (cluster_ringid_seq != (uint32_t)fd->cpg_ringid.seq) { + log_debug("check_ringid cluster %u cpg %u:%llu", + cluster_ringid_seq, fd->cpg_ringid.nodeid, + (unsigned long long)fd->cpg_ringid.seq); + return 0; + } + + log_debug("check_ringid done cluster %u cpg %u:%llu", + cluster_ringid_seq, fd->cpg_ringid.nodeid, + (unsigned long long)fd->cpg_ringid.seq); + return 1; +} + static int wait_conditions_done(struct fd *fd) { + if (!check_ringid_done(fd)) + return 0; if (!check_quorum_done(fd)) return 0; return 1; @@ -829,6 +862,25 @@ static int match_change(struct fd *fd, struct change *cg, struct fd_header *hd, return 0; } + /* this start message couldn't have been sent for a cg preceding + a confchg when the sending node failed or left */ + + if ((node->fail_seq > cg->seq) || (node->left_seq > cg->seq)) { + log_debug("match_change %d:%u skip cg %u fail cg %u left cg %u", + hd->nodeid, seq, cg->seq, + node->fail_seq, node->left_seq); + return 0; + } + + /* if we matched the last start message from this node against our + cg N, then don't match this stsart message against an earlier cg */ + + if (node->last_match_seq > cg->seq) { + log_debug("match_change %d:%u skip cg %u last matched cg %u", + hd->nodeid, seq, cg->seq, node->last_match_seq); + return 0; + } + /* verify this is the right change by matching the counts and the nodeids of the current members */ @@ -863,6 +915,8 @@ static int match_change(struct fd *fd, struct change *cg, struct fd_header *hd, if (members_mismatch) return 0; + node->last_match_seq = cg->seq; + log_debug("match_change %d:%u matches cg %u", hd->nodeid, seq, cg->seq); return 1; } @@ -917,19 +971,45 @@ static int match_change(struct fd *fd, struct change *cg, struct fd_header *hd, is > cpg ringid, then return 0 for conditions_done so we won't send start and will wait until the most recent cpg confchg (matching the current cman one) to send a start. Waits for cpg to catch up with cman. + + Final solution is the patch adding check_ringid_done() that waits for + cman and cpg to both be on the same ringid before going ahead to check + quorum and send starts. */ static struct change *find_change(struct fd *fd, struct fd_header *hd, struct fd_info *fi, struct id_info *ids) { struct change *cg; + struct change *cg1 = NULL, *cg2 = NULL; list_for_each_entry_reverse(cg, &fd->changes, list) { if (!match_change(fd, cg, hd, fi, ids)) continue; - return cg; + + if (!(hd->flags & FD_MFLG_DUPLICATE_CG)) + return cg; + + /* this start message is for the second of two matching cg's */ + + if (!cg1) { + cg1 = cg; + log_debug("find_change %d:%u match1 %u look for dup", + hd->nodeid, hd->msgdata, cg1->seq); + continue; + } else { + cg2 = cg; + log_debug("find_change %d:%u match1 %u match2 %u", + hd->nodeid, hd->msgdata, cg1->seq, cg2->seq); + break; + } } + if (cg1 && cg2) + return cg2; + if (cg1) + return cg1; + log_debug("find_change %d:%u no match", hd->nodeid, hd->msgdata); return NULL; } @@ -1062,19 +1142,17 @@ static int count_ids(struct fd *fd) return count; } -static void send_info(struct fd *fd, int type) +static void send_info(struct fd *fd, struct change *cg, int type, + uint32_t flags) { - struct change *cg; struct fd_header *hd; struct fd_info *fi; struct id_info *id; struct node_history *node; char *buf; - uint32_t flags; + uint32_t idflags; int len, id_count; - cg = list_first_entry(&fd->changes, struct change, list); - id_count = count_ids(fd); len = sizeof(struct fd_header) + sizeof(struct fd_info) + @@ -1095,6 +1173,8 @@ static void send_info(struct fd *fd, int type) hd->type = type; hd->msgdata = cg->seq; + hd->flags = flags; + if (cg->we_joined) hd->flags |= FD_MFLG_JOINING; if (fd->init_complete || fd->local_init_complete) @@ -1114,11 +1194,11 @@ static void send_info(struct fd *fd, int type) /* fill in id_info entries */ list_for_each_entry(node, &fd->node_history, list) { - flags = 0; + idflags = 0; if (find_memb(cg, node->nodeid)) - flags = IDI_NODEID_IS_MEMBER; + idflags = IDI_NODEID_IS_MEMBER; - id->flags = cpu_to_le32(flags); + id->flags = cpu_to_le32(idflags); id->nodeid = cpu_to_le32(node->nodeid); id->fence_external_node= cpu_to_le32(node->fence_external_node); id->fence_master = cpu_to_le32(node->fence_master); @@ -1128,8 +1208,8 @@ static void send_info(struct fd *fd, int type) id++; } - log_debug("send_%s cg %u flags %x counts %u %d %d %d %d", - type == FD_MSG_START ? "start" : "complete", + log_debug("send_%s %d:%u flags %x started %u m %d j %d r %d f %d", + type == FD_MSG_START ? "start" : "complete", our_nodeid, cg->seq, hd->flags, fd->started_count, cg->member_count, cg->joined_count, cg->remove_count, cg->failed_count); @@ -1138,9 +1218,45 @@ static void send_info(struct fd *fd, int type) free(buf); } +static int same_members(struct change *cg1, struct change *cg2) +{ + struct member *memb; + + list_for_each_entry(memb, &cg1->members, list) { + if (!find_memb(cg2, memb->nodeid)) + return 0; + } + return 1; +} + static void send_start(struct fd *fd) { - send_info(fd, FD_MSG_START); + struct change *cg = list_first_entry(&fd->changes, struct change, list); + struct change *cgtmp; + uint32_t flags = 0; + + /* look for a previous matching cg that we don't want others to + confuse for this one */ + + list_for_each_entry(cgtmp, &fd->changes, list) { + if (cgtmp->sent_start) + continue; + + if (cgtmp->seq < cg->seq && + cgtmp->member_count == cg->member_count && + cgtmp->joined_count == cg->joined_count && + cgtmp->remove_count == cg->remove_count && + cgtmp->failed_count == cg->failed_count && + same_members(cgtmp, cg)) { + log_debug("duplicate old cg %u new cg %u", + cgtmp->seq, cg->seq); + flags = FD_MFLG_DUPLICATE_CG; + } + } + + cg->sent_start = 1; + + send_info(fd, cg, FD_MSG_START, flags); } /* same content as a start message, a new (incomplete) node will look for @@ -1149,7 +1265,9 @@ static void send_start(struct fd *fd) static void send_complete(struct fd *fd) { - send_info(fd, FD_MSG_COMPLETE); + struct change *cg = list_first_entry(&fd->changes, struct change, list); + + send_info(fd, cg, FD_MSG_COMPLETE, 0); } /* FIXME: better to just look in victims list for any nodes with init_victim? */ @@ -1317,9 +1435,9 @@ static int add_change(struct fd *fd, list_add_tail(&memb->list, &cg->removed); if (memb->failed) - node_history_fail(fd, memb->nodeid); + node_history_fail(fd, memb->nodeid, cg->seq); else - node_history_left(fd, memb->nodeid); + node_history_left(fd, memb->nodeid, cg->seq); log_debug("add_change cg %u remove nodeid %d reason %d", cg->seq, memb->nodeid, left_list[i].reason); @@ -1350,8 +1468,8 @@ static int add_change(struct fd *fd, list_for_each_entry(memb, &cg->members, list) node_history_init(fd, memb->nodeid); - log_debug("add_change cg %u counts member %d joined %d remove %d " - "failed %d", cg->seq, cg->member_count, cg->joined_count, + log_debug("add_change cg %u m %d j %d r %d f %d", + cg->seq, cg->member_count, cg->joined_count, cg->remove_count, cg->failed_count); list_add(&cg->list, &fd->changes); @@ -1526,9 +1644,36 @@ static void deliver_cb_domain(cpg_handle_t handle, apply_changes(fd); } -static cpg_callbacks_t cpg_callbacks_domain = { +/* save ringid to compare with cman's. + also save member_list to double check with cman's member list? + they should match */ + +static void totem_cb_domain(cpg_handle_t handle, + struct cpg_ring_id ring_id, + uint32_t member_list_entries, + const uint32_t *member_list) +{ + struct fd *fd; + + log_ringid(handle, &ring_id, member_list, member_list_entries); + + fd = find_fd_handle(handle); + if (!fd) { + log_error("totem_cb no fence domain for handle"); + return; + } + + fd->cpg_ringid.nodeid = ring_id.nodeid; + fd->cpg_ringid.seq = ring_id.seq; + + apply_changes(fd); +} + +static cpg_model_v1_data_t cpg_callbacks_domain = { .cpg_deliver_fn = deliver_cb_domain, .cpg_confchg_fn = confchg_cb_domain, + .cpg_totem_confchg_fn = totem_cb_domain, + .flags = CPG_MODEL_V1_DELIVER_INITIAL_TOTEM_CONF, }; static void process_cpg_domain(int ci) @@ -1552,32 +1697,35 @@ static void process_cpg_domain(int ci) int fd_join(struct fd *fd) { cpg_error_t error; - cpg_handle_t h; struct cpg_name name; - int i = 0, f, ci; + int i = 0, ci; - error = cpg_initialize(&h, &cpg_callbacks_domain); + error = cpg_model_initialize(&cpg_handle_domain, CPG_MODEL_V1, + (cpg_model_data_t *)&cpg_callbacks_domain, + NULL); if (error != CPG_OK) { - log_error("cpg_initialize error %d", error); + log_error("cpg_model_initialize error %d", error); goto fail_free; } - cpg_fd_get(h, &f); + cpg_fd_get(cpg_handle_domain, &cpg_fd_domain); - ci = client_add(f, process_cpg_domain, NULL); + ci = client_add(cpg_fd_domain, process_cpg_domain, NULL); list_add(&fd->list, &domains); - fd->cpg_handle = h; + fd->cpg_handle = cpg_handle_domain; fd->cpg_client = ci; - fd->cpg_fd = f; + fd->cpg_fd = cpg_fd_domain; fd->joining_group = 1; memset(&name, 0, sizeof(name)); sprintf(name.value, "fenced:%s", fd->name); name.length = strlen(name.value) + 1; + memcpy(&group_name_domain, &name, sizeof(struct cpg_name)); + log_debug("cpg_join %s ...", name.value); retry: - error = cpg_join(h, &name); + error = cpg_join(cpg_handle_domain, &name); if (error == CPG_ERR_TRY_AGAIN) { sleep(1); if (!(++i % 10)) @@ -1594,7 +1742,7 @@ int fd_join(struct fd *fd) fail: list_del(&fd->list); client_dead(ci); - cpg_finalize(h); + cpg_finalize(cpg_handle_domain); fail_free: free(fd); return error; @@ -1612,6 +1760,7 @@ int fd_leave(struct fd *fd) sprintf(name.value, "fenced:%s", fd->name); name.length = strlen(name.value) + 1; + log_debug("cpg_leave %s ...", name.value); retry: error = cpg_leave(fd->cpg_handle, &name); if (error == CPG_ERR_TRY_AGAIN) { @@ -2084,9 +2233,19 @@ static void confchg_cb_daemon(cpg_handle_t handle, } } -static cpg_callbacks_t cpg_callbacks_daemon = { +static void totem_cb_daemon(cpg_handle_t handle, + struct cpg_ring_id ring_id, + uint32_t member_list_entries, + const uint32_t *member_list) +{ + log_ringid(handle, &ring_id, member_list, member_list_entries); +} + +static cpg_model_v1_data_t cpg_callbacks_daemon = { .cpg_deliver_fn = deliver_cb_daemon, .cpg_confchg_fn = confchg_cb_daemon, + .cpg_totem_confchg_fn = totem_cb_daemon, + .flags = CPG_MODEL_V1_DELIVER_INITIAL_TOTEM_CONF, }; void process_cpg_daemon(int ci) @@ -2145,9 +2304,11 @@ int setup_cpg_daemon(void) our_protocol.daemon_max[1] = 1; our_protocol.daemon_max[2] = 1; - error = cpg_initialize(&cpg_handle_daemon, &cpg_callbacks_daemon); + error = cpg_model_initialize(&cpg_handle_daemon, CPG_MODEL_V1, + (cpg_model_data_t *)&cpg_callbacks_daemon, + NULL); if (error != CPG_OK) { - log_error("daemon cpg_initialize error %d", error); + log_error("daemon cpg_model_initialize error %d", error); goto ret; } @@ -2156,6 +2317,7 @@ int setup_cpg_daemon(void) memset(&name, 0, sizeof(name)); sprintf(name.value, "fenced:daemon"); name.length = strlen(name.value) + 1; + memcpy(&group_name_daemon, &name, sizeof(struct cpg_name)); log_debug("cpg_join %s ...", name.value); retry: @@ -2196,6 +2358,7 @@ void close_cpg_daemon(void) sprintf(name.value, "fenced:daemon"); name.length = strlen(name.value) + 1; + log_debug("cpg_leave %s ...", name.value); retry: error = cpg_leave(cpg_handle_daemon, &name); if (error == CPG_ERR_TRY_AGAIN) { diff --git a/fence/fenced/fd.h b/fence/fenced/fd.h index 9f64dff..39a34ad 100644 --- a/fence/fenced/fd.h +++ b/fence/fenced/fd.h @@ -64,6 +64,7 @@ extern int daemon_quit; extern int cluster_down; extern struct list_head domains; extern int cluster_quorate; +extern uint32_t cluster_ringid_seq; extern uint64_t quorate_time; extern int our_nodeid; extern char our_name[MAX_NODENAME_LEN+1]; @@ -95,6 +96,7 @@ do { \ #define FD_MFLG_JOINING 1 /* accompanies start, we are joining */ #define FD_MFLG_COMPLETE 2 /* accompanies start, we have complete info */ +#define FD_MFLG_DUPLICATE_CG 4 struct fd_header { uint16_t version[3]; @@ -122,6 +124,7 @@ struct change { int failed_count; int state; /* CGST_ */ int we_joined; + int sent_start; uint32_t seq; /* just used as a reference when debugging */ uint64_t create_time; }; @@ -146,6 +149,9 @@ struct node_history { int fence_external_node; int fence_master; int fence_how; /* VIC_DONE_ */ + uint32_t last_match_seq; + uint32_t fail_seq; + uint32_t left_seq; }; struct node { @@ -172,6 +178,7 @@ struct fd { struct list_head node_history; int init_complete; int local_init_complete; + struct cpg_ring_id cpg_ringid; /* general domain membership */ diff --git a/fence/fenced/main.c b/fence/fenced/main.c index deb9515..a371dc8 100644 --- a/fence/fenced/main.c +++ b/fence/fenced/main.c @@ -1069,6 +1069,7 @@ int daemon_quit; int cluster_down; struct list_head domains; int cluster_quorate; +uint32_t cluster_ringid_seq; uint64_t quorate_time; int our_nodeid; char our_name[MAX_NODENAME_LEN+1]; diff --git a/fence/fenced/member_cman.c b/fence/fenced/member_cman.c index a245adf..b9d8341 100644 --- a/fence/fenced/member_cman.c +++ b/fence/fenced/member_cman.c @@ -148,9 +148,17 @@ int name_to_nodeid(char *name) static void update_cluster(void) { + cman_cluster_t info; int quorate = cluster_quorate; int i, rv; + rv = cman_get_cluster(ch, &info); + if (rv < 0) { + log_error("cman_get_cluster error %d %d", rv, errno); + return; + } + cluster_ringid_seq = info.ci_generation; + cluster_quorate = cman_is_quorate(ch); if (!quorate && cluster_quorate) @@ -171,8 +179,8 @@ static void update_cluster(void) if (old_nodes[i].cn_member && !is_cluster_member(old_nodes[i].cn_nodeid)) { - log_debug("cluster node %d removed", - old_nodes[i].cn_nodeid); + log_debug("cluster node %d removed seq %u", + old_nodes[i].cn_nodeid, cluster_ringid_seq); node_history_cluster_remove(old_nodes[i].cn_nodeid); } @@ -182,8 +190,8 @@ static void update_cluster(void) if (cman_nodes[i].cn_member && !is_old_member(cman_nodes[i].cn_nodeid)) { - log_debug("cluster node %d added", - cman_nodes[i].cn_nodeid); + log_debug("cluster node %d added seq %u", + cman_nodes[i].cn_nodeid, cluster_ringid_seq); node_history_cluster_add(cman_nodes[i].cn_nodeid); }
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor