Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-12-SP2:Update
pacemaker
pacemaker-pengine-pseudo-fence-guest-node-recov...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File pacemaker-pengine-pseudo-fence-guest-node-recovery.patch of Package pacemaker
commit beab7718e14a54f1b50d7c5ff4b0086e09332da3 Author: Ken Gaillot <kgaillot@redhat.com> Date: Fri Apr 15 13:10:17 2016 -0500 Fix: pengine: create a pseudo-fence for guest node recovery If a guest node needs to be recovered, the PE would previously order actions in relation to the stop action for the guest's container resource, if one was scheduled. This had problems: for implied stops due to fencing the guest's host, there would be no stop action, so no ordering could be done; ordering in relation to the stop action made stonith_constraints() mistakenly assume that the host node (the node for the stop action) was the fence target, and thus mistakenly mark the wrong stops/demotes as implied; and, clone notifications for fence events would not get called for guest node recoveries, whether explicit or implied. Now, a fence pseudo-event is created for guest node recovery, regardless of whether there is an explicit stop action scheduled for the container. This addresses all those issues, and will allow the crmd to be able to detect implied stops. This also allows us to simplify the implied stop/demote detection, since we will check the pseudo-op for implied actions -- we don't need to check the real fence op for implied actions on guest nodes. Index: pacemaker/crmd/te_utils.c =================================================================== --- pacemaker.orig/crmd/te_utils.c +++ pacemaker/crmd/te_utils.c @@ -331,6 +331,14 @@ tengine_stonith_notify(stonith_t * st, s /* The DC always sends updates */ send_stonith_update(NULL, st_event->target, uuid); + /* @TODO Ideally, at this point, we'd check whether the fenced node + * hosted any guest nodes, and call remote_node_down() for them. + * Unfortunately, the crmd doesn't have a simple, reliable way to + * map hosts to guests. It might be possible to track this in the + * peer cache via crm_remote_peer_cache_refresh(). For now, we rely + * on the PE creating fence pseudo-events for the guests. + */ + if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) { /* Abort the current transition graph if it wasn't us Index: pacemaker/pengine/allocate.c =================================================================== --- pacemaker.orig/pengine/allocate.c +++ pacemaker/pengine/allocate.c @@ -1337,6 +1337,69 @@ any_managed_resources(pe_working_set_t * return FALSE; } +/*! + * \internal + * \brief Create pseudo-op for guest node fence, and order relative to it + * + * \param[in] node Guest node to fence + * \param[in] done STONITH_DONE operation + * \param[in] data_set Working set of CIB state + */ +static void +fence_guest(pe_node_t *node, pe_action_t *done, pe_working_set_t *data_set) +{ + resource_t *container = node->details->remote_rsc->container; + pe_action_t *stop = NULL; + pe_action_t *stonith_op = NULL; + + /* The fence action is just a label; we don't do anything differently for + * off vs. reboot. We specify it explicitly, rather than let it default to + * cluster's default action, because we are not _initiating_ fencing -- we + * are creating a pseudo-event to describe fencing that is already occurring + * by other means (container recovery). + */ + const char *fence_action = "off"; + + /* Check whether guest's container resource is has any explicit stop or + * start (the stop may be implied by fencing of the guest's host). + */ + if (container) { + stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL); + + if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) { + fence_action = "reboot"; + } + } + + /* Create a fence pseudo-event, so we have an event to order actions + * against, and crmd can always detect it. + */ + stonith_op = pe_fence_op(node, fence_action, FALSE, data_set); + update_action_flags(stonith_op, pe_action_pseudo | pe_action_runnable); + + /* We want to imply stops/demotes after the guest is stopped, not wait until + * it is restarted, so we always order pseudo-fencing after stop, not start + * (even though start might be closer to what is done for a real reboot). + */ + if (stop) { + order_actions(stop, stonith_op, + pe_order_runnable_left|pe_order_implies_then); + crm_info("Implying guest node %s is down (action %d) " + "after container %s is stopped (action %d)", + node->details->uname, stonith_op->id, + container->id, stop->id); + } else { + crm_info("Implying guest node %s is down (action %d) ", + node->details->uname, stonith_op->id); + } + + /* @TODO: Order pseudo-fence after any (optional) fence of guest's host */ + + /* Order/imply other actions relative to pseudo-fence as with real fence */ + stonith_constraints(node, stonith_op, data_set); + order_actions(stonith_op, done, pe_order_implies_then); +} + /* * Create dependencies for stonith and shutdown operations */ @@ -1365,21 +1429,12 @@ stage6(pe_working_set_t * data_set) for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; - /* remote-nodes associated with a container resource (such as a vm) are not fenced */ + /* Guest nodes are "fenced" by recovering their container resource, + * so handle them separately. + */ if (is_container_remote_node(node)) { if (node->details->remote_requires_reset && need_stonith) { - resource_t *container = node->details->remote_rsc->container; - char *key = stop_key(container); - GListPtr stop_list = find_actions(container->actions, key, NULL); - - crm_info("Implying node %s is down when container %s is stopped (%p)", - node->details->uname, container->id, stop_list); - if(stop_list) { - stonith_constraints(node, stop_list->data, data_set); - } - - g_list_free(stop_list); - free(key); + fence_guest(node, done, data_set); } continue; } Index: pacemaker/pengine/graph.c =================================================================== --- pacemaker.orig/pengine/graph.c +++ pacemaker/pengine/graph.c @@ -713,13 +713,7 @@ stonith_constraints(node_t * node, actio CRM_CHECK(stonith_op != NULL, return FALSE); for (r = data_set->resources; r != NULL; r = r->next) { - resource_t *rsc = (resource_t *) r->data; - - if ((stonith_op->rsc == NULL) - || ((stonith_op->rsc != rsc) && (stonith_op->rsc != rsc->container))) { - - rsc_stonith_ordering(rsc, stonith_op, data_set); - } + rsc_stonith_ordering((resource_t *) r->data, stonith_op, data_set); } return TRUE; } @@ -886,7 +880,11 @@ action2xml(action_t * action, gboolean a } if (safe_str_eq(action->task, CRM_OP_FENCE)) { - action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); + /* All fences need node info; guest node fences are pseudo-events */ + action_xml = create_xml_node(NULL, + is_set(action->flags, pe_action_pseudo)? + XML_GRAPH_TAG_PSEUDO_EVENT : + XML_GRAPH_TAG_CRM_EVENT); } else if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); Index: pacemaker/pengine/native.c =================================================================== --- pacemaker.orig/pengine/native.c +++ pacemaker/pengine/native.c @@ -2903,48 +2903,6 @@ native_start_constraints(resource_t * rs } } -/* User data to pass to guest node iterator */ -struct action_list_s { - GListPtr search_list; /* list of actions to search */ - GListPtr result_list; /* list of matching actions for this node */ - const char *key; /* action key to match */ -}; - -/*! - * \internal - * \brief Prepend a node's actions matching a key to a list - * - * \param[in] node Guest node - * \param[in/out] data User data - */ -static void prepend_node_actions(const node_t *node, void *data) -{ - GListPtr actions; - struct action_list_s *info = (struct action_list_s *) data; - - actions = find_actions(info->search_list, info->key, node); - info->result_list = g_list_concat(actions, info->result_list); -} - -static GListPtr -find_fence_target_node_actions(GListPtr search_list, const char *key, node_t *fence_target, pe_working_set_t *data_set) -{ - struct action_list_s action_list; - - /* Actions on the target that match the key are implied by the fencing */ - action_list.search_list = search_list; - action_list.result_list = find_actions(search_list, key, fence_target); - action_list.key = key; - - /* - * If the target is a host for any guest nodes, actions on those nodes - * that match the key are also implied by the fencing. - */ - pe_foreach_guest_node(data_set, fence_target, prepend_node_actions, &action_list); - - return action_list.result_list; -} - static void native_stop_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) { @@ -2964,8 +2922,7 @@ native_stop_constraints(resource_t * rsc /* Get a list of stop actions potentially implied by the fencing */ key = stop_key(rsc); - action_list = find_fence_target_node_actions(rsc->actions, key, target, - data_set); + action_list = find_actions(rsc->actions, key, target); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { @@ -3071,8 +3028,7 @@ native_stop_constraints(resource_t * rsc /* Get a list of demote actions potentially implied by the fencing */ key = demote_key(rsc); - action_list = find_fence_target_node_actions(rsc->actions, key, target, - data_set); + action_list = find_actions(rsc->actions, key, target); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor