Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-12-SP2:Update
pacemaker
pacemaker-pengine-fencing-messages.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File pacemaker-pengine-fencing-messages.patch of Package pacemaker
commit 083c3a49ad41bd17387c8ae661c23b44d4b845c6 Author: Ken Gaillot <kgaillot@redhat.com> Date: Tue May 30 14:43:25 2017 -0500 Log: pengine,libpe_status: revisit fencing messages Index: pacemaker/lib/pengine/unpack.c =================================================================== --- pacemaker.orig/lib/pengine/unpack.c +++ pacemaker/lib/pengine/unpack.c @@ -63,6 +63,13 @@ is_dangling_container_remote_node(node_t } +/*! + * \brief Schedule a fence action for a node + * + * \param[in,out] data_set Current working set of cluster + * \param[in,out] node Node to fence + * \param[in] reason Text description of why fencing is needed + */ void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason) { @@ -73,11 +80,13 @@ pe_fence_node(pe_working_set_t * data_se resource_t *rsc = node->details->remote_rsc->container; if (is_set(rsc->flags, pe_rsc_failed) == FALSE) { if (!is_set(rsc->flags, pe_rsc_managed)) { - crm_notice("Not fencing node %s due to '%s': container %s is" - " unmanaged" - "%s", node->details->uname, reason, rsc->id); + crm_notice("Not fencing guest node %s " + "(otherwise would because %s): " + "its guest resource %s is unmanaged", + node->details->uname, reason, rsc->id); } else { - crm_warn("Remote node %s will be fenced due to '%s' by recovering %s", + crm_warn("Guest node %s will be fenced " + "(by recovering its guest resource %s): %s", node->details->uname, rsc->id, reason); /* We don't mark the node as unclean because that would prevent the @@ -90,8 +99,9 @@ pe_fence_node(pe_working_set_t * data_se } } else if (is_dangling_container_remote_node(node)) { - crm_info("Cleaning up dangling connection resource for guest node %s due to '%s'" - " (fencing is already done, guest resource no longer exists)", + crm_info("Cleaning up dangling connection for guest node %s: " + "fencing was already done because %s, " + "and guest resource no longer exists", node->details->uname, reason); set_bit(node->details->remote_rsc->flags, pe_rsc_failed); @@ -99,31 +109,29 @@ pe_fence_node(pe_working_set_t * data_se resource_t *rsc = node->details->remote_rsc; if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) { - crm_notice("Not fencing node %s due to '%s': connection is unmanaged", + crm_notice("Not fencing remote node %s " + "(otherwise would because %s): connection is unmanaged", node->details->uname, reason); } else if(node->details->remote_requires_reset == FALSE) { node->details->remote_requires_reset = TRUE; - if (pe_can_fence(data_set, node)) { - crm_warn("Remote node %s will be fenced due to %s", node->details->uname, reason); - } else { - crm_warn("Remote node %s is unclean due to %s", node->details->uname, reason); - } + crm_warn("Remote node %s %s: %s", + node->details->uname, + pe_can_fence(data_set, node)? "will be fenced" : "is unclean", + reason); } node->details->unclean = TRUE; } else if (node->details->unclean) { - if (pe_can_fence(data_set, node)) { - crm_trace("Node %s would also be fenced due to '%s'", node->details->uname, reason); - } else { - crm_trace("Node %s is also unclean due to '%s'", node->details->uname, reason); - } - - } else if (pe_can_fence(data_set, node)) { - crm_warn("Node %s will be fenced due to %s", node->details->uname, reason); - node->details->unclean = TRUE; + crm_trace("Cluster node %s %s because %s", + node->details->uname, + pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean", + reason); } else { - crm_warn("Node %s is unclean due to %s", node->details->uname, reason); + crm_warn("Cluster node %s %s: %s", + node->details->uname, + pe_can_fence(data_set, node)? "will be fenced" : "is unclean", + reason); node->details->unclean = TRUE; } } @@ -1877,6 +1885,8 @@ process_rsc_state(resource_t * rsc, node xmlNode * migrate_op, pe_working_set_t * data_set) { node_t *tmpnode = NULL; + char *reason = NULL; + CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); @@ -1906,7 +1916,6 @@ process_rsc_state(resource_t * rsc, node && node->details->maintenance == FALSE && is_set(rsc->flags, pe_rsc_managed)) { - char *reason = NULL; gboolean should_fence = FALSE; /* If this is a guest node, fence it (regardless of whether fencing is @@ -1921,14 +1930,19 @@ process_rsc_state(resource_t * rsc, node should_fence = TRUE; } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) { - if (is_baremetal_remote_node(node) && node->details->remote_rsc && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) { + if (is_baremetal_remote_node(node) && node->details->remote_rsc + && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) { + /* setting unseen = true means that fencing of the remote node will * only occur if the connection resource is not going to start somewhere. * This allows connection resources on a failed cluster-node to move to * another node without requiring the baremetal remote nodes to be fenced * as well. */ node->details->unseen = TRUE; - reason = crm_strdup_printf("%s is active there. Fencing will be revoked if remote-node connection can be re-established on another cluster-node.", rsc->id); + reason = crm_strdup_printf("%s is active there (fencing will be" + " revoked if remote connection can " + "be re-established elsewhere)", + rsc->id); } should_fence = TRUE; } @@ -1958,7 +1972,9 @@ process_rsc_state(resource_t * rsc, node /* treat it as if it is still running * but also mark the node as unclean */ - pe_fence_node(data_set, node, "resource failure(s)"); + reason = crm_strdup_printf("%s failed there", rsc->id); + pe_fence_node(data_set, node, reason); + free(reason); break; case action_fail_standby: @@ -2001,6 +2017,7 @@ process_rsc_state(resource_t * rsc, node stop_action(rsc, node, FALSE); } break; + case action_fail_reset_remote: set_bit(rsc->flags, pe_rsc_failed); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { @@ -2014,7 +2031,8 @@ process_rsc_state(resource_t * rsc, node /* connection resource to baremetal resource failed in a way that * should result in fencing the remote-node. */ - pe_fence_node(data_set, tmpnode, "of connection failure(s)"); + pe_fence_node(data_set, tmpnode, + "remote connection is unrecoverable"); } } Index: pacemaker/pengine/allocate.c =================================================================== --- pacemaker.orig/pengine/allocate.c +++ pacemaker/pengine/allocate.c @@ -453,7 +453,7 @@ check_actions_for(xmlNode * rsc_entry, r set_bit(action_clear->flags, pe_action_runnable); crm_notice("Clearing failure of %s on %s " - "action definition changed " CRM_XS " %s", + "because action definition changed " CRM_XS " %s", rsc->id, node->details->uname, action_clear->uuid); } } @@ -1770,7 +1770,6 @@ apply_container_ordering(action_t *actio CRM_ASSERT(action->node); CRM_ASSERT(is_remote_node(action->node)); - CRM_ASSERT(action->node->details->remote_rsc); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc); @@ -1782,7 +1781,13 @@ apply_container_ordering(action_t *actio pe_fence_node(data_set, action->node, "container failed"); } - crm_trace("%s %s %s %s %d", action->uuid, action->task, remote_rsc->id, container->id, is_set(container->flags, pe_rsc_failed)); + crm_trace("Order %s action %s relative to %s%s for %s%s", + action->task, action->uuid, + is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", + remote_rsc->id, + is_set(container->flags, pe_rsc_failed)? "failed " : "", + container->id); + switch (task) { case start_rsc: case action_promote: @@ -1855,6 +1860,7 @@ apply_remote_ordering(action_t *action, node_t *cluster_node = NULL; enum action_tasks task = text2task(action->task); enum remote_connection_state state = remote_state_unknown; + enum pe_ordering order_opts = pe_order_none; if (action->rsc == NULL) { return; @@ -1862,7 +1868,6 @@ apply_remote_ordering(action_t *action, CRM_ASSERT(action->node); CRM_ASSERT(is_remote_node(action->node)); - CRM_ASSERT(action->node->details->remote_rsc); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc); @@ -1876,7 +1881,7 @@ apply_remote_ordering(action_t *action, * on that remote node until after it starts elsewhere. */ if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) { - /* There is no-where left to run the connection resource + /* There is nowhere left to run the connection resource, * and the resource is in a failed state (either directly * or because it is located on a failed node). * @@ -1884,8 +1889,7 @@ apply_remote_ordering(action_t *action, * or if there are resources in an unknown state (probe), we * must assume the worst and fence it. */ - - if(is_set(action->node->details->remote_rsc->flags, pe_rsc_failed)) { + if (is_set(remote_rsc->flags, pe_rsc_failed)) { state = remote_state_failed; } else if(cluster_node && cluster_node->details->unclean) { state = remote_state_failed; @@ -1915,22 +1919,31 @@ apply_remote_ordering(action_t *action, state = remote_state_alive; } - crm_trace("%s %s %s %d %d", action->uuid, action->task, action->node->details->uname, state, is_set(remote_rsc->flags, pe_rsc_failed)); + crm_trace("Order %s action %s relative to %s%s (state %d)", + action->task, action->uuid, + is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", + remote_rsc->id, state); switch (task) { case start_rsc: case action_promote: - if(state == remote_state_failed) { - /* Wait for the connection resource to be up and force recovery */ - custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL, - action->rsc, NULL, action, - pe_order_preserve | pe_order_implies_then | pe_order_runnable_left, data_set); - } else { - /* Ensure the connection resource is up and assume everything is as we left it */ - custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL, - action->rsc, NULL, action, - pe_order_preserve | pe_order_runnable_left, data_set); + /* This as an internally generated constraint exempt from + * user constraint prohibitions, and this action isn't runnable + * if the connection start isn't runnable. + */ + order_opts = pe_order_preserve | pe_order_runnable_left; + + if (state == remote_state_failed) { + /* Force recovery, by making this action required */ + order_opts |= pe_order_implies_then; } + + /* Ensure connection is up before running this action */ + custom_action_order(remote_rsc, + generate_op_key(remote_rsc->id, RSC_START, 0), + NULL, action->rsc, NULL, action, order_opts, + data_set); break; + case stop_rsc: /* Handle special case with remote node where stop actions need to be * ordered after the connection resource starts somewhere else. @@ -1956,22 +1969,19 @@ apply_remote_ordering(action_t *action, pe_order_preserve | pe_order_implies_first, data_set); } break; - case action_demote: - /* If the connection is being torn down, we don't want - * to build a constraint between a resource's demotion and - * the connection resource starting... because the connection - * resource can not start. The connection might already be up, - * but the "start" action would not be allowed, which in turn would - * block the demotion of any resources living in the node. + case action_demote: + /* Only order this demote relative to the connection start if the + * connection isn't being torn down. Otherwise, the demote would be + * blocked because the connection start would not be allowed. */ - if(state == remote_state_resting || state == remote_state_unknown) { custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL, action->rsc, NULL, action, pe_order_preserve, data_set); } /* Otherwise we can rely on the stop ordering */ break; + default: /* Wait for the connection resource to be up */ if (is_recurring_action(action)) { @@ -2241,15 +2251,12 @@ stage7(pe_working_set_t * data_set) order_probes(data_set); crm_trace("Updating %d actions", g_list_length(data_set->actions)); - for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; update_action(action); } - crm_trace("Processing reloads"); - LogNodeActions(data_set, FALSE); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data;
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor