Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-15-SP1:Update
pacemaker.29834
bsc#1181744-0004-Fix-fence-history-resync-fence...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File bsc#1181744-0004-Fix-fence-history-resync-fence-history-after-fenced-.patch of Package pacemaker.29834
From 03c4455fced74f093deb782198b1ba3076e52015 Mon Sep 17 00:00:00 2001 From: Klaus Wenninger <klaus.wenninger@aon.at> Date: Tue, 18 Jun 2019 14:12:27 +0200 Subject: [PATCH 4/7] Fix: fence-history: resync fence-history after fenced crash Setting up a 30s fallback timer to trigger history-sync if the sync via DC doesn't happen --- daemons/controld/controld_callbacks.c | 2 +- daemons/controld/controld_control.c | 2 + daemons/controld/controld_te_utils.c | 86 +++++++++++++++++++++++---- daemons/controld/controld_transition.h | 3 +- 4 files changed, 79 insertions(+), 14 deletions(-) Index: pacemaker-2.0.1+20190417.13d370ca9/daemons/controld/controld_callbacks.c =================================================================== --- pacemaker-2.0.1+20190417.13d370ca9.orig/daemons/controld/controld_callbacks.c +++ pacemaker-2.0.1+20190417.13d370ca9/daemons/controld/controld_callbacks.c @@ -210,7 +210,7 @@ peer_update_callback(enum crm_status_typ } else if(AM_I_DC) { if (appeared) { - te_trigger_stonith_history_sync(); + te_trigger_stonith_history_sync(FALSE); } else { erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); } Index: pacemaker-2.0.1+20190417.13d370ca9/daemons/controld/controld_control.c =================================================================== --- pacemaker-2.0.1+20190417.13d370ca9.orig/daemons/controld/controld_control.c +++ pacemaker-2.0.1+20190417.13d370ca9/daemons/controld/controld_control.c @@ -118,7 +118,12 @@ do_shutdown(long long action, clear_bit(fsa_input_register, R_ST_REQUIRED); crm_info("Disconnecting from fencer"); - stonith_api->cmds->disconnect(stonith_api); + if (stonith_api->state != stonith_disconnected) { + stonith_api->cmds->disconnect(stonith_api); + } + stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT); + stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE); + stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED); } } @@ -272,6 +277,8 @@ crmd_exit(crm_exit_t exit_code) crm_timer_stop(wait_timer); crm_timer_stop(recheck_timer); + te_cleanup_stonith_history_sync(NULL, TRUE); + free(transition_timer); transition_timer = NULL; free(integration_timer); integration_timer = NULL; free(finalization_timer); finalization_timer = NULL; Index: pacemaker-2.0.1+20190417.13d370ca9/daemons/controld/controld_te_utils.c =================================================================== --- pacemaker-2.0.1+20190417.13d370ca9.orig/daemons/controld/controld_te_utils.c +++ pacemaker-2.0.1+20190417.13d370ca9/daemons/controld/controld_te_utils.c @@ -22,7 +22,33 @@ crm_trigger_t *stonith_reconnect = NULL; static crm_trigger_t *stonith_history_sync_trigger = NULL; -static mainloop_timer_t *stonith_history_sync_timer = NULL; +static mainloop_timer_t *stonith_history_sync_timer_short = NULL; +static mainloop_timer_t *stonith_history_sync_timer_long = NULL; + +void +te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers) +{ + if (free_timers) { + mainloop_timer_del(stonith_history_sync_timer_short); + stonith_history_sync_timer_short = NULL; + mainloop_timer_del(stonith_history_sync_timer_long); + stonith_history_sync_timer_long = NULL; + } else { + mainloop_timer_stop(stonith_history_sync_timer_short); + mainloop_timer_stop(stonith_history_sync_timer_long); + } + + if (st) { + st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED); + } +} + +static void +tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event) +{ + te_cleanup_stonith_history_sync(st, FALSE); + crm_debug("Fence-history synced - cancel all timers"); +} /* * stonith cleanup list @@ -163,6 +189,8 @@ fail_incompletable_stonith(crm_graph_t * static void tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e) { + te_cleanup_stonith_history_sync(st, FALSE); + if (is_set(fsa_input_register, R_ST_REQUIRED)) { crm_crit("Fencing daemon connection failed"); mainloop_set_trigger(stonith_reconnect); @@ -176,11 +204,12 @@ tengine_stonith_connection_destroy(stoni /* the client API won't properly reconnect notifications * if they are still in the table - so remove them */ - stonith_api->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT); - stonith_api->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE); if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(st); } + stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT); + stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE); + stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED); } if (AM_I_DC) { @@ -197,6 +226,9 @@ char *te_client_id = NULL; #endif static void +tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event); + +static void tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) { if(te_client_id == NULL) { @@ -345,6 +377,7 @@ do_stonith_history_sync(gpointer user_da if (stonith_api && (stonith_api->state != stonith_disconnected)) { stonith_history_t *history = NULL; + te_cleanup_stonith_history_sync(stonith_api, FALSE); stonith_api->cmds->history(stonith_api, st_opt_sync_call | st_opt_broadcast, NULL, &history, 5); @@ -364,11 +397,18 @@ stonith_history_sync_set_trigger(gpointe } void -te_trigger_stonith_history_sync(void) +te_trigger_stonith_history_sync(bool long_timeout) { /* trigger a sync in 5s to give more nodes the * chance to show up so that we don't create * unnecessary stonith-history-sync traffic + * + * the long timeout of 30s is there as a fallback + * so that after a successful connection to fenced + * we will wait for 30s for the DC to trigger a + * history-sync + * if this doesn't happen we trigger a sync locally + * (e.g. fenced segfaults and is restarted by pacemakerd) */ /* as we are finally checking the stonith-connection @@ -382,14 +422,26 @@ te_trigger_stonith_history_sync(void) do_stonith_history_sync, NULL); } - if(stonith_history_sync_timer == NULL) { - stonith_history_sync_timer = - mainloop_timer_add("history_sync", 5000, - FALSE, stonith_history_sync_set_trigger, - NULL); + if (long_timeout) { + if(stonith_history_sync_timer_long == NULL) { + stonith_history_sync_timer_long = + mainloop_timer_add("history_sync_long", 30000, + FALSE, stonith_history_sync_set_trigger, + NULL); + } + crm_info("Fence history will be synchronized cluster-wide within 30 seconds"); + mainloop_timer_start(stonith_history_sync_timer_long); + } else { + if(stonith_history_sync_timer_short == NULL) { + stonith_history_sync_timer_short = + mainloop_timer_add("history_sync_short", 5000, + FALSE, stonith_history_sync_set_trigger, + NULL); + } + crm_info("Fence history will be synchronized cluster-wide within 5 seconds"); + mainloop_timer_start(stonith_history_sync_timer_short); } - crm_info("Fence history will be synchronized cluster-wide within 5 seconds"); - mainloop_timer_start(stonith_history_sync_timer); + } gboolean @@ -437,6 +489,11 @@ te_connect_stonith(gpointer user_data) stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE, tengine_stonith_notify); + stonith_api->cmds->register_notification(stonith_api, + T_STONITH_NOTIFY_HISTORY_SYNCED, + tengine_stonith_history_synced); + + te_trigger_stonith_history_sync(TRUE); crm_trace("Connected"); return TRUE; Index: pacemaker-2.0.1+20190417.13d370ca9/daemons/controld/controld_transition.h =================================================================== --- pacemaker-2.0.1+20190417.13d370ca9.orig/daemons/controld/controld_transition.h +++ pacemaker-2.0.1+20190417.13d370ca9/daemons/controld/controld_transition.h @@ -70,7 +70,8 @@ extern void abort_transition_graph(int a extern gboolean te_connect_stonith(gpointer user_data); -extern void te_trigger_stonith_history_sync(void); +extern void te_trigger_stonith_history_sync(bool long_timeout); +extern void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers); extern crm_trigger_t *transition_trigger; extern crm_trigger_t *stonith_reconnect;
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor