Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Leap:42.3:Staging:A
pacemaker
pacemaker-libcrmcommon-avoid-evicting-IPC-clien...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File pacemaker-libcrmcommon-avoid-evicting-IPC-client.patch of Package pacemaker
commit 5d94da7af947810ca4b12dc1d9dce18a7d638f73 Author: Ken Gaillot <kgaillot@redhat.com> Date: Mon May 1 17:39:02 2017 -0500 Fix: libcrmcommon: avoid evicting IPC client if messages spike briefly Before, an IP server would evict a client if its event queue grew to 500 messages. Now, we avoid evicting if the backlog is new (a quick spike of many messages that immediately crosses the threshold). diff --git a/include/crm/common/ipcs.h b/include/crm/common/ipcs.h index 2fec93178..ba1ccefc7 100644 --- a/include/crm/common/ipcs.h +++ b/include/crm/common/ipcs.h @@ -73,6 +73,8 @@ struct crm_client_s { char *name; char *user; + /* Provided for server use (not used by library) */ + /* @TODO merge options, flags, and kind (reserving lower bits for server) */ long long options; int request_id; @@ -80,7 +82,7 @@ struct crm_client_s { void *userdata; int event_timer; - GList *event_queue; + GList *event_queue; /* @TODO use GQueue instead */ /* Depending on the value of kind, only some of the following * will be populated/valid @@ -91,6 +93,7 @@ struct crm_client_s { struct crm_remote_s *remote; /* TCP/TLS */ + unsigned int backlog_len; /* IPC queue length after last flush */ }; extern GHashTable *client_connections; diff --git a/lib/common/ipc.c b/lib/common/ipc.c index c9e3da2bb..d32e37390 100644 --- a/lib/common/ipc.c +++ b/lib/common/ipc.c @@ -37,6 +37,9 @@ #define PCMK_IPC_VERSION 1 +/* Evict clients whose event queue grows this large */ +#define PCMK_IPC_MAX_QUEUE 500 + struct crm_ipc_response_header { struct qb_ipc_response_header qb; uint32_t size_uncompressed; @@ -523,24 +526,44 @@ crm_ipcs_flush_events(crm_client_t * c) } queue_len -= sent; - if (sent > 0 || c->event_queue) { + if (sent > 0 || queue_len) { crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%lld)", sent, queue_len, c->ipcs, c->pid, pcmk_strerror(rc < 0 ? rc : 0), (long long) rc); } - if (c->event_queue) { - if (queue_len % 100 == 0 && queue_len > 99) { - crm_warn("Event queue for %p[%d] has grown to %d", c->ipcs, c->pid, queue_len); + if (queue_len) { + /* We want to allow clients to briefly fall behind on processing + * incoming messages, but drop completely unresponsive clients so the + * connection doesn't consume resources indefinitely. + * + * @TODO It is possible that the queue could reasonably grow large in a + * short time. An example is a reprobe of hundreds of resources on many + * nodes resulting in a surge of CIB replies to the crmd. We could + * possibly give cluster daemons a higher threshold here, and/or prevent + * such a surge by throttling LRM history writes in the crmd. + */ - } else if (queue_len > 500) { - crm_err("Evicting slow client %p[%d]: event queue reached %d entries", - c->ipcs, c->pid, queue_len); - qb_ipcs_disconnect(c->ipcs); - return rc; + if (queue_len > PCMK_IPC_MAX_QUEUE) { + if ((c->backlog_len <= 1) || (queue_len < c->backlog_len)) { + /* Don't evict for a new or shrinking backlog */ + crm_warn("Client with process ID %u has a backlog of %u messages " + CRM_XS " %p", c->pid, queue_len, c->ipcs); + } else { + crm_err("Evicting client with process ID %u due to backlog of %u messages " + CRM_XS " %p", c->pid, queue_len, c->ipcs); + c->backlog_len = 0; + qb_ipcs_disconnect(c->ipcs); + return rc; + } } + + c->backlog_len = queue_len; delay_next_flush(c, queue_len); + } else { + /* Event queue is empty, there is no backlog */ + c->backlog_len = 0; } return rc;
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor