Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Step:FrontRunner
pacemaker.15718
0002-Revert-use-common-service-interface-for-fe...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 0002-Revert-use-common-service-interface-for-fence-agents.patch of Package pacemaker.15718
From 5e862acfe98fa659095c651b6e7d97fd2ed39a07 Mon Sep 17 00:00:00 2001 From: "Gao,Yan" <ygao@suse.com> Date: Thu, 11 Apr 2019 17:20:33 +0200 Subject: [PATCH 2/2] Revert "use common service interface for fence-agents and RAs" This reverts commit 18c321e792a279d81008cbd99cb5ec7f81db096f. --- include/crm/services.h | 5 +- lib/fencing/Makefile.am | 1 - lib/fencing/st_client.c | 453 +++++++++++++++++++++++++++++++--------- lib/services/services_linux.c | 93 --------- lib/services/services_private.h | 2 - 5 files changed, 355 insertions(+), 199 deletions(-) diff --git a/include/crm/services.h b/include/crm/services.h index c13fc0f04..013f0b851 100644 --- a/include/crm/services.h +++ b/include/crm/services.h @@ -156,10 +156,7 @@ typedef struct svc_action_s { char *agent; int timeout; - GHashTable *params; /* used for setting up environment for ocf-ra & - alert agents - and to be sent via stdin for fence-agents - */ + GHashTable *params; /* used by OCF agents and alert agents */ int rc; int pid; diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am index 6191cb9e2..486dd7d7a 100644 --- a/lib/fencing/Makefile.am +++ b/lib/fencing/Makefile.am @@ -15,7 +15,6 @@ libstonithd_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libstonithd_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libstonithd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la -libstonithd_la_LIBADD += $(top_builddir)/lib/services/libcrmservice.la libstonithd_la_SOURCES = st_client.c st_rhcs.c if BUILD_LHA_SUPPORT diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 0f2c33012..8f6734d33 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -11,7 +11,6 @@ #include <stdio.h> #include <string.h> #include <ctype.h> -#include <libgen.h> #include <sys/stat.h> #include <sys/types.h> @@ -38,18 +37,23 @@ struct stonith_action_s { char *agent; char *action; char *victim; - GHashTable *args; + char *args; int timeout; int async; void *userdata; void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data); - svc_action_t *svc_action; + /*! internal async track data */ + int fd_stdout; + int fd_stderr; + int last_timeout_signo; /*! internal timing information */ time_t initial_start_time; int tries; int remaining_timeout; + guint timer_sigterm; + guint timer_sigkill; int max_retries; /* device output data */ @@ -431,11 +435,13 @@ stonith_api_register_level(stonith_t * st, int options, const char *node, int le } static void -append_arg(const char *key, const char *value, GHashTable **args) +append_arg(const char *key, const char *value, char **args) { + int len = 3; /* =, \n, \0 */ + int last = 0; + CRM_CHECK(key != NULL, return); CRM_CHECK(value != NULL, return); - CRM_CHECK(args != NULL, return); if (strstr(key, "pcmk_")) { return; @@ -445,13 +451,15 @@ append_arg(const char *key, const char *value, GHashTable **args) return; } - if (!*args) { - *args = crm_str_table_new(); + len += strlen(key); + len += strlen(value); + if (*args != NULL) { + last = strlen(*args); } - CRM_CHECK(*args != NULL, return); + *args = realloc_safe(*args, last + len); crm_trace("Appending: %s=%s", key, value); - g_hash_table_replace(*args, strdup(key), strdup(value)); + sprintf((*args) + last, "%s=%s\n", key, value); } static void @@ -467,12 +475,12 @@ append_config_arg(gpointer key, gpointer value, gpointer user_data) } } -static GHashTable * +static char * make_args(const char *agent, const char *action, const char *victim, uint32_t victim_nodeid, GHashTable * device_args, GHashTable * port_map) { char buffer[512]; - GHashTable *arg_list = NULL; + char *arg_list = NULL; const char *value = NULL; CRM_CHECK(action != NULL, return NULL); @@ -538,6 +546,66 @@ make_args(const char *agent, const char *action, const char *victim, uint32_t vi return arg_list; } +static gboolean +st_child_term(gpointer data) +{ + int rc = 0; + stonith_action_t *track = data; + + crm_info("Child %d timed out, sending SIGTERM", track->pid); + track->timer_sigterm = 0; + track->last_timeout_signo = SIGTERM; + rc = kill(-track->pid, SIGTERM); + if (rc < 0) { + crm_perror(LOG_ERR, "Couldn't send SIGTERM to %d", track->pid); + } + return FALSE; +} + +static gboolean +st_child_kill(gpointer data) +{ + int rc = 0; + stonith_action_t *track = data; + + crm_info("Child %d timed out, sending SIGKILL", track->pid); + track->timer_sigkill = 0; + track->last_timeout_signo = SIGKILL; + rc = kill(-track->pid, SIGKILL); + if (rc < 0) { + crm_perror(LOG_ERR, "Couldn't send SIGKILL to %d", track->pid); + } + return FALSE; +} + +static void +stonith_action_clear_tracking_data(stonith_action_t * action) +{ + if (action->timer_sigterm > 0) { + g_source_remove(action->timer_sigterm); + action->timer_sigterm = 0; + } + if (action->timer_sigkill > 0) { + g_source_remove(action->timer_sigkill); + action->timer_sigkill = 0; + } + if (action->fd_stdout) { + close(action->fd_stdout); + action->fd_stdout = 0; + } + if (action->fd_stderr) { + close(action->fd_stderr); + action->fd_stderr = 0; + } + free(action->output); + action->output = NULL; + free(action->error); + action->error = NULL; + action->rc = 0; + action->pid = 0; + action->last_timeout_signo = 0; +} + /*! * \internal * \brief Free all memory used by a stonith action @@ -548,17 +616,11 @@ void stonith__destroy_action(stonith_action_t *action) { if (action) { + stonith_action_clear_tracking_data(action); free(action->agent); - if (action->args) { - g_hash_table_destroy(action->args); - } + free(action->args); free(action->action); free(action->victim); - if (action->svc_action) { - services_action_free(action->svc_action); - } - free(action->output); - free(action->error); free(action); } } @@ -640,6 +702,38 @@ stonith_action_create(const char *agent, return action; } +#define READ_MAX 500 +static char * +read_output(int fd) +{ + char buffer[READ_MAX]; + char *output = NULL; + int len = 0; + int more = 0; + + if (!fd) { + return NULL; + } + + do { + errno = 0; + memset(&buffer, 0, READ_MAX); + more = read(fd, buffer, READ_MAX - 1); + + if (more > 0) { + buffer[more] = 0; /* Make sure it's nul-terminated for logging + * 'more' is always less than our buffer size + */ + output = realloc_safe(output, len + more + 1); + snprintf(output + len, more + 1, "%s", buffer); + len += more; + } + + } while (more == (READ_MAX - 1) || (more < 0 && errno == EINTR)); + + return output; +} + static gboolean update_remaining_timeout(stonith_action_t * action) { @@ -659,51 +753,58 @@ update_remaining_timeout(stonith_action_t * action) return action->remaining_timeout ? TRUE : FALSE; } -static int -svc_action_to_errno(svc_action_t *svc_action) { - int rv = pcmk_ok; +static void +stonith_action_async_done(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) +{ + stonith_action_t *action = mainloop_child_userdata(p); - if (svc_action->rc > 0) { - /* Try to provide a useful error code based on the fence agent's - * error output. - */ - if (svc_action->rc == PCMK_OCF_TIMEOUT) { - rv = -ETIME; + if (action->timer_sigterm > 0) { + g_source_remove(action->timer_sigterm); + action->timer_sigterm = 0; + } + if (action->timer_sigkill > 0) { + g_source_remove(action->timer_sigkill); + action->timer_sigkill = 0; + } - } else if (svc_action->stderr_data == NULL) { - rv = -ENODATA; + action->output = read_output(action->fd_stdout); + action->error = read_output(action->fd_stderr); - } else if (strstr(svc_action->stderr_data, "imed out")) { - /* Some agents have their own internal timeouts */ - rv = -ETIME; + if (action->last_timeout_signo) { + action->rc = -ETIME; + crm_notice("Child process %d performing action '%s' timed out with signal %d", + pid, action->action, action->last_timeout_signo); - } else if (strstr(svc_action->stderr_data, "Unrecognised action")) { - rv = -EOPNOTSUPP; + } else if (signo) { + action->rc = -ECONNABORTED; + crm_notice("Child process %d performing action '%s' timed out with signal %d", + pid, action->action, signo); - } else { - rv = -pcmk_err_generic; + } else { + crm_debug("Child process %d performing action '%s' exited with rc %d", + pid, action->action, exitcode); + if (exitcode > 0) { + /* Try to provide a useful error code based on the fence agent's + * error output. + */ + if (action->error == NULL) { + exitcode = -ENODATA; + + } else if (strstr(action->error, "imed out")) { + /* Some agents have their own internal timeouts */ + exitcode = -ETIMEDOUT; + + } else if (strstr(action->error, "Unrecognised action")) { + exitcode = -EOPNOTSUPP; + + } else { + exitcode = -pcmk_err_generic; + } } + action->rc = exitcode; } - return rv; -} - -static void -stonith_action_async_done(svc_action_t *svc_action) -{ - stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; - - action->rc = svc_action_to_errno(svc_action); - action->output = svc_action->stdout_data; - svc_action->stdout_data = NULL; - action->error = svc_action->stderr_data; - svc_action->stderr_data = NULL; - - svc_action->params = NULL; - - crm_debug("Child process %d performing action '%s' exited with rc %d", - action->pid, action->action, svc_action->rc); - log_action(action, action->pid); + log_action(action, pid); if (action->rc != pcmk_ok && update_remaining_timeout(action)) { int rc = internal_stonith_action_execute(action); @@ -713,21 +814,28 @@ stonith_action_async_done(svc_action_t *svc_action) } if (action->done_cb) { - action->done_cb(action->pid, action->rc, action->output, action->userdata); + action->done_cb(pid, action->rc, action->output, action->userdata); } - action->svc_action = NULL; // don't remove our caller stonith__destroy_action(action); } static int internal_stonith_action_execute(stonith_action_t * action) { - int rc = -EPROTO; + int pid, status = 0, len, rc = -EPROTO; + int ret; + int total = 0; + int p_read_fd, p_write_fd; /* parent read/write file descriptors */ + int c_read_fd, c_write_fd; /* child read/write file descriptors */ + int c_stderr_fd, p_stderr_fd; /* parent/child side file descriptors for stderr */ + int fd1[2]; + int fd2[2]; + int fd3[2]; int is_retry = 0; - svc_action_t *svc_action = NULL; - static int stonith_sequence = 0; - char *buffer = NULL; + + /* clear any previous tracking data */ + stonith_action_clear_tracking_data(action); if (!action->tries) { action->initial_start_time = time(NULL); @@ -740,60 +848,207 @@ internal_stonith_action_execute(stonith_action_t * action) is_retry = 1; } + c_read_fd = c_write_fd = p_read_fd = p_write_fd = c_stderr_fd = p_stderr_fd = -1; + if (action->args == NULL || action->agent == NULL) goto fail; + len = strlen(action->args); + + if (pipe(fd1)) + goto fail; + p_read_fd = fd1[0]; + c_write_fd = fd1[1]; + + if (pipe(fd2)) + goto fail; + c_read_fd = fd2[0]; + p_write_fd = fd2[1]; + + if (pipe(fd3)) + goto fail; + p_stderr_fd = fd3[0]; + c_stderr_fd = fd3[1]; + + crm_debug("forking"); + pid = fork(); + if (pid < 0) { + rc = -ECHILD; + goto fail; + } + + if (!pid) { + /* child */ + setpgid(0, 0); + + close(1); + /* coverity[leaked_handle] False positive */ + if (dup(c_write_fd) < 0) + goto fail; + close(2); + /* coverity[leaked_handle] False positive */ + if (dup(c_stderr_fd) < 0) + goto fail; + close(0); + /* coverity[leaked_handle] False positive */ + if (dup(c_read_fd) < 0) + goto fail; + + /* keep c_stderr_fd open so parent can report all errors. */ + /* keep c_write_fd open so hostlist can be sent to parent. */ + close(c_read_fd); + close(p_read_fd); + close(p_write_fd); + close(p_stderr_fd); + + /* keep retries from executing out of control */ + if (is_retry) { + sleep(1); + } + execlp(action->agent, action->agent, NULL); + exit(CRM_EX_ERROR); + } + + /* parent */ + action->pid = pid; + ret = crm_set_nonblocking(p_read_fd); + if (ret < 0) { + crm_notice("Could not set output of %s to be non-blocking: %s " + CRM_XS " rc=%d", + action->agent, pcmk_strerror(rc), rc); + } + ret = crm_set_nonblocking(p_stderr_fd); + if (ret < 0) { + crm_notice("Could not set error output of %s to be non-blocking: %s " + CRM_XS " rc=%d", + action->agent, pcmk_strerror(rc), rc); + } + + errno = 0; + do { + crm_debug("sending args"); + ret = write(p_write_fd, action->args + total, len - total); + if (ret > 0) { + total += ret; + } - buffer = crm_strdup_printf(RH_STONITH_DIR "/%s", basename(action->agent)); - svc_action = services_action_create_generic(buffer, NULL); - free(buffer); - svc_action->timeout = 1000 * action->remaining_timeout; - svc_action->standard = strdup(PCMK_RESOURCE_CLASS_STONITH); - svc_action->id = crm_strdup_printf("%s_%s_%d", basename(action->agent), - action->action, action->tries); - svc_action->agent = strdup(action->agent); - svc_action->sequence = stonith_sequence++; - svc_action->params = action->args; - svc_action->cb_data = (void *) action; - - /* keep retries from executing out of control and free previous results */ - if (is_retry) { - free(action->output); - action->output = NULL; - free(action->error); - action->error = NULL; - sleep(1); + } while (errno == EINTR && total < len); + + if (total != len) { + crm_perror(LOG_ERR, "Sent %d not %d bytes", total, len); + if (ret >= 0) { + rc = -ECOMM; + } + goto fail; } + close(p_write_fd); p_write_fd = -1; + + /* async */ if (action->async) { - /* async */ - if(services_action_async(svc_action, &stonith_action_async_done) == FALSE) { - services_action_free(svc_action); - svc_action = NULL; + action->fd_stdout = p_read_fd; + action->fd_stderr = p_stderr_fd; + mainloop_child_add(pid, 0/* Move the timeout here? */, action->action, action, stonith_action_async_done); + crm_trace("Op: %s on %s, pid: %d, timeout: %ds", action->action, action->agent, pid, + action->remaining_timeout); + action->last_timeout_signo = 0; + if (action->remaining_timeout) { + action->timer_sigterm = + g_timeout_add(1000 * action->remaining_timeout, st_child_term, action); + action->timer_sigkill = + g_timeout_add(1000 * (action->remaining_timeout + 5), st_child_kill, action); } else { - action->pid = svc_action->pid; - action->svc_action = svc_action; - rc = 0; + crm_err("No timeout set for stonith operation %s with device %s", + action->action, action->agent); } + close(c_write_fd); + close(c_read_fd); + close(c_stderr_fd); + return 0; + } else { /* sync */ - if (services_action_sync(svc_action)) { + int timeout = action->remaining_timeout + 1; + pid_t p = 0; + + while (action->remaining_timeout < 0 || timeout > 0) { + p = waitpid(pid, &status, WNOHANG); + if (p > 0) { + break; + } + sleep(1); + timeout--; + } + + if (timeout == 0) { + int killrc = kill(-pid, SIGKILL); + + if (killrc && errno != ESRCH) { + crm_err("kill(%d, KILL) failed: %s (%d)", pid, pcmk_strerror(errno), errno); + } + /* + * From sigprocmask(2): + * It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored. + * + * This makes it safe to skip WNOHANG here + */ + p = waitpid(pid, &status, 0); + } + + if (p <= 0) { + crm_perror(LOG_ERR, "waitpid(%d)", pid); + + } else if (p != pid) { + crm_err("Waited for %d, got %d", pid, p); + } + + action->output = read_output(p_read_fd); + action->error = read_output(p_stderr_fd); + + action->rc = -ECONNABORTED; + + log_action(action, pid); + + rc = action->rc; + if (timeout == 0) { + action->rc = -ETIME; + } else if (WIFEXITED(status)) { + crm_debug("result = %d", WEXITSTATUS(status)); + action->rc = -WEXITSTATUS(status); rc = 0; - action->rc = svc_action_to_errno(svc_action); - action->output = svc_action->stdout_data; - svc_action->stdout_data = NULL; - action->error = svc_action->stderr_data; - svc_action->stderr_data = NULL; + + } else if (WIFSIGNALED(status)) { + crm_err("call %s for %s exited due to signal %d", action->action, action->agent, + WTERMSIG(status)); + } else { - action->rc = -ECONNABORTED; - rc = action->rc; + crm_err("call %s for %s returned unexpected status %#x", + action->action, action->agent, status); } - - svc_action->params = NULL; - services_action_free(svc_action); } fail: + + if (p_read_fd >= 0) { + close(p_read_fd); + } + if (p_write_fd >= 0) { + close(p_write_fd); + } + if (p_stderr_fd >= 0) { + close(p_stderr_fd); + } + + if (c_read_fd >= 0) { + close(c_read_fd); + } + if (c_write_fd >= 0) { + close(c_write_fd); + } + if (c_stderr_fd >= 0) { + close(c_stderr_fd); + } + return rc; } diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index 66f0fbfc7..23428a828 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -196,39 +196,6 @@ add_action_env_vars(const svc_action_t *op) } } -static void -pipe_in_single_parameter(gpointer key, gpointer value, gpointer user_data) -{ - svc_action_t *op = user_data; - char *buffer = crm_strdup_printf("%s=%s\n", (char *)key, (char *) value); - int ret, total = 0, len = strlen(buffer); - - do { - errno = 0; - ret = write(op->opaque->stdin_fd, buffer + total, len - total); - if (ret > 0) { - total += ret; - } - - } while ((errno == EINTR) && (total < len)); - free(buffer); -} - -/*! - * \internal - * \brief Pipe parameters in via stdin for action - * - * \param[in] op Action to use - */ -static void -pipe_in_action_stdin_parameters(const svc_action_t *op) -{ - crm_debug("sending args"); - if (op->params) { - g_hash_table_foreach(op->params, pipe_in_single_parameter, (gpointer) op); - } -} - gboolean recurring_action_timer(gpointer data) { @@ -318,10 +285,6 @@ operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exi op->opaque->stdout_gsource = NULL; } - if (op->opaque->stdin_fd >= 0) { - close(op->opaque->stdin_fd); - } - if (signo) { if (mainloop_child_timeout(p)) { crm_warn("%s - timed out after %dms", prefix, op->timeout); @@ -653,9 +616,6 @@ action_synced_wait(svc_action_t * op, sigset_t *mask) close(op->opaque->stdout_fd); close(op->opaque->stderr_fd); - if (op->opaque->stdin_fd >= 0) { - close(op->opaque->stdin_fd); - } #ifdef HAVE_SYS_SIGNALFD_H close(sfd); @@ -669,7 +629,6 @@ services_os_action_execute(svc_action_t * op) { int stdout_fd[2]; int stderr_fd[2]; - int stdin_fd[2] = {-1, -1}; int rc; struct stat st; sigset_t *pmask = NULL; @@ -735,25 +694,6 @@ services_os_action_execute(svc_action_t * op) return FALSE; } - if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_STONITH)) { - if (pipe(stdin_fd) < 0) { - rc = errno; - - close(stdout_fd[0]); - close(stdout_fd[1]); - close(stderr_fd[0]); - close(stderr_fd[1]); - - crm_err("pipe(stdin_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); - - services_handle_exec_error(op, rc); - if (!op->synchronous) { - return operation_finalize(op); - } - return FALSE; - } - } - if (op->synchronous) { #ifdef HAVE_SYS_SIGNALFD_H sigemptyset(&mask); @@ -801,10 +741,6 @@ services_os_action_execute(svc_action_t * op) close(stdout_fd[1]); close(stderr_fd[0]); close(stderr_fd[1]); - if (stdin_fd[0] >= 0) { - close(stdin_fd[0]); - close(stdin_fd[1]); - } crm_err("Could not execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); @@ -818,9 +754,6 @@ services_os_action_execute(svc_action_t * op) case 0: /* Child */ close(stdout_fd[0]); close(stderr_fd[0]); - if (stdin_fd[1] >= 0) { - close(stdin_fd[1]); - } if (STDOUT_FILENO != stdout_fd[1]) { if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) { crm_err("dup2() failed (stdout)"); @@ -833,13 +766,6 @@ services_os_action_execute(svc_action_t * op) } close(stderr_fd[1]); } - if ((stdin_fd[0] >= 0) && - (STDIN_FILENO != stdin_fd[0])) { - if (dup2(stdin_fd[0], STDIN_FILENO) != STDIN_FILENO) { - crm_err("dup2() failed (stdin)"); - } - close(stdin_fd[0]); - } if (op->synchronous) { sigchld_cleanup(); @@ -852,9 +778,6 @@ services_os_action_execute(svc_action_t * op) /* Only the parent reaches here */ close(stdout_fd[1]); close(stderr_fd[1]); - if (stdin_fd[0] >= 0) { - close(stdin_fd[0]); - } op->opaque->stdout_fd = stdout_fd[0]; rc = crm_set_nonblocking(op->opaque->stdout_fd); @@ -872,22 +795,6 @@ services_os_action_execute(svc_action_t * op) pcmk_strerror(rc), rc); } - op->opaque->stdin_fd = stdin_fd[1]; - if (op->opaque->stdin_fd >= 0) { - // using buffer behind non-blocking-fd here - that could be improved - // as long as no other standard uses stdin_fd assume stonith - rc = crm_set_nonblocking(op->opaque->stdin_fd); - if (rc < 0) { - crm_warn("Could not set child input non-blocking: %s " - CRM_XS " fd=%d,rc=%d", - pcmk_strerror(rc), op->opaque->stdin_fd, rc); - } - pipe_in_action_stdin_parameters(op); - // as long as we are handling parameters directly in here just close - close(op->opaque->stdin_fd); - op->opaque->stdin_fd = -1; - } - if (op->synchronous) { action_synced_wait(op, pmask); sigchld_cleanup(); diff --git a/lib/services/services_private.h b/lib/services/services_private.h index bb4a7b6a3..23395e20e 100644 --- a/lib/services/services_private.h +++ b/lib/services/services_private.h @@ -31,8 +31,6 @@ struct svc_action_private_s { int stdout_fd; mainloop_io_t *stdout_gsource; - - int stdin_fd; #if SUPPORT_DBUS DBusPendingCall* pending; unsigned timerid; -- 2.16.4
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor