Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Step:15-SP4
slurm.32299
Prevent-credential-abuse.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File Prevent-credential-abuse.patch of Package slurm.32299
From e799a1a3e17aaae2e2076ce43bf2f7a4fa162b69 Mon Sep 17 00:00:00 2001 From: Dominik Bartkiewicz <bart@schedmd.com> Date: Wed, 4 May 2022 13:06:36 -0600 Subject: [PATCH 01/14] Prevent credential abuse. CVE-2022-29500 --- NEWS | 1 + src/api/config_info.c | 1 + src/api/job_info.c | 2 + src/api/job_step_info.c | 2 + src/api/node_info.c | 1 + src/api/pmi_server.c | 1 + src/api/reconfigure.c | 1 + src/api/signal.c | 3 + src/api/slurm_pmi.c | 3 + src/api/step_launch.c | 2 + src/bcast/file_bcast.c | 1 + src/common/forward.c | 5 +- src/common/slurm_auth.c | 43 +- src/common/slurm_auth.h | 16 +- src/common/slurm_persist_conn.c | 1 + src/common/slurm_persist_conn.h | 1 + src/common/slurm_protocol_api.c | 243 ++++++++++- src/common/slurm_protocol_api.h | 3 + src/common/slurm_protocol_defs.c | 4 + src/common/slurm_protocol_defs.h | 14 + src/common/slurmdb_defs.c | 1 + src/common/stepd_api.c | 3 +- src/common/stepd_api.h | 3 +- .../accounting_storage/common/common_as.c | 1 + .../accounting_storage/slurmdbd/dbd_conn.c | 380 ++++++++++++++++++ src/plugins/auth/jwt/auth_jwt.c | 16 +- src/plugins/auth/munge/auth_munge.c | 49 ++- src/plugins/auth/none/auth_none.c | 17 +- src/plugins/mpi/pmi2/setup.c | 2 + src/plugins/mpi/pmi2/setup.h | 1 + src/plugins/mpi/pmi2/spawn.c | 4 +- src/plugins/mpi/pmix/pmixp_dconn.c | 1 + src/plugins/mpi/pmix/pmixp_dconn.h | 3 + src/plugins/mpi/pmix/pmixp_server.c | 27 +- src/plugins/mpi/pmix/pmixp_utils.c | 1 + src/sattach/sattach.c | 1 + src/slurmctld/agent.c | 23 +- src/slurmctld/agent.h | 5 + src/slurmctld/backup.c | 9 +- src/slurmctld/controller.c | 2 + src/slurmctld/fed_mgr.c | 2 + src/slurmctld/job_mgr.c | 14 +- src/slurmctld/job_scheduler.c | 3 + src/slurmctld/node_mgr.c | 3 + src/slurmctld/node_scheduler.c | 3 + src/slurmctld/ping_nodes.c | 4 + src/slurmctld/proc_req.c | 2 + src/slurmctld/srun_comm.c | 37 +- src/slurmctld/step_mgr.c | 3 + src/slurmd/slurmd/req.c | 21 +- src/slurmd/slurmd/slurmd.c | 6 +- src/slurmd/slurmstepd/io.c | 1 + src/slurmd/slurmstepd/mgr.c | 16 +- src/slurmd/slurmstepd/mgr.h | 2 +- src/slurmd/slurmstepd/req.c | 1 + src/slurmd/slurmstepd/slurmstepd.c | 23 +- src/slurmd/slurmstepd/slurmstepd_job.c | 7 +- src/slurmd/slurmstepd/slurmstepd_job.h | 4 +- src/slurmd/slurmstepd/x11_forwarding.c | 4 + src/slurmdbd/read_config.c | 1 + src/slurmdbd/slurmdbd.c | 1 + 61 files changed, 988 insertions(+), 67 deletions(-) create mode 100644 src/plugins/accounting_storage/slurmdbd/dbd_conn.c diff --git a/NEWS b/NEWS index 4ed7678514..3340c918a7 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,7 @@ This file describes changes in recent versions of Slurm. It primarily documents those changes that are of interest to users and administrators. + -- CVE-2022-29500 - Prevent credential abuse. -- CVE-2022-29501 - Prevent abuse of REQUEST_FORWARD_DATA. * Changes in Slurm 20.02.7 diff --git a/src/api/config_info.c b/src/api/config_info.c index f8c9f08dcb..0f2a373b44 100644 --- a/src/api/config_info.c +++ b/src/api/config_info.c @@ -1911,6 +1911,7 @@ slurm_load_slurmd_status(slurmd_status_t **slurmd_status_ptr) } req_msg.msg_type = REQUEST_DAEMON_STATUS; req_msg.data = NULL; + slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY); rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0); diff --git a/src/api/job_info.c b/src/api/job_info.c index 5d47db0f82..412e5eb059 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -1500,6 +1500,7 @@ slurm_pid2jobid (pid_t job_pid, uint32_t *jobid) req.job_pid = job_pid; req_msg.msg_type = REQUEST_JOB_ID; req_msg.data = &req; + slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY); rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0); @@ -1875,6 +1876,7 @@ slurm_network_callerid (network_callerid_msg_t req, uint32_t *job_id, req_msg.msg_type = REQUEST_NETWORK_CALLERID; req_msg.data = &req; + slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY); if (slurm_send_recv_node_msg(&req_msg, &resp_msg, 0) < 0) return SLURM_ERROR; diff --git a/src/api/job_step_info.c b/src/api/job_step_info.c index c53aa51e9d..ebe406a1c1 100644 --- a/src/api/job_step_info.c +++ b/src/api/job_step_info.c @@ -605,6 +605,7 @@ extern int slurm_job_step_stat(uint32_t job_id, uint32_t step_id, __func__, job_id, step_id, node_list); slurm_msg_t_init(&req_msg); + slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY); memset(&req, 0, sizeof(req)); resp_out->job_id = req.job_id = job_id; @@ -720,6 +721,7 @@ extern int slurm_job_step_get_pids(uint32_t job_id, uint32_t step_id, __func__, job_id, step_id, node_list); slurm_msg_t_init(&req_msg); + slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY); memset(&req, 0, sizeof(req)); resp_out->job_id = req.job_id = job_id; diff --git a/src/api/node_info.c b/src/api/node_info.c index 8f9cb0e26e..22dbeb13fa 100644 --- a/src/api/node_info.c +++ b/src/api/node_info.c @@ -859,6 +859,7 @@ extern int slurm_get_node_energy(char *host, uint16_t context_id, req.delta = delta; req_msg.msg_type = REQUEST_ACCT_GATHER_ENERGY; req_msg.data = &req; + slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY); rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0); diff --git a/src/api/pmi_server.c b/src/api/pmi_server.c index 203bf0504f..520eea944f 100644 --- a/src/api/pmi_server.c +++ b/src/api/pmi_server.c @@ -141,6 +141,7 @@ static void *_msg_thread(void *x) slurm_msg_t msg_send; slurm_msg_t_init(&msg_send); + slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY); debug2("KVS_Barrier msg to %s:%hu", msg_arg_ptr->bar_ptr->hostname, diff --git a/src/api/reconfigure.c b/src/api/reconfigure.c index fd428c36d0..789a060777 100644 --- a/src/api/reconfigure.c +++ b/src/api/reconfigure.c @@ -157,6 +157,7 @@ static int _send_message_controller(int dest, slurm_msg_t *req) slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR); } + slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id); if (slurm_send_node_msg(fd, req) < 0) { close(fd); slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SEND_ERROR); diff --git a/src/api/signal.c b/src/api/signal.c index 8f585db6a3..bd536e97f4 100644 --- a/src/api/signal.c +++ b/src/api/signal.c @@ -59,6 +59,7 @@ static int _local_send_recv_rc_msgs(const char *nodelist, slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); + slurm_msg_set_r_uid(msg, SLURM_AUTH_UID_ANY); msg->msg_type = type; msg->data = data; @@ -101,6 +102,7 @@ static int _signal_batch_script_step(const resource_allocation_response_msg_t rpc.flags = KILL_JOB_BATCH; slurm_msg_t_init(&msg); + slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id); msg.msg_type = REQUEST_SIGNAL_TASKS; msg.data = &rpc; if (slurm_conf_get_addr(name, &msg.address, msg.flags) @@ -159,6 +161,7 @@ static int _terminate_batch_script_step(const resource_allocation_response_msg_t slurm_msg_t_init(&msg); msg.msg_type = REQUEST_TERMINATE_TASKS; + slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id); msg.data = &rpc; if (slurm_conf_get_addr(name, &msg.address, msg.flags) diff --git a/src/api/slurm_pmi.c b/src/api/slurm_pmi.c index c2c4dce054..4028105b26 100644 --- a/src/api/slurm_pmi.c +++ b/src/api/slurm_pmi.c @@ -178,6 +178,7 @@ int slurm_send_kvs_comm_set(kvs_comm_set_t *kvs_set_ptr, _set_pmi_time(); slurm_msg_t_init(&msg_send); + slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY); msg_send.address = srun_addr; msg_send.msg_type = PMI_KVS_PUT_REQ; msg_send.data = (void *) kvs_set_ptr; @@ -260,6 +261,7 @@ int slurm_get_kvs_comm_set(kvs_comm_set_t **kvs_set_ptr, data.port = port; data.hostname = hostname; slurm_msg_t_init(&msg_send); + slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY); slurm_msg_t_init(&msg_rcv); msg_send.address = srun_addr; msg_send.msg_type = PMI_KVS_GET_REQ; @@ -344,6 +346,7 @@ static int _forward_comm_set(kvs_comm_set_t *kvs_set_ptr) if (kvs_set_ptr->kvs_host_ptr[i].port == 0) continue; /* empty */ slurm_msg_t_init(&msg_send); + slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY); msg_send.msg_type = PMI_KVS_GET_RESP; msg_send.data = (void *) kvs_set_ptr; slurm_set_addr(&msg_send.address, diff --git a/src/api/step_launch.c b/src/api/step_launch.c index 09b441f084..c75f3dc403 100644 --- a/src/api/step_launch.c +++ b/src/api/step_launch.c @@ -900,6 +900,7 @@ extern void slurm_step_launch_fwd_signal(slurm_step_ctx_t *ctx, int signo) hostlist_destroy(hl); RESEND: slurm_msg_t_init(&req); + slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY); req.msg_type = REQUEST_SIGNAL_TASKS; req.data = &msg; @@ -1723,6 +1724,7 @@ static int _launch_tasks(slurm_step_ctx_t *ctx, } slurm_msg_t_init(&msg); + slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY); msg.msg_type = REQUEST_LAUNCH_TASKS; msg.data = launch_msg; diff --git a/src/bcast/file_bcast.c b/src/bcast/file_bcast.c index 149a0ed3a1..0a1064704b 100644 --- a/src/bcast/file_bcast.c +++ b/src/bcast/file_bcast.c @@ -190,6 +190,7 @@ static int _file_bcast(struct bcast_parameters *params, slurm_msg_t msg; slurm_msg_t_init(&msg); + slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY); msg.data = bcast_msg; msg.flags = USE_BCAST_NETWORK; msg.forward.tree_width = params->fanout; diff --git a/src/common/forward.c b/src/common/forward.c index bdcfe09c74..eaf6ceb62a 100644 --- a/src/common/forward.c +++ b/src/common/forward.c @@ -248,7 +248,7 @@ void *_forward_thread(void *arg) /* steps, fwd_msg->timeout); */ } - ret_list = slurm_receive_msgs(fd, steps, fwd_msg->timeout); + ret_list = slurm_receive_resp_msgs(fd, steps, fwd_msg->timeout); /* info("sent %d forwards got %d back", */ /* fwd_msg->header.forward.cnt, list_count(ret_list)); */ @@ -357,6 +357,9 @@ void *_fwd_tree_thread(void *arg) send_msg.flags = fwd_tree->orig_msg->flags; send_msg.data = fwd_tree->orig_msg->data; send_msg.protocol_version = fwd_tree->orig_msg->protocol_version; + if (fwd_tree->orig_msg->restrict_uid_set) + slurm_msg_set_r_uid(&send_msg, + fwd_tree->orig_msg->restrict_uid); /* repeat until we are sure the message was sent */ while ((name = hostlist_shift(fwd_tree->tree_hl))) { diff --git a/src/common/slurm_auth.c b/src/common/slurm_auth.c index 69e5c4de3e..f9431b209c 100644 --- a/src/common/slurm_auth.c +++ b/src/common/slurm_auth.c @@ -62,12 +62,16 @@ typedef struct { typedef struct { uint32_t (*plugin_id); char (*plugin_type); - void * (*create) (char *auth_info); + bool (*hash_enable); + void * (*create) (char *auth_info, uid_t r_uid, + void *data, int dlen); int (*destroy) (void *cred); int (*verify) (void *cred, char *auth_info); uid_t (*get_uid) (void *cred); gid_t (*get_gid) (void *cred); char * (*get_host) (void *cred); + int (*get_data) (void *cred, char **data, + uint32_t *len); int (*pack) (void *cred, Buf buf, uint16_t protocol_version); void * (*unpack) (Buf buf, uint16_t protocol_version); @@ -82,12 +86,14 @@ typedef struct { static const char *syms[] = { "plugin_id", "plugin_type", + "hash_enable", "slurm_auth_create", "slurm_auth_destroy", "slurm_auth_verify", "slurm_auth_get_uid", "slurm_auth_get_gid", "slurm_auth_get_host", + "auth_p_get_data", "slurm_auth_pack", "slurm_auth_unpack", "slurm_auth_thread_config", @@ -95,6 +101,17 @@ static const char *syms[] = { "slurm_auth_token_generate", }; +typedef struct { + int plugin_id; + char *type; +} auth_plugin_types_t; + +auth_plugin_types_t auth_plugin_types[] = { + { AUTH_PLUGIN_NONE, "auth/none" }, + { AUTH_PLUGIN_MUNGE, "auth/munge" }, + { AUTH_PLUGIN_JWT, "auth/jwt" }, +}; + /* * A global authentication context. "Global" in the sense that there's * only one, with static bindings. We don't export it. @@ -104,6 +121,15 @@ static plugin_context_t **g_context = NULL; static int g_context_num = -1; static pthread_mutex_t context_lock = PTHREAD_MUTEX_INITIALIZER; +extern bool slurm_get_plugin_hash_enable(int index) +{ + if (slurm_auth_init(NULL) < 0) + return true; + + return *(ops[index].hash_enable); + +} + extern int slurm_auth_init(char *auth_type) { int retval = SLURM_SUCCESS; @@ -232,14 +258,15 @@ int slurm_auth_index(void *cred) * the API function dispatcher. */ -void *g_slurm_auth_create(int index, char *auth_info) +void *g_slurm_auth_create(int index, char *auth_info, uid_t r_uid, + void *data, int dlen) { cred_wrapper_t *cred; if (slurm_auth_init(NULL) < 0) return NULL; - cred = (*(ops[index].create))(auth_info); + cred = (*(ops[index].create))(auth_info, r_uid, data, dlen); if (cred) cred->index = index; return cred; @@ -295,6 +322,16 @@ char *g_slurm_auth_get_host(void *cred) return (*(ops[wrap->index].get_host))(cred); } +int auth_g_get_data(void *cred, char **data, uint32_t *len) +{ + cred_wrapper_t *wrap = (cred_wrapper_t *) cred; + + if (!wrap || slurm_auth_init(NULL) < 0) + return SLURM_ERROR; + + return (*(ops[wrap->index].get_data))(cred, data, len); +} + int g_slurm_auth_pack(void *cred, Buf buf, uint16_t protocol_version) { cred_wrapper_t *wrap = (cred_wrapper_t *) cred; diff --git a/src/common/slurm_auth.h b/src/common/slurm_auth.h index 8a607a5b3c..7b6ca17cf8 100644 --- a/src/common/slurm_auth.h +++ b/src/common/slurm_auth.h @@ -63,6 +63,12 @@ */ #define SLURM_AUTH_NOBODY 99 +/* + * This should be equal to MUNGE_UID_ANY + * do not restrict decode via uid + */ +#define SLURM_AUTH_UID_ANY -1 + /* * Default auth_index value, corresponds to the primary AuthType used. */ @@ -86,15 +92,23 @@ extern int slurm_auth_fini(void); */ extern int slurm_auth_index(void *cred); +/* + * Check if plugin type corresponding to the authentication + * plugin index supports hash. + */ +extern bool slurm_get_plugin_hash_enable(int index); + /* * Static bindings for the global authentication context. */ -extern void *g_slurm_auth_create(int index, char *auth_info); +extern void *g_slurm_auth_create(int index, char *auth_info, uid_t r_uid, + void *data, int dlen); extern int g_slurm_auth_destroy(void *cred); extern int g_slurm_auth_verify(void *cred, char *auth_info); extern uid_t g_slurm_auth_get_uid(void *cred); extern gid_t g_slurm_auth_get_gid(void *cred); extern char *g_slurm_auth_get_host(void *cred); +extern int auth_g_get_data(void *cred, char **data, uint32_t *len); extern int g_slurm_auth_pack(void *cred, Buf buf, uint16_t protocol_version); extern void *g_slurm_auth_unpack(Buf buf, uint16_t protocol_version); diff --git a/src/common/slurm_persist_conn.c b/src/common/slurm_persist_conn.c index bb1a0bccbd..82b9d204a2 100644 --- a/src/common/slurm_persist_conn.c +++ b/src/common/slurm_persist_conn.c @@ -600,6 +600,7 @@ extern int slurm_persist_conn_open(slurm_persist_conn_t *persist_conn) req_msg.flags |= SLURM_GLOBAL_AUTH_KEY; if (persist_conn->flags & PERSIST_FLAG_DBD) req_msg.flags |= SLURMDBD_CONNECTION; + slurm_msg_set_r_uid(&req_msg, persist_conn->r_uid); memset(&req, 0, sizeof(persist_init_req_msg_t)); req.cluster_name = persist_conn->cluster_name; diff --git a/src/common/slurm_persist_conn.h b/src/common/slurm_persist_conn.h index 405a393d58..a8bfcba2c9 100644 --- a/src/common/slurm_persist_conn.h +++ b/src/common/slurm_persist_conn.h @@ -74,6 +74,7 @@ typedef struct { uint16_t flags; bool inited; persist_conn_type_t persist_type; + uid_t r_uid; char *rem_host; uint16_t rem_port; time_t *shutdown; diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 5eb87d7b3f..c0aa1fd2ec 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -980,6 +980,36 @@ char *slurm_get_prep_plugins(void) return plugins; } +static int _check_hash(buf_t *buffer, header_t *header, slurm_msg_t *msg, + void *cred) +{ + char *cred_hash = NULL; + uint32_t cred_hash_len = 0; + int rc; + static time_t config_update = (time_t) -1; + static bool block_null_hash = true; + + if (config_update != slurm_conf.last_update) { + block_null_hash = (xstrcasestr(slurm_conf.comm_params, + "block_null_hash")); + config_update = slurm_conf.last_update; + } + + rc = auth_g_get_data(cred, &cred_hash, &cred_hash_len); + + if (cred_hash || cred_hash_len) { + if (cred_hash_len != 3 || cred_hash[0] != 1 || + memcmp(cred_hash + 1, + &msg->msg_type, sizeof(msg->msg_type))) + rc = SLURM_ERROR; + } else if (block_null_hash && + slurm_get_plugin_hash_enable(msg->auth_index)) + rc = SLURM_ERROR; + + xfree(cred_hash); + return rc; +} + static int _get_tres_id(char *type, char *name) { slurmdb_tres_rec_t tres_rec; @@ -3435,6 +3465,9 @@ extern int slurm_unpack_received_msg(slurm_msg_t *msg, int fd, Buf buffer) goto total_return; } + msg->auth_uid = g_slurm_auth_get_uid(auth_cred); + msg->auth_uid_set = true; + /* * Unpack message body */ @@ -3445,6 +3478,7 @@ extern int slurm_unpack_received_msg(slurm_msg_t *msg, int fd, Buf buffer) msg->body_offset = get_buf_offset(buffer); if ((header.body_length > remaining_buf(buffer)) || + _check_hash(buffer, &header, msg, auth_cred) || (unpack_msg(msg, buffer) != SLURM_SUCCESS)) { rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET; (void) g_slurm_auth_destroy(auth_cred); @@ -3548,6 +3582,8 @@ int slurm_receive_msg(int fd, slurm_msg_t *msg, int timeout) */ if (slurm_msg_recvfrom_timeout(fd, &buf, &buflen, 0, timeout) < 0) { rc = errno; + if (!rc) + rc = SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR; goto endit; } @@ -3710,6 +3746,8 @@ List slurm_receive_msgs(int fd, int steps, int timeout) goto total_return; } + msg.auth_uid = g_slurm_auth_get_uid(auth_cred); + msg.auth_uid_set = true; /* * Unpack message body */ @@ -3718,6 +3756,7 @@ List slurm_receive_msgs(int fd, int steps, int timeout) msg.flags = header.flags; if ((header.body_length > remaining_buf(buffer)) || + _check_hash(buffer, &header, &msg, auth_cred) || (unpack_msg(&msg, buffer) != SLURM_SUCCESS)) { (void) g_slurm_auth_destroy(auth_cred); free_buf(buffer); @@ -3759,6 +3798,155 @@ total_return: } +List slurm_receive_resp_msgs(int fd, int steps, int timeout) +{ + char *buf = NULL; + size_t buflen = 0; + header_t header; + int rc; + void *auth_cred = NULL; + slurm_msg_t msg; + buf_t *buffer; + ret_data_info_t *ret_data_info = NULL; + List ret_list = NULL; + int orig_timeout = timeout; + + xassert(fd >= 0); + + slurm_msg_t_init(&msg); + msg.conn_fd = fd; + + if (timeout <= 0) { + /* convert secs to msec */ + timeout = slurm_conf.msg_timeout * 1000; + orig_timeout = timeout; + } + if (steps) { + if (message_timeout < 0) + message_timeout = slurm_conf.msg_timeout * 1000; + orig_timeout = (timeout - + (message_timeout*(steps-1)))/steps; + steps--; + } + + log_flag(NET, "%s: orig_timeout was %d we have %d steps and a timeout of %d", + __func__, orig_timeout, steps, timeout); + /* we compare to the orig_timeout here because that is really + * what we are going to wait for each step + */ + if (orig_timeout >= (slurm_conf.msg_timeout * 10000)) { + log_flag(NET, "%s: Sending a message with timeout's greater than %d seconds, requested timeout is %d seconds", + __func__, (slurm_conf.msg_timeout * 10), + (timeout/1000)); + } else if (orig_timeout < 1000) { + log_flag(NET, "%s: Sending a message with a very short timeout of %d milliseconds each step in the tree has %d milliseconds", + __func__, timeout, orig_timeout); + } + + + /* + * Receive a msg. slurm_msg_recvfrom() will read the message + * length and allocate space on the heap for a buffer containing + * the message. + */ + if (slurm_msg_recvfrom_timeout(fd, &buf, &buflen, 0, timeout) < 0) { + forward_init(&header.forward); + rc = errno; + goto total_return; + } + + log_flag_hex(NET_RAW, buf, buflen, "%s: read", __func__); + buffer = create_buf(buf, buflen); + + if (unpack_header(&header, buffer) == SLURM_ERROR) { + free_buf(buffer); + rc = SLURM_COMMUNICATIONS_RECEIVE_ERROR; + goto total_return; + } + + if (check_header_version(&header) < 0) { + slurm_addr_t resp_addr; + if (!slurm_get_peer_addr(fd, &resp_addr)) { + error("%s: Invalid Protocol Version %u from at %pA", + __func__, header.version, &resp_addr); + } else { + error("%s: Invalid Protocol Version %u from problem connection: %m", + __func__, header.version); + } + + free_buf(buffer); + rc = SLURM_PROTOCOL_VERSION_ERROR; + goto total_return; + } + //info("ret_cnt = %d",header.ret_cnt); + if (header.ret_cnt > 0) { + if (header.ret_list) + ret_list = header.ret_list; + else + ret_list = list_create(destroy_data_info); + header.ret_cnt = 0; + header.ret_list = NULL; + } + + /* Forward message to other nodes */ + if (header.forward.cnt > 0) { + error("%s: We need to forward this to other nodes use slurm_receive_msg_and_forward instead", + __func__); + } + + if (!(auth_cred = g_slurm_auth_unpack(buffer, header.version))) { + error("%s: auth_g_unpack: %m", __func__); + free_buf(buffer); + rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET; + goto total_return; + } + g_slurm_auth_destroy(auth_cred); + /* + * Unpack message body + */ + msg.protocol_version = header.version; + msg.msg_type = header.msg_type; + msg.flags = header.flags; + + if ((header.body_length > remaining_buf(buffer)) || + (unpack_msg(&msg, buffer) != SLURM_SUCCESS)) { + free_buf(buffer); + rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET; + goto total_return; + } + free_buf(buffer); + rc = SLURM_SUCCESS; + +total_return: + destroy_forward(&header.forward); + + if (rc != SLURM_SUCCESS) { + if (ret_list) { + ret_data_info = xmalloc(sizeof(ret_data_info_t)); + ret_data_info->err = rc; + ret_data_info->type = RESPONSE_FORWARD_FAILED; + ret_data_info->data = NULL; + list_push(ret_list, ret_data_info); + } + + error("%s: failed: %s", + __func__, slurm_strerror(rc)); + usleep(10000); /* Discourage brute force attack */ + } else { + if (!ret_list) + ret_list = list_create(destroy_data_info); + ret_data_info = xmalloc(sizeof(ret_data_info_t)); + ret_data_info->err = rc; + ret_data_info->node_name = NULL; + ret_data_info->type = msg.msg_type; + ret_data_info->data = msg.data; + list_push(ret_list, ret_data_info); + } + + errno = rc; + return ret_list; + +} /* try to determine the UID associated with a message with different * message header version, return -1 if we can't tell */ static int _unpack_msg_uid(Buf buffer, uint16_t protocol_version) @@ -3946,6 +4134,9 @@ int slurm_receive_msg_and_forward(int fd, slurm_addr_t *orig_addr, goto total_return; } + msg->auth_uid = g_slurm_auth_get_uid(auth_cred); + msg->auth_uid_set = true; + /* * Unpack message body */ @@ -3960,6 +4151,7 @@ int slurm_receive_msg_and_forward(int fd, slurm_addr_t *orig_addr, } if ( (header.body_length > remaining_buf(buffer)) || + _check_hash(buffer, &header, msg, auth_cred) || (unpack_msg(msg, buffer) != SLURM_SUCCESS) ) { (void) g_slurm_auth_destroy(auth_cred); free_buf(buffer); @@ -4027,6 +4219,7 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg) int rc; void * auth_cred; time_t start_time = time(NULL); + unsigned char auth_payload[3] = { 1 }; /* uint8_t + uint16_t (msg_type) */ if (msg->conn) { persist_msg_t persist_msg; @@ -4062,6 +4255,9 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg) return rc; } + if (!msg->restrict_uid_set) + fatal("%s: restrict_uid is not set", __func__); + memcpy(auth_payload + 1, &msg->msg_type, sizeof(msg->msg_type)); /* * Initialize header with Auth credential and message type. * We get the credential now rather than later so the work can @@ -4071,10 +4267,15 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg) */ if (msg->flags & SLURM_GLOBAL_AUTH_KEY) { auth_cred = g_slurm_auth_create(msg->auth_index, - _global_auth_key()); + _global_auth_key(), + msg->restrict_uid, auth_payload, + sizeof(auth_payload)); } else { char *auth_info = slurm_get_auth_info(); - auth_cred = g_slurm_auth_create(msg->auth_index, auth_info); + auth_cred = g_slurm_auth_create(msg->auth_index, + slurm_conf.authinfo, + msg->restrict_uid, auth_payload, + sizeof(auth_payload)); xfree(auth_info); } @@ -4092,11 +4293,17 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg) (void) g_slurm_auth_destroy(auth_cred); if (msg->flags & SLURM_GLOBAL_AUTH_KEY) { auth_cred = g_slurm_auth_create(msg->auth_index, - _global_auth_key()); + _global_auth_key(), + msg->restrict_uid, + auth_payload, + sizeof(auth_payload)); } else { char *auth_info = slurm_get_auth_info(); auth_cred = g_slurm_auth_create(msg->auth_index, - auth_info); + slurm_conf.authinfo, + msg->restrict_uid, + auth_payload, + sizeof(auth_payload)); xfree(auth_info); } } @@ -4331,6 +4538,24 @@ static void _resp_msg_setup(slurm_msg_t *msg, slurm_msg_t *resp_msg, resp_msg->protocol_version = msg->protocol_version; resp_msg->ret_list = msg->ret_list; resp_msg->orig_addr = msg->orig_addr; + /* + * Extra sanity check. This should always be set. But if for some + * reason it isn't, restrict the decode to avoid leaking an + * unrestricted authentication token. + * + * Implicitly trust communications initiated by SlurmUser and + * SlurmdUser. In future releases this won't matter - there's + * no point packing an auth token on the reply as it isn't checked, + * but we're stuck doing that on older protocol versions for + * backwards-compatibility. + */ + if (!msg->auth_uid_set) + slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_NOBODY); + else if ((msg->auth_uid != slurm_conf.slurm_user_id) && + (msg->auth_uid != slurm_conf.slurmd_user_id)) + slurm_msg_set_r_uid(resp_msg, msg->auth_uid); + else + slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_UID_ANY); } static void _rc_msg_setup(slurm_msg_t *msg, slurm_msg_t *resp_msg, @@ -4612,6 +4837,7 @@ extern int slurm_send_recv_controller_msg(slurm_msg_t * request_msg, forward_init(&request_msg->forward); request_msg->ret_list = NULL; request_msg->forward_struct = NULL; + slurm_msg_set_r_uid(request_msg, SLURM_AUTH_UID_ANY); tryagain: retry = 1; @@ -4740,6 +4966,8 @@ extern int slurm_send_only_controller_msg(slurm_msg_t *req, goto cleanup; } + slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id); + if ((rc = slurm_send_node_msg(fd, req)) < 0) { rc = SLURM_ERROR; } else { @@ -5131,6 +5359,12 @@ extern void slurm_free_msg(slurm_msg_t *msg) } } +extern void slurm_msg_set_r_uid(slurm_msg_t *msg, uid_t r_uid) +{ + msg->restrict_uid = r_uid; + msg->restrict_uid_set = true; +} + extern char *nodelist_nth_host(const char *nodelist, int inx) { hostlist_t hl = hostlist_create(nodelist); @@ -5329,6 +5563,7 @@ extern int slurm_forward_data( req.len = len; req.data = (char *)data; + slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY); msg.msg_type = REQUEST_FORWARD_DATA; msg.data = &req; diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h index cadaf0130c..34384e0bcc 100644 --- a/src/common/slurm_protocol_api.h +++ b/src/common/slurm_protocol_api.h @@ -1082,6 +1082,7 @@ int slurm_receive_msg(int fd, slurm_msg_t *msg, int timeout); * errno set. */ List slurm_receive_msgs(int fd, int steps, int timeout); +List slurm_receive_resp_msgs(int fd, int steps, int timeout); /* * Receive a slurm message on the open slurm descriptor "fd" waiting @@ -1390,6 +1391,8 @@ extern int *set_span(int total, uint16_t tree_width); extern void slurm_free_msg_members(slurm_msg_t *msg); extern void slurm_free_msg(slurm_msg_t * msg); +extern void slurm_msg_set_r_uid(slurm_msg_t *msg, uid_t r_uid); + /* must free this memory with free not xfree */ extern char *nodelist_nth_host(const char *nodelist, int inx); extern int nodelist_find(const char *nodelist, const char *name); diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index bd00716147..1bd4dcbf03 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -52,6 +52,7 @@ #include "src/common/power.h" #include "src/common/slurm_accounting_storage.h" #include "src/common/slurm_acct_gather_energy.h" +#include "src/common/slurm_auth.h" #include "src/common/slurm_cred.h" #include "src/common/slurm_ext_sensors.h" #include "src/common/slurm_jobacct_gather.h" @@ -124,6 +125,7 @@ extern void slurm_msg_t_init(slurm_msg_t *msg) { memset(msg, 0, sizeof(slurm_msg_t)); + msg->auth_uid = SLURM_AUTH_NOBODY; msg->conn_fd = -1; msg->msg_type = NO_VAL16; msg->protocol_version = NO_VAL16; @@ -151,6 +153,8 @@ extern void slurm_msg_t_copy(slurm_msg_t *dest, slurm_msg_t *src) dest->ret_list = src->ret_list; dest->forward_struct = src->forward_struct; dest->orig_addr.sin_addr.s_addr = 0; + if (src->auth_uid_set) + slurm_msg_set_r_uid(dest, src->auth_uid); return; } diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index b5ead888ba..e9971bbfbd 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -59,6 +59,7 @@ #include "src/common/job_options.h" #include "src/common/list.h" #include "src/common/macros.h" +#include "src/common/slurm_auth.h" #include "src/common/slurm_cred.h" #include "src/common/slurm_protocol_common.h" #include "src/common/slurm_persist_conn.h" @@ -493,6 +494,19 @@ typedef struct slurm_msg { * so that we'll respond with the same auth * plugin used to connect to us originally. */ + uid_t auth_uid; /* NEVER PACK. Authenticated uid from auth + * credential. Only valid if auth_uid_set is + * true. Set to SLURM_AUTH_NOBODY if not set + * yet. + */ + bool auth_uid_set; /* NEVER PACK. True when auth_uid has been set. + * This is a safety measure against handling + * a slurm_msg_t that has been xmalloc()'d but + * slurm_msg_t_init() was not called since + * auth_uid would be root. + */ + uid_t restrict_uid; + bool restrict_uid_set; uint32_t body_offset; /* DON'T PACK: offset in buffer where body part of buffer starts. */ Buf buffer; /* DON't PACK! ptr to buffer that msg was unpacked from. */ diff --git a/src/common/slurmdb_defs.c b/src/common/slurmdb_defs.c index f4f606b66b..35e812742e 100644 --- a/src/common/slurmdb_defs.c +++ b/src/common/slurmdb_defs.c @@ -3042,6 +3042,7 @@ extern int slurmdb_send_accounting_update(List update_list, char *cluster, slurm_set_addr_char(&req.address, port, host); req.protocol_version = rpc_version; + slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY); req.msg_type = ACCOUNTING_UPDATE_MSG; if (slurmdbd_conf) diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c index ed385e5c11..ef80af2cd1 100644 --- a/src/common/stepd_api.c +++ b/src/common/stepd_api.c @@ -523,7 +523,7 @@ rwfail: int stepd_attach(int fd, uint16_t protocol_version, slurm_addr_t *ioaddr, slurm_addr_t *respaddr, - void *job_cred_sig, reattach_tasks_response_msg_t *resp) + void *job_cred_sig, uid_t uid, reattach_tasks_response_msg_t *resp) { int req = REQUEST_ATTACH; int rc = SLURM_SUCCESS; @@ -533,6 +533,7 @@ stepd_attach(int fd, uint16_t protocol_version, safe_write(fd, ioaddr, sizeof(slurm_addr_t)); safe_write(fd, respaddr, sizeof(slurm_addr_t)); safe_write(fd, job_cred_sig, SLURM_IO_KEY_SIZE); + safe_write(fd, &uid, sizeof(uid_t)); safe_write(fd, &protocol_version, sizeof(uint16_t)); } else goto rwfail; diff --git a/src/common/stepd_api.h b/src/common/stepd_api.h index 425a42fec5..137fadd3df 100644 --- a/src/common/stepd_api.h +++ b/src/common/stepd_api.h @@ -200,7 +200,8 @@ int stepd_signal_container(int fd, uint16_t protocol_version, int signal, */ int stepd_attach(int fd, uint16_t protocol_version, slurm_addr_t *ioaddr, slurm_addr_t *respaddr, - void *job_cred_sig, reattach_tasks_response_msg_t *resp); + void *job_cred_sig, uid_t uid, + reattach_tasks_response_msg_t *resp); /* * Scan for available running slurm step daemons by checking diff --git a/src/plugins/accounting_storage/common/common_as.c b/src/plugins/accounting_storage/common/common_as.c index 73d90144e0..e3311e30e6 100644 --- a/src/plugins/accounting_storage/common/common_as.c +++ b/src/plugins/accounting_storage/common/common_as.c @@ -396,6 +396,7 @@ extern int cluster_first_reg(char *host, uint16_t port, uint16_t rpc_version) out_msg.msg_type = ACCOUNTING_FIRST_REG; out_msg.flags = SLURM_GLOBAL_AUTH_KEY; out_msg.data = &update; + slurm_msg_set_r_uid(&out_msg, SLURM_AUTH_UID_ANY); slurm_send_node_msg(fd, &out_msg); /* We probably need to add matching recv_msg function * for an arbitray fd or should these be fire diff --git a/src/plugins/accounting_storage/slurmdbd/dbd_conn.c b/src/plugins/accounting_storage/slurmdbd/dbd_conn.c new file mode 100644 index 0000000000..c9672a732f --- /dev/null +++ b/src/plugins/accounting_storage/slurmdbd/dbd_conn.c @@ -0,0 +1,380 @@ +/****************************************************************************\ + * dbd_conn.c - functions to manage the connection to the SlurmDBD + ***************************************************************************** + * Copyright (C) 2011-2020 SchedMD LLC. + * Copyright (C) 2008-2010 Lawrence Livermore National Security. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Danny Auble <da@schedmd.com> + * Written by Morris Jette <jette1@llnl.gov> + * CODE-OCEC-09-009. All rights reserved. + * + * This file is part of Slurm, a resource management program. + * For details, see <https://slurm.schedmd.com/>. + * Please also read the included file: DISCLAIMER. + * + * Slurm is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Slurm; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include "src/common/slurm_xlator.h" +#include "src/common/slurmdbd_pack.h" +#include "src/slurmctld/trigger_mgr.h" +#include "slurmdbd_agent.h" + +#define SLURMDBD_TIMEOUT 900 /* Seconds SlurmDBD for response */ + +static void _acct_full(void) +{ + if (running_in_slurmctld()) + trigger_primary_ctld_acct_full(); +} + +static void _dbd_fail(void) +{ + if (running_in_slurmctld()) + trigger_primary_dbd_fail(); +} + +static void _dbd_res_op(void) +{ + if (running_in_slurmctld()) + trigger_primary_dbd_res_op(); +} + +static void _db_fail(void) +{ + if (running_in_slurmctld()) + trigger_primary_db_fail(); +} + +static void _db_res_op(void) +{ + if (running_in_slurmctld()) + trigger_primary_db_res_op(); +} + +static int _connect_dbd_conn(slurm_persist_conn_t *pc) +{ + + int rc; + char *backup_host = NULL; + + xassert(pc); + + /* Only setup a backup host on a non ext_dbd connection */ + if (!(pc->flags & PERSIST_FLAG_EXT_DBD)) + backup_host = + xstrdup(slurm_conf.accounting_storage_backup_host); +again: + // A connection failure is only an error if backup dne or also fails + if (backup_host) + pc->flags |= PERSIST_FLAG_SUPPRESS_ERR; + else + pc->flags &= (~PERSIST_FLAG_SUPPRESS_ERR); + + pc->r_uid = SLURM_AUTH_UID_ANY; + + if (((rc = slurm_persist_conn_open(pc)) != SLURM_SUCCESS) && + backup_host) { + xfree(pc->rem_host); + // Force the next error to display + pc->comm_fail_time = 0; + pc->rem_host = backup_host; + backup_host = NULL; + goto again; + } + + xfree(backup_host); + + if (rc == SLURM_SUCCESS) { + /* + * Increase SLURMDBD_TIMEOUT to wait as long as we need for a + * query to complete. + */ + pc->timeout = MAX(pc->timeout, SLURMDBD_TIMEOUT * 1000); + (pc->trigger_callbacks.dbd_resumed)(); + (pc->trigger_callbacks.db_resumed)(); + } + + if (rc == SLURM_SUCCESS) { + debug("Sent PersistInit msg"); + /* clear errno (checked after this for errors) */ + errno = 0; + } else { + if (rc == ESLURM_DB_CONNECTION) + (pc->trigger_callbacks.db_fail)(); + slurm_persist_conn_close(pc); + + /* This means errno was already set correctly */ + if (rc != SLURM_ERROR) + errno = rc; + error("Sending PersistInit msg: %m"); + } + + return rc; +} + +extern slurm_persist_conn_t *dbd_conn_open(uint16_t *persist_conn_flags, + char *cluster_name, + char *rem_host, + uint16_t rem_port) +{ + slurm_persist_conn_t *pc = xmalloc(sizeof(*pc)); + + if (persist_conn_flags) + pc->flags = *persist_conn_flags; + pc->flags |= (PERSIST_FLAG_DBD | PERSIST_FLAG_RECONNECT); + pc->persist_type = PERSIST_TYPE_DBD; + if (cluster_name) + pc->cluster_name = xstrdup(cluster_name); + else + pc->cluster_name = xstrdup(slurm_conf.cluster_name); + pc->timeout = (slurm_conf.msg_timeout + 35) * 1000; + if (rem_host) + pc->rem_host = xstrdup(rem_host); + else + pc->rem_host = xstrdup(slurm_conf.accounting_storage_host); + if (rem_port) + pc->rem_port = rem_port; + else + pc->rem_port = slurm_conf.accounting_storage_port; + pc->version = SLURM_PROTOCOL_VERSION; + + /* Initialize the callback pointers */ + pc->trigger_callbacks.acct_full = _acct_full; + pc->trigger_callbacks.dbd_fail = _dbd_fail; + pc->trigger_callbacks.dbd_resumed = _dbd_res_op; + pc->trigger_callbacks.db_fail = _db_fail; + pc->trigger_callbacks.db_resumed = _db_res_op; + + (void)_connect_dbd_conn(pc); + + if (persist_conn_flags) + *persist_conn_flags = pc->flags; + + return pc; +} + +extern int dbd_conn_check_and_reopen(slurm_persist_conn_t *pc) +{ + xassert(pc); + + if (pc && pc->fd >= 0) { + debug("Attempt to re-open slurmdbd socket"); + /* clear errno (checked after this for errors) */ + errno = 0; + return SLURM_SUCCESS; + } + + /* + * Reset the rem_host just in case we were connected to the backup + * before. + */ + xfree(pc->rem_host); + pc->rem_host = xstrdup(slurm_conf.accounting_storage_host); + + return _connect_dbd_conn(pc); +} + +extern void dbd_conn_close(slurm_persist_conn_t **pc) +{ + int rc; + Buf buffer; + dbd_fini_msg_t req; + + if (!pc) + return; + + /* + * Only send the FINI message if we haven't shutdown + * (i.e. not slurmctld) + */ + if (*(*pc)->shutdown) { + log_flag(NET, "We are shutdown, not sending DB_FINI to %s:%u", + (*pc)->rem_host, + (*pc)->rem_port); + return; + } + + /* If the connection is already gone, we don't need to send a fini. */ + if (slurm_persist_conn_writeable(*pc) == -1) { + log_flag(NET, "unable to send DB_FINI msg to %s:%u", + (*pc)->rem_host, + (*pc)->rem_port); + return; + } + + buffer = init_buf(1024); + pack16((uint16_t) DBD_FINI, buffer); + req.commit = 0; + req.close_conn = 1; + slurmdbd_pack_fini_msg(&req, SLURM_PROTOCOL_VERSION, buffer); + + rc = slurm_persist_send_msg(*pc, buffer); + free_buf(buffer); + + log_flag(NET, "sent DB_FINI msg to %s:%u rc(%d):%s", + (*pc)->rem_host, (*pc)->rem_port, + rc, slurm_strerror(rc)); + + slurm_persist_conn_destroy(*pc); + *pc = NULL; +} + +/* + * Send an RPC to the SlurmDBD and wait for an arbitrary reply message. + * The RPC will not be queued if an error occurs. + * The "resp" message must be freed by the caller. + * Returns SLURM_SUCCESS or an error code + */ +extern int dbd_conn_send_recv_direct(uint16_t rpc_version, + persist_msg_t *req, + persist_msg_t *resp) +{ + int rc = SLURM_SUCCESS; + Buf buffer; + slurm_persist_conn_t *use_conn = req->conn; + + xassert(req); + xassert(resp); + xassert(use_conn); + + if (use_conn->fd < 0) { + /* The connection has been closed, reopen */ + rc = dbd_conn_check_and_reopen(use_conn); + + if (rc != SLURM_SUCCESS || (use_conn->fd < 0)) { + rc = SLURM_ERROR; + goto end_it; + } + } + + if (!(buffer = pack_slurmdbd_msg(req, rpc_version))) { + rc = SLURM_ERROR; + goto end_it; + } + + rc = slurm_persist_send_msg(use_conn, buffer); + free_buf(buffer); + if (rc != SLURM_SUCCESS) { + error("Sending message type %s: %d: %s", + slurmdbd_msg_type_2_str(req->msg_type, 1), rc, + slurm_strerror(rc)); + goto end_it; + } + + buffer = slurm_persist_recv_msg(use_conn); + if (buffer == NULL) { + error("Getting response to message type: %s", + slurmdbd_msg_type_2_str(req->msg_type, 1)); + rc = SLURM_ERROR; + goto end_it; + } + + rc = unpack_slurmdbd_msg(resp, rpc_version, buffer); + /* check for the rc of the start job message */ + if (rc == SLURM_SUCCESS && resp->msg_type == DBD_ID_RC) + rc = ((dbd_id_rc_msg_t *)resp->data)->return_code; + + free_buf(buffer); +end_it: + + log_flag(PROTOCOL, "msg_type:%s protocol_version:%hu return_code:%d response_msg_type:%s", + slurmdbd_msg_type_2_str(req->msg_type, 1), + rpc_version, rc, slurmdbd_msg_type_2_str(resp->msg_type, 1)); + + return rc; +} + +extern int dbd_conn_send_recv_rc_msg(uint16_t rpc_version, + persist_msg_t *req, + int *resp_code) +{ + int rc; + persist_msg_t resp; + + xassert(req); + xassert(resp_code); + + memset(&resp, 0, sizeof(persist_msg_t)); + rc = dbd_conn_send_recv(rpc_version, req, &resp); + if (rc != SLURM_SUCCESS) { + ; /* error message already sent */ + } else if (resp.msg_type != PERSIST_RC) { + error("response is not type PERSIST_RC: %s(%u)", + slurmdbd_msg_type_2_str(resp.msg_type, 1), + resp.msg_type); + rc = SLURM_ERROR; + } else { /* resp.msg_type == PERSIST_RC */ + persist_rc_msg_t *msg = resp.data; + *resp_code = msg->rc; + if (msg->rc != SLURM_SUCCESS && + msg->rc != ACCOUNTING_FIRST_REG && + msg->rc != ACCOUNTING_TRES_CHANGE_DB && + msg->rc != ACCOUNTING_NODES_CHANGE_DB) { + char *comment = msg->comment; + if (!comment) + comment = slurm_strerror(msg->rc); + if (!req->conn && + (msg->ret_info == DBD_REGISTER_CTLD) && + slurm_conf.accounting_storage_enforce) { + error("Issue with call " + "%s(%u): %u(%s)", + slurmdbd_msg_type_2_str( + msg->ret_info, 1), + msg->ret_info, msg->rc, + comment); + fatal("You need to add this cluster " + "to accounting if you want to " + "enforce associations, or no " + "jobs will ever run."); + } else + debug("Issue with call " + "%s(%u): %u(%s)", + slurmdbd_msg_type_2_str( + msg->ret_info, 1), + msg->ret_info, msg->rc, + comment); + } + slurm_persist_free_rc_msg(msg); + } + + log_flag(PROTOCOL, "msg_type:%s protocol_version:%hu return_code:%d", + slurmdbd_msg_type_2_str(req->msg_type, 1), + rpc_version, rc); + + return rc; +} + +extern int dbd_conn_send_recv(uint16_t rpc_version, + persist_msg_t *req, + persist_msg_t *resp) +{ + if (running_in_slurmctld() && + (!req->conn || (req->conn == slurmdbd_conn))) + return slurmdbd_agent_send_recv(rpc_version, req, resp); + else + return dbd_conn_send_recv_direct(rpc_version, req, resp); +} diff --git a/src/plugins/auth/jwt/auth_jwt.c b/src/plugins/auth/jwt/auth_jwt.c index 6eab3be5f0..aeb1e3e652 100644 --- a/src/plugins/auth/jwt/auth_jwt.c +++ b/src/plugins/auth/jwt/auth_jwt.c @@ -75,6 +75,7 @@ const char plugin_name[] = "JWT authentication plugin"; const char plugin_type[] = "auth/jwt"; const uint32_t plugin_id = AUTH_PLUGIN_JWT; const uint32_t plugin_version = SLURM_VERSION_NUMBER; +bool hash_enable = false; typedef struct { int index; /* MUST ALWAYS BE FIRST. DO NOT PACK. */ @@ -149,7 +150,8 @@ extern int fini(void) return SLURM_SUCCESS; } -auth_token_t *slurm_auth_create(char *auth_info) +auth_token_t *slurm_auth_create(char *auth_info, uid_t r_uid, + void *data, int dlen) { return xmalloc(sizeof(auth_token_t)); } @@ -306,6 +308,18 @@ char *slurm_auth_get_host(auth_token_t *cred) return NULL; } +int auth_p_get_data(auth_token_t *cred, char **data, uint32_t *len) +{ + if (cred == NULL) { + slurm_seterrno(ESLURM_AUTH_BADARG); + return SLURM_ERROR; + } + + *data = NULL; + *len = 0; + return SLURM_SUCCESS; +} + int slurm_auth_pack(auth_token_t *cred, Buf buf, uint16_t protocol_version) { char *pack_this = (thread_token) ? thread_token : token; diff --git a/src/plugins/auth/munge/auth_munge.c b/src/plugins/auth/munge/auth_munge.c index 789119b2f9..3361f74156 100644 --- a/src/plugins/auth/munge/auth_munge.c +++ b/src/plugins/auth/munge/auth_munge.c @@ -85,6 +85,7 @@ const char plugin_name[] = "Munge authentication plugin"; const char plugin_type[] = "auth/munge"; const uint32_t plugin_id = AUTH_PLUGIN_MUNGE; const uint32_t plugin_version = SLURM_VERSION_NUMBER; +bool hash_enable = true; static int bad_cred_test = -1; @@ -102,6 +103,8 @@ typedef struct _slurm_auth_credential { bool verified; /* true if this cred has been verified */ uid_t uid; /* UID. valid only if verified == true */ gid_t gid; /* GID. valid only if verified == true */ + void *data; /* payload data */ + int dlen; /* payload data length */ } slurm_auth_credential_t; /* Static prototypes */ @@ -130,7 +133,8 @@ int init(void) * allocate a credential. Whether the credential is populated with useful * data at this time is implementation-dependent. */ -slurm_auth_credential_t *slurm_auth_create(char *opts) +slurm_auth_credential_t *slurm_auth_create(char *opts, uid_t r_uid, + void *data, int dlen) { int rc, retry = RETRY_COUNT, auth_ttl; slurm_auth_credential_t *cred = NULL; @@ -155,6 +159,13 @@ slurm_auth_credential_t *slurm_auth_create(char *opts) } } + rc = munge_ctx_set(ctx, MUNGE_OPT_UID_RESTRICTION, r_uid); + if (rc != EMUNGE_SUCCESS) { + error("munge_ctx_set failure"); + munge_ctx_destroy(ctx); + return NULL; + } + auth_ttl = slurm_get_auth_ttl(); if (auth_ttl) (void) munge_ctx_set(ctx, MUNGE_OPT_TTL, auth_ttl); @@ -162,6 +173,8 @@ slurm_auth_credential_t *slurm_auth_create(char *opts) cred = xmalloc(sizeof(*cred)); cred->verified = false; cred->m_str = NULL; + cred->data = NULL; + cred->dlen = 0; xassert((cred->magic = MUNGE_MAGIC)); @@ -174,7 +187,7 @@ slurm_auth_credential_t *slurm_auth_create(char *opts) ohandler = xsignal(SIGALRM, (SigFunc *)SIG_BLOCK); again: - err = munge_encode(&cred->m_str, ctx, NULL, 0); + err = munge_encode(&cred->m_str, ctx, data, dlen); if (err != EMUNGE_SUCCESS) { if ((err == EMUNGE_SOCKET) && retry--) { debug("Munge encode failed: %s (retrying ...)", @@ -215,6 +228,8 @@ int slurm_auth_destroy(slurm_auth_credential_t *cred) /* Note: Munge cred string not encoded with xmalloc() */ if (cred->m_str) free(cred->m_str); + if (cred->data) + free(cred->data); xfree(cred); return SLURM_SUCCESS; @@ -336,6 +351,34 @@ char *slurm_auth_get_host(slurm_auth_credential_t *cred) return hostname; } +/* + * auth_p_verify() must be called first. + */ +int auth_p_get_data(slurm_auth_credential_t *cred, char **data, uint32_t *len) +{ + if (!cred || !cred->verified) { + /* + * This xassert will trigger on a development build if + * the calling path did not verify the credential first. + */ + xassert(!cred); + slurm_seterrno(ESLURM_AUTH_BADARG); + return SLURM_ERROR; + } + + xassert(cred->magic == MUNGE_MAGIC); + + if (cred->data && cred->dlen) { + *data = xmalloc(cred->dlen); + memcpy(*data, cred->data, cred->dlen); + *len = cred->dlen; + } else { + *data = NULL; + *len = 0; + } + return SLURM_SUCCESS; +} + /* * Marshall a credential for transmission over the network, according to * Slurm's marshalling protocol. @@ -428,7 +471,7 @@ static int _decode_cred(slurm_auth_credential_t *c, char *socket) } again: - err = munge_decode(c->m_str, ctx, NULL, NULL, &c->uid, &c->gid); + err = munge_decode(c->m_str, ctx, &c->data, &c->dlen, &c->uid, &c->gid); if (err != EMUNGE_SUCCESS) { if ((err == EMUNGE_SOCKET) && retry--) { debug("Munge decode failed: %s (retrying ...)", diff --git a/src/plugins/auth/none/auth_none.c b/src/plugins/auth/none/auth_none.c index 321377da53..19f4331829 100644 --- a/src/plugins/auth/none/auth_none.c +++ b/src/plugins/auth/none/auth_none.c @@ -75,6 +75,7 @@ const char plugin_name[] = "Null authentication plugin"; const char plugin_type[] = "auth/none"; const uint32_t plugin_id = AUTH_PLUGIN_NONE; const uint32_t plugin_version = SLURM_VERSION_NUMBER; +bool hash_enable = false; /* * An opaque type representing authentication credentials. This type can be @@ -138,7 +139,8 @@ extern int fini(void) * Allocate and initializes a credential. This function should return * NULL if it cannot allocate a credential. */ -slurm_auth_credential_t *slurm_auth_create(char *auth_info) +slurm_auth_credential_t *slurm_auth_create(char *auth_info, uid_t r_uid, + void *data, int dlen) { slurm_auth_credential_t *cred = xmalloc(sizeof(*cred)); @@ -217,6 +219,19 @@ char *slurm_auth_get_host(slurm_auth_credential_t *cred) return xstrdup(cred->hostname); } +int auth_p_get_data(slurm_auth_credential_t *cred, char **data, uint32_t *len) +{ + if (!cred) { + slurm_seterrno(ESLURM_AUTH_BADARG); + return SLURM_ERROR; + } + + *data = NULL; + *len = 0; + + return SLURM_SUCCESS; +} + /* * Marshall a credential for transmission over the network, according to * Slurm's marshalling protocol. diff --git a/src/plugins/mpi/pmi2/setup.c b/src/plugins/mpi/pmi2/setup.c index 3dd074f0af..116c5f25fe 100644 --- a/src/plugins/mpi/pmi2/setup.c +++ b/src/plugins/mpi/pmi2/setup.c @@ -106,6 +106,8 @@ _setup_stepd_job_info(const stepd_step_rec_t *job, char ***env) memset(&job_info, 0, sizeof(job_info)); + job_info.uid = job->uid; + if (job->het_job_id && (job->het_job_id != NO_VAL)) { job_info.jobid = job->het_job_id; job_info.stepid = job->stepid; diff --git a/src/plugins/mpi/pmi2/setup.h b/src/plugins/mpi/pmi2/setup.h index 6f25d372e7..e329b5d215 100644 --- a/src/plugins/mpi/pmi2/setup.h +++ b/src/plugins/mpi/pmi2/setup.h @@ -59,6 +59,7 @@ typedef struct pmi2_job_info { uint32_t jobid; /* Current Slurm job id */ uint32_t stepid; /* Current step id (or NO_VAL) */ + uid_t uid; /* user id for job */ uint32_t nnodes; /* number of nodes in current job step */ uint32_t nodeid; /* relative position of this node in job */ uint32_t ntasks; /* total number of tasks in current job */ diff --git a/src/plugins/mpi/pmi2/spawn.c b/src/plugins/mpi/pmi2/spawn.c index 50ea79027c..182edae3e4 100644 --- a/src/plugins/mpi/pmi2/spawn.c +++ b/src/plugins/mpi/pmi2/spawn.c @@ -151,7 +151,8 @@ spawn_req_pack(spawn_req_t *req, Buf buf) void *auth_cred; char *auth_info = slurm_get_auth_info(); - auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, auth_info); + auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, slurm_conf.authinfo, + job_info.uid, NULL, 0); xfree(auth_info); if (auth_cred == NULL) { error("authentication: %m"); @@ -214,6 +215,7 @@ spawn_req_unpack(spawn_req_t **req_ptr, Buf buf) if (g_slurm_auth_verify(auth_cred, auth_info)) { error("authentication: %m"); xfree(auth_info); + g_slurm_auth_destroy(auth_cred); return SLURM_ERROR; } xfree(auth_info); diff --git a/src/plugins/mpi/pmix/pmixp_dconn.c b/src/plugins/mpi/pmix/pmixp_dconn.c index e8524a8ce3..594230bfc1 100644 --- a/src/plugins/mpi/pmix/pmixp_dconn.c +++ b/src/plugins/mpi/pmix/pmixp_dconn.c @@ -79,6 +79,7 @@ int pmixp_dconn_init(int node_cnt, pmixp_p2p_data_t direct_hdr) _pmixp_dconn_conns[i].nodeid = i; _pmixp_dconn_conns[i].state = PMIXP_DIRECT_INIT; _pmixp_dconn_conns[i].priv = _pmixp_dconn_h.init(i, direct_hdr); + _pmixp_dconn_conns[i].uid = slurm_conf.slurmd_user_id; } return SLURM_SUCCESS; } diff --git a/src/plugins/mpi/pmix/pmixp_dconn.h b/src/plugins/mpi/pmix/pmixp_dconn.h index 77f302f103..7e34fbdad2 100644 --- a/src/plugins/mpi/pmix/pmixp_dconn.h +++ b/src/plugins/mpi/pmix/pmixp_dconn.h @@ -82,6 +82,9 @@ typedef struct { /* remote node info */ int nodeid; void *priv; + + /* authenticated uid on remote */ + uid_t uid; } pmixp_dconn_t; typedef void *(*pmixp_dconn_p2p_init_t)(int nodeid, diff --git a/src/plugins/mpi/pmix/pmixp_server.c b/src/plugins/mpi/pmix/pmixp_server.c index 20c1b17f9e..4ced93c14b 100644 --- a/src/plugins/mpi/pmix/pmixp_server.c +++ b/src/plugins/mpi/pmix/pmixp_server.c @@ -494,13 +494,14 @@ void pmixp_server_cleanup(void) * --------------------- Authentication functionality ------------------- */ -static int _auth_cred_create(Buf buf) +static int _auth_cred_create(Buf buf, uid_t uid) { void *auth_cred = NULL; char *auth_info = slurm_get_auth_info(); int rc = SLURM_SUCCESS; - auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, auth_info); + auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, slurm_conf.authinfo, + uid, NULL, 0); xfree(auth_info); if (!auth_cred) { PMIXP_ERROR("Creating authentication credential: %m"); @@ -520,7 +521,7 @@ static int _auth_cred_create(Buf buf) return rc; } -static int _auth_cred_verify(Buf buf) +static int _auth_cred_verify(Buf buf, uid_t *uid) { void *auth_cred = NULL; char *auth_info = NULL; @@ -540,8 +541,18 @@ static int _auth_cred_verify(Buf buf) rc = g_slurm_auth_verify(auth_cred, auth_info); xfree(auth_info); - if (rc) + if (rc) { PMIXP_ERROR("Verifying authentication credential: %m"); + } else { + uid_t auth_uid; + auth_uid = g_slurm_auth_get_uid(auth_cred); + if ((auth_uid != slurm_conf.slurmd_user_id) && + (auth_uid != _pmixp_job_info.uid)) { + PMIXP_ERROR("Credential from uid %u", auth_uid); + rc = SLURM_ERROR; + } + *uid = auth_uid; + } g_slurm_auth_destroy(auth_cred); return rc; } @@ -708,7 +719,7 @@ static int _process_extended_hdr(pmixp_base_hdr_t *hdr, Buf buf) pmixp_base_hdr_t bhdr; init_msg = xmalloc(sizeof(*init_msg)); - rc = _auth_cred_create(buf_init); + rc = _auth_cred_create(buf_init, dconn->uid); if (rc) { FREE_NULL_BUFFER(init_msg->buf_ptr); xfree(init_msg); @@ -1162,6 +1173,7 @@ _direct_conn_establish(pmixp_conn_t *conn, void *_hdr, void *msg) Buf buf_msg; int rc; char *nodename = NULL; + uid_t uid = SLURM_AUTH_NOBODY; if (!hdr->ext_flag) { nodename = pmixp_info_job_host(hdr->nodeid); @@ -1185,7 +1197,7 @@ _direct_conn_establish(pmixp_conn_t *conn, void *_hdr, void *msg) return; } /* Unpack and verify the auth credential */ - rc = _auth_cred_verify(buf_msg); + rc = _auth_cred_verify(buf_msg, &uid); FREE_NULL_BUFFER(buf_msg); if (rc) { close(fd); @@ -1209,6 +1221,9 @@ _direct_conn_establish(pmixp_conn_t *conn, void *_hdr, void *msg) xfree(nodename); return; } + + dconn->uid = uid; + new_conn = pmixp_conn_new_persist(PMIXP_PROTO_DIRECT, pmixp_dconn_engine(dconn), _direct_new_msg_conn, diff --git a/src/plugins/mpi/pmix/pmixp_utils.c b/src/plugins/mpi/pmix/pmixp_utils.c index 3d1c56847c..5d22cacf83 100644 --- a/src/plugins/mpi/pmix/pmixp_utils.c +++ b/src/plugins/mpi/pmix/pmixp_utils.c @@ -418,6 +418,7 @@ static int _pmix_p2p_send_core(const char *nodename, const char *address, msg.forward.timeout = timeout; msg.forward.cnt = 0; msg.forward.nodelist = NULL; + slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id); ret_list = slurm_send_addr_recv_msgs(&msg, (char*)nodename, timeout); if (!ret_list) { /* This should never happen (when this was diff --git a/src/sattach/sattach.c b/src/sattach/sattach.c index 8870db653e..948f1156d3 100644 --- a/src/sattach/sattach.c +++ b/src/sattach/sattach.c @@ -405,6 +405,7 @@ static int _attach_to_tasks(uint32_t jobid, reattach_msg.io_port = io_ports; reattach_msg.cred = fake_cred; + slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY); msg.msg_type = REQUEST_REATTACH_TASKS; msg.data = &reattach_msg; msg.protocol_version = layout->start_protocol_ver; diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c index ec557308ac..cebc4cf158 100644 --- a/src/slurmctld/agent.c +++ b/src/slurmctld/agent.c @@ -144,6 +144,7 @@ typedef struct agent_info { uint16_t retry; /* if set, keep trying */ thd_t *thread_struct; /* thread structures */ bool get_reply; /* flag if reply expected */ + uid_t r_uid; /* receiver UID */ slurm_msg_type_t msg_type; /* RPC to be issued */ void **msg_args_pptr; /* RPC data to be used */ uint16_t protocol_version; /* if set, use this version */ @@ -157,6 +158,7 @@ typedef struct task_info { uint32_t *threads_active_ptr; /* currently active thread ptr */ thd_t *thread_struct_ptr; /* thread structures ptr */ bool get_reply; /* flag if reply expected */ + uid_t r_uid; /* receiver UID */ slurm_msg_type_t msg_type; /* RPC to be issued */ void *msg_args_ptr; /* ptr to RPC data to be used */ uint16_t protocol_version; /* if set, use this version */ @@ -316,14 +318,16 @@ void *agent(void *args) slurm_thread_create(&thread_wdog, _wdog, agent_info_ptr); if (slurmctld_conf.debug_flags & DEBUG_FLAG_AGENT) { - info("%s: New agent thread_count:%d threads_active:%d retry:%c get_reply:%c msg_type:%s protocol_version:%hu", + info("%s: New agent thread_count:%d threads_active:%d retry:%c get_reply:%c r_uid:%u msg_type:%s protocol_version:%hu", __func__, agent_info_ptr->thread_count, agent_info_ptr->threads_active, agent_info_ptr->retry ? 'T' : 'F', agent_info_ptr->get_reply ? 'T' : 'F', + agent_info_ptr->r_uid, rpc_num2string(agent_arg_ptr->msg_type), agent_info_ptr->protocol_version); } + /* start all the other threads (up to AGENT_THREAD_COUNT active) */ for (i = 0; i < agent_info_ptr->thread_count; i++) { /* wait until "room" for another thread */ @@ -421,6 +425,11 @@ static int _valid_agent_arg(agent_arg_t *agent_arg_ptr) __func__, agent_arg_ptr->node_count, hostlist_cnt); return SLURM_ERROR; /* no messages to be sent */ } + if (!agent_arg_ptr->r_uid_set) { + error("%s: r_uid not set for message:%u ", + __func__, agent_arg_ptr->msg_type); + return SLURM_ERROR; + } return SLURM_SUCCESS; } @@ -443,6 +452,7 @@ static agent_info_t *_make_agent_info(agent_arg_t *agent_arg_ptr) thread_ptr = xcalloc(agent_info_ptr->thread_count, sizeof(thd_t)); memset(thread_ptr, 0, (agent_info_ptr->thread_count * sizeof(thd_t))); agent_info_ptr->thread_struct = thread_ptr; + agent_info_ptr->r_uid = agent_arg_ptr->r_uid; agent_info_ptr->msg_type = agent_arg_ptr->msg_type; agent_info_ptr->msg_args_pptr = &agent_arg_ptr->msg_args; agent_info_ptr->protocol_version = agent_arg_ptr->protocol_version; @@ -529,6 +539,7 @@ static task_info_t *_make_task_data(agent_info_t *agent_info_ptr, int inx) task_info_ptr->threads_active_ptr= &agent_info_ptr->threads_active; task_info_ptr->thread_struct_ptr = &agent_info_ptr->thread_struct[inx]; task_info_ptr->get_reply = agent_info_ptr->get_reply; + task_info_ptr->r_uid = agent_info_ptr->r_uid; task_info_ptr->msg_type = agent_info_ptr->msg_type; task_info_ptr->msg_args_ptr = *agent_info_ptr->msg_args_pptr; task_info_ptr->protocol_version = agent_info_ptr->protocol_version; @@ -931,6 +942,7 @@ static void *_thread_per_group_rpc(void *args) msg.msg_type = msg_type; msg.data = task_ptr->msg_args_ptr; + slurm_msg_set_r_uid(&msg, task_ptr->r_uid); if (slurmctld_conf.debug_flags & DEBUG_FLAG_AGENT) { info("%s: sending %s to %s", __func__, rpc_num2string(msg_type), @@ -1304,6 +1316,8 @@ static void _queue_agent_retry(agent_info_t * agent_info_ptr, int count) agent_arg_ptr->msg_args = *(agent_info_ptr->msg_args_pptr); *(agent_info_ptr->msg_args_pptr) = NULL; + set_agent_arg_r_uid(agent_arg_ptr, agent_info_ptr->r_uid); + j = 0; for (i = 0; i < agent_info_ptr->thread_count; i++) { if (!thread_ptr[i].ret_list) { @@ -2312,3 +2326,10 @@ static void _reboot_from_ctld(agent_arg_t *agent_arg_ptr) } xfree(argv[1]); } + +/* Set r_uid of agent_arg */ +extern void set_agent_arg_r_uid(agent_arg_t *agent_arg_ptr, uid_t r_uid) +{ + agent_arg_ptr->r_uid = r_uid; + agent_arg_ptr->r_uid_set = true; +} diff --git a/src/slurmctld/agent.h b/src/slurmctld/agent.h index 2da751b1f6..2da73b0d37 100644 --- a/src/slurmctld/agent.h +++ b/src/slurmctld/agent.h @@ -54,6 +54,8 @@ typedef struct agent_arg { uint32_t node_count; /* number of nodes to communicate * with */ uint16_t retry; /* if set, keep trying */ + uid_t r_uid; /* receiver UID */ + bool r_uid_set; /* True if receiver UID set*/ slurm_addr_t *addr; /* if set will send to this addr not hostlist */ hostlist_t hostlist; /* hostlist containing the @@ -114,4 +116,7 @@ extern void mail_job_info(job_record_t *job_ptr, uint16_t mail_type); /* Return length of agent's retry_list */ extern int retry_list_size(void); +/* Set r_uid of agent_arg */ +extern void set_agent_arg_r_uid(agent_arg_t *agent_arg_ptr, uid_t r_uid); + #endif /* !_AGENT_H */ diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c index 17b4c2f6d2..70cf9e2750 100644 --- a/src/slurmctld/backup.c +++ b/src/slurmctld/backup.c @@ -394,8 +394,9 @@ static void *_background_rpc_mgr(void *no_data) slurm_msg_t_init(&msg); if (slurm_receive_msg(newsockfd, &msg, 0) != 0) error("slurm_receive_msg: %m"); + else + error_code = _background_process_msg(&msg); - error_code = _background_process_msg(&msg); if ((error_code == SLURM_SUCCESS) && (msg.msg_type == REQUEST_SHUTDOWN_IMMEDIATE) && (slurmctld_config.shutdown_time == 0)) @@ -420,6 +421,10 @@ static int _background_process_msg(slurm_msg_t *msg) int error_code = SLURM_SUCCESS; bool send_rc = true; + if (!msg->auth_uid_set) + fatal("%s: received message without previously validated auth", + __func__); + if (msg->msg_type != REQUEST_PING) { bool super_user = false; uid_t uid = g_slurm_auth_get_uid(msg->auth_cred); @@ -471,6 +476,7 @@ static void *_ping_ctld_thread(void *arg) slurm_msg_t_init(&req); slurm_set_addr(&req.address, ping->slurmctld_port, ping->control_addr); req.msg_type = REQUEST_CONTROL_STATUS; + slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY); if (slurm_send_recv_node_msg(&req, &resp, 0) == SLURM_SUCCESS) { switch (resp.msg_type) { case RESPONSE_CONTROL_STATUS: @@ -607,6 +613,7 @@ static void *_shutdown_controller(void *arg) xfree(arg); slurm_msg_t_init(&req); + slurm_msg_set_r_uid(&req, slurm_conf.slurm_user_id); slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port, slurmctld_conf.control_addr[shutdown_inx]); req.msg_type = REQUEST_CONTROL; diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 99478f4381..611a4e210c 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -1886,6 +1886,7 @@ static void _queue_reboot_msg(void) reboot_agent_args->hostlist); debug("Queuing reboot request for nodes %s", host_str); xfree(host_str); + set_agent_arg_r_uid(reboot_agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(reboot_agent_args); last_node_update = now; schedule_node_save(); @@ -2747,6 +2748,7 @@ static void *_shutdown_bu_thread(void *arg) xfree(arg); slurm_msg_t_init(&req); + slurm_msg_set_r_uid(&req, slurm_conf.slurm_user_id); slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port, slurmctld_conf.control_addr[bu_inx]); req.msg_type = REQUEST_CONTROL; diff --git a/src/slurmctld/fed_mgr.c b/src/slurmctld/fed_mgr.c index fe9571be02..a461cbd68e 100644 --- a/src/slurmctld/fed_mgr.c +++ b/src/slurmctld/fed_mgr.c @@ -368,6 +368,8 @@ static int _open_controller_conn(slurmdb_cluster_rec_t *cluster, bool locked) persist_conn->rem_port = cluster->control_port; } + persist_conn->r_uid = SLURM_AUTH_UID_ANY; + rc = slurm_persist_conn_open(persist_conn); if (rc != SLURM_SUCCESS) { if (_comm_fail_log(cluster)) { diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index b2d0fc987e..702e6ae38d 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -5954,6 +5954,7 @@ static void _signal_batch_job(job_record_t *job_ptr, uint16_t signal, signal_tasks_msg->signal = signal; agent_args->msg_args = signal_tasks_msg; + set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(agent_args); return; } @@ -14343,8 +14344,7 @@ extern int update_job_str(slurm_msg_t *msg, uid_t uid) reply: if ((rc != ESLURM_JOB_SETTING_DB_INX) && (msg->conn_fd >= 0)) { - slurm_msg_t_init(&resp_msg); - resp_msg.protocol_version = msg->protocol_version; + response_init(&resp_msg, msg); if (resp_array) { resp_array_msg = _resp_array_xlate(resp_array, job_id); resp_msg.msg_type = RESPONSE_JOB_ARRAY_ERRORS; @@ -14354,7 +14354,6 @@ reply: rc_msg.return_code = rc; resp_msg.data = &rc_msg; } - resp_msg.conn = msg->conn; slurm_send_node_msg(msg->conn_fd, &resp_msg); if (resp_array_msg) { @@ -14441,6 +14440,7 @@ static void _send_job_kill(job_record_t *job_ptr) } agent_args->msg_args = kill_job; + set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(agent_args); return; } @@ -14857,6 +14857,7 @@ extern void abort_job_on_node(uint32_t job_id, job_record_t *job_ptr, agent_info->msg_type = REQUEST_ABORT_JOB; agent_info->msg_args = kill_req; + set_agent_arg_r_uid(agent_info, SLURM_AUTH_UID_ANY); agent_queue_request(agent_info); } @@ -14924,6 +14925,7 @@ extern void abort_job_on_nodes(job_record_t *job_ptr, agent_info->msg_type = REQUEST_ABORT_JOB; agent_info->msg_args = kill_req; agent_info->protocol_version = protocol_version; + set_agent_arg_r_uid(agent_info, SLURM_AUTH_UID_ANY); agent_queue_request(agent_info); bit_free(tmp_node_bitmap); } @@ -14976,6 +14978,7 @@ extern void kill_job_on_node(job_record_t *job_ptr, agent_info->msg_type = REQUEST_TERMINATE_JOB; agent_info->msg_args = kill_req; + set_agent_arg_r_uid(agent_info, SLURM_AUTH_UID_ANY); agent_queue_request(agent_info); } @@ -15970,6 +15973,7 @@ static void _signal_job(job_record_t *job_ptr, int signal, uint16_t flags) } agent_args->msg_args = signal_job_msg; + set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(agent_args); return; } @@ -16049,6 +16053,7 @@ static void _suspend_job(job_record_t *job_ptr, uint16_t op, bool indf_susp) } agent_args->msg_args = sus_ptr; + set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(agent_args); return; } @@ -16414,6 +16419,7 @@ extern int job_suspend(suspend_msg_t *sus_ptr, uid_t uid, memset(&rc_msg, 0, sizeof(rc_msg)); rc_msg.return_code = rc; resp_msg.data = &rc_msg; + slurm_msg_set_r_uid(&resp_msg, uid); slurm_send_node_msg(conn_fd, &resp_msg); } return rc; @@ -16564,6 +16570,7 @@ extern int job_suspend2(suspend_msg_t *sus_ptr, uid_t uid, rc_msg.return_code = rc; resp_msg.data = &rc_msg; } + slurm_msg_set_r_uid(&resp_msg, uid); slurm_send_node_msg(conn_fd, &resp_msg); if (resp_array_msg) { @@ -17299,6 +17306,7 @@ reply: FREE_NULL_LIST(top_job_list); memset(&rc_msg, 0, sizeof(rc_msg)); rc_msg.return_code = rc; resp_msg.data = &rc_msg; + slurm_msg_set_r_uid(&resp_msg, uid); slurm_send_node_msg(conn_fd, &resp_msg); } diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 59406e43be..60e18462f3 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -2575,6 +2575,7 @@ extern void launch_job(job_record_t *job_ptr) agent_arg_ptr->hostlist = hostlist_create(launch_job_ptr->batch_host); agent_arg_ptr->msg_type = REQUEST_BATCH_JOB_LAUNCH; agent_arg_ptr->msg_args = (void *) launch_msg_ptr; + set_agent_arg_r_uid(agent_arg_ptr, SLURM_AUTH_UID_ANY); /* Launch the RPC via agent */ agent_queue_request(agent_arg_ptr); @@ -4318,6 +4319,7 @@ extern int reboot_job_nodes(job_record_t *job_ptr) rc = SLURM_ERROR; } xfree(nodes); + set_agent_arg_r_uid(reboot_agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(reboot_agent_args); } @@ -4349,6 +4351,7 @@ extern int reboot_job_nodes(job_record_t *job_ptr) rc = SLURM_ERROR; } xfree(nodes); + set_agent_arg_r_uid(reboot_agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(reboot_agent_args); } diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 87cb0569cc..df7dec5173 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -3551,6 +3551,7 @@ void msg_to_slurmd (slurm_msg_type_t msg_type) xfree (kill_agent_args); } else { debug ("Spawning agent msg_type=%d", msg_type); + set_agent_arg_r_uid(kill_agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(kill_agent_args); } } @@ -3612,6 +3613,7 @@ void push_reconfig_to_slurmd(void) xfree(new_args); } else { debug("Spawning agent msg_type=%d", new_args->msg_type); + set_agent_arg_r_uid(new_args, SLURM_AUTH_UID_ANY); agent_queue_request(new_args); } @@ -3620,6 +3622,7 @@ void push_reconfig_to_slurmd(void) xfree(old_args); } else { debug("Spawning agent msg_type=%d", old_args->msg_type); + set_agent_arg_r_uid(old_args, SLURM_AUTH_UID_ANY); agent_queue_request(old_args); } #else diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index d6c35f9604..e02088c853 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -632,6 +632,7 @@ extern void deallocate_nodes(job_record_t *job_ptr, bool timeout, } agent_args->msg_args = kill_job; + set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(agent_args); return; } @@ -3388,6 +3389,7 @@ extern void launch_prolog(job_record_t *job_ptr) select_g_step_start(build_extern_step(job_ptr)); /* Launch the RPC via agent */ + set_agent_arg_r_uid(agent_arg_ptr, SLURM_AUTH_UID_ANY); agent_queue_request(agent_arg_ptr); } @@ -4748,6 +4750,7 @@ extern void re_kill_job(job_record_t *job_ptr) last_job_id = job_ptr->job_id; hostlist_destroy(kill_hostlist); agent_args->msg_args = kill_job; + set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(agent_args); return; } diff --git a/src/slurmctld/ping_nodes.c b/src/slurmctld/ping_nodes.c index 8459f05137..5789634601 100644 --- a/src/slurmctld/ping_nodes.c +++ b/src/slurmctld/ping_nodes.c @@ -352,6 +352,7 @@ void ping_nodes (void) debug("Spawning ping agent for %s", host_str); xfree(host_str); ping_begin(); + set_agent_arg_r_uid(ping_agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(ping_agent_args); } @@ -366,6 +367,7 @@ void ping_nodes (void) host_str, reg_agent_args->node_count); xfree(host_str); ping_begin(); + set_agent_arg_r_uid(reg_agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(reg_agent_args); } @@ -522,6 +524,7 @@ extern void run_health_check(void) debug("Spawning health check agent for %s", host_str); xfree(host_str); ping_begin(); + set_agent_arg_r_uid(check_agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(check_agent_args); } } @@ -581,6 +584,7 @@ extern void update_nodes_acct_gather_data(void) info("Updating acct_gather data for %s", host_str); xfree(host_str); ping_begin(); + set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(agent_args); } } diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index da234ac77c..df29339da2 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -662,6 +662,8 @@ extern void response_init(slurm_msg_t *resp, slurm_msg_t *msg) resp->conn = msg->conn; resp->flags = msg->flags; resp->protocol_version = msg->protocol_version; + if (msg->auth_uid_set) + slurm_msg_set_r_uid(resp, msg->auth_uid); } /* diff --git a/src/slurmctld/srun_comm.c b/src/slurmctld/srun_comm.c index 5781708830..1011229729 100644 --- a/src/slurmctld/srun_comm.c +++ b/src/slurmctld/srun_comm.c @@ -57,7 +57,7 @@ */ static void _srun_agent_launch(slurm_addr_t *addr, char *host, slurm_msg_type_t type, void *msg_args, - uint16_t protocol_version) + uid_t r_uid, uint16_t protocol_version) { agent_arg_t *agent_args = xmalloc(sizeof(agent_arg_t)); @@ -67,6 +67,7 @@ static void _srun_agent_launch(slurm_addr_t *addr, char *host, agent_args->hostlist = hostlist_create(host); agent_args->msg_type = type; agent_args->msg_args = msg_args; + set_agent_arg_r_uid(agent_args, r_uid); agent_args->protocol_version = protocol_version; agent_queue_request(agent_args); @@ -145,6 +146,7 @@ extern void srun_allocate(job_record_t *job_ptr) msg_arg = build_alloc_msg(job_ptr, SLURM_SUCCESS, NULL); _srun_agent_launch(addr, job_ptr->alloc_node, RESPONSE_RESOURCE_ALLOCATION, msg_arg, + job_ptr->user_id, job_ptr->start_protocol_ver); } else if (_pending_het_jobs(job_ptr)) { return; @@ -169,6 +171,7 @@ extern void srun_allocate(job_record_t *job_ptr) list_iterator_destroy(iter); _srun_agent_launch(addr, job_ptr->alloc_node, RESPONSE_HET_JOB_ALLOCATION, job_resp_list, + job_ptr->user_id, job_ptr->start_protocol_ver); } else { error("%s: Can not find hetjob leader %pJ", @@ -194,7 +197,7 @@ extern void srun_allocate_abort(job_record_t *job_ptr) msg_arg->step_id = NO_VAL; _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_JOB_COMPLETE, - msg_arg, + msg_arg, job_ptr->user_id, job_ptr->start_protocol_ver); } } @@ -247,7 +250,8 @@ extern void srun_node_fail(job_record_t *job_ptr, char *node_name) msg_arg->step_id = step_ptr->step_id; msg_arg->nodelist = xstrdup(node_name); _srun_agent_launch(addr, step_ptr->host, SRUN_NODE_FAIL, - msg_arg, step_ptr->start_protocol_ver); + msg_arg, job_ptr->user_id, + step_ptr->start_protocol_ver); } list_iterator_destroy(step_iterator); @@ -259,7 +263,8 @@ extern void srun_node_fail(job_record_t *job_ptr, char *node_name) msg_arg->step_id = NO_VAL; msg_arg->nodelist = xstrdup(node_name); _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_NODE_FAIL, - msg_arg, job_ptr->start_protocol_ver); + msg_arg, job_ptr->user_id, + job_ptr->start_protocol_ver); } } @@ -293,7 +298,7 @@ extern void srun_ping (void) msg_arg->job_id = job_ptr->job_id; msg_arg->step_id = NO_VAL; _srun_agent_launch(addr, job_ptr->alloc_node, - SRUN_PING, msg_arg, + SRUN_PING, msg_arg, job_ptr->user_id, job_ptr->start_protocol_ver); } } @@ -324,6 +329,7 @@ extern void srun_step_timeout(step_record_t *step_ptr, time_t timeout_val) msg_arg->step_id = step_ptr->step_id; msg_arg->timeout = timeout_val; _srun_agent_launch(addr, step_ptr->host, SRUN_TIMEOUT, msg_arg, + step_ptr->job_ptr->user_id, step_ptr->start_protocol_ver); } @@ -350,7 +356,8 @@ extern void srun_timeout(job_record_t *job_ptr) msg_arg->step_id = NO_VAL; msg_arg->timeout = job_ptr->end_time; _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_TIMEOUT, - msg_arg, job_ptr->start_protocol_ver); + msg_arg, job_ptr->user_id, + job_ptr->start_protocol_ver); } @@ -380,7 +387,8 @@ extern int srun_user_message(job_record_t *job_ptr, char *msg) msg_arg->job_id = job_ptr->job_id; msg_arg->msg = xstrdup(msg); _srun_agent_launch(addr, job_ptr->resp_host, SRUN_USER_MSG, - msg_arg, job_ptr->start_protocol_ver); + msg_arg, job_ptr->user_id, + job_ptr->start_protocol_ver); return SLURM_SUCCESS; } else if (job_ptr->batch_flag && IS_JOB_RUNNING(job_ptr)) { #ifndef HAVE_FRONT_END @@ -419,6 +427,7 @@ extern int srun_user_message(job_record_t *job_ptr, char *msg) agent_arg_ptr->msg_type = REQUEST_JOB_NOTIFY; agent_arg_ptr->msg_args = (void *) notify_msg_ptr; /* Launch the RPC via agent */ + set_agent_arg_r_uid(agent_arg_ptr, SLURM_AUTH_UID_ANY); agent_queue_request(agent_arg_ptr); return SLURM_SUCCESS; } @@ -446,6 +455,7 @@ extern void srun_job_complete(job_record_t *job_ptr) msg_arg->step_id = NO_VAL; _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_JOB_COMPLETE, msg_arg, + job_ptr->user_id, job_ptr->start_protocol_ver); } @@ -480,6 +490,7 @@ extern bool srun_job_suspend(job_record_t *job_ptr, uint16_t op) msg_arg->op = op; _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_REQUEST_SUSPEND, msg_arg, + job_ptr->user_id, job_ptr->start_protocol_ver); msg_sent = true; } @@ -503,7 +514,8 @@ extern void srun_step_complete(step_record_t *step_ptr) msg_arg->job_id = step_ptr->job_ptr->job_id; msg_arg->step_id = step_ptr->step_id; _srun_agent_launch(addr, step_ptr->host, SRUN_JOB_COMPLETE, - msg_arg, step_ptr->start_protocol_ver); + msg_arg, step_ptr->job_ptr->user_id, + step_ptr->start_protocol_ver); } } @@ -527,7 +539,8 @@ extern void srun_step_missing(step_record_t *step_ptr, char *node_list) msg_arg->step_id = step_ptr->step_id; msg_arg->nodelist = xstrdup(node_list); _srun_agent_launch(addr, step_ptr->host, SRUN_STEP_MISSING, - msg_arg, step_ptr->start_protocol_ver); + msg_arg, step_ptr->job_ptr->user_id, + step_ptr->start_protocol_ver); } } @@ -551,7 +564,8 @@ extern void srun_step_signal(step_record_t *step_ptr, uint16_t signal) msg_arg->job_step_id = step_ptr->step_id; msg_arg->signal = signal; _srun_agent_launch(addr, step_ptr->host, SRUN_STEP_SIGNAL, - msg_arg, step_ptr->start_protocol_ver); + msg_arg, step_ptr->job_ptr->user_id, + step_ptr->start_protocol_ver); } } @@ -582,7 +596,8 @@ extern void srun_exec(step_record_t *step_ptr, char **argv) for (i=0; i<cnt ; i++) msg_arg->argv[i] = xstrdup(argv[i]); _srun_agent_launch(addr, step_ptr->host, SRUN_EXEC, - msg_arg, step_ptr->start_protocol_ver); + msg_arg, step_ptr->job_ptr->user_id, + step_ptr->start_protocol_ver); } else { error("srun_exec %pS lacks communication channel", step_ptr); diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 236a66c805..f68f4348c9 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -670,6 +670,7 @@ void signal_step_tasks(step_record_t *step_ptr, uint16_t signal, } agent_args->msg_args = signal_tasks_msg; + set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(agent_args); return; } @@ -715,6 +716,7 @@ void signal_step_tasks_on_node(char* node_name, step_record_t *step_ptr, signal_tasks_msg->job_step_id = step_ptr->step_id; signal_tasks_msg->signal = signal; agent_args->msg_args = signal_tasks_msg; + set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(agent_args); return; } @@ -4252,6 +4254,7 @@ static void _signal_step_timelimit(job_record_t *job_ptr, step_record_t *step_pt } agent_args->msg_args = kill_step; + set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(agent_args); return; } diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 65b33d382f..80699e5318 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -494,7 +494,7 @@ rwfail: static int _send_slurmstepd_init(int fd, int type, void *req, - slurm_addr_t *cli, slurm_addr_t *self, + slurm_addr_t *cli, uid_t cli_uid, slurm_addr_t *self, hostset_t step_hset, uint16_t protocol_version) { int len = 0; @@ -623,6 +623,7 @@ _send_slurmstepd_init(int fd, int type, void *req, safe_write(fd, get_buf_data(buffer), len); free_buf(buffer); buffer = NULL; + safe_write(fd, &cli_uid, sizeof(uid_t)); /* send self address over to slurmstepd */ if (self) { @@ -696,7 +697,7 @@ rwfail: */ static int _forkexec_slurmstepd(uint16_t type, void *req, - slurm_addr_t *cli, slurm_addr_t *self, + slurm_addr_t *cli, uid_t cli_uid, slurm_addr_t *self, const hostset_t step_hset, uint16_t protocol_version) { pid_t pid; @@ -738,7 +739,7 @@ _forkexec_slurmstepd(uint16_t type, void *req, error("Unable to close write to_slurmd in parent: %m"); if ((rc = _send_slurmstepd_init(to_stepd[1], type, - req, cli, self, + req, cli, cli_uid, self, step_hset, protocol_version)) != 0) { error("Unable to init slurmstepd"); @@ -1594,8 +1595,9 @@ _rpc_launch_tasks(slurm_msg_t *msg) } debug3("%s: call to _forkexec_slurmstepd", __func__); - errnum = _forkexec_slurmstepd(LAUNCH_TASKS, (void *)req, cli, &self, - step_hset, msg->protocol_version); + errnum = _forkexec_slurmstepd(LAUNCH_TASKS, (void *)req, cli, + msg->auth_uid, &self, step_hset, + msg->protocol_version); debug3("%s: return from _forkexec_slurmstepd", __func__); _launch_complete_add(req->job_id); @@ -2249,7 +2251,7 @@ static int _spawn_prolog_stepd(slurm_msg_t *msg) debug3("%s: call to _forkexec_slurmstepd", __func__); rc = _forkexec_slurmstepd(LAUNCH_TASKS, (void *)launch_req, - cli, &self, step_hset, + cli, msg->auth_uid, &self, step_hset, msg->protocol_version); debug3("%s: return from _forkexec_slurmstepd %d", __func__, rc); @@ -2597,8 +2599,9 @@ _rpc_batch_job(slurm_msg_t *msg, bool new_msg) info("Launching batch job %u for UID %u", req->job_id, req->uid); debug3("_rpc_batch_job: call to _forkexec_slurmstepd"); - rc = _forkexec_slurmstepd(LAUNCH_BATCH_JOB, (void *)req, cli, NULL, - (hostset_t)NULL, SLURM_PROTOCOL_VERSION); + rc = _forkexec_slurmstepd(LAUNCH_BATCH_JOB, (void *)req, cli, + msg->auth_uid, NULL, (hostset_t)NULL, + SLURM_PROTOCOL_VERSION); debug3("_rpc_batch_job: return from _forkexec_slurmstepd: %d", rc); slurm_mutex_unlock(&launch_mutex); @@ -4515,7 +4518,7 @@ _rpc_reattach_tasks(slurm_msg_t *msg) /* Following call fills in gtids and local_pids when successful. */ rc = stepd_attach(fd, protocol_version, &ioaddr, - &resp_msg.address, job_cred_sig, resp); + &resp_msg.address, job_cred_sig, msg->auth_uid, resp); if (rc != SLURM_SUCCESS) { debug2("stepd_attach call failed"); goto done2; diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 2a59331546..8a7dd9c076 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -574,7 +574,11 @@ _service_connection(void *arg) * to are taken care of and sent back. This way the control * also has a better idea what happened to us */ - slurm_send_rc_msg(msg, rc); + if (msg->auth_uid_set) + slurm_send_rc_msg(msg, rc); + else + debug("%s: incomplete message", __func__); + goto cleanup; } debug2("Start processing RPC: %s", rpc_num2string(msg->msg_type)); diff --git a/src/slurmd/slurmstepd/io.c b/src/slurmd/slurmstepd/io.c index d2abfa3be3..6bee94d135 100644 --- a/src/slurmd/slurmstepd/io.c +++ b/src/slurmd/slurmstepd/io.c @@ -1940,6 +1940,7 @@ _user_managed_io_connect(srun_info_t *srun, uint32_t gtid) slurm_msg_t_init(&msg); msg.protocol_version = srun->protocol_version; msg.msg_type = TASK_USER_MANAGED_IO_STREAM; + slurm_msg_set_r_uid(&msg, srun->uid); msg.data = &user_io_msg; user_io_msg.task_id = gtid; diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index e10348f4a9..1bb68c491f 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -161,7 +161,7 @@ typedef struct kill_thread { static bool _access(const char *path, int modes, uid_t uid, int ngids, gid_t *gids); static void _send_launch_failure(launch_tasks_request_msg_t *, - slurm_addr_t *, int, uint16_t); + slurm_addr_t *, uid_t, int, uint16_t); static int _fork_all_tasks(stepd_step_rec_t *job, bool *io_initialized); static int _become_user(stepd_step_rec_t *job, struct priv_state *ps); static void _set_prio_process (stepd_step_rec_t *job); @@ -199,7 +199,8 @@ static stepd_step_rec_t *reattach_job; */ extern stepd_step_rec_t * mgr_launch_tasks_setup(launch_tasks_request_msg_t *msg, slurm_addr_t *cli, - slurm_addr_t *self, uint16_t protocol_version) + uid_t cli_uid, slurm_addr_t *self, + uint16_t protocol_version) { stepd_step_rec_t *job = NULL; @@ -210,7 +211,8 @@ mgr_launch_tasks_setup(launch_tasks_request_msg_t *msg, slurm_addr_t *cli, * reset in _send_launch_failure. */ int fail = errno; - _send_launch_failure(msg, cli, errno, protocol_version); + _send_launch_failure(msg, cli, cli_uid, errno, + protocol_version); errno = fail; return NULL; } @@ -665,6 +667,7 @@ _send_exit_msg(stepd_step_rec_t *job, uint32_t *tid, int n, int status) /* This should always be set to something else we have a bug. */ xassert(srun->protocol_version); resp.protocol_version = srun->protocol_version; + slurm_msg_set_r_uid(&resp, srun->uid); if (_send_srun_resp_msg(&resp, job->nnodes) != SLURM_SUCCESS) error("Failed to send MESSAGE_TASK_EXIT: %m"); @@ -762,6 +765,7 @@ _one_step_complete_msg(stepd_step_rec_t *job, int first, int last) } /*********************************************/ slurm_msg_t_init(&req); + slurm_msg_set_r_uid(&req, slurm_conf.slurmd_user_id); req.msg_type = REQUEST_STEP_COMPLETE; req.data = &msg; req.address = step_complete.parent_addr; @@ -2402,8 +2406,8 @@ extern int stepd_drain_node(char *reason) } static void -_send_launch_failure(launch_tasks_request_msg_t *msg, slurm_addr_t *cli, int rc, - uint16_t protocol_version) +_send_launch_failure(launch_tasks_request_msg_t *msg, slurm_addr_t *cli, + uid_t cli_uid, int rc, uint16_t protocol_version) { slurm_msg_t resp_msg; launch_tasks_response_msg_t resp; @@ -2439,6 +2443,7 @@ _send_launch_failure(launch_tasks_request_msg_t *msg, slurm_addr_t *cli, int rc, resp_msg.data = &resp; resp_msg.msg_type = RESPONSE_LAUNCH_TASKS; resp_msg.protocol_version = protocol_version; + slurm_msg_set_r_uid(&resp_msg, cli_uid); resp.job_id = msg->job_id; resp.step_id = msg->job_step_id; @@ -2467,6 +2472,7 @@ _send_launch_resp(stepd_step_rec_t *job, int rc) slurm_msg_t_init(&resp_msg); resp_msg.address = srun->resp_addr; + slurm_msg_set_r_uid(&resp_msg, srun->uid); resp_msg.protocol_version = srun->protocol_version; resp_msg.data = &resp; resp_msg.msg_type = RESPONSE_LAUNCH_TASKS; diff --git a/src/slurmd/slurmstepd/mgr.h b/src/slurmd/slurmstepd/mgr.h index 5c2d62c2a5..4b5808661a 100644 --- a/src/slurmd/slurmstepd/mgr.h +++ b/src/slurmd/slurmstepd/mgr.h @@ -53,7 +53,7 @@ void batch_finish(stepd_step_rec_t *job, int rc); * Initialize a stepd_step_rec_t structure for a launch tasks */ stepd_step_rec_t *mgr_launch_tasks_setup(launch_tasks_request_msg_t *msg, - slurm_addr_t *client, + slurm_addr_t *cli, uid_t cli_uid, slurm_addr_t *self, uint16_t protocol_version); diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c index f8e11b4735..0aebeb7140 100644 --- a/src/slurmd/slurmstepd/req.c +++ b/src/slurmd/slurmstepd/req.c @@ -1021,6 +1021,7 @@ _handle_attach(int fd, stepd_step_rec_t *job, uid_t uid) safe_read(fd, &srun->ioaddr, sizeof(slurm_addr_t)); safe_read(fd, &srun->resp_addr, sizeof(slurm_addr_t)); safe_read(fd, srun->key, SLURM_IO_KEY_SIZE); + safe_read(fd, &srun->uid, sizeof(uid_t)); safe_read(fd, &srun->protocol_version, sizeof(uint16_t)); if (!srun->protocol_version) diff --git a/src/slurmd/slurmstepd/slurmstepd.c b/src/slurmd/slurmstepd/slurmstepd.c index c0c469eed5..e6b59c771a 100644 --- a/src/slurmd/slurmstepd/slurmstepd.c +++ b/src/slurmd/slurmstepd/slurmstepd.c @@ -75,15 +75,16 @@ #include "src/slurmd/slurmstepd/slurmstepd.h" #include "src/slurmd/slurmstepd/slurmstepd_job.h" -static int _init_from_slurmd(int sock, char **argv, slurm_addr_t **_cli, +static int _init_from_slurmd(int sock, char **argv, + slurm_addr_t **_cli, uid_t *_cli_uid, slurm_addr_t **_self, slurm_msg_t **_msg); static void _dump_user_env(void); static void _send_ok_to_slurmd(int sock); static void _send_fail_to_slurmd(int sock); static void _got_ack_from_slurmd(int); -static stepd_step_rec_t *_step_setup(slurm_addr_t *cli, slurm_addr_t *self, - slurm_msg_t *msg); +static stepd_step_rec_t *_step_setup(slurm_addr_t *cli, uid_t cli_uid, + slurm_addr_t *self, slurm_msg_t *msg); #ifdef MEMORY_LEAK_DEBUG static void _step_cleanup(stepd_step_rec_t *job, slurm_msg_t *msg, int rc); #endif @@ -107,6 +108,7 @@ main (int argc, char **argv) { log_options_t lopts = LOG_OPTS_INITIALIZER; slurm_addr_t *cli; + uid_t cli_uid; slurm_addr_t *self; slurm_msg_t *msg; stepd_step_rec_t *job; @@ -130,11 +132,11 @@ main (int argc, char **argv) fatal( "failed to initialize authentication plugin" ); /* Receive job parameters from the slurmd */ - _init_from_slurmd(STDIN_FILENO, argv, &cli, &self, &msg); + _init_from_slurmd(STDIN_FILENO, argv, &cli, &cli_uid, &self, &msg); /* Create the stepd_step_rec_t, mostly from info in a * launch_tasks_request_msg_t or a batch_job_launch_msg_t */ - if (!(job = _step_setup(cli, self, msg))) { + if (!(job = _step_setup(cli, cli_uid, self, msg))) { _send_fail_to_slurmd(STDOUT_FILENO); rc = SLURM_ERROR; goto ending; @@ -507,7 +509,8 @@ static void _set_job_log_prefix(uint32_t jobid, uint32_t stepid) */ static int _init_from_slurmd(int sock, char **argv, - slurm_addr_t **_cli, slurm_addr_t **_self, slurm_msg_t **_msg) + slurm_addr_t **_cli, uid_t *_cli_uid, slurm_addr_t **_self, + slurm_msg_t **_msg) { char *incoming_buffer = NULL; Buf buffer; @@ -515,6 +518,7 @@ _init_from_slurmd(int sock, char **argv, int len; uint16_t proto; slurm_addr_t *cli = NULL; + uid_t cli_uid; slurm_addr_t *self = NULL; slurm_msg_t *msg = NULL; uint16_t port; @@ -565,6 +569,7 @@ _init_from_slurmd(int sock, char **argv, if (slurm_unpack_slurm_addr_no_alloc(cli, buffer) == SLURM_ERROR) fatal("slurmstepd: problem with unpack of slurmd_conf"); free_buf(buffer); + safe_read(sock, &cli_uid, sizeof(uid_t)); /* receive self from slurmd */ safe_read(sock, &len, sizeof(int)); @@ -650,6 +655,7 @@ _init_from_slurmd(int sock, char **argv, msg->protocol_version = proto; *_cli = cli; + *_cli_uid = cli_uid; *_self = self; *_msg = msg; @@ -661,7 +667,8 @@ rwfail: } static stepd_step_rec_t * -_step_setup(slurm_addr_t *cli, slurm_addr_t *self, slurm_msg_t *msg) +_step_setup(slurm_addr_t *cli, uid_t cli_uid, slurm_addr_t *self, + slurm_msg_t *msg) { stepd_step_rec_t *job = NULL; @@ -672,7 +679,7 @@ _step_setup(slurm_addr_t *cli, slurm_addr_t *self, slurm_msg_t *msg) break; case REQUEST_LAUNCH_TASKS: debug2("setup for a launch_task"); - job = mgr_launch_tasks_setup(msg->data, cli, self, + job = mgr_launch_tasks_setup(msg->data, cli, cli_uid, self, msg->protocol_version); break; default: diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c index f79ee97af7..d8ecff3772 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.c +++ b/src/slurmd/slurmstepd/slurmstepd_job.c @@ -445,7 +445,7 @@ extern stepd_step_rec_t *stepd_step_rec_create(launch_tasks_request_msg_t *msg, memset(&io_addr, 0, sizeof(slurm_addr_t)); } - srun = srun_info_create(msg->cred, &resp_addr, &io_addr, + srun = srun_info_create(msg->cred, &resp_addr, &io_addr, job->uid, protocol_version); job->profile = msg->profile; @@ -608,7 +608,7 @@ batch_stepd_step_rec_create(batch_job_launch_msg_t *msg) get_cred_gres(msg->cred, conf->node_name, &job->job_gres_list, &job->step_gres_list); - srun = srun_info_create(NULL, NULL, NULL, NO_VAL16); + srun = srun_info_create(NULL, NULL, NULL, job->uid, NO_VAL16); list_append(job->sruns, (void *) srun); @@ -698,7 +698,7 @@ stepd_step_rec_destroy(stepd_step_rec_t *job) extern srun_info_t * srun_info_create(slurm_cred_t *cred, slurm_addr_t *resp_addr, - slurm_addr_t *ioaddr, uint16_t protocol_version) + slurm_addr_t *ioaddr, uid_t uid, uint16_t protocol_version) { char *data = NULL; uint32_t len = 0; @@ -709,6 +709,7 @@ srun_info_create(slurm_cred_t *cred, slurm_addr_t *resp_addr, if (!protocol_version || (protocol_version == NO_VAL16)) protocol_version = SLURM_PROTOCOL_VERSION; srun->protocol_version = protocol_version; + srun->uid = uid; /* * If no credential was provided, return the empty * srun info object. (This is used, for example, when diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h index cb25a836c1..c31770a48f 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.h +++ b/src/slurmd/slurmstepd/slurmstepd_job.h @@ -68,6 +68,7 @@ typedef struct { slurm_addr_t ioaddr; /* Address to connect on for normal I/O. Spawn IO uses messages to the normal resp_addr. */ + uid_t uid; /* user id for job */ uint16_t protocol_version; /* protocol_version of the srun */ } srun_info_t; @@ -263,7 +264,8 @@ stepd_step_rec_t * batch_stepd_step_rec_create(batch_job_launch_msg_t *msg); void stepd_step_rec_destroy(stepd_step_rec_t *job); srun_info_t * srun_info_create(slurm_cred_t *cred, slurm_addr_t *respaddr, - slurm_addr_t *ioaddr, uint16_t protocol_version); + slurm_addr_t *ioaddr, uid_t uid, + uint16_t protocol_version); void srun_info_destroy(srun_info_t *srun); diff --git a/src/slurmd/slurmstepd/x11_forwarding.c b/src/slurmd/slurmstepd/x11_forwarding.c index 248fe63fed..1c56b90768 100644 --- a/src/slurmd/slurmstepd/x11_forwarding.c +++ b/src/slurmd/slurmstepd/x11_forwarding.c @@ -75,6 +75,8 @@ static eio_handle_t *eio_handle; /* Target salloc/srun host/port */ static slurm_addr_t alloc_node; +/* Target UID */ +static uid_t job_uid; /* X11 display hostname on target, or UNIX socket. */ static char *x11_target = NULL; /* X11 display port on target (if not a UNIX socket). */ @@ -129,6 +131,7 @@ static int _x11_socket_read(eio_obj_t *obj, List objs) slurm_msg_t_init(&resp); req.msg_type = SRUN_NET_FORWARD; + slurm_msg_set_r_uid(&req, job_uid); req.data = &rpc; slurm_send_recv_msg(*remote, &req, &resp, 0); @@ -253,6 +256,7 @@ extern int setup_x11_forward(stepd_step_rec_t *job, int *display, xsignal_unblock(sig_array); slurm_set_addr(&alloc_node, job->x11_alloc_port, job->x11_alloc_host); + job_uid = job->uid; debug("X11Parameters: %s", conf->x11_params); diff --git a/src/slurmdbd/read_config.c b/src/slurmdbd/read_config.c index 31b833fb00..d3f9580aa8 100644 --- a/src/slurmdbd/read_config.c +++ b/src/slurmdbd/read_config.c @@ -628,6 +628,7 @@ extern int read_slurmdbd_conf(void) if (!slurmdbd_conf->purge_usage) slurmdbd_conf->purge_usage = NO_VAL; + slurm_conf.last_update = time(NULL); slurm_mutex_unlock(&conf_mutex); return SLURM_SUCCESS; } diff --git a/src/slurmdbd/slurmdbd.c b/src/slurmdbd/slurmdbd.c index b68c2d2802..5125c701dd 100644 --- a/src/slurmdbd/slurmdbd.c +++ b/src/slurmdbd/slurmdbd.c @@ -849,6 +849,7 @@ static int _send_slurmctld_register_req(slurmdb_cluster_rec_t *cluster_rec) } else { slurm_msg_t out_msg; slurm_msg_t_init(&out_msg); + slurm_msg_set_r_uid(&out_msg, SLURM_AUTH_UID_ANY); out_msg.msg_type = ACCOUNTING_REGISTER_CTLD; out_msg.flags = SLURM_GLOBAL_AUTH_KEY; out_msg.protocol_version = cluster_rec->rpc_version; -- 2.35.3 From 987b53a7e8feaec627acea1015b22eed6b3c9b1e Mon Sep 17 00:00:00 2001 From: Egbert Eich <eich@suse.com> Date: Tue, 10 May 2022 21:39:56 +0200 Subject: [PATCH 02/14] Convert slurm_conf.slurmd_user_id -> slurm_get_slurmd_user_id() Signed-off-by: Egbert Eich <eich@suse.com> --- src/api/signal.c | 4 ++-- src/common/slurm_protocol_api.c | 2 +- src/plugins/mpi/pmix/pmixp_dconn.c | 2 +- src/plugins/mpi/pmix/pmixp_server.c | 2 +- src/plugins/mpi/pmix/pmixp_utils.c | 2 +- src/slurmd/slurmstepd/mgr.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/api/signal.c b/src/api/signal.c index bd536e97f4..8659be7d49 100644 --- a/src/api/signal.c +++ b/src/api/signal.c @@ -102,7 +102,7 @@ static int _signal_batch_script_step(const resource_allocation_response_msg_t rpc.flags = KILL_JOB_BATCH; slurm_msg_t_init(&msg); - slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id); + slurm_msg_set_r_uid(&msg, slurm_get_slurmd_user_id()); msg.msg_type = REQUEST_SIGNAL_TASKS; msg.data = &rpc; if (slurm_conf_get_addr(name, &msg.address, msg.flags) @@ -161,7 +161,7 @@ static int _terminate_batch_script_step(const resource_allocation_response_msg_t slurm_msg_t_init(&msg); msg.msg_type = REQUEST_TERMINATE_TASKS; - slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id); + slurm_msg_set_r_uid(&msg, slurm_get_slurmd_user_id()); msg.data = &rpc; if (slurm_conf_get_addr(name, &msg.address, msg.flags) diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index c0aa1fd2ec..44a70a1d25 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -4552,7 +4552,7 @@ static void _resp_msg_setup(slurm_msg_t *msg, slurm_msg_t *resp_msg, if (!msg->auth_uid_set) slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_NOBODY); else if ((msg->auth_uid != slurm_conf.slurm_user_id) && - (msg->auth_uid != slurm_conf.slurmd_user_id)) + (msg->auth_uid != slurm_get_slurmd_user_id())) slurm_msg_set_r_uid(resp_msg, msg->auth_uid); else slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_UID_ANY); diff --git a/src/plugins/mpi/pmix/pmixp_dconn.c b/src/plugins/mpi/pmix/pmixp_dconn.c index 594230bfc1..6a1fb42732 100644 --- a/src/plugins/mpi/pmix/pmixp_dconn.c +++ b/src/plugins/mpi/pmix/pmixp_dconn.c @@ -79,7 +79,7 @@ int pmixp_dconn_init(int node_cnt, pmixp_p2p_data_t direct_hdr) _pmixp_dconn_conns[i].nodeid = i; _pmixp_dconn_conns[i].state = PMIXP_DIRECT_INIT; _pmixp_dconn_conns[i].priv = _pmixp_dconn_h.init(i, direct_hdr); - _pmixp_dconn_conns[i].uid = slurm_conf.slurmd_user_id; + _pmixp_dconn_conns[i].uid = slurm_get_slurmd_user_id(); } return SLURM_SUCCESS; } diff --git a/src/plugins/mpi/pmix/pmixp_server.c b/src/plugins/mpi/pmix/pmixp_server.c index 4ced93c14b..a3861cb83a 100644 --- a/src/plugins/mpi/pmix/pmixp_server.c +++ b/src/plugins/mpi/pmix/pmixp_server.c @@ -546,7 +546,7 @@ static int _auth_cred_verify(Buf buf, uid_t *uid) } else { uid_t auth_uid; auth_uid = g_slurm_auth_get_uid(auth_cred); - if ((auth_uid != slurm_conf.slurmd_user_id) && + if ((auth_uid != slurm_get_slurmd_user_id()) && (auth_uid != _pmixp_job_info.uid)) { PMIXP_ERROR("Credential from uid %u", auth_uid); rc = SLURM_ERROR; diff --git a/src/plugins/mpi/pmix/pmixp_utils.c b/src/plugins/mpi/pmix/pmixp_utils.c index 5d22cacf83..4218629524 100644 --- a/src/plugins/mpi/pmix/pmixp_utils.c +++ b/src/plugins/mpi/pmix/pmixp_utils.c @@ -418,7 +418,7 @@ static int _pmix_p2p_send_core(const char *nodename, const char *address, msg.forward.timeout = timeout; msg.forward.cnt = 0; msg.forward.nodelist = NULL; - slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id); + slurm_msg_set_r_uid(&msg, slurm_get_slurmd_user_id()); ret_list = slurm_send_addr_recv_msgs(&msg, (char*)nodename, timeout); if (!ret_list) { /* This should never happen (when this was diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 1bb68c491f..887f0dbf6b 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -765,7 +765,7 @@ _one_step_complete_msg(stepd_step_rec_t *job, int first, int last) } /*********************************************/ slurm_msg_t_init(&req); - slurm_msg_set_r_uid(&req, slurm_conf.slurmd_user_id); + slurm_msg_set_r_uid(&req, slurm_get_slurmd_user_id()); req.msg_type = REQUEST_STEP_COMPLETE; req.data = &msg; req.address = step_complete.parent_addr; -- 2.35.3 From 07e24624ced60de5becd105ecab906d15bd128b9 Mon Sep 17 00:00:00 2001 From: Egbert Eich <eich@suse.com> Date: Tue, 10 May 2022 21:44:04 +0200 Subject: [PATCH 03/14] Convert slurm_conf.slurm_user_id -> slurm_get_slurm_user_id() Signed-off-by: Egbert Eich <eich@suse.com> --- src/api/reconfigure.c | 2 +- src/common/slurm_protocol_api.c | 4 ++-- src/slurmctld/backup.c | 2 +- src/slurmctld/controller.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/api/reconfigure.c b/src/api/reconfigure.c index 789a060777..f55f9e3c86 100644 --- a/src/api/reconfigure.c +++ b/src/api/reconfigure.c @@ -157,7 +157,7 @@ static int _send_message_controller(int dest, slurm_msg_t *req) slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR); } - slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id); + slurm_msg_set_r_uid(req, slurm_get_slurm_user_id()); if (slurm_send_node_msg(fd, req) < 0) { close(fd); slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SEND_ERROR); diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 44a70a1d25..5668c945f1 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -4551,7 +4551,7 @@ static void _resp_msg_setup(slurm_msg_t *msg, slurm_msg_t *resp_msg, */ if (!msg->auth_uid_set) slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_NOBODY); - else if ((msg->auth_uid != slurm_conf.slurm_user_id) && + else if ((msg->auth_uid != slurm_get_slurm_user_id()) && (msg->auth_uid != slurm_get_slurmd_user_id())) slurm_msg_set_r_uid(resp_msg, msg->auth_uid); else @@ -4966,7 +4966,7 @@ extern int slurm_send_only_controller_msg(slurm_msg_t *req, goto cleanup; } - slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id); + slurm_msg_set_r_uid(req, slurm_get_slurm_user_id()); if ((rc = slurm_send_node_msg(fd, req)) < 0) { rc = SLURM_ERROR; diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c index 70cf9e2750..63b7209265 100644 --- a/src/slurmctld/backup.c +++ b/src/slurmctld/backup.c @@ -613,7 +613,7 @@ static void *_shutdown_controller(void *arg) xfree(arg); slurm_msg_t_init(&req); - slurm_msg_set_r_uid(&req, slurm_conf.slurm_user_id); + slurm_msg_set_r_uid(&req, slurm_get_slurm_user_id()); slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port, slurmctld_conf.control_addr[shutdown_inx]); req.msg_type = REQUEST_CONTROL; diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 611a4e210c..016bb00b74 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -2748,7 +2748,7 @@ static void *_shutdown_bu_thread(void *arg) xfree(arg); slurm_msg_t_init(&req); - slurm_msg_set_r_uid(&req, slurm_conf.slurm_user_id); + slurm_msg_set_r_uid(&req, slurm_get_slurm_user_id()); slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port, slurmctld_conf.control_addr[bu_inx]); req.msg_type = REQUEST_CONTROL; -- 2.35.3 From f8fe3288a412863f34ccc3210e6e3ed66b02fa78 Mon Sep 17 00:00:00 2001 From: Egbert Eich <eich@suse.com> Date: Tue, 10 May 2022 21:46:37 +0200 Subject: [PATCH 04/14] Convert slurm_conf.last_update -> slurmctld_conf.last_update Signed-off-by: Egbert Eich <eich@suse.com> --- src/common/slurm_protocol_api.c | 4 ++-- src/slurmdbd/read_config.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 5668c945f1..60a02b4499 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -989,10 +989,10 @@ static int _check_hash(buf_t *buffer, header_t *header, slurm_msg_t *msg, static time_t config_update = (time_t) -1; static bool block_null_hash = true; - if (config_update != slurm_conf.last_update) { + if (config_update != slurmctld_conf.last_update) { block_null_hash = (xstrcasestr(slurm_conf.comm_params, "block_null_hash")); - config_update = slurm_conf.last_update; + config_update = slurmctld_conf.last_update; } rc = auth_g_get_data(cred, &cred_hash, &cred_hash_len); diff --git a/src/slurmdbd/read_config.c b/src/slurmdbd/read_config.c index d3f9580aa8..c5ec3ef7a7 100644 --- a/src/slurmdbd/read_config.c +++ b/src/slurmdbd/read_config.c @@ -628,7 +628,7 @@ extern int read_slurmdbd_conf(void) if (!slurmdbd_conf->purge_usage) slurmdbd_conf->purge_usage = NO_VAL; - slurm_conf.last_update = time(NULL); + slurmctld_conf.last_update = time(NULL); slurm_mutex_unlock(&conf_mutex); return SLURM_SUCCESS; } -- 2.35.3 From 94a2496e418566fde8600a57cfdf544c3e4bda9f Mon Sep 17 00:00:00 2001 From: Egbert Eich <eich@suse.com> Date: Tue, 10 May 2022 21:48:51 +0200 Subject: [PATCH 05/14] Convert slurm_conf.authinfo -> slurm_get_auth_info() Signed-off-by: Egbert Eich <eich@suse.com> --- src/common/slurm_protocol_api.c | 4 ++-- src/plugins/mpi/pmi2/spawn.c | 2 +- src/plugins/mpi/pmix/pmixp_server.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 60a02b4499..dbee444d8c 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -4273,7 +4273,7 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg) } else { char *auth_info = slurm_get_auth_info(); auth_cred = g_slurm_auth_create(msg->auth_index, - slurm_conf.authinfo, + auth_info, msg->restrict_uid, auth_payload, sizeof(auth_payload)); xfree(auth_info); @@ -4300,7 +4300,7 @@ int slurm_send_node_msg(int fd, slurm_msg_t * msg) } else { char *auth_info = slurm_get_auth_info(); auth_cred = g_slurm_auth_create(msg->auth_index, - slurm_conf.authinfo, + auth_info, msg->restrict_uid, auth_payload, sizeof(auth_payload)); diff --git a/src/plugins/mpi/pmi2/spawn.c b/src/plugins/mpi/pmi2/spawn.c index 182edae3e4..09aaed9434 100644 --- a/src/plugins/mpi/pmi2/spawn.c +++ b/src/plugins/mpi/pmi2/spawn.c @@ -151,7 +151,7 @@ spawn_req_pack(spawn_req_t *req, Buf buf) void *auth_cred; char *auth_info = slurm_get_auth_info(); - auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, slurm_conf.authinfo, + auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, auth_info, job_info.uid, NULL, 0); xfree(auth_info); if (auth_cred == NULL) { diff --git a/src/plugins/mpi/pmix/pmixp_server.c b/src/plugins/mpi/pmix/pmixp_server.c index a3861cb83a..d80e64dd72 100644 --- a/src/plugins/mpi/pmix/pmixp_server.c +++ b/src/plugins/mpi/pmix/pmixp_server.c @@ -500,7 +500,7 @@ static int _auth_cred_create(Buf buf, uid_t uid) char *auth_info = slurm_get_auth_info(); int rc = SLURM_SUCCESS; - auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, slurm_conf.authinfo, + auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, auth_info, uid, NULL, 0); xfree(auth_info); if (!auth_cred) { -- 2.35.3 From 81ff4a47c6d5f398681aaff5cd9af2b1a84664c1 Mon Sep 17 00:00:00 2001 From: Egbert Eich <eich@suse.com> Date: Tue, 10 May 2022 21:54:56 +0200 Subject: [PATCH 06/14] Convert log_flag_hex() to local _print_data() Signed-off-by: Egbert Eich <eich@suse.com> --- src/common/slurm_protocol_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index dbee444d8c..8e274e0f6e 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -3855,7 +3855,9 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout) goto total_return; } - log_flag_hex(NET_RAW, buf, buflen, "%s: read", __func__); +#if _DEBUG + _print_data (buf, buflen); +#endif buffer = create_buf(buf, buflen); if (unpack_header(&header, buffer) == SLURM_ERROR) { -- 2.35.3 From 88f5016e5558949ee13b74531096fccc6754b81d Mon Sep 17 00:00:00 2001 From: Egbert Eich <eich@suse.com> Date: Tue, 10 May 2022 21:57:09 +0200 Subject: [PATCH 07/14] Convert slurm_conf.comm_params -> slurm_get_comm_parameters() Signed-off-by: Egbert Eich <eich@suse.com> --- src/common/slurm_protocol_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 8e274e0f6e..158606f242 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -990,8 +990,10 @@ static int _check_hash(buf_t *buffer, header_t *header, slurm_msg_t *msg, static bool block_null_hash = true; if (config_update != slurmctld_conf.last_update) { - block_null_hash = (xstrcasestr(slurm_conf.comm_params, + char * comm_parameters = slurm_get_comm_parameters(); + block_null_hash = (xstrcasestr(comm_parameters, "block_null_hash")); + xfree(comm_parameters); config_update = slurmctld_conf.last_update; } -- 2.35.3 From 6811cefd3353c46003adaf9c590bc3001817cbc6 Mon Sep 17 00:00:00 2001 From: Egbert Eich <eich@suse.com> Date: Tue, 10 May 2022 21:58:46 +0200 Subject: [PATCH 08/14] Convert slurm_conf.msg_timeout no -> slurm_get_msg_timeout() Signed-off-by: Egbert Eich <eich@suse.com> --- src/common/slurm_protocol_api.c | 8 ++++---- src/plugins/accounting_storage/slurmdbd/dbd_conn.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 158606f242..35d7b9e2a7 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -3820,12 +3820,12 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout) if (timeout <= 0) { /* convert secs to msec */ - timeout = slurm_conf.msg_timeout * 1000; + timeout = slurm_get_msg_timeout() * 1000; orig_timeout = timeout; } if (steps) { if (message_timeout < 0) - message_timeout = slurm_conf.msg_timeout * 1000; + message_timeout = slurm_get_msg_timeout() * 1000; orig_timeout = (timeout - (message_timeout*(steps-1)))/steps; steps--; @@ -3836,9 +3836,9 @@ List slurm_receive_resp_msgs(int fd, int steps, int timeout) /* we compare to the orig_timeout here because that is really * what we are going to wait for each step */ - if (orig_timeout >= (slurm_conf.msg_timeout * 10000)) { + if (orig_timeout >= (slurm_get_msg_timeout() * 10000)) { log_flag(NET, "%s: Sending a message with timeout's greater than %d seconds, requested timeout is %d seconds", - __func__, (slurm_conf.msg_timeout * 10), + __func__, (slurm_get_msg_timeout() * 10), (timeout/1000)); } else if (orig_timeout < 1000) { log_flag(NET, "%s: Sending a message with a very short timeout of %d milliseconds each step in the tree has %d milliseconds", diff --git a/src/plugins/accounting_storage/slurmdbd/dbd_conn.c b/src/plugins/accounting_storage/slurmdbd/dbd_conn.c index c9672a732f..f9060ef417 100644 --- a/src/plugins/accounting_storage/slurmdbd/dbd_conn.c +++ b/src/plugins/accounting_storage/slurmdbd/dbd_conn.c @@ -151,7 +151,7 @@ extern slurm_persist_conn_t *dbd_conn_open(uint16_t *persist_conn_flags, pc->cluster_name = xstrdup(cluster_name); else pc->cluster_name = xstrdup(slurm_conf.cluster_name); - pc->timeout = (slurm_conf.msg_timeout + 35) * 1000; + pc->timeout = (slurm_get_msg_timeout() + 35) * 1000; if (rem_host) pc->rem_host = xstrdup(rem_host); else -- 2.35.3 From fdb124f657db1f5492fa03c5195af52dc9cb1eea Mon Sep 17 00:00:00 2001 From: Egbert Eich <eich@suse.com> Date: Wed, 11 May 2022 08:36:38 +0200 Subject: [PATCH 09/14] Fix g_slurm_auth_create in _pack_composite_msg Signed-off-by: Egbert Eich <eich@suse.com> --- src/common/slurm_protocol_pack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 115ab383a1..bafde811c7 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -2614,7 +2614,8 @@ _pack_composite_msg(composite_msg_t *msg, Buf buffer, uint16_t protocol_version) /* FIXME: this should handle the * _global_auth_key() as well. */ tmp_info->auth_cred = g_slurm_auth_create( - tmp_info->auth_index, auth_info); + tmp_info->auth_index, auth_info, + tmp_info->restrict_uid, NULL, 0); xfree(auth_info); } -- 2.35.3 From 07140634c1333ece666a2a64ef645c84c984bbdb Mon Sep 17 00:00:00 2001 From: Egbert Eich <eich@suse.com> Date: Wed, 11 May 2022 13:42:41 +0200 Subject: [PATCH 10/14] Remove legacy stepd connect not used anymore. This will break backward compatibility to Slurm versions prior to 19.05. For normal Slurm installations using the SLE packages, this should not be a problem as slurmd and slurmstepd are running on the same systems and are always updated together. Signed-off-by: Egbert Eich <eich@suse.com> --- src/common/stepd_api.c | 80 ------------------------------------------ 1 file changed, 80 deletions(-) diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c index ef80af2cd1..447b60c0a7 100644 --- a/src/common/stepd_api.c +++ b/src/common/stepd_api.c @@ -224,77 +224,6 @@ _guess_nodename(void) return nodename; } -/* - * Legacy version for connecting to pre-19.05 stepds. - * Remove this two versions after 19.05 is released. - */ -static int _stepd_connect_legacy(const char *directory, const char *nodename, - uint32_t jobid, uint32_t stepid, - uint16_t *protocol_version) -{ - int req = REQUEST_CONNECT; - int fd = -1; - int rc; - void *auth_cred; - char *auth_info; - char *local_nodename = NULL; - Buf buffer; - int len; - - buffer = init_buf(0); - /* Create an auth credential */ - auth_info = slurm_get_auth_info(); - auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, auth_info); - xfree(auth_info); - if (auth_cred == NULL) { - error("Creating authentication credential: %m"); - slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); - goto fail1; - } - - /* - * Pack the auth credential. - * Always send SLURM_MIN_PROTOCOL_VERSION since we don't know the - * version at the moment. - */ - rc = g_slurm_auth_pack(auth_cred, buffer, SLURM_MIN_PROTOCOL_VERSION); - (void) g_slurm_auth_destroy(auth_cred); - if (rc) { - error("Packing authentication credential: %m"); - slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); - goto fail1; - } - - /* Connect to the step */ - fd = _step_connect(directory, nodename, jobid, stepid); - if (fd == -1) - goto fail1; - - safe_write(fd, &req, sizeof(int)); - len = size_buf(buffer); - safe_write(fd, &len, sizeof(int)); - safe_write(fd, get_buf_data(buffer), len); - - safe_read(fd, &rc, sizeof(int)); - if (rc < 0) { - error("slurmstepd refused authentication: %m"); - slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); - goto rwfail; - } else if (rc) { - *protocol_version = rc; - } - - free_buf(buffer); - xfree(local_nodename); - return fd; - -rwfail: - close(fd); -fail1: - free_buf(buffer); - xfree(local_nodename); - return -1; -} /* * Connect to a slurmstepd proccess by way of its unix domain socket. @@ -347,15 +276,6 @@ extern int stepd_connect(const char *directory, const char *nodename, rwfail: close(fd); - /* - * Most likely case for ending up here is when connecting to a - * pre-19.05 stepd. Assume that the stepd shut the connection down - * since we sent SLURM_PROTOCOL_VERSION instead of SOCKET_CONNECT, - * and retry with the older connection style. Remove this fallback - * 2 versions after 19.05. - */ - fd = _stepd_connect_legacy(directory, nodename, jobid, stepid, - protocol_version); fail1: xfree(local_nodename); return fd; -- 2.35.3 From cca32e3c0c8a7d8b87a7fed1bfb9dbc3c1e75c3a Mon Sep 17 00:00:00 2001 From: Egbert Eich <eich@suse.com> Date: Wed, 11 May 2022 17:49:40 +0200 Subject: [PATCH 11/14] Add to set_agent_arg_r_uid() call to _xmit_new_end_time() Signed-off-by: Egbert Eich <eich@suse.com> --- src/slurmctld/job_mgr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 702e6ae38d..e4c740c5d4 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -15265,6 +15265,7 @@ static void _xmit_new_end_time(job_record_t *job_ptr) #endif agent_args->msg_args = job_time_msg_ptr; + set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); agent_queue_request(agent_args); return; } -- 2.35.3 From aaa9c2ecb3a55278fea2531c1f37b921c8783043 Mon Sep 17 00:00:00 2001 From: Egbert Eich <eich@suse.com> Date: Wed, 11 May 2022 19:41:13 +0200 Subject: [PATCH 12/14] Disable Message Aggregation Signed-off-by: Egbert Eich <eich@suse.com> --- src/slurmd/slurmd/slurmd.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 8a7dd9c076..240814e85a 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -188,7 +188,9 @@ static void _decrement_thd_count(void); static void _destroy_conf(void); static int _drain_node(char *reason); static void _fill_registration_msg(slurm_node_registration_status_msg_t *); +#if 0 static uint64_t _get_int(const char *my_str); +#endif static void _handle_connection(int fd, slurm_addr_t *client); static void _hup_handler(int); static void _increment_thd_count(void); @@ -2334,6 +2336,7 @@ static int _set_topo_info(void) return rc; } +#if 0 static uint64_t _get_int(const char *my_str) { char *end = NULL; @@ -2346,23 +2349,33 @@ static uint64_t _get_int(const char *my_str) return NO_VAL; return value; } +#endif static uint64_t _parse_msg_aggr_params(int type, char *params) { uint64_t value = NO_VAL; +#if 0 char *sub_str = NULL; - +#endif if (!params) return NO_VAL; switch (type) { case WINDOW_TIME: + info("Message aggregation has been disabled, " + "please check SLE release notes!"); +#if 0 if ((sub_str = xstrcasestr(params, "WindowTime="))) value = _get_int(sub_str + 11); +#endif break; case WINDOW_MSGS: + info("Message aggregation has been disabled, " + "please check SLE release notes!"); +#if 0 if ((sub_str = xstrcasestr(params, "WindowMsgs="))) value = _get_int(sub_str + 11); +#endif break; default: fatal("invalid message aggregation parameters: %s", params); -- 2.35.3 From 8eae3fa02d6a932369bc1d6ca31c9bb38187b7d4 Mon Sep 17 00:00:00 2001 From: Egbert Eich <eich@suse.com> Date: Fri, 13 May 2022 09:01:15 +0200 Subject: [PATCH 13/14] Add missing slurm_msg_set_r_uid() Signed-off-by: Egbert Eich <eich@suse.com> --- src/slurmd/slurmstepd/mgr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 887f0dbf6b..bf10a87bf7 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -2540,6 +2540,7 @@ _send_complete_batch_script_msg(stepd_step_rec_t *job, int err, int status) slurm_set_addr_char(&req_msg.address, conf->port, conf->hostname); } + slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY); msg_rc = slurm_send_recv_rc_msg_only_one(&req_msg, &rc, 0); } -- 2.35.3 From 2bac013d24891a833a91eb82bc1c514959a6bcf1 Mon Sep 17 00:00:00 2001 From: Egbert Eich <eich@suse.com> Date: Wed, 10 Aug 2022 16:37:38 +0200 Subject: [PATCH 14/14] Make sure r_uid is set SLURM_AUTH_UID_ANY for database access Signed-off-by: Egbert Eich <eich@suse.com> --- src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c b/src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c index 26c6959ca3..4009b9d7f3 100644 --- a/src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c +++ b/src/plugins/accounting_storage/slurmdbd/slurmdbd_agent.c @@ -618,6 +618,7 @@ static void _open_slurmdbd_conn(bool need_db, else memset(&slurmdbd_conn->trigger_callbacks, 0, sizeof(slurm_trigger_callbacks_t)); + slurmdbd_conn->r_uid = SLURM_AUTH_UID_ANY; } slurmdbd_shutdown = 0; slurmdbd_conn->shutdown = &slurmdbd_shutdown; -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor