Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-15-SP7:Update
openmpi3
rmaps-simplify-the-lookup-for-the-binding-objec...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File rmaps-simplify-the-lookup-for-the-binding-object-and-fix-for-hwloc-2.0.patch of Package openmpi3
commit 8992b7c5996de5e261bbfc9e57b270c8717852f9 Author: Brice Goglin <Brice.Goglin@inria.fr> Date: Fri Jan 26 16:19:52 2018 +0100 rmaps: simplify the lookup for the binding object and fix for hwloc 2.0 Don't bother doing a lookup upwards or downwards for the target object type. Just use the target depth, iterate over the level until we find the min_bound object that intersects the locale cpuset. Signed-off-by: Brice Goglin <Brice.Goglin@inria.fr> diff --git orte/mca/rmaps/base/rmaps_base_binding.c orte/mca/rmaps/base/rmaps_base_binding.c index df3799947514..d6781608f36f 100644 --- orte/mca/rmaps/base/rmaps_base_binding.c +++ orte/mca/rmaps/base/rmaps_base_binding.c @@ -15,6 +15,7 @@ * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Inria. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -120,142 +121,21 @@ static void unbind_procs(orte_job_t *jdata) } } -static int bind_upwards(orte_job_t *jdata, +static int bind_generic(orte_job_t *jdata, orte_node_t *node, - hwloc_obj_type_t target, - unsigned cache_level) -{ - /* traverse the hwloc topology tree on each node upwards - * until we find an object of type target - and then bind - * the process to that target - */ - int j; - orte_job_map_t *map; - orte_proc_t *proc; - hwloc_obj_t obj; - unsigned int idx, ncpus; - opal_hwloc_obj_data_t *data; - hwloc_obj_t locale; - char *cpu_bitmap; - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps: bind upwards for job %s with bindings %s", - ORTE_JOBID_PRINT(jdata->jobid), - opal_hwloc_base_print_binding(jdata->map->binding)); - /* initialize */ - map = jdata->map; - - - /* cycle thru the procs */ - for (j=0; j < node->procs->size; j++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { - continue; - } - /* ignore procs from other jobs */ - if (proc->name.jobid != jdata->jobid) { - continue; - } - /* bozo check */ - if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { - orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-locale", true, ORTE_NAME_PRINT(&proc->name)); - return ORTE_ERR_SILENT; - } - /* starting at the locale, move up thru the parents - * to find the target object type - */ - cpu_bitmap = NULL; - for (obj = locale->parent; NULL != obj; obj = obj->parent) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "%s bind:upward target %s type %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - hwloc_obj_type_string(target), - hwloc_obj_type_string(obj->type)); - if (target == obj->type) { -#if HWLOC_API_VERSION < 0x20000 - if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) { - continue; - } -#endif - /* get its index */ - if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology->topo, obj, OPAL_HWLOC_AVAILABLE))) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_SILENT; - } - /* track the number bound */ - data = (opal_hwloc_obj_data_t*)obj->userdata; - data->num_bound++; - /* get the number of cpus under this location */ - if (0 == (ncpus = opal_hwloc_base_get_npus(node->topology->topo, obj))) { - orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name); - return ORTE_ERR_SILENT; - } - /* error out if adding a proc would cause overload and that wasn't allowed, - * and it wasn't a default binding policy (i.e., the user requested it) - */ - if (ncpus < data->num_bound && - !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) { - if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { - /* if the user specified a binding policy, then we cannot meet - * it since overload isn't allowed, so error out - have the - * message indicate that setting overload allowed will remove - * this restriction */ - orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, - opal_hwloc_base_print_binding(map->binding), node->name, - data->num_bound, ncpus); - return ORTE_ERR_SILENT; - } else { - /* if we have the default binding policy, then just don't bind */ - OPAL_SET_BINDING_POLICY(map->binding, OPAL_BIND_TO_NONE); - unbind_procs(jdata); - return ORTE_SUCCESS; - } - } - /* bind it here */ - hwloc_bitmap_list_asprintf(&cpu_bitmap, obj->cpuset); - orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING); - /* record the location */ - orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND, ORTE_ATTR_LOCAL, obj, OPAL_PTR); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "%s BOUND PROC %s TO %s[%s:%u] on node %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name), - cpu_bitmap, - hwloc_obj_type_string(target), - idx, node->name); - break; - } - } - if (NULL == cpu_bitmap && OPAL_BINDING_REQUIRED(jdata->map->binding)) { - /* didn't find anyone to bind to - this is an error - * unless the user specified if-supported - */ - orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-target-not-found", true, - opal_hwloc_base_print_binding(map->binding), node->name); - return ORTE_ERR_SILENT; - } - if (NULL != cpu_bitmap) { - free(cpu_bitmap); - } - } - - return ORTE_SUCCESS; -} - -static int bind_downwards(orte_job_t *jdata, - orte_node_t *node, - hwloc_obj_type_t target, - unsigned cache_level) + int target_depth) { int j; orte_job_map_t *map; orte_proc_t *proc; - hwloc_obj_t trg_obj, nxt_obj; + hwloc_obj_t trg_obj, tmp_obj, nxt_obj; unsigned int ncpus; opal_hwloc_obj_data_t *data; int total_cpus; hwloc_cpuset_t totalcpuset; hwloc_obj_t locale; char *cpu_bitmap; + unsigned min_bound; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: bind downward for job %s with bindings %s", @@ -282,12 +162,24 @@ static int bind_downwards(orte_job_t *jdata, hwloc_bitmap_free(totalcpuset); return ORTE_ERR_SILENT; } - /* we don't know if the target is a direct child of this locale, - * or if it is some depth below it, so we have to conduct a bit - * of a search. Let hwloc find the min usage one for us. - */ - trg_obj = opal_hwloc_base_find_min_bound_target_under_obj(node->topology->topo, locale, - target, cache_level); + + /* use the min_bound object that intersects locale->cpuset at target_depth */ + tmp_obj = NULL; + trg_obj = NULL; + min_bound = UINT_MAX; + while (tmp_obj = hwloc_get_next_obj_by_depth(node->topology->topo, target_depth, tmp_obj)) { + if (!hwloc_bitmap_intersects(locale->cpuset, tmp_obj->cpuset)) + continue; + data = (opal_hwloc_obj_data_t*)tmp_obj->userdata; + if (NULL == data) { + data = OBJ_NEW(opal_hwloc_obj_data_t); + tmp_obj->userdata = data; + } + if (data->num_bound < min_bound) { + min_bound = data->num_bound; + trg_obj = tmp_obj; + } + } if (NULL == trg_obj) { /* there aren't any such targets under this object */ orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name); @@ -296,6 +188,7 @@ static int bind_downwards(orte_job_t *jdata, } /* record the location */ orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND, ORTE_ATTR_LOCAL, trg_obj, OPAL_PTR); + /* start with a clean slate */ hwloc_bitmap_zero(totalcpuset); total_cpus = 0; @@ -685,7 +578,7 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata) int i, rc; struct hwloc_topology_support *support; bool force_down = false; - int bind_depth, map_depth; + int bind_depth; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: compute bindings for job %s with policy %s[%x]", @@ -904,62 +797,35 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata) */ reset_usage(node, jdata->jobid); - if (force_down) { - if (ORTE_SUCCESS != (rc = bind_downwards(jdata, node, hwb, clvl))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } else { - /* determine the relative depth on this node */ + /* determine the relative depth on this node */ #if HWLOC_API_VERSION < 0x20000 - if (HWLOC_OBJ_CACHE == hwb) { - /* must use a unique function because blasted hwloc - * just doesn't deal with caches very well...sigh - */ - bind_depth = hwloc_get_cache_type_depth(node->topology->topo, clvl, (hwloc_obj_cache_type_t)-1); - } else + if (HWLOC_OBJ_CACHE == hwb) { + /* must use a unique function because blasted hwloc + * just doesn't deal with caches very well...sigh + */ + bind_depth = hwloc_get_cache_type_depth(node->topology->topo, clvl, (hwloc_obj_cache_type_t)-1); + } else #endif - bind_depth = hwloc_get_type_depth(node->topology->topo, hwb); - if (0 > bind_depth) { - /* didn't find such an object */ - orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects", - true, hwloc_obj_type_string(hwb), node->name); - return ORTE_ERR_SILENT; - } + bind_depth = hwloc_get_type_depth(node->topology->topo, hwb); #if HWLOC_API_VERSION < 0x20000 - if (HWLOC_OBJ_CACHE == hwm) { - /* must use a unique function because blasted hwloc - * just doesn't deal with caches very well...sigh - */ - map_depth = hwloc_get_cache_type_depth(node->topology->topo, clvm, (hwloc_obj_cache_type_t)-1); - } else + if (0 > bind_depth) #else - /* do something with clvm to silence compiler warnings */ - ++clvm; + if (0 > bind_depth && HWLOC_TYPE_DEPTH_NUMANODE != bind_depth) #endif - map_depth = hwloc_get_type_depth(node->topology->topo, hwm); - if (0 > map_depth) { - /* didn't find such an object */ - orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects", - true, hwloc_obj_type_string(hwm), node->name); - return ORTE_ERR_SILENT; - } - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "%s bind_depth: %d map_depth %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - bind_depth, map_depth); - if (bind_depth > map_depth) { - if (ORTE_SUCCESS != (rc = bind_downwards(jdata, node, hwb, clvl))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } else { - if (ORTE_SUCCESS != (rc = bind_upwards(jdata, node, hwb, clvl))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } + { + /* didn't find such an object */ + orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects", + true, hwloc_obj_type_string(hwb), node->name); + return ORTE_ERR_SILENT; + } + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "%s bind_depth: %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + bind_depth); + if (ORTE_SUCCESS != (rc = bind_generic(jdata, node, bind_depth))) { + ORTE_ERROR_LOG(rc); + return rc; + } } return ORTE_SUCCESS;
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor