示例#1
0
 def thread_target():
     try:
         logger.debug(f"thread '{name}' starting")
         if _prctl_available:
             prctl.set_name(name)
         if _numa_available and numa.available():
             numa.set_localalloc()
             logger.debug(
                 f"Set NUMA local allocation policy on thread {name}")
         worker(index)
         logger.debug(f"thread '{name}' finishing")
     except Exception:
         logger.critical("Exception occured in thread; exiting")
         logger.critical(traceback.format_exc())
         # Communicate back the main thread that something bad has happened.
         # This seems to be the only reliable way to do it.
         _thread.interrupt_main()
         # Now we still need to make sure that the main thread doesn't block
         # on the queue.get/join (as it won't be interrupted). This is an attempt
         # to make sure that it unblocks. May not be fool-proof though.
         #
         # TODO This doesn't really work. We can still block on pushing things
         # onto the queue. We'll probably have to do something ourselves using
         # timeouts and stuff to see if an error has occured.
         if consumer:
             while True:
                 try:
                     work_queue.task_done()
                 except ValueError:
                     break
         else:
             work_queue.put(None)
def check_numa():
    try:
        import numa
    except:
        return
    if not numa.available():
        return
    if numa.get_max_node() > 0 and len(numa.get_run_on_node_mask()) > 1:
        print("Warning: NUMA settings may be suboptimal!", file=sys.stderr)
示例#3
0
def coremap():
    try:
        import numa
    except ImportError:
        print('This script requires the libnuma python bindings')
        raise RuntimeError("Numa not available")
    if not numa.available():
        raise RuntimeError("Numa not available")
    node_to_core = {int(i): deque([int(k) for k in numa.node_to_cpus(i)]) for i in range(numa.get_max_node() + 1)}
    total_core = max(itertools.chain(*node_to_core.values())) + 1
    return node_to_core, total_core
示例#4
0
def load_numa():
    """ Load information about core numbers and numa patterns """

    if not numa.available():
        raise Exception('Numa detection not available')

    max_node = numa.get_max_node()

    nodes = {}
    for i in range(max_node + 1):
        nodes[i] = list(numa.node_to_cpus(i))

    return nodes
示例#5
0
    def _configure_numa(self):
        self._numa_available = \
            numa.available() and which('numactl') is not None
        if not self._numa_available:
            return
        num_numa_nodes = numa.get_max_node() + 1
        self._numa_cpu_map = {}
        num_gpus = len(self._gpu_ids)

        # Calculate how many CPUs to allocate for each GPU. Ensure this number
        # is a power of 2.
        num_cpus = 0
        for i in range(num_numa_nodes):
            num_cpus += len(numa.node_to_cpus(i))
        num_cpus_per_gpu = min(MAX_CPUS_PER_GPU, max(num_cpus // num_gpus, 1))
        num_cpus_per_gpu = pow(2, round(math.log(num_cpus_per_gpu, 2)))

        # Find blocks of contiguous CPUs.
        contiguous_blocks = []
        for i in range(num_numa_nodes):
            cpus = sorted(numa.node_to_cpus(i))
            contiguous_block = [cpus[0]]
            for j in range(1, len(cpus)):
                if (cpus[j] - cpus[j - 1] == 1
                        and len(contiguous_block) < num_cpus_per_gpu):
                    contiguous_block.append(cpus[j])
                else:
                    contiguous_blocks.append(
                        (contiguous_block, len(contiguous_block)))
                    contiguous_block = [cpus[j]]
            if len(contiguous_block) > 0:
                contiguous_blocks.append(
                    (contiguous_block, len(contiguous_block)))
        contiguous_blocks.sort(key=lambda x: x[-1], reverse=True)

        # Assign CPUs to GPUs.
        block_idx = 0
        for i in range(num_gpus):
            self._numa_cpu_map[i] = []
            while len(self._numa_cpu_map[i]) < num_cpus_per_gpu:
                self._numa_cpu_map[i] += contiguous_blocks[block_idx][0]
                block_idx = (block_idx + 1) % len(contiguous_blocks)
            self._logger.info('GPU {gpu} assigned CPUs {cpus}'.format(
                gpu=i, cpus=str(self._numa_cpu_map[i])))
    def _lazy_cpu_and_mem_set_init(self):
        # Implicitly assume lock is already held
        if len(self._numa_nodes) != 0:
            # Init already happened
            return
        if (self._available_cpu_ids is None or self._cpus_per_job is None
                or self._use_memset_of_nearest_node is None):
            raise Exception('Cannot do init. One or more params were None')
        import numa
        if not numa.available():
            raise Exception('NUMA not available')
        numa_nodes = list(range(0, numa.get_max_node() + 1))
        cpu_count = 0
        for numa_node in numa_nodes:
            cpus = numa.node_to_cpus(numa_node)
            for cpu_id in cpus:
                if cpu_id in self._available_cpu_ids:
                    try:
                        self._numa_nodes[numa_node].add(cpu_id)
                    except KeyError:
                        self._numa_nodes[numa_node] = set()
                        self._numa_nodes[numa_node].add(cpu_id)
                    try:
                        self._numa_node_pool[numa_node].add(cpu_id)
                    except KeyError:
                        self._numa_node_pool[numa_node] = set()
                        self._numa_node_pool[numa_node].add(cpu_id)
                    _logger.info(
                        'Putting CPU {} in NUMA node {} in resource pool'.
                        format(cpu_id, numa_node))
                    cpu_count += 1
                else:
                    _logger.info(
                        'CPU {} in NUMA node {} is NOT IN resource pool'.
                        format(cpu_id, numa_node))

        if cpu_count == 0:
            raise Exception('Found no available CPUs')
        if cpu_count != len(self._available_cpu_ids):
            raise Exception(
                'Mismatch between provided available CPU ids and what was found on system'
            )
        assert len(self._numa_node_pool) == len(self._numa_nodes)
示例#7
0
 def test_available(self):
     self.failUnlessEqual(True, numa.available())
示例#8
0
 def test_available(self):
     self.assertEqual(True, numa.available())
示例#9
0
 def test_available(self):
     self.failUnlessEqual(True, numa.available())