Пример #1
0
def onlinecpu():
    import numa

    o_cpus = []
    for node in range(0, numa.get_max_node() + 1):
        for cpu in sorted(numa.node_to_cpus(node)):
            o_cpus.append(cpu)

    return o_cpus
Пример #2
0
def check_numa():
    try:
        import numa
    except:
        return
    if not numa.available():
        return
    if numa.get_max_node() > 0 and len(numa.get_run_on_node_mask()) > 1:
        print("Warning: NUMA settings may be suboptimal!", file=sys.stderr)
Пример #3
0
def setNuma():
    """Without subapAllocation... whole rmx on each numa node"""
    d = darc.Control()
    nthr = d.Get("ncamThreads").sum()
    nnodes = numa.get_max_node() + 1
    #specify which numa nodes the threads are closest to:
    threadToNuma = numpy.zeros(nthr, numpy.int32)  #all node 0 for now...!

    rmx = d.Get("gainReconmxT")
    d.Set("threadToNuma", threadToNuma)
    for i in range(nnodes):
        d.SetNuma("gainReconmxT%d" % i, rmx, i)
Пример #4
0
def load_numa():
    """ Load information about core numbers and numa patterns """

    if not numa.available():
        raise Exception('Numa detection not available')

    max_node = numa.get_max_node()

    nodes = {}
    for i in range(max_node + 1):
        nodes[i] = list(numa.node_to_cpus(i))

    return nodes
Пример #5
0
def coremap():
    try:
        import numa
    except ImportError:
        print('This script requires the libnuma python bindings')
        raise RuntimeError("Numa not available")
    if not numa.available():
        raise RuntimeError("Numa not available")
    node_to_core = {
        int(i): deque([int(k) for k in numa.node_to_cpus(i)])
        for i in range(numa.get_max_node() + 1)
    }
    total_core = max(itertools.chain(*node_to_core.values())) + 1
    return node_to_core, total_core
Пример #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-membw", default="./membw/membw")
    parser.add_argument("-bandwidth", type=int, default=9999)
    parser.add_argument("-operation", default="nt-write")
    args = parser.parse_args()

    max_nid = numa.get_max_node()
    if max_nid != 1:
        print("This tool requires two sockets at least")
        sys.exit()

    cpus = numa.node_to_cpus(max_nid)
    for cpuid in cpus:
        cmd = "%s -c %d -b %d --%s &" % (args.membw, cpuid, args.bandwidth, args.operation)
        print(cmd)
        os.system(cmd)
Пример #7
0
    def _configure_numa(self):
        self._numa_available = \
            numa.available() and which('numactl') is not None
        if not self._numa_available:
            return
        num_numa_nodes = numa.get_max_node() + 1
        self._numa_cpu_map = {}
        num_gpus = len(self._gpu_ids)

        # Calculate how many CPUs to allocate for each GPU. Ensure this number
        # is a power of 2.
        num_cpus = 0
        for i in range(num_numa_nodes):
            num_cpus += len(numa.node_to_cpus(i))
        num_cpus_per_gpu = min(MAX_CPUS_PER_GPU, max(num_cpus // num_gpus, 1))
        num_cpus_per_gpu = pow(2, round(math.log(num_cpus_per_gpu, 2)))

        # Find blocks of contiguous CPUs.
        contiguous_blocks = []
        for i in range(num_numa_nodes):
            cpus = sorted(numa.node_to_cpus(i))
            contiguous_block = [cpus[0]]
            for j in range(1, len(cpus)):
                if (cpus[j] - cpus[j - 1] == 1
                        and len(contiguous_block) < num_cpus_per_gpu):
                    contiguous_block.append(cpus[j])
                else:
                    contiguous_blocks.append(
                        (contiguous_block, len(contiguous_block)))
                    contiguous_block = [cpus[j]]
            if len(contiguous_block) > 0:
                contiguous_blocks.append(
                    (contiguous_block, len(contiguous_block)))
        contiguous_blocks.sort(key=lambda x: x[-1], reverse=True)

        # Assign CPUs to GPUs.
        block_idx = 0
        for i in range(num_gpus):
            self._numa_cpu_map[i] = []
            while len(self._numa_cpu_map[i]) < num_cpus_per_gpu:
                self._numa_cpu_map[i] += contiguous_blocks[block_idx][0]
                block_idx = (block_idx + 1) % len(contiguous_blocks)
            self._logger.info('GPU {gpu} assigned CPUs {cpus}'.format(
                gpu=i, cpus=str(self._numa_cpu_map[i])))
    def _lazy_cpu_and_mem_set_init(self):
        # Implicitly assume lock is already held
        if len(self._numa_nodes) != 0:
            # Init already happened
            return
        if (self._available_cpu_ids is None or self._cpus_per_job is None
                or self._use_memset_of_nearest_node is None):
            raise Exception('Cannot do init. One or more params were None')
        import numa
        if not numa.available():
            raise Exception('NUMA not available')
        numa_nodes = list(range(0, numa.get_max_node() + 1))
        cpu_count = 0
        for numa_node in numa_nodes:
            cpus = numa.node_to_cpus(numa_node)
            for cpu_id in cpus:
                if cpu_id in self._available_cpu_ids:
                    try:
                        self._numa_nodes[numa_node].add(cpu_id)
                    except KeyError:
                        self._numa_nodes[numa_node] = set()
                        self._numa_nodes[numa_node].add(cpu_id)
                    try:
                        self._numa_node_pool[numa_node].add(cpu_id)
                    except KeyError:
                        self._numa_node_pool[numa_node] = set()
                        self._numa_node_pool[numa_node].add(cpu_id)
                    _logger.info(
                        'Putting CPU {} in NUMA node {} in resource pool'.
                        format(cpu_id, numa_node))
                    cpu_count += 1
                else:
                    _logger.info(
                        'CPU {} in NUMA node {} is NOT IN resource pool'.
                        format(cpu_id, numa_node))

        if cpu_count == 0:
            raise Exception('Found no available CPUs')
        if cpu_count != len(self._available_cpu_ids):
            raise Exception(
                'Mismatch between provided available CPU ids and what was found on system'
            )
        assert len(self._numa_node_pool) == len(self._numa_nodes)
Пример #9
0
def setNuma2():
    """With subapAllocation: partial rmx on each numa node"""
    d = darc.Control()
    nthr = d.Get("ncamThreads").sum()
    nnodes = numa.get_max_node() + 1
    #specify which numa nodes the threads are closest to:
    threadToNuma = numpy.zeros(nthr, numpy.int32)  #all node 0 for now...!

    sf = d.Get("subapFlag")
    nsub = sf.size
    sa = numpy.zeros(nsub, numpy.int32)
    #divide all threads equally...
    sa[:] = nthr - 1
    start = 0
    for i in range(nthr):
        end = start + nsub / nthr
        sa[start:end] = i
        start = end
    print numpy.reshape(sa, (7, 7))
    print numpy.reshape(sf, (7, 7))

    d.Set("threadToNuma", threadToNuma, swap=1)
    rmx = d.Get("gainReconmxT")  #nslope,nact
    thrSubapCnt = numpy.zeros(nthr, numpy.int32)
    rmxPart = []
    for i in range(nthr):
        rmxPart.append([])
    indx = 0
    for i in range(nsub):
        if sf[i]:
            thrSubapCnt[sa[i]] += 1
            rmxPart[sa[i]].append(rmx[indx])
            rmxPart[sa[i]].append(rmx[indx + 1])
            indx += 2
    for i in range(nthr):
        r = numpy.array(rmxPart[i])
        print "Thread %d rmx shape %s, dtype %s" % (i, str(r.shape), r.dtype)
        d.SetNuma("gainReconmxT%d" % i, r, int(threadToNuma[i]), swap=1)
    d.Set("subapAllocation", sa, swap=1)
Пример #10
0
    def __init__(self):
        gr.top_block.__init__(self, "ATA New SNAP X-Engine")

        ##################################################
        # Variables
        ##################################################
        self.starting_channel = starting_channel = clparam_starting_channel
        self.num_channels = num_channels = clparam_num_channels
        self.output_file = output_file = clparam_output_directory + '/casa_2021_jan_04_sync_v3_xeng'
        self.ending_channel = ending_channel = starting_channel + num_channels - 1

        ##################################################
        # Blocks
        ##################################################
        self.clenabled_clXEngine_0 = clenabled.clXEngine(
            1, 2, 0, 0, False, 6, 2, clparam_num_antennas, 1, starting_channel,
            num_channels, clparam_integration_frames, clparam_antenna_list,
            True, output_file, 0, True, clparam_snap_sync, clparam_object_name,
            clparam_starting_chan_freq, clparam_channel_width,
            clparam_no_output, clparam_cpu_integration)

        if clparam_enable_affinity:
            # So with affinity here, we're just trying to ensure NUMA doesn't move us off
            # where our memory was allocated.  So we're going to try to be smart about
            # allocating here.  We'll set affinity to all of the cores on each processor till
            # we've "recommended" a full set.
            num_nodes = numa.get_max_node() + 1

            # core_pairs = []
            cpu_core_list = []
            cores_per_cpu = 0
            cores_per_cpu_2 = 0

            for cur_node in range(0, num_nodes):
                cpu_to_node = list(numa.node_to_cpus(cur_node))
                cpu_core_list.append(cpu_to_node)
                if cores_per_cpu == 0:
                    cores_per_cpu = len(cpu_to_node)
                    cores_per_cpu_2 = cores_per_cpu // 2
                print("CPU" + str(cur_node) + " has " + str(len(cpu_to_node)) +
                      " cores: " + str(cpu_to_node))
                #i = 0

                #for cur_cpu in cpu_to_node:
                #   if i % 2 == 0:
                #        cpu_pair = [cur_cpu]
                #    else:
                #        cpu_pair.append(cur_cpu)
                #        core_pairs.append(cpu_pair)
                #
                #    i += 1

            #print("Setting xEngine affinity to cores " + str(core_pairs[0]))
            #self.clenabled_clXEngine_0.set_processor_affinity(core_pairs[0])
            #core_pairs = core_pairs[1:]
            # or to all cores on CPU0
            if num_nodes > 1:
                self.clenabled_clXEngine_0.set_processor_affinity(
                    cpu_core_list[0])

        self.antenna_list = []
        for i in range(0, clparam_num_antennas):
            if i == 0:
                input_file = '/home/sonata/casa_pcap/snap_2_ant_1f.pcap'
                input_port = clparam_base_port + i
            elif i == 1:
                input_file = '/home/sonata/casa_pcap/snap_7_ant_3c.pcap'
                input_port = clparam_base_port + i
            else:
                input_file = '/home/sonata/casa_pcap/snap_8_ant_4g.pcap'
                input_port = 10002

            new_ant = ata.snap_source(input_port, 1, True, False, False,
                                      starting_channel, ending_channel, 3,
                                      input_file, False, True, '224.1.1.10',
                                      False)

            if clparam_enable_affinity and num_nodes > 1:
                cpu2 = cores_per_cpu_2 - 2 + cores_per_cpu_2
                if i < cores_per_cpu_2 - 2:
                    # Subtract 2 for the xengine on the first node.
                    print("Setting affinity for PCAP " + input_file +
                          " to CPU0")
                    new_ant.set_processor_affinity(cpu_core_list[0])
                elif i < cpu2:
                    print("Setting affinity for PCAP " + input_file +
                          " to CPU1")
                    new_ant.set_processor_affinity(cpu_core_list[1])
                else:
                    #   just balance
                    index = i % 2
                    print("Setting affinity for PCAP " + input_file +
                          " to CPU" + str(index))
                    new_ant.set_processor_affinity(cpu_core_list[index])

            ##################################################
            # Connections
            ##################################################
            self.msg_connect((self.clenabled_clXEngine_0, 'sync'),
                             (new_ant, 'sync'))
            self.connect((new_ant, 0), (self.clenabled_clXEngine_0, i))
            self.antenna_list.append(new_ant)
Пример #11
0
 def get_dst_numa_node_from_pcpu(self, pcpu_id):
     #module numa has not implemented numa_node_of_cpu() call of numa(3) library
     for i in range(0, numa.get_max_node() + 1):
         if pcpu_id in numa.node_to_cpus(i):
             return i
Пример #12
0
def onlinecpu():
    import numa

    for node in range(0, numa.get_max_node() + 1):
        for cpu in sorted(numa.node_to_cpus(node)):
            yield str(cpu)
Пример #13
0
 def test_node_size(self):
     for node in range(numa.get_max_node()+1):
         print 'Node: %d, size: %r' % (node, numa.get_node_size(node))
Пример #14
0
    "figureOpen": 0,
    "figureName": "libfigureSL240.so",
    "figureParams": None,
    "reconName": "libreconmvm.so",
    "fluxThreshold": 0,
    "printUnused": 1,
    "useBrightest": 0,
    "figureGain": 1,
    "decayFactor": None,  #used in libreconmvm.so
    "reconlibOpen": 1,
    "maxAdapOffset": 10,
    "noPrePostThread": 0,
}

nthr = control.get("ncamThreads").sum()
nnodes = numa.get_max_node() + 1
threadToNuma = numpy.zeros(nthr, numpy.int32)
sf = control.get("subapFlag")
nsub = sf.size
sa = numpy.zeros(nsub, numpy.int32)
#divide all threads equally....
sa[:] = nthr - 1
start = 0
for i in range(nnodes):
    control["numa%d" % i] = {}
for i in range(nthr):
    end = start + nsub / nthr
    sa[start:end] = i
    start = end
control["threadToNuma"] = threadToNuma
gainReconmxT = control["rmx"].transpose().astype(numpy.float32)
Пример #15
0
 def test_node_size(self):
     for node in range(numa.get_max_node()+1):
         print('Node: %d, size: %r' % (node, numa.get_node_size(node)))
Пример #16
0
def onlinecpu():
    import numa

    for node in range(0,numa.get_max_node()+1):
        for cpu in sorted(numa.node_to_cpus(node)):
            yield str(cpu)
Пример #17
0
    def __init__(self, **kwargs):
        test_module = kwargs.pop('test_module')
        viewers = kwargs.pop('viewers', False)
        n_nodes = kwargs.pop('n_nodes')
        n_workers = kwargs.pop('n_workers')
        partition = kwargs.pop('partition')
        do_numa = kwargs.pop('numa')
        log_file = kwargs.pop('log_file')
        input_files = kwargs.pop('input_files')
        self.__logger = kwargs.pop('logger')

        from collections import defaultdict
        self.__results = defaultdict(list)

        import numa
        max_nodes = numa.get_max_node() + 1
        if do_numa:
            if n_nodes > max_nodes:
                print(
                    'WARNING: %d NUMA nodes have been requested to be used, ' +
                    'but only %d are available and will be used.')
                n_nodes = max_nodes
            n_nodes = max_nodes if n_nodes == -1 else n_nodes
            CalibLauncher = NumaLauncher
        else:
            n_nodes = 1 if n_nodes == -1 else n_nodes
            CalibLauncher = Launcher

        if n_workers != -1:
            n_work = n_workers
        else:
            import multiprocessing
            n_work = int(
                float(multiprocessing.cpu_count()) / float(max_nodes) + 0.5)

        tms = __import__("CalibTests.TestModules", globals(), locals(),
                         [test_module])
        tm = getattr(tms, test_module)

        import os
        directory = os.path.dirname(input_files[0])

        self.__logger.info("Will start %d workers on %d nodes." %
                           (n_work, n_nodes))
        launcher = Launcher(partition, self.__logger)
        self.__components = [
            (LogServer, dict(launcher=launcher, priority=1)),
            (Iterator,
             dict(launcher=launcher, priority=3,
                  python=tm.Iterator(directory))),
            (Analyser,
             dict(n_nodes=n_nodes,
                  n_workers=n_work,
                  python=tm.Analyzer(input_files),
                  launcher=CalibLauncher(partition, self.__logger),
                  priority=4))
        ]
        if log_file:
            self.__components.append((LogViewer,
                                      dict(launcher=launcher,
                                           priority=2,
                                           output_file=log_file,
                                           timing_width=90,
                                           split_regex='CALIBWORK_(\d+)')))
        if viewers:
            self.__components.append(
                (LogViewer, dict(launcher=launcher, priority=2)))

        self.__calib_process = CalibrationSteering("CalibrationTest",
                                                   self.__logger,
                                                   self.__components)

        # To get an idea of overall CPU usage
        from OfflineOnline.BenchmarkProcess import CPUMeasurer
        self.__cpu_process = CPUMeasurer(self.__logger)