示例#1
0
def get_dev(n=1, ok=(0, 1, 2, 3, 4, 5, 6, 7)):
    import GPUtil, time
    print('Auto select gpu')
    GPUtil.showUtilization()

    def _limit(devs, ok):
        return [dev for dev in devs if dev in ok]

    devs = GPUtil.getAvailable(order='memory',
                               maxLoad=0.5,
                               maxMemory=0.5,
                               limit=n)  #
    devs = _limit(devs, ok)
    if len(devs) >= 1:
        print('available {}'.format(devs))
        # GPUtil.showUtilization()
        return devs[0] if n == 1 else devs
    while len(devs) == 0:
        devs = GPUtil.getAvailable(order='random',
                                   maxLoad=0.98,
                                   maxMemory=0.98,
                                   limit=n)
        devs = _limit(devs, ok)
        if len(devs) >= 1:
            print('available {}'.format(devs))
            GPUtil.showUtilization()
            return devs[0] if n == 1 else devs
        print('no device avelaible')
        GPUtil.showUtilization()
        time.sleep(60)  # 60 * 3
示例#2
0
文件: lz.py 项目: luzai/sample-cls
def get_dev(n=1, ok=(0, 1, 2, 3, 4, 5, 6, 7), mem=(0.5, 0.9), sleep=60):
    import GPUtil, time
    logging.info('Auto select gpu')
    GPUtil.showUtilization()

    def _limit(devs, ok):
        return [int(dev) for dev in devs if dev in ok]

    devs = GPUtil.getAvailable(order='memory', maxLoad=1, maxMemory=mem[0], limit=n)  #

    devs = _limit(devs, ok)
    if len(devs) >= 1:
        logging.info('available {}'.format(devs))
        # GPUtil.showUtilization()
        return int(devs[0]) if n == 1 else devs
    while len(devs) == 0:
        devs = GPUtil.getAvailable(order='random', maxLoad=1, maxMemory=mem[1], limit=n)
        devs = _limit(devs, ok)
        if len(devs) >= 1:
            logging.info('available {}'.format(devs))
            GPUtil.showUtilization()
            return devs[0] if n == 1 else devs
        logging.info('no device avelaible')
        GPUtil.showUtilization()
        time.sleep(sleep)
def check_configs():
    if cfg.MODE in ('train', ):
        cfg.TEST.USE_SAVED_PRED_RES = 'none'
    elif cfg.MODE in ('vis', ):
        cfg.TEST.EVAL_SEG_TAG_ON_GT = False
        cfg.LOG_IN_FILE = False
    elif cfg.MODE in ('demo', 'batch'):
        cfg.TEST.USE_SAVED_PRED_RES = 'none'
        cfg.TEST.EVAL_SEG_TAG_ON_GT = False

    scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
    assert scales == cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
    if cfg.MODEL.BACKBONE.FEATURE_UPSAMPLE:
        assert len(scales) == 1 and scales[0] == 1. / 2**(
            cfg.MODEL.BACKBONE.FEATURE_UPSAMPLE_LEVEL - 1)
    anchor = cfg.MODEL.RPN.ANCHOR_STRIDE
    assert len(anchor) == 1 and anchor[0] == 1. / scales[0]

    if not cfg.MODEL.USE_3D_FUSION:
        assert cfg.INPUT.NUM_IMAGES_3DCE == 1
        assert cfg.MODEL.BACKBONE.FEATURE_FUSION_LEVELS == [False] * 3

    if cfg.GPU == '':
        import GPUtil
        deviceIDs = GPUtil.getAvailable(order='lowest', limit=1, maxMemory=.2)
        if len(deviceIDs) == 0:
            deviceIDs = GPUtil.getAvailable(order='lowest',
                                            limit=1,
                                            maxMemory=.9,
                                            maxLoad=1)
        cfg.GPU = str(deviceIDs[0])
def allocate_GPU(gpu_num):
    device_list = GPUtil.getAvailable(limit=gpu_num)
    while len(device_list)!=gpu_num:
        print('Cannot allocate GPU. Waiting...')
        time.sleep(60)
        device_list = GPUtil.getAvailable(limit=gpu_num)
    devices = ''
    for device in device_list:
        devices += str(device) + ','
    os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
    os.environ['CUDA_VISIBLE_DEVICES'] = devices
示例#5
0
def Assign_GPU():
    excluded_IDs = [2]
    GPU_2_use = GPUtil.getAvailable(order='memory', excludeID=excluded_IDs)
    if len(GPU_2_use) == 0:
        print('No available GPUs. waiting...')
        while len(GPU_2_use) == 0:
            time.sleep(10)
            GPU_2_use = GPUtil.getAvailable(order='memory',
                                            excludeID=excluded_IDs)
    print('Using GPU #%d' % (GPU_2_use[0]))
    return GPU_2_use
def Assign_GPU(max_GPUs=1,**kwargs):
    excluded_IDs = []
    GPU_2_use = GPUtil.getAvailable(order='memory',excludeID=excluded_IDs,limit=max_GPUs if max_GPUs is not None else 100,**kwargs)
    if len(GPU_2_use)==0:
        print('No available GPUs. waiting...')
        while len(GPU_2_use)==0:
            time.sleep(10)
            GPU_2_use = GPUtil.getAvailable(order='memory', excludeID=excluded_IDs)
    assert len(GPU_2_use)>0,'No available GPUs...'
    if max_GPUs is not None:
        print('Using GPU #%d'%(GPU_2_use[0]))
        os.environ["CUDA_VISIBLE_DEVICES"] = "%d"%(GPU_2_use[0]) # Limit to 1 GPU when using an interactive session
        return [GPU_2_use[0]]
    else:
        return GPU_2_use
示例#7
0
def getAvailableGPU(maxload, maxmem, check_docker=True):
    ### firsly, get available gpus by resource usage
    availableIDs = GPUtil.getAvailable(order='first',
                                       limit=8,
                                       maxLoad=maxload,
                                       maxMemory=maxmem)
    if len(availableIDs) < 1:
        return None
    elif check_docker:
        ### check available gpus by docker
        tmp_ids = os.popen(
            "docker inspect $(docker ps -q)|grep NVIDIA_VISIBLE_DEVICES").read(
            ).replace("NVIDIA_VISIBLE_DEVICES=", "").replace('''"''',
                                                             "").split()
        print tmp_ids
        try:
            invalid_gpus = map(
                lambda x: int(x),
                filter(
                    lambda x: x != '' and x != 'all',
                    list(
                        set(
                            reduce(lambda x, y: x + y,
                                   map(lambda x: x.split(','), tmp_ids))))))
        except:
            invalid_gpus = []
        print invalid_gpus
        final_availableIDs = filter(lambda x: x not in invalid_gpus,
                                    availableIDs)
        print final_availableIDs
        if len(final_availableIDs) > 0:
            return final_availableIDs.pop()
        return None
    else:
        return availableIDs.pop()
示例#8
0
def runTask():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--maxGPU', type=int, default=1000)
    parser.add_argument('--needGPU', type=int, default=1)
    parser.add_argument('--maxLoad', type=float, default=0.1)
    parser.add_argument('--maxMemory', type=float, default=0.1)
    parser.add_argument('--sleeptime', type=float, default=60)
    parser.add_argument('--user', type=str)
    parser.add_argument('file', nargs=1)
    args = parser.parse_args()

    import cPickle
    from subprocess import Popen, PIPE

    import time

    import GPUtil

    import random

    import os

    maxGPU = args.maxGPU
    needGPU = args.needGPU
    maxLoad = args.maxLoad
    maxMemory = args.maxMemory
    file = args.file[0]
    user = args.user
    sleeptime = args.sleeptime

    while True:
        with open(file) as f:
            lines = [line for line in f if line.strip()]
        if lines:
            while True:
                s = 'for x in $(nvidia-smi --query-compute-apps=pid --format=csv,noheader,nounits); do ps -f -p $x | grep "%s"; done' % user
                p = Popen(s, stdout=PIPE, shell=True)
                ans = p.stdout.read()
                mygpu = len(ans.splitlines())
                deviceIDs = GPUtil.getAvailable(order='first',
                                                limit=needGPU,
                                                maxLoad=maxLoad,
                                                maxMemory=maxMemory,
                                                includeNan=False,
                                                excludeID=[],
                                                excludeUUID=[])
                find = False
                if mygpu < maxGPU and len(deviceIDs) >= needGPU:
                    os.system(lines[0].strip())
                    print('runing command(%s)' % lines[0].strip())
                    find = True
                time.sleep(sleeptime)
                if find:
                    break
            with open(file, 'w') as f:
                for line in lines[1:]:
                    f.write(line)
        else:
            break
示例#9
0
def configure_tf_devices(visible_ids=None):
    # Do nothing if no visible GPU IDs
    if not visible_ids or visible_ids[0] == -1:
        return

    try:
        deviceIDs = GPUtil.getAvailable(order='load',
                                        limit=100,
                                        maxLoad=0.5,
                                        maxMemory=0.5,
                                        includeNan=False,
                                        excludeID=[],
                                        excludeUUID=[])
    except ValueError:
        cprint(NO_NVIDIA_GPUS, 'yellow')
        return

    deviceIDs = [id_ for id_ in deviceIDs if id_ in visible_ids]

    if not deviceIDs:
        cprint(NO_NVIDIA_GPUS, 'yellow')
        return

    if not deviceIDs:
        cprint(
            "Error: Currently, no GPU is eligible (available memory and load at <=50%)",
            "red")
        GPUtil.showUtilization()
    else:
        cprint(
            "GPUs with utilization and memory load <50%: {}".format(', '.join(
                [str(x) for x in deviceIDs])), "green")

    return deviceIDs
示例#10
0
文件: utils.py 项目: zizai/hvae-nips
def set_gpus(n_gpus):
    """
    Find GPUs to use, if possible. Return TF config
    """

    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True

    try:
        device_IDs = GPUtil.getAvailable(order='load', limit=n_gpus)
    except FileNotFoundError:
        print('\n---- No GPUs on this machine ----\n')
        return (gpu_config, 0)

    if len(device_IDs) > 0:
        str_device_list = ','.join([str(x) for x in device_IDs])
        gpu_config.gpu_options.visible_device_list = str_device_list

        if len(device_IDs) < n_gpus:
            print('\n**** Note: {0} GPUs requested, but only {1} found ****'.
                  format(n_gpus, len(device_IDs)))

        print('\n---- Running on GPU(s) {} ----\n'.format(str_device_list))

    else:
        print('\n---- No GPUs available! ----\n')

    return (gpu_config, len(device_IDs))
示例#11
0
文件: lbm.py 项目: gfrisch/sparsebm
    def _check_params(self):
        self._np = np
        self._cupyx = None
        self.loglikelihood_ = -np.inf
        self.trained_successfully_ = False

        if self.use_gpu and (not _CUPY_INSTALLED or not _DEFAULT_USE_GPU
                             or not cupy.cuda.is_available()):
            self.gpu_number = None
            self.use_gpu = False
            logger.warning(
                "GPU not used as cupy library seems not to be installed or CUDA is not available"
            )

        if (self.use_gpu and _CUPY_INSTALLED and _DEFAULT_USE_GPU
                and cupy.cuda.is_available()):
            if self.gpu_index != None:
                cupy.cuda.Device(self.gpu_index).use()
                self._np = cupy
                self._cupyx = cupyx
            else:
                free_idx = GPUtil.getAvailable("memory", limit=10)
                if not free_idx:
                    self.use_gpu = False
                    logger.warning("GPU not used as no gpu is free")
                else:
                    self._np = cupy
                    self._cupyx = cupyx
                    gpu_number = free_idx[0]
                    cupy.cuda.Device(gpu_number).use()
示例#12
0
    def get_gpu_info(self) -> Dict[str, str]:
        try:
            gpus: List[GPU] = GPUtil.getGPUs()
            available_gpus: List = GPUtil.getAvailable(order='memory',
                                                       limit=10,
                                                       maxLoad=0.4,
                                                       maxMemory=0.4,
                                                       includeNan=False,
                                                       excludeID=[],
                                                       excludeUUID=[])
            available_gpus: List = list(
                filter(lambda gpu: gpu.id in available_gpus, gpus))

            # return -1 if no nvidia-smi visible else return available gpus
            # {"id":"gpu name"}
            if len(gpus) == 0:
                return {"-1": "CPU"}
            else:
                gpus_dict: Dict[str, str] = {}

                for gpu in available_gpus:
                    gpus_dict[str(
                        gpu.id)] = "GPU " + str(gpu.id) + " - " + str(
                            gpu.name) + "- Available Memory: " + str(
                                int(gpu.memoryFree)) + "MB/" + str(
                                    int(gpu.memoryTotal)) + "MB"

                return gpus_dict
        except Exception as e:
            raise GpuInfoInvalid(e._str_())
示例#13
0
def get_device():
    """
        Get one gpu id that have the most available memory.
    Returns:
        (int, str): The gpu id (None if no available gpu) and the the device string (pytorch style).
    """
    gpu_id_list = GPUtil.getAvailable(
        order="memory", limit=3
    )  # get the fist gpu with the lowest load
    if len(gpu_id_list) < 1:
        gpu_id = None
        device_str = "cpu"
    else:
        gpu_id = gpu_id_list[0]
        # need to set 0 if ray only specify 1 gpu
        if "CUDA_VISIBLE_DEVICES" in os.environ:
            if len(os.environ["CUDA_VISIBLE_DEVICES"].split()) == 1:
                #  gpu_id = int(os.environ["CUDA_VISIBLE_DEVICES"])
                gpu_id = 0
                print("Find only one gpu with id: ", gpu_id)
                device_str = "cuda:" + str(gpu_id)
        #                     print(os.system("nvidia-smi"))
        else:
            print("Get a gpu id list sorted by the most available memory:", gpu_id)
            device_str = "cuda:" + str(gpu_id)
    return gpu_id, device_str
示例#14
0
def set_gpu():
    gpu = GPUtil.getAvailable('last', limit=5, excludeID=[0, 1])
    vis_gpu = ""
    for g in gpu:
        vis_gpu += ", " + str(g)
    vis_gpu = vis_gpu[1:]
    os.environ["CUDA_VISIBLE_DEVICES"] = vis_gpu
示例#15
0
 def __init__(self,
              video_dir,
              cache_dir=None,
              stage_n=None,
              n_gpu=4,
              batch_size=4,
              stride=1):
     available_gpus = GPUtil.getAvailable(limit=n_gpu,
                                          maxLoad=0.2,
                                          maxMemory=0.2)
     n_available_gpu = len(available_gpus)
     stages = [
         LoaderStage(),
         DetectorStage(available_gpus, 2),
         TrackerStage(),
         MinotorStage(cache_dir is not None)
     ]
     stages = stages[:stage_n]
     super().__init__(stages, video_dir, cache_dir, batch_size, stride)
     if n_available_gpu == 0:
         self.logger.warn('No gpus available, running on cpu')
     elif n_available_gpu < n_gpu:
         self.logger.warn(
             '%d gpus requested, but only %d gpus (gpu id: %s) available',
             n_gpu, n_available_gpu, available_gpus)
     else:
         self.logger.info('Running on %d gpus (gpu id: %s)', n_gpu,
                          available_gpus)
     if isinstance(stages[-1], (DetectorStage.func)) and n_gpu > 1:
         self.logger.warn(
             'Last stage is %s with %d gpus, '
             'results may be out of order and incomplete',
             stages[-1].__class__.__name__, n_gpu)
     self.videos_processed = []
     self.events = []
示例#16
0
 def generate_jobs(self, n, wait_seconds=20):
     for i in range(n):
         params = {
             name: spec.sample()
             for name, spec in self.params.items()
         }
         if self.name is not None:
             ids = ['{}={}'.format(k, params[k]) for k in self.keys]
             params[self.name] = hashlib.sha256(
                 ','.join(ids).encode()).hexdigest()
         gpu_prefix = ''
         if self.gpu is not None:
             while True:
                 available = GPUtil.getAvailable(order='first',
                                                 limit=1,
                                                 maxMemory=0.01)
                 if available:
                     break
                 else:
                     time.sleep(wait_seconds)
             params['gpu'] = 0
             gpu_prefix = 'CUDA_VISIBLE_DEVICES={} '.format(available[0])
         command = '{}{} '.format(gpu_prefix, self.executable)
         specs = []
         for k in sorted(list(params.keys())):
             v = params[k]
             specs.append('--{} {}'.format(k, v))
         command += ' '.join(specs)
         yield command, params
示例#17
0
def compile_sequential_model():
    # Compiles and Trains neural network

    data = request.json

    notebook = get_notebook_data(data['notebook_name'])
    notebook['hyperparameters'] = data['hyperparameters']
    notebook["history"] = {
        "acc": [],
        "val_acc": [],
        "loss": [],
        "val_loss": []
    }

    # allocate specified device while creating notebook

    config = tensorflow.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = (
        notebook["GPU_count"] / len(GPUtil.getAvailable()))
    keras.backend.tensorflow_backend.set_session(
        tensorflow.Session(config=config))

    notebook['is_online'] = True

    # load created model
    model = keras.models.model_from_json(notebook['model'])

    # compile with client-sent hyperparamters
    model.compile(loss=data['hyperparameters']['loss'],
                  optimizer=keras.optimizers.SGD(
                      lr=float(data['hyperparameters']['learning_rate']),
                      momentum=float(data['hyperparameters']['momentum']),
                      nesterov=bool(data['hyperparameters']['nesterov'])),
                  metrics=['acc'])

    # Training starts
    model.fit(x=notebook['x_train'],
              y=notebook['y_train'],
              batch_size=128,
              validation_data=(notebook['x_test'], notebook['y_test']),
              epochs=int(data['hyperparameters']['epochs']),
              callbacks=[on_epoch_end_callback(notebook=notebook)])

    # save model separately as model weights could not be pickled
    model.save("NOTEBOOK_" + data['notebook_name'] +
               "_neural_network_model.hdf5")

    notebook['model'] = model.to_json()
    set_notebook_data(data['notebook_name'])

    try:
        keras.backend.clear_session()
    except:
        pass

    return json_encoder.encode({
        "message": "Success",
        "comment": "Compiled model and trained"
    })
def select_devices(
    num_gpus_to_use=0, max_load=0.01, max_memory=0.01, exclude_gpu_ids=None
):

    if num_gpus_to_use == 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = ""
    else:
        if exclude_gpu_ids is None:
            exclude_gpu_ids = []
        gpu_to_use = GPUtil.getAvailable(
            order="first",
            limit=num_gpus_to_use,
            maxLoad=max_load,
            maxMemory=max_memory,
            includeNan=False,
            excludeID=exclude_gpu_ids,
            excludeUUID=[],
        )
        if len(gpu_to_use) < num_gpus_to_use:
            raise OSError(
                "Couldnt find enough GPU(s) as required by the user, stopping program "
                "- consider reducing "
                "the requirements or using num_gpus_to_use=0 to use CPU"
            )

        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
            str(gpu_idx) for gpu_idx in gpu_to_use
        )

        print("GPUs selected have IDs {}".format(os.environ["CUDA_VISIBLE_DEVICES"]))
示例#19
0
    def _get_device_map(self):
        self.logger.info('get devices')
        run_on_gpu = False
        device_map = [-1] * self.num_worker
        if not self.args.cpu:
            try:
                import GPUtil
                num_all_gpu = len(GPUtil.getGPUs())
                avail_gpu = GPUtil.getAvailable(order='memory', limit=min(num_all_gpu, self.num_worker))
                num_avail_gpu = len(avail_gpu)

                if num_avail_gpu >= self.num_worker:
                    run_on_gpu = True
                elif 0 < num_avail_gpu < self.num_worker:
                    self.logger.warning('only %d out of %d GPU(s) is available/free, but "-num_worker=%d"' %
                                        (num_avail_gpu, num_all_gpu, self.num_worker))
                    if not self.args.device_map:
                        self.logger.warning('multiple workers will be allocated to one GPU, '
                                            'may not scale well and may raise out-of-memory')
                    else:
                        self.logger.warning('workers will be allocated based on "-device_map=%s", '
                                            'may not scale well and may raise out-of-memory' % self.args.device_map)
                    run_on_gpu = True
                else:
                    self.logger.warning('no GPU available, fall back to CPU')

                if run_on_gpu:
                    device_map = ((self.args.device_map or avail_gpu) * self.num_worker)[: self.num_worker]
            except FileNotFoundError:
                self.logger.warning('nvidia-smi is missing, often means no gpu on this machine. '
                                    'fall back to cpu!')
        self.logger.info('device map: \n\t\t%s' % '\n\t\t'.join(
            'worker %2d -> %s' % (w_id, ('gpu %2d' % g_id) if g_id >= 0 else 'cpu') for w_id, g_id in
            enumerate(device_map)))
        return device_map
示例#20
0
def print_gpu_stat(gpu_id=None):
    """Print GPU status."""
    if gpu_id is None:
        gpu_ids = GPUtil.getAvailable(limit=10)
        for gpu_id in gpu_ids:
            GPU = GPUtil.getGPUs()[gpu_id]
            GPU_load = GPU.load * 100
            GPU_memoryUtil = GPU.memoryUtil / 2.0**10
            GPU_memoryTotal = GPU.memoryTotal / 2.0**10
            GPU_memoryUsed = GPU.memoryUsed / 2.0**10
            GPU_memoryFree = GPU.memoryFree / 2.0**10
            print("Current GPU (ID:{:d}) name:\t{:s}".format(gpu_id, GPU.name))
            print("Total_GPU_memory:\t{:.3f}GB;".format(GPU_memoryTotal))
            print("GPU_memoryUtil:\t{:.3f}GB;".format(GPU_memoryUtil))
            print("GPU_memoryUsed:\t{:.3f}GB;".format(GPU_memoryUsed))
            print("GPU_memoryFree:\t{:.3f}GB;".format(GPU_memoryFree))
            print("GPU_load:\t{:.3f}GB;".format(GPU_load))
    else:
        GPU = GPUtil.getGPUs()[gpu_id]
        GPU_load = GPU.load * 100
        GPU_memoryUtil = GPU.memoryUtil / 2.0**10
        GPU_memoryTotal = GPU.memoryTotal / 2.0**10
        GPU_memoryUsed = GPU.memoryUsed / 2.0**10
        GPU_memoryFree = GPU.memoryFree / 2.0**10
        print("Current GPU (ID:{:d}) name:{:s}".format(gpu_id, GPU.name))
        print("Total_GPU_memory: {:.3f}GB;".format(GPU_memoryTotal))
        print("GPU_memoryUsed:{:.3f}GB;".format(GPU_memoryUsed))
        print("GPU_memoryFree:{:.3f}GB;".format(GPU_memoryFree))
        print("GPU_load:{:.3f}GB;".format(GPU_load))
示例#21
0
文件: utils.py 项目: xywswsxy/virtual
def set_free_gpus(num):
    # num: integer; number of GPUs that shall be allocated
    # returns: string; listing a total of 'num' available GPUs.

    list_gpu = GPUtil.getAvailable(limit=num, maxMemory=0.01)
    print(list_gpu)
    return str(list_gpu)[1:-1]
示例#22
0
def autoset_settings(set_var):
    """Autoset GPU parameters using CUDA_VISIBLE_DEVICES variables.

    Return default config if variable not set.
    :param set_var: Variable to set. Must be of type ConfigSettings
    """
    try:
        devices = ast.literal_eval(os.environ["CUDA_VISIBLE_DEVICES"])
        if type(devices) != list and type(devices) != tuple:
            devices = [devices]
        if len(devices) != 0:
            set_var.GPU = len(devices)
            set_var.NJOBS = len(devices)
            warnings.warn("Detecting CUDA devices : {}".format(devices))

    except KeyError:
        set_var.GPU = len(
            GPUtil.getAvailable(order='first',
                                limit=8,
                                maxLoad=0.5,
                                maxMemory=0.5,
                                includeNan=False))
        if not set_var.GPU:
            warnings.warn(
                "No GPU automatically detected. Setting SETTINGS.GPU to 0, " +
                "and SETTINGS.NJOBS to cpu_count.")
            set_var.GPU = 0
            set_var.NJOBS = multiprocessing.cpu_count()
        else:
            set_var.NJOBS = set_var.GPU
            warnings.warn("Detecting {} CUDA devices.".format(set_var.GPU))

    return set_var
示例#23
0
    def get_gpus(self, **kwargs):
        """Gets a list of qualifying GPU IDs
        """
        max_load = self._round_to_between_0_and_1(1.0 - self.minFreeLoad)
        max_mem = self._round_to_between_0_and_1(1.0 - self.minFreeMemory)

        log.debug("GPU Requirements")

        table = [
            ("order", self.priority),
            ("maxLoad", max_load),
            ("maxMemory", max_mem),
            ("excludeID", self.ignoreIDs),
            ("excludeUUID", self.ignoreUUIDs),
        ]

        table = tabulate(table,
                         headers=["Parameter", "Value"],
                         tablefmt="simple")
        log.debug(table)

        availableGPUids = GPUtil.getAvailable(
            order=self.priority,
            maxLoad=max_load,
            maxMemory=max_mem,
            excludeID=self.ignoreIDs,
            excludeUUID=self.ignoreUUIDs,
        )
        log.debug("GPU Util Found GPU IDs: " + str(availableGPUids))
        availableGPUids = self._filter_gpus(availableGPUids)
        log.debug("Filtered GPU IDs are: " + str(availableGPUids))

        return availableGPUids
def check_if_gpu():
    gpus = GPUtil.getAvailable()

    if (len(gpus) == 0):
        print('No gpu found')
    else:
        print(gpus)
def main(conf_name, gpu):
    # Initialize configs and prepare result dir with date
    if conf_name is None:
        conf = configs.Config()
    else:
        conf = configs.X2_REAL_CONF
        # conf = None
        # exec ('conf = configs.%s' % conf_name)
    res_dir = prepare_result_dir(conf)
    local_dir = os.path.dirname(__file__)

    # We take all png files that are not ground truth
    files = [file_path for file_path in glob.glob('%s/*.png' % conf.input_path)
             if not file_path[-7:-4] == '_gt']

    # Loop over all the files
    for file_ind, input_file in enumerate(files):

        # Ground-truth file needs to be like the input file with _gt (if exists)
        ground_truth_file = input_file[:-4] + '_gt.png'
        if not os.path.isfile(ground_truth_file):
            ground_truth_file = '0'

        # Numeric kernel files need to be like the input file with serial number
        kernel_files = ['%s_%d.mat;' % (input_file[:-4], ind) for ind in range(len(conf.scale_factors))]
        kernel_files_str = ''.join(kernel_files)
        for kernel_file in kernel_files:
            if not os.path.isfile(kernel_file[:-1]):
                kernel_files_str = '0'
                print('no kernel loaded')
                break

        print(kernel_files)

        # This option uses all the gpu resources efficiently
        if gpu == 'all':

            # Stay stuck in this loop until there is some gpu available with at least half capacity
            gpus = []
            while not gpus:
                gpus = GPUtil.getAvailable(order='memory')

            # Take the gpu with the most free memory
            cur_gpu = gpus[-1]

            # Run ZSSR from command line, open xterm for each run
            os.system("xterm -hold -e " + conf.python_path +
                      " %s/run_ZSSR_single_input.py '%s' '%s' '%s' '%s' '%s' '%s' alias python &"
                      % (local_dir, input_file, ground_truth_file, kernel_files_str, cur_gpu, conf_name, res_dir))

            # Verbose
            print('Ran file #%d: %s on GPU %d\n' % (file_ind, input_file, cur_gpu))

            # Wait 5 seconds for the previous process to start using GPU. if we wouldn't wait then GPU memory will not
            # yet be taken and all process will start on the same GPU at once and later collapse.
            sleep(5)

        # The other option is just to run sequentially on a chosen GPU.
        else:
            run_ZSSR_single_input.main(input_file, ground_truth_file, kernel_files_str, gpu, conf_name, res_dir)
示例#26
0
 def get_available_gpus(self, limit):
     exclude = self.exclude_gpus + [p['gpu_idx'] for p in self.active_procs]
     return GPUtil.getAvailable(order='random',
                                limit=limit,
                                maxLoad=self.gpu_max_load,
                                maxMemory=self.gpu_max_mem,
                                excludeID=exclude)
def parallel(func: Callable,
             filelist: Iterable,
             use_gpu: bool = False,
             nbprocesses: int = None) -> None:
    """Parallel processing with multiprocessing.Pool(), works better 
    with functools.partial().
    
    If ``use_gpu`` is True, ``gpu_queue`` will be passed to ``func`` as 
    a keyword argument. The input ``func`` needs to handle the keyword
    parameter ``gpu_queue`` and select the GPU with gpu_queue.get(). 
    Don't forget to put the GPU id back to the gpu_queue at the end of
    ``func``.
    
    Parameters
    ----------
    func : `Callable`
        The target function for parallel processing.
    filelist : `Iterable`
        The file list to process with the input function.
    use_gpu : `bool`, optional
        True for running NN-based PCC algs., False otherwise. 
        Defaults to False.
    nbprocesses : `int`, optional
        Specify the number of cpu parallel processes. If None, it will 
        equal to the cpu count. Defaults to None.
    
    Raises
    ------
    `ValueError`
        No available GPU.
    """
    if use_gpu is True:
        # Get the number of available GPUs
        deviceIDs = GPUtil.getAvailable(order='first',
                                        limit=8,
                                        maxLoad=0.5,
                                        maxMemory=0.2,
                                        includeNan=False,
                                        excludeID=[],
                                        excludeUUID=[])
        process = len(deviceIDs)

        if process <= 0:
            logger.error(
                "No available GPU. Check with the threshold parameters "
                "of ``GPUtil.getAvailable()``")
            raise ValueError

        manager = Manager()
        gpu_queue = manager.Queue()
        for id in deviceIDs:
            gpu_queue.put(id)
        pfunc = partial(func, gpu_queue=gpu_queue)
    else:
        process = nbprocesses
        pfunc = func

    with Pool(process) as pool:
        list(tqdm(pool.imap_unordered(pfunc, filelist), total=len(filelist)))
示例#28
0
文件: lz.py 项目: luzai/opt18
 def get_dev_one(mem):
     devs = GPUtil.getAvailable(order='memory', maxLoad=1, maxMemory=mem, limit=n)
     devs = _limit(devs, ok)
     if len(devs) >= n:
         logging.info('available {}'.format(devs))
         return devs
     else:
         return []
示例#29
0
def set_gpu():
    gpu = GPUtil.getAvailable(limit=3, excludeID=[0, 1])
    vis_gpu = ""
    for g in gpu:
        vis_gpu += ", " + str(g)
    vis_gpu = vis_gpu[1:]
    os.environ["CUDA_VISIBLE_DEVICES"] = vis_gpu
    print("Setting GPUS: ", vis_gpu)
async def get_gpu_info():
    return GPUtil.getAvailable(order='memory',
                               limit=10,
                               maxLoad=0.25,
                               maxMemory=0.25,
                               includeNan=False,
                               excludeID=[],
                               excludeUUID=[])
示例#31
0
def main():
    max_devices = 16
    # Check which devices we have locally
    available_devices = GPUtil.getAvailable(limit=max_devices)
    # Use one worker per device
    cluster = LocalCluster(n_workers=len(available_devices), threads_per_worker=4)
    client = Client(cluster)

    # Set up a relatively large regression problem
    n = 100
    m = 10000000
    partition_size = 100000
    X = da.random.random((m, n), partition_size)
    y = da.random.random(m, partition_size)

    xgb.dask.run(client, train, X, y, available_devices)
示例#32
0
import os
import subprocess
import GPUtil


deviceIDs = GPUtil.getAvailable(order='first', limit=3, maxLoad=0.5, maxMemory=0.5)

print(','.join(str(e) for e in deviceIDs))

task_queue_file = os.path.join(os.environ.get("HOME", None), "task_queue.txt")

if not os.path.isfile(task_queue_file):
	sys.exit(0)

task_list = open(task_queue_file).readlines()

print(task_list)

for i in range(min(len(deviceIDs), len(task_list))):
	task = task_list[i].strip() + " --gpu-id=%d" % i
	print(task)
	subprocess.Popen(task, shell=True, cwd="/home/xyang22/project/research/active-learning-dnn")
示例#33
0
def available_gpu(*args, **kwargs):
    """This function is an alias for ``GPUtil.getAvailable``. If
    ``GPUtil`` is not installed, it returns [0,] as a default GPU ID."""

    return GPUtil.getAvailable(*args, **kwargs)