Exemplo n.º 1
0
    def __init__(self, debug_mode=False, challenge_mode=False, track=None, timeout=20.0):
        """
        Init requires scenario as input
        """
        self.scenario = None
        self.scenario_tree = None
        self.scenario_class = None
        self.ego_vehicles = None
        self.other_actors = None

        self._debug_mode = debug_mode
        self._challenge_mode = challenge_mode
        self._track = track
        self._agent = None
        self._running = False
        self._timestamp_last_run = 0.0
        self._timeout = timeout
        self._watchdog = Watchdog(float(self._timeout))

        self.scenario_duration_system = 0.0
        self.scenario_duration_game = 0.0
        self.start_system_time = None
        self.end_system_time = None
        nvidia_smi.nvmlInit()
        self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(1)

        # Register the scenario tick as callback for the CARLA world
        # Use the callback_id inside the signal handler to allow external interrupts
        signal.signal(signal.SIGINT, self._signal_handler)
Exemplo n.º 2
0
def get_gpu_memory():
    import nvidia_smi
    nvidia_smi.nvmlInit()
    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
    info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
    print("Used GPU memory: {}%".format((info.used * 100) // info.total))
    nvidia_smi.nvmlShutdown()
Exemplo n.º 3
0
    def __init__(self, gpus=[]):
        r"""CO2 consumption tracker for deep learning models.
        Look at https://arxiv.org/abs/1906.02243 for details.
        """
        # temporal variables
        self._start = None
        self._step = None

        # power variables
        self._cpu_power = 0
        self._gpu_power = 0
        self._ram_power = 0
        self.total_energy = 0

        # GPU-specific constants
        self._cuda = torch.cuda.is_available()
        print(gpus)
        if self._cuda:
            nvidia_smi.nvmlInit()
            self._handles = [
                nvidia_smi.nvmlDeviceGetHandleByIndex(gpu) for gpu in gpus
            ]

        # energy consumption constants
        self._pue_coeff = 1.58
        self._co2_coeff = 0.477
Exemplo n.º 4
0
    def __init__(self, batch_size, validation_split=0.2, gpu=0, smooth=0.05):
        self.cuda = torch.cuda.is_available()
        self.gpu = gpu
        self.smooth = smooth
        self.device = torch.device(f"cuda:{self.gpu}" if self.cuda else "cpu")

        if (NVIDIA_SMI):
            nvidia_smi.nvmlInit()
            self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(self.gpu)
            print("Computing in {0} : {1}".format(self.device, nvidia_smi.nvmlDeviceGetName(self.handle)))
        
        self.batch_size = batch_size
        self.validation_split = validation_split        
                
        kwargs = {'num_workers': 2, 'pin_memory': False} if self.cuda else {}        
        
        self.model = model.Network(95*3+1, 100, 2).to(self.device)
        
        print('N. total parameters : {0}'.format(sum(p.numel() for p in self.model.parameters() if p.requires_grad)))

        self.dataset = Dataset()
        
        # Compute the fraction of data for training/validation
        idx = np.arange(self.dataset.n_training)

        self.train_index = idx[0:int((1-validation_split)*self.dataset.n_training)]
        self.validation_index = idx[int((1-validation_split)*self.dataset.n_training):]

        # Define samplers for the training and validation sets
        self.train_sampler = torch.utils.data.sampler.SubsetRandomSampler(self.train_index)
        self.validation_sampler = torch.utils.data.sampler.SubsetRandomSampler(self.validation_index)
                
        # Data loaders that will inject data during training
        self.train_loader = torch.utils.data.DataLoader(self.dataset, batch_size=self.batch_size, sampler=self.train_sampler, shuffle=False, **kwargs)
        self.validation_loader = torch.utils.data.DataLoader(self.dataset, batch_size=self.batch_size, sampler=self.validation_sampler, shuffle=False, **kwargs)
Exemplo n.º 5
0
def check_gpu_stat():
    nvidia_smi.nvmlInit()
    deviceCount = nvmlDeviceGetCount()
    for i in range(deviceCount):
        handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
        res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
        print(f'gpu{i}: {res.gpu}%, gpu-mem: {res.memory}%')
Exemplo n.º 6
0
    def __init__(self,
                 batch_size,
                 validation_split=0.2,
                 gpu=0,
                 smooth=0.05,
                 K=3,
                 model_class='conv1d'):
        self.cuda = torch.cuda.is_available()
        self.gpu = gpu
        self.smooth = smooth
        self.device = torch.device(f"cuda:{self.gpu}" if self.cuda else "cpu")
        # self.device = 'cpu'
        self.batch_size = batch_size
        self.model_class = model_class

        self.K = K

        if (NVIDIA_SMI):
            nvidia_smi.nvmlInit()
            self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(self.gpu)
            print("Computing in {0} : {1}".format(
                self.device, nvidia_smi.nvmlDeviceGetName(self.handle)))

        self.validation_split = validation_split

        kwargs = {'num_workers': 4, 'pin_memory': False} if self.cuda else {}

        if (model_class == 'conv1d'):
            self.model = model.Network(K=self.K,
                                       L=32,
                                       device=self.device,
                                       model_class=model_class).to(self.device)

        if (model_class == 'conv2d'):
            self.model = model.Network(K=self.K,
                                       L=32,
                                       NSIDE=16,
                                       device=self.device,
                                       model_class=model_class).to(self.device)

        print('N. total parameters : {0}'.format(
            sum(p.numel() for p in self.model.parameters()
                if p.requires_grad)))

        self.train_dataset = Dataset(n_training=20000)
        self.validation_dataset = Dataset(n_training=2000)

        # Data loaders that will inject data during training
        self.train_loader = torch.utils.data.DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            drop_last=True,
            **kwargs)
        self.validation_loader = torch.utils.data.DataLoader(
            self.validation_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            drop_last=True,
            **kwargs)
Exemplo n.º 7
0
 def __init__(self, device='cpu'):
     self.log = SummaryWriter()
     if nvidia_smi and device != 'cpu':
         nvidia_smi.nvmlInit()
         self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
     else:
         self.handle = None
Exemplo n.º 8
0
def checkGPUsAvailability(n_gpus=1):
    '''
    Test that GPUs have free memory on 'n_gpus'.
    OUT:
        True: if they have
        False: if not
    '''
    # For every gpu to check
    for i_gpu in range(n_gpus):
        
        # Access to the memory used by the i-th gpu
        try:
            nvidia_smi.nvmlInit()
            handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i_gpu)
            mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
        except Exception:
            print('Warning: GPU did not accessed')
            break
                
        # If more than 1GB is taken, then stop
        if (mem_res.used/(1024.**3) > 1.0):         # greater than 1GB of VRAM
            # Report it
            print('Memory used (gpu-%i): %.2f GB' % (i_gpu, mem_res.used/(1024**3)), end='')
            print(' - on total: %.2f GB' % (mem_res.total/(1024**3)))
            return False

    return True
Exemplo n.º 9
0
    def __init__(self, basis_wavefront='zernike', npix_image=128, n_modes=44, n_frames=10, gpu=0, smooth=0.05,\
        batch_size=16, arguments=None):

        self.pixel_size = 0.0303
        self.telescope_diameter = 256.0  # cm
        self.central_obscuration = 51.0  # cm
        self.wavelength = 8000.0
        self.n_frames = n_frames
        self.batch_size = batch_size
        self.arguments = arguments

        self.basis_for_wavefront = basis_wavefront
        self.npix_image = npix_image
        self.n_modes = n_modes
        self.gpu = gpu
        self.cuda = torch.cuda.is_available()
        self.device = torch.device(f"cuda:{self.gpu}" if self.cuda else "cpu")

        # Ger handlers to later check memory and usage of GPUs
        if (NVIDIA_SMI):
            nvidia_smi.nvmlInit()
            self.handle = nvidia_smi.nvmlDeviceGetHandleByIndex(self.gpu)
            print("Computing in {0} : {1}".format(
                gpu, nvidia_smi.nvmlDeviceGetName(self.handle)))

        # Define the neural network model
        print("Defining the model...")
        self.model = model.Network(device=self.device, n_modes=self.n_modes, n_frames=self.n_frames, \
            pixel_size=self.pixel_size, telescope_diameter=self.telescope_diameter, central_obscuration=self.central_obscuration, wavelength=self.wavelength,\
            basis_for_wavefront=self.basis_for_wavefront, npix_image=self.npix_image).to(self.device)

        print('N. total parameters : {0}'.format(
            sum(p.numel() for p in self.model.parameters()
                if p.requires_grad)))

        kwargs = {'num_workers': 1, 'pin_memory': False} if self.cuda else {}
        # Data loaders that will inject data during training
        self.training_dataset = Dataset(
            filename='/scratch1/aasensio/fastcam/training_small.h5',
            n_training_per_star=1000,
            n_frames=self.n_frames)
        self.train_loader = torch.utils.data.DataLoader(
            self.training_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            drop_last=True,
            **kwargs)

        self.validation_dataset = Dataset(
            filename='/scratch1/aasensio/fastcam/validation_small.h5',
            n_training_per_star=100,
            n_frames=self.n_frames,
            validation=True)
        self.validation_loader = torch.utils.data.DataLoader(
            self.validation_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            drop_last=True,
            **kwargs)
Exemplo n.º 10
0
def get_mem_info(device_id):
    gpu_list = [device_id]
    nvidia_smi.nvmlInit()
    handle = [nvidia_smi.nvmlDeviceGetHandleByIndex(i) for i in gpu_list]
    res = [nvidia_smi.nvmlDeviceGetMemoryInfo(item) for item in handle]
    res = [100 * item.used / item.total for item in res]
    nvidia_smi.nvmlShutdown()
    return res[0]
Exemplo n.º 11
0
def show_memory_usage():
    nvidia_smi.nvmlInit()
    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)  # GPU number
    mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
    # print('=' * 50)
    # print(f'mem: {mem_res.used / (1024 ** 3)} (GiB)')  # usage in GiB
    print(f'mem usage: {100 * (mem_res.used / mem_res.total):.3f}%'
          )  # percentage
Exemplo n.º 12
0
def get_gpu_temp():
    try:
        nvmlInit()
        gpu = nvmlDeviceGetHandleByIndex(0)
        gpu_temp = nvmlDeviceGetTemperature(gpu, NVML_TEMPERATURE_GPU)
        return gpu_temp
    except NVMLError:
        return None
Exemplo n.º 13
0
def print_gpu_info(idx=0):
    nvidia_smi.nvmlInit()
    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(idx)
    info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
    print("Total memory:", info.total)
    print("Free memory:", info.free)
    print("Used memory:", info.used)
    nvidia_smi.nvmlShutdown()
Exemplo n.º 14
0
def gpu_memory_tracker():
    """returns nvidia gpu memory consumed"""
    nvidia_smi.nvmlInit()
    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
    info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
    used = info.used
    total = info.total
    percent = used / total * 100
    return percent
Exemplo n.º 15
0
def memory_check():
    nvidia_smi.nvmlInit()
    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
    # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate

    mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
    mbs = mem_res.used / (1024**2)
    percent = mem_res.used / mem_res.total
    return mbs, percent
Exemplo n.º 16
0
def Watch_fin():
    nvidia_smi.nvmlInit()
    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(1)
    res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
    time.sleep(1)
    if res.used == 0:
        return 0
    else:
        return 1
Exemplo n.º 17
0
 def get_usage(gpu_list=None, **kwargs):
     """ Track GPU memory usage. """
     _ = kwargs
     gpu_list = gpu_list or [0]
     nvidia_smi.nvmlInit()
     handle = [nvidia_smi.nvmlDeviceGetHandleByIndex(i) for i in gpu_list]
     res = [nvidia_smi.nvmlDeviceGetMemoryInfo(item) for item in handle]
     res = [100 * item.used / item.total for item in res]
     nvidia_smi.nvmlShutdown()
     return res
Exemplo n.º 18
0
 def get_usage(gpu_list=None, **kwargs):
     """ Track GPU memory utilization. """
     _ = kwargs
     gpu_list = gpu_list or [0]
     nvidia_smi.nvmlInit()
     handle = [nvidia_smi.nvmlDeviceGetHandleByIndex(i) for i in gpu_list]
     res = [
         nvidia_smi.nvmlDeviceGetUtilizationRates(item) for item in handle
     ]
     return [item.memory for item in res]
Exemplo n.º 19
0
def use_gpu():
    nvidia_smi.nvmlInit()

    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
    info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
    nvidia_smi.nvmlShutdown()

    if info.used > 1000000000:
        return True
    else:
        return False
Exemplo n.º 20
0
def gpu_usage():
    nvidia_smi.nvmlInit()

    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
    # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate

    info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
    usage = info.used

    nvidia_smi.nvmlShutdown()
    return usage
Exemplo n.º 21
0
def get_max_data_group_size():
    nvidia_smi.nvmlInit()
    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
    info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
    total_memory = info.total
    if total_memory >= 12 * (10 ** 9):
        return 2 ** 12
    elif total_memory >= 6 * (10 ** 9):
        return 2 ** 11
    else:
        os.environ["CUDA_VISIBLE_DEVICES"] = ""
        return 2 ** 12
Exemplo n.º 22
0
def check_cuda_memory():
    nvidia_smi.nvmlInit()

    deviceCount = nvidia_smi.nvmlDeviceGetCount()
    for i in range(deviceCount):
        handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
        info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
        print("Device {}: {}, Memory : ({:.2f}% free): {}(total), {} (free), {} (used)"\
              .format(i, nvidia_smi.nvmlDeviceGetName(handle), 100*info.free/info.total, \
                      info.total, info.free, info.used))
    nvidia_smi.nvmlShutdown()
    return
Exemplo n.º 23
0
def get_device(gpuID=False):
    """Checks available GPUs and selects the one with the most available memory

    Parameters
    ----------
    gpuID: bool or int
        whether to use GPU, or the device ID of a specific GPU to use. If False,
        use only CPU. If True, attempts to find the GPU with most available memory.

    Returns
    -------
    device : jax.device
        handle to gpu or cpu device selected

    """

    import jax

    if gpuID is False:
        return jax.devices('cpu')[0]

    try:
        gpus = jax.devices('gpu')
        # did the user request a specific GPU?
        if isinstance(gpuID, int) and gpuID < len(gpus):
            return gpus[gpuID]
        if isinstance(gpuID, int):
            from desc.backend import TextColors
            # ID was not valid
            warnings.warn(
                TextColors.WARNING +
                'gpuID did not match any found devices, trying default gpu option'
                + TextColors.ENDC)
        # find all available options and see which has the most space
        import nvidia_smi
        nvidia_smi.nvmlInit()
        maxmem = 0
        gpu = gpus[0]
        for i in range(len(gpus)):
            handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
            info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
            if info.free > maxmem:
                maxmem = info.free
                gpu = gpus[i]

        nvidia_smi.nvmlShutdown()
        return gpu

    except:
        from desc.backend import TextColors
        warnings.warn(TextColors.WARNING +
                      'No GPU found, falling back to CPU' + TextColors.ENDC)
        return jax.devices('cpu')[0]
Exemplo n.º 24
0
    def on_train_batch_begin(self, batch, logs=None):

        nvidia_smi.nvmlInit()
        handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
        # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate

        res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
        res1 = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
        #GPUs = GPU.getGPUs()
        #gpu = GPUs[0]

        print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')
Exemplo n.º 25
0
    def __init__(self, exclude_gpu_ids: list = []):
        """
        Usage:

        g = GPUAllocator()
        gpu_id = g.get_gpu()

        ## do something with gpu_id 

        g.set_as_free(gpu_id)

        """

        nvidia_smi.nvmlInit()

        self.num_gpus = nvidia_smi.nvmlDeviceGetCount()
        self.gpu_names =  []

        for i in range(self.num_gpus):
            if i in exclude_gpu_ids:
                pass
            else:
                self.gpu_names.append('cuda:' +  str(i))

        self.usage = {}

        for i in range(self.num_gpus):
            if i in exclude_gpu_ids:
                pass
            else:
                self.usage[i] = False

        """
        on a good day, this is how the variables look like: 

        self.num_gpus= 2

        self.gpu_names= [
            'cuda:0', 
            'cuda:1'
        ]

        self.usage= {
            0: False,
            1: False
        }
        
        """

        print( "[" + Colors.CYAN+ "EDEN" +Colors.END+ "] " + 'Initialized GPUAllocator with devices: ', self.gpu_names)

        """
def predict_age(file_path="/media/original/data/vtps/sub-CC00050XX01_ses-7201_hemi-L_inflated_reduce50.vtp"):
    torch.manual_seed(0)
    if osp.isfile(file_path):

        # mesh = read(file_path)
        # reader = vtk.vtkPolyDataReader()
        reader = vtk.vtkXMLPolyDataReader()
        reader.SetFileName(file_path)
        reader.Update()
        # output = reader.GetOutput()

        points = torch.tensor(np.array(reader.GetOutput().GetPoints().GetData()))

        local_features = ['corrected_thickness', 'curvature', 'sulcal_depth']


        x = get_features(local_features, reader)
        transform = T.NormalizeScale()
        # transform_samp = T.FixedPoints(10000)
        data = Data(batch=torch.zeros_like(x[:, 0]).long(), x=x, pos=points)
        data = transform(data)
        # data = transform_samp(data)
        # data = Data(batch=torch.zeros_like(x[:, 0]).long(), x=x, pos=points)
        # data = Data(x=x, pos=points)

        try:
            nvidia_smi.nvmlInit()
            handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
            mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
            free_mem = mem_res.free / 1024 ** 2
        except:
            free_mem = 0

        device = torch.device('cuda' if torch.cuda.is_available() and free_mem >= GPU_MEM_LIMIT else 'cpu')

        numb_local_features = x.size(1)
        numb_global_features = 0

        model = Net(numb_local_features, numb_global_features).to(device)

        model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
        model.eval()

        # data_loader = DataLoader([data], batch_size=1, shuffle=False)
        # print(len(data_loader))
        # pred = model(next(iter(data_loader)).to(device))
        pred = model(data.to(device))

        return pred.item()
    else:
        return 'Unable to predict..'
Exemplo n.º 27
0
def reserve(h=24):
    nvidia_smi.nvmlInit()
    deviceCount = nvmlDeviceGetCount()
    free_mem = []
    for i in range(deviceCount):
        total, used, free = check_mem(i)
        free_mem.append(free)
    block_mem = int(max(free_mem) * 0.9)
    x = torch.cuda.FloatTensor(256, 1024, block_mem)
    try:
        sleep(h * 3600)
    except KeyboardInterrupt:
        print('\nMemory Released')
    del x
Exemplo n.º 28
0
def get_free_gpu_mem(gpu_index):
    nvidia_smi.nvmlInit()

    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
    # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate

    info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)

    # logging.info("Total GPU memory: {}".format(info.total))
    # logging.info("Free GPU memory: {}".format(info.free))
    # logging.info("Used GPU memory: {}".format(info.used))

    nvidia_smi.nvmlShutdown()

    return info.free
Exemplo n.º 29
0
def watch(memory_max):
    nvidia_smi.nvmlInit()
    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(1)
    res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
    time.sleep(1)
    if memory_max < res.used:
        memory_max = res.used
    with open(
            "/mnt/mqs02/data/ogawa/BERT/preprocess-for-BERT/conbination/NUMAS/vocab_cost/recipe/32000/memory_cost_3.txt",
            "a",
            encoding="utf-8") as f:
        result = str(memory_max) + "\n"
        f.write(result)
    print(memory_max)
    return memory_max
Exemplo n.º 30
0
 def cal_gpu_util(job):
     ct = 0
     gpu = 0
     nvidia_smi.nvmlInit()
     for key in job.gpus_loc.keys():
         for i in job.gpus_loc[key]:
             ct += 1
             handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
             res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
             gpu += res.gpu
     if ct > 0:
         avg = gpu / ct
         return avg
     else:
         print('job no gpu')
         return 0
Exemplo n.º 31
0
    if os.path.exists(fn):
        config = ConfigParser.ConfigParser()
        with open(fn) as f:
            config.readfp(f)
            cfg_dev = config.get("global", "device")
            if cfg_dev is not None:
                dev = cfg_dev
    if 'THEANO_FLAGS' in os.environ:
        res = re.match(r'device=(\w+)', os.environ['THEANO_FLAGS'])
        if res:
            dev = res.group(1)
    return dev


device = get_default_device()
if device == 'gpu':
    nvidia_smi.nvmlInit()
    print "default is", device
    if device == 'gpu':
        gpu = get_gpu()
        if 'THEANO_FLAGS' in os.environ:
            flags = os.environ['THEANO_FLAGS']
        else:
            flags = ""
        os.environ['THEANO_FLAGS'] = flags + ",device=gpu%d" % gpu
        print "Using device gpu", gpu

if __name__ == "__main__":
    import theano
    print theano.config.device