def testListLocalDevices(self): devices = device_lib.list_local_devices() self.assertGreater(len(devices), 0) self.assertEqual(devices[0].device_type, "CPU") devices = device_lib.list_local_devices(config_pb2.ConfigProto()) self.assertGreater(len(devices), 0) self.assertEqual(devices[0].device_type, "CPU") # GPU test if test.is_gpu_available(): self.assertGreater(len(devices), 1) self.assertTrue("GPU" in [d.device_type for d in devices] or "SYCL" in [d.device_type for d in devices])
def is_gpu_available(cuda_only=False): """Returns whether TensorFlow can access a GPU. Args: cuda_only: limit the search to CUDA gpus. Returns: True iff a gpu device of the requested kind is available. """ if cuda_only: return any((x.device_type == 'GPU') for x in _device_lib.list_local_devices()) else: return any((x.device_type == 'GPU' or x.device_type == 'SYCL') for x in _device_lib.list_local_devices())
def get_config(model_type, prior_pi, log_sigma1, log_sigma2): """Get model config.""" print ("Using Model configuration: %s"%model_type) if model_type == "small": config = SmallConfig() elif model_type == "medium": config = MediumConfig() elif model_type == "large": config = LargeConfig() elif model_type == "test": config = TestConfig() elif model_type == "aritificial": config = ArtificialDataConfig() else: raise ValueError("Invalid model: %s", model_type) config.prior_pi = prior_pi config.log_sigma1 = log_sigma1 config.log_sigma2 = log_sigma2 ########### Automatically get the number of GPUs we have ################## gpus = [x.name for x in device_lib.list_local_devices() if x.device_type == "GPU"] # print(len(gpus)) if len(gpus) == 0: config.num_gpus = 1 # TODO: We need to set it to at least one. else: config.num_gpus = len(gpus) print ("$$$$$$$$$$$ YOU ACTUALLY HAVE GPUs DUDE $$$$$$$$$$$") return config
def count_gpus(): from tensorflow.python.client import device_lib count = 0 for device in device_lib.list_local_devices(): if device.device_type == "GPU": count+=1 return count
def main(args): logging.info( args ) device = 'gpu' if args.gpu else 'cpu' devices = device_lib.list_local_devices() num_gpus = len([d for d in devices if '/gpu' in d.name]) env = gym.make(args.game) env = Env(env, resized_width=84, resized_height=84, agent_history_length=4) num_actions = len(env.gym_actions) global_net = Network(num_actions, -1, 'cpu') actor_networks = [] for t in range(args.threads): device_index = 0 if device is 'cpu' else (t if args.threads <= num_gpus else 0) n = Network(num_actions, t, device, device_index) n.tie_global_net(global_net) actor_networks.append(n) sess = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=args.threads, inter_op_parallelism_threads=args.threads)) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() if not os.path.exists(args.checkpoint_dir): os.makedirs(args.checkpoint_dir) threads = [] for t, net in enumerate(actor_networks): e = Env(gym.make(args.game), net.width, net.height, net.depth) w = Worker(t, e, net, sess, saver, args.checkpoint_dir) w.start() threads.append(w) for t in threads: t.join()
def is_gpu_available(cuda_only=False, min_cuda_compute_capability=None): """Returns whether TensorFlow can access a GPU. Args: cuda_only: limit the search to CUDA gpus. min_cuda_compute_capability: a (major,minor) pair that indicates the minimum CUDA compute capability required, or None if no requirement. Returns: True iff a gpu device of the requested kind is available. """ def compute_capability_from_device_desc(device_desc): # TODO(jingyue): The device description generator has to be in sync with # this file. Another option is to put compute capability in # DeviceAttributes, but I avoided that to keep DeviceAttributes # target-independent. Reconsider this option when we have more things like # this to keep in sync. # LINT.IfChange match = re.search(r"compute capability: (\d+)\.(\d+)", device_desc) # LINT.ThenChange(//tensorflow/core/\ # common_runtime/gpu/gpu_device.cc) if not match: return 0, 0 return int(match.group(1)), int(match.group(2)) for local_device in device_lib.list_local_devices(): if local_device.device_type == "GPU": if (min_cuda_compute_capability is None or compute_capability_from_device_desc(local_device.physical_device_desc) >= min_cuda_compute_capability): return True if local_device.device_type == "SYCL" and not cuda_only: return True return False
def setUp(self): # Load the rime operation library from montblanc.impl.rime.tensorflow import load_tf_lib self.rime = load_tf_lib() # Obtain a list of GPU device specifications ['/gpu:0', '/gpu:1', ...] self.gpu_devs = [d.name for d in device_lib.list_local_devices() if d.device_type == 'GPU']
def _get_local_devices(device_type): local_device_protos = device_lib.list_local_devices() return [ device.name for device in local_device_protos if device.device_type == device_type ]
def get_available_gpus(): # recipe from here: # https://stackoverflow.com/questions/38559755/how-to-get-current-available-gpus-in-tensorflow?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa from tensorflow.python.client import device_lib local_device_protos = device_lib.list_local_devices() return [x.name for x in local_device_protos if x.device_type == 'GPU']
def get_available_gpus(): ''' DESCRIPTION: This function is same as that used in train_multi_gpu script. One modification that could be done later is to run the inference on the system which dont have any GPU but instead just CPUs. So this function could return those names also instead. USAGE: OUTPUT: all_gpu_name :list of name of of all the gpus which are visible to tensorflow. ''' #This will give the list of all the devices (including CPUs) local_devices=device_lib.list_local_devices() #Now filtering the GPU devices to run the inference. '''Test whether running inference affect with different devices since batchnorm statistics will be saved, which will be specific to the devices. So atleast we need to have same graph to run the inference after restoring the checkpoint? unless all the weights (including the BNs were on cpu)''' all_gpu_name=[x.name for x in local_devices if x.device_type=='GPU'] return all_gpu_name
def validate_batch_size_for_multi_gpu(batch_size): """For multi-gpu, batch-size must be a multiple of the number of GPUs. Note that this should eventually be handled by replicate_model_fn directly. Multi-GPU support is currently experimental, however, so doing the work here until that feature is in place. Args: batch_size: the number of examples processed in each training batch. Raises: ValueError: if no GPUs are found, or selected batch_size is invalid. """ from tensorflow.python.client import device_lib # pylint: disable=g-import-not-at-top local_device_protos = device_lib.list_local_devices() num_gpus = sum([1 for d in local_device_protos if d.device_type == 'GPU']) if not num_gpus: raise ValueError('Multi-GPU mode was specified, but no GPUs ' 'were found. To use CPU, run without --multi_gpu.') remainder = batch_size % num_gpus if remainder: err = ('When running with multiple GPUs, batch size ' 'must be a multiple of the number of available GPUs. ' 'Found {} GPUs with a batch size of {}; try --batch_size={} instead.' ).format(num_gpus, batch_size, batch_size - remainder) raise ValueError(err)
def get_available_gpus(): """ Returns a list of the identifiers of all visible GPUs. """ from tensorflow.python.client import device_lib local_device_protos = device_lib.list_local_devices() return [x.name for x in local_device_protos if x.device_type == 'GPU']
def get_nr_gpu(): """ Returns: int: #available GPUs in CUDA_VISIBLE_DEVICES, or in the system. """ env = os.environ.get('CUDA_VISIBLE_DEVICES', None) if env is not None: return len(env.split(',')) output, code = subproc_call("nvidia-smi -L", timeout=5) if code == 0: output = output.decode('utf-8') return len(output.strip().split('\n')) else: try: # Use NVML to query device properties with NVMLContext() as ctx: return ctx.num_devices() except Exception: # Fallback # Note this will initialize all GPUs and therefore has side effect # https://github.com/tensorflow/tensorflow/issues/8136 logger.info("Loading local devices by TensorFlow ...") from tensorflow.python.client import device_lib local_device_protos = device_lib.list_local_devices() return len([x.name for x in local_device_protos if x.device_type == 'GPU'])
def get_num_gpus(flags_obj): """Treat num_gpus=-1 as 'use all'.""" if flags_obj.num_gpus != -1: return flags_obj.num_gpus from tensorflow.python.client import device_lib # pylint: disable=g-import-not-at-top local_device_protos = device_lib.list_local_devices() return sum([1 for d in local_device_protos if d.device_type == "GPU"])
def get_available_gpus(ngpus=-1): ''' :param int ngpus: GPUs max to use. Default -1 means all gpus. :returns: List of gpu devices. Ex.: ['/gpu:0', '/gpu:1', ...] ''' local_device_protos = device_lib.list_local_devices() gpus_list = [x.name for x in local_device_protos if x.device_type == 'GPU'] return gpus_list[:ngpus] if ngpus > -1 else gpus_list
def _testListLocalDevices(self): devices = device_lib.list_local_devices() self.assertGreater(len(devices), 0) self.assertEqual(devices[0].device_type, "CPU") # GPU test if tf.test.is_built_with_cuda(): self.assertGreater(len(devices), 1) self.assertTrue("GPU" in [d.device_type for d in devices])
def all_gpus(): """ Get integer ids of all available GPUs """ local_device_protos = device_lib.list_local_devices() return [ int(x.name.split(':')[-1]) for x in local_device_protos if x.device_type == 'GPU' ]
def is_gpu_available(): from tensorflow.python.client import device_lib local_device_protos = device_lib.list_local_devices() gpu_list = [x.name for x in local_device_protos if x.device_type == 'GPU'] if len(gpu_list) < 0: print("Tensorflow GPU:", gpu_list) return True else: return False
def _check_gpu_existence(): r"""Return True if at least one GPU available""" sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True try: with tf.Session(config=sess_config): device_list = device_lib.list_local_devices() return any(device.device_type == 'GPU' for device in device_list) except AttributeError as e: log.warning(f'Got an AttributeError `{e}`, assuming documentation building') return False
def get_available_gpus(session_config=None): # based on recipe from https://stackoverflow.com/a/38580201 # Unless we allocate a session here, subsequent attempts to create one # will ignore our custom config (in particular, allow_growth=True will have # no effect). if session_config is None: session_config = get_session()._config from tensorflow.python.client import device_lib local_device_protos = device_lib.list_local_devices(session_config) return [x.name for x in local_device_protos if x.device_type == 'GPU']
def xla_device_name(): devices = device_lib.list_local_devices() def find_type(device_type): for d in devices: if d.device_type == device_type: return d.name return None name = find_type("TPU") or find_type("XLA_GPU") or find_type("XLA_CPU") if name is None: raise ValueError( "Can't find any XLA device. Available devices:\n%s" % devices) return str(name)
def _train_batch_sizes(self): """Choose batch sizes based on GPU capability.""" for device in device_lib.list_local_devices(): if tf.DeviceSpec.from_string(device.name).device_type == 'GPU': if 'K20' in device.physical_device_desc: return (16,) if 'P100' in device.physical_device_desc: return (16, 32, 64) if tf.DeviceSpec.from_string(device.name).device_type == 'TPU': return (32,) return (16, 32)
def get_nr_gpu(): """ Returns: int: the number of GPU from ``CUDA_VISIBLE_DEVICES``. """ env = os.environ.get('CUDA_VISIBLE_DEVICES', None) if env is not None: return len(env.split(',')) logger.info("Loading local devices by TensorFlow ...") from tensorflow.python.client import device_lib device_protos = device_lib.list_local_devices() gpus = [x.name for x in device_protos if x.device_type == 'GPU'] return len(gpus)
def _train_batch_sizes(self): """Choose batch sizes based on GPU capability.""" for device in device_lib.list_local_devices(): if 'GPU:0' in device.name: # Avoid OOM errors with larger batch sizes, which seem to cause errors # later on even if caught. # # TODO(allenl): Base this on device memory; memory limit information # during the test seems to exclude the amount TensorFlow has allocated, # which isn't useful. if 'K20' in device.physical_device_desc: return (16,) if 'P100' in device.physical_device_desc: return (16, 32, 64) return (16, 32)
def _collect_gpu_info(run_info): """Collect local GPU information by TF device library.""" gpu_info = {} local_device_protos = device_lib.list_local_devices() gpu_info["count"] = len([d for d in local_device_protos if d.device_type == "GPU"]) # The device description usually is a JSON string, which contains the GPU # model info, eg: # "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0" for d in local_device_protos: if d.device_type == "GPU": gpu_info["model"] = _parse_gpu_model(d.physical_device_desc) # Assume all the GPU connected are same model break run_info["machine_config"]["gpu_info"] = gpu_info
def versions(list_devices=False): """ Prints system info and version strings for finicky libraries. """ import keras import tensorflow as tf import h5py import platform print("Platform:", platform.platform()) print("h5py:\n" + h5py.version.info) # print("numpy:",np.version.full_version) # h5py already reports this print("Keras:", str(keras.__version__)) print("Tensorflow:", str(tf.__version__)) if list_devices: from tensorflow.python.client import device_lib print("Devices:\n" + str(device_lib.list_local_devices()))
def gather_available_device_info(): """Gather list of devices available to TensorFlow. Returns: A list of test_log_pb2.AvailableDeviceInfo messages. """ device_info_list = [] devices = device_lib.list_local_devices() for d in devices: device_info = test_log_pb2.AvailableDeviceInfo() device_info.name = d.name device_info.type = d.device_type device_info.memory_limit = d.memory_limit device_info.physical_description = d.physical_device_desc device_info_list.append(device_info) return device_info_list
def _train_batch_sizes(self): """Shamelessly copied from `resnet50_test.py`. Note: This is targeted towards ImageNet. CIFAR-10 should allow more aggressive batch sizes. Returns: A tuple of possible batch sizes """ for device in device_lib.list_local_devices(): if tf.DeviceSpec.from_string(device.name).device_type == "GPU": if "K20" in device.physical_device_desc: return (16,) if "P100" in device.physical_device_desc: return (16, 32, 64) if tf.DeviceSpec.from_string(device.name).device_type == "TPU": return (32,) return (16, 32)
def _train_batch_sizes(self): """Choose batch sizes based on GPU capability.""" for device in device_lib.list_local_devices(): if tf.DeviceSpec.from_string(device.name).device_type == 'GPU': # Avoid OOM errors with larger batch sizes, which seem to cause errors # later on even if caught. # # TODO(allenl): Base this on device memory; memory limit information # during the test seems to exclude the amount TensorFlow has allocated, # which isn't useful. if 'K20' in device.physical_device_desc: return (16,) if 'P100' in device.physical_device_desc: return (16, 32, 64) if tf.DeviceSpec.from_string(device.name).device_type == 'TPU': # TODO(iga): Training fails with batch size of 16, probably because of # no layout optimizations with op-by-op mode. Investigate more. return (8,) return (16, 32)
def testMultiGPUSessionRun(self): local_devices = device_lib.list_local_devices() gpu_device_names = [] for device in local_devices: if device.device_type == "GPU": gpu_device_names.append(device.name) gpu_device_names = sorted(gpu_device_names) if len(gpu_device_names) < 2: self.skipTest( "This test requires at least 2 GPUs, but only %d is available." % len(gpu_device_names)) with session.Session() as sess: v = variables.Variable([10.0, 15.0], dtype=dtypes.float32, name="v") with ops.device(gpu_device_names[0]): u0 = math_ops.add(v, v, name="u0") with ops.device(gpu_device_names[1]): u1 = math_ops.multiply(v, v, name="u1") w = math_ops.subtract(u1, u0, name="w") sess.run(v.initializer) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_urls="file://" + self._dump_root) run_metadata = config_pb2.RunMetadata() self.assertAllClose( [80.0, 195.0], sess.run(w, options=run_options, run_metadata=run_metadata)) debug_dump_dir = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertEqual(3, len(debug_dump_dir.devices())) self.assertAllClose( [10.0, 15.0], debug_dump_dir.get_tensors("v", 0, "DebugIdentity")[0]) self.assertAllClose( [20.0, 30.0], debug_dump_dir.get_tensors("u0", 0, "DebugIdentity")[0]) self.assertAllClose( [100.0, 225.0], debug_dump_dir.get_tensors("u1", 0, "DebugIdentity")[0])
def get_available_gpus(): local_device_protos = device_lib.list_local_devices() count = len( [x.name for x in local_device_protos if x.device_type == 'GPU']) print("Number of GPU : ", count)
def get_cpu_devices(): local_device_protos = device_lib.list_local_devices() cpu_devices = [ x.name for x in local_device_protos if x.device_type == 'CPU' ] return cpu_devices
def cnn_line_lstm_ctc(input_shape, output_shape, **kwargs): image_height, image_width = input_shape output_length, num_classes = output_shape image_input = Input(shape=input_shape, name='image') y_true = Input(shape=(output_length, ), name='y_true') input_length = Input(shape=(1, ), name='input_length') label_length = Input(shape=(1, ), name='label_length') gpu_present = len(device_lib.list_local_devices()) > 1 lstm_fn = CuDNNLSTM if gpu_present else LSTM ##### Your code below (Lab 3) image_reshaped = Reshape((image_height, image_width, 1))(image_input) # (image_height, image_width, 1) convnet_outputs = image_reshaped # convnet_outputs = Dropout(0.5)(convnet_outputs) convnet_outputs = Conv2D(16, 3, padding='SAME')(convnet_outputs) convnet_outputs = BatchNormalization()(convnet_outputs) convnet_outputs = LeakyReLU()(convnet_outputs) convnet_outputs = MaxPooling2D(2, 2)(convnet_outputs) # convnet_outputs = Dropout(0.5)(convnet_outputs) convnet_outputs = Conv2D(32, 3, padding='SAME')(convnet_outputs) convnet_outputs = BatchNormalization()(convnet_outputs) convnet_outputs = LeakyReLU()(convnet_outputs) convnet_outputs = MaxPooling2D(2, 2)(convnet_outputs) convnet_outputs = Dropout(0.2)(convnet_outputs) convnet_outputs = Conv2D(48, 3, padding='SAME')(convnet_outputs) convnet_outputs = BatchNormalization()(convnet_outputs) convnet_outputs = LeakyReLU()(convnet_outputs) convnet_outputs = MaxPooling2D(2, 2)(convnet_outputs) convnet_outputs = Dropout(0.2)(convnet_outputs) convnet_outputs = Conv2D(64, 3, padding='SAME')(convnet_outputs) convnet_outputs = BatchNormalization()(convnet_outputs) convnet_outputs = LeakyReLU()(convnet_outputs) convnet_outputs = Dropout(0.2)(convnet_outputs) convnet_outputs = Conv2D(80, 3, padding='SAME')(convnet_outputs) convnet_outputs = BatchNormalization()(convnet_outputs) convnet_outputs = LeakyReLU()(convnet_outputs) num_windows = 119 convnet_outputs = Permute([2, 1, 3])(convnet_outputs) convnet_outputs = Reshape([num_windows, 240])(convnet_outputs) # (num_windows, 128) for i in range(5): convnet_outputs = Dropout(0.5)(convnet_outputs) lstm_output = Bidirectional(lstm_fn( 256, return_sequences=True))(convnet_outputs) lstm_output = Dropout(0.5)(lstm_output) softmax_output = Dense(num_classes, activation='softmax', name='softmax_output')(lstm_output) # (num_windows, num_classes) ##### Your code above (Lab 3) input_length_processed = Lambda( lambda x, num_windows=None: x * num_windows, arguments={'num_windows': num_windows})(input_length) ctc_loss_output = Lambda( lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name='ctc_loss')( [y_true, softmax_output, input_length_processed, label_length]) ctc_decoded_output = Lambda( lambda x: ctc_decode(x[0], x[1], output_length), name='ctc_decoded')([softmax_output, input_length_processed]) model = KerasModel( inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output]) return model
version: Parallelized sampling on CPU C++ evaluation for top-k recommendation ''' import os import sys import threading import tensorflow as tf from tensorflow.python.client import device_lib from utility.helper import * from utility.batch_test import * os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' cpus = [ x.name for x in device_lib.list_local_devices() if x.device_type == 'CPU' ] class LightGCN(object): def __init__(self, data_config, pretrain_data): # argument settings self.model_type = 'LightGCN' self.adj_type = args.adj_type self.alg_type = args.alg_type self.pretrain_data = pretrain_data self.n_users = data_config['n_users'] self.n_items = data_config['n_items'] self.n_fold = 100 self.norm_adj = data_config['norm_adj'] self.n_nonzero_elems = self.norm_adj.count_nonzero()
def get_available_gpus(): # WARNING: this method will take all the memory on all devices! local_device_protos = device_lib.list_local_devices() return [x.name for x in local_device_protos if x.device_type == 'GPU']
from __future__ import print_function import tensorflow as tf import numpy as np from model import SEGAN, SEAE import os from tensorflow.python.client import device_lib from scipy.io import wavfile from data_loader import pre_emph devices = device_lib.list_local_devices() flags = tf.app.flags flags.DEFINE_integer("seed", 111, "Random seed (Def: 111).") flags.DEFINE_integer("epoch", 150, "Epochs to train (Def: 150).") flags.DEFINE_integer("batch_size", 150, "Batch size (Def: 150).") flags.DEFINE_integer("save_freq", 500, "Batch save freq (Def: 500).") flags.DEFINE_integer("canvas_size", 2**14, "Canvas size (Def: 2^14).") flags.DEFINE_integer("denoise_epoch", 5, "Epoch where noise in disc is " "removed (Def: 5).") flags.DEFINE_integer("l1_remove_epoch", 150, "Epoch where L1 in G is " "removed (Def: 150).") flags.DEFINE_boolean("bias_deconv", False, "Flag to specify if we bias deconvs (Def: False)") flags.DEFINE_boolean("bias_downconv", False, "flag to specify if we bias downconvs (def: false)") flags.DEFINE_boolean("bias_D_conv", False, "flag to specify if we bias D_convs (def: false)") # TODO: noise decay is under check flags.DEFINE_float("denoise_lbound", 0.01, "Min noise std to be still alive (Def: 0.001)") flags.DEFINE_float("noise_decay", 0.7, "Decay rate of noise std (Def: 0.7)")
def get_available_gpus(): from tensorflow.python.client import device_lib local_device_protos = device_lib.list_local_devices() return [x.physical_device_desc for x in local_device_protos if x.device_type == 'GPU']
def get_available_gpus(): """ Returns a list of string names of all available GPUs """ local_device_protos = device_lib.list_local_devices() return [x.name for x in local_device_protos if x.device_type == 'GPU']
def train_lstm(self): ''' Train the graph ''' self.W = tf.get_variable('weight', initializer=np.zeros((self.attention_length, 2 * self.state_size)), dtype=tf.float64) self.b = tf.get_variable('bias', initializer=np.zeros( (self.attention_length, self.max_sentence_length)), dtype=tf.float64) self.Uw = tf.get_variable('U_weight', initializer=np.zeros( (self.attention_length, 1)), dtype=tf.float64) self.build_lstm() # Initialise the LSTM states if self.__tf_saver is not None: with open(lstm_config.SAVED_LSTM + '.pickle', 'rb') as f: [self.__train_files, self.__file_trace, batch_count,last_epoch,external_weight, weight_attn, bias_attn, weightf, weightb, \ _statex_fw,_statex_bw , _statexp_fw,_statexp_bw,_statexm_fw,_statexm_bw,_,_] = pickle.load(f) self.__batch_count = batch_count if self.__batch_count == self.max_train_triplet_count: # last epoch was complete self.__batch_count = 0 last_epoch += 1 logging.debug('{} : RESUMING THE TRAINING '.format( datetime.now().strftime("%Y-%m-%d;%H:%M:%S"))) else: self.__prepare_files() zero_state = np.zeros((2, self.batch_size, self.state_size)) _statex_fw = zero_state _statex_bw = zero_state _statexp_fw = zero_state _statexp_bw = zero_state _statexm_fw = zero_state _statexm_bw = zero_state weight_attn = np.zeros( (self.attention_length, 2 * self.state_size)) bias_attn = np.zeros( (self.attention_length, self.max_sentence_length)) external_weight = np.zeros((self.attention_length, 1)) last_epoch = 0 logging.debug( '{} : NO PRE-TRAINED DATA FOUND. STARTING FRESH TRAINING '. format(datetime.now().strftime("%Y-%m-%d;%H:%M:%S"))) logging.debug('{} : Starting training with the files : {} , {}'\ .format(datetime.now().strftime("%Y-%m-%d;%H:%M:%S"),self.__file_trace['sentence_file'], self.__file_trace['title_file'])) logging.info('{} : Save batches : {}'.format( datetime.now().strftime("%Y-%m-%d;%H:%M:%S"), self.save_batches)) devices = device_lib.list_local_devices() logging.info('detected devices : {}'.format(devices)) for dev in devices: cpu_device = "" if dev.device_type == "CPU": cpu_device = dev.name logging.info( 'Generating batches with CPU : {}'.format(cpu_device)) break bar1 = ETA(self.num_epochs * self.max_train_triplet_count) bar1.start() with tf.Session( ) as sess: # config=tf.ConfigProto(log_device_placement=True) tf.global_variables_initializer().run() merged_summary = tf.summary.merge_all() #writer = tf.summary.FileWriter("/tmp/TensorBoard", sess.graph) for i in range(last_epoch, self.num_epochs): self.end_of_sen_data = False self.end_of_title_data = False self.__end_of_data = False while not self.__end_of_data: t1 = time.time() if cpu_device: with tf.device(cpu_device): self.gen_training_batches() else: self.gen_training_batches() logging.debug('memory cost : {}'.format( round(time.time() - t1, 3))) if not self.restore: self.restore = True for in_x in self.__x_batch: sess.run(self.outputx,\ feed_dict={self.batchX_placeholder : in_x,\ self.statex_fw_placeholder : _statex_fw,\ self.statex_bw_placeholder : _statex_bw }) self.cellx_fw.set_weights(weightf) self.cellx_bw.set_weights(weightb) if len(np.array(self.__x_batch).shape) != 4: continue if np.array(self.__x_batch).shape[1] != self.batch_size: continue t1 = time.time() for in_x, in_xp, in_xm in zip(self.__x_batch, self.__xp_batch, self.__xm_batch): external_weight,weight_attn,bias_attn,loss,_, _statex_fw, _statex_bw, _statexp_fw, _statexp_bw, \ _statexm_fw, _statexm_bw,accuracy = \ sess.run([self.Uw,self.W,self.b,self.loss, self.optimizer, self.statex_fw, self.statex_bw,\ self.statexp_fw, self.statexp_bw, self.statexm_fw, self.statexm_bw,self.accuracy],\ feed_dict={self.batchX_placeholder : in_x,\ self.batchXp_placeholder : in_xp,\ self.batchXm_placeholder : in_xm,\ self.statex_fw_placeholder : _statex_fw,\ self.statex_bw_placeholder : _statex_bw,\ self.statexp_fw_placeholder : _statexp_fw, \ self.statexp_bw_placeholder : _statexp_bw,\ self.statexm_fw_placeholder : _statexm_fw, \ self.statexm_bw_placeholder : _statexm_bw,\ self.W_placeholder : weight_attn,\ self.b_placeholder : bias_attn,\ self.Uw_placeholder : external_weight}) logging.debug('training cost : {}'.format( round(time.time() - t1, 3))) eta = bar1.update(self.max_train_triplet_count * i + self.__batch_count) logging.info('{} : {} % progress ---- ETA : {} '.format( datetime.now().strftime("%Y-%m-%d;%H:%M:%S"), eta[0], eta[1])) weightf = self.cellx_fw.get_weights() weightb = self.cellx_bw.get_weights() t1 = time.time() with open(lstm_config.NEW_LSTM + '.pickle', 'wb') as f: pickle.dump([self.__train_files, self.__file_trace, self.__batch_count,i, external_weight, weight_attn, bias_attn,\ weightf, weightb, _statex_fw,_statex_bw ,_statexp_fw,_statexp_bw,_statexm_fw,_statexm_bw,loss,accuracy], f) logging.debug('Data saving cost : {}'.format( round(time.time() - t1, 3))) logging.debug('{} : saving network with "batch count = {}"'.\ format(datetime.now().strftime("%Y-%m-%d;%H:%M:%S"),self.__batch_count)) if self.__batch_count in self.save_batches: os.system('mkdir -p ' + lstm_config.NEW_LSTM_FILE_STACK + 'epoch_' + str(i)) with open( lstm_config.NEW_LSTM_FILE_STACK + 'epoch_' + str(i) + '/model_' + str(self.__batch_count) + '.pickle', 'wb') as f: pickle.dump([self.__train_files, self.__file_trace, self.__batch_count,i, external_weight, weight_attn, bias_attn,\ weightf, weightb, _statex_fw,_statex_bw ,_statexp_fw,_statexp_bw,_statexm_fw,_statexm_bw,loss,accuracy], f) logging.debug('{} : saving network in datastack with "batch count = {}"'.\ format(datetime.now().strftime("%Y-%m-%d;%H:%M:%S"),self.__batch_count)) # Save the network tf.train.Saver().save(sess, lstm_config.NEW_LSTM) self.__batch_count = 0 # reset the batch count #tf.add_summary(summary,i) #writer.close() eta = bar1.finish() logging.info('{} : {} % progress ---- ETA : {} '.format( datetime.now().strftime("%Y-%m-%d;%H:%M:%S"), eta[0], eta[1])) print('Oh. Finally, its done !') pid = subprocess.check_output( 'ps ax | grep train.sh', shell=True).split()[0].decode('utf-8') os.system('kill ' + pid)
ar_save_model_path = path + "model" if ar_save_loss_path is not None: file1 = open(ar_save_loss_path, "w") file1.write("Params: {} \n".format(results)) file1.write("Losses: \n") file1.close() depth = arg_win_size height = 64 width = 64 num_classes = 27 os.environ['CUDA_VISIBLE_DEVICES'] = "{}".format(arg_visible_devices) os.environ['TF_CPP_MIN_LOG_LEVEL'] = "2" print(device_lib.list_local_devices()) choose_device = "/device:GPU:{}".format(arg_num_device) with tf.device(choose_device): x_inpuT = tf.placeholder(tf.float32, shape=[arg_batch_size, depth, height, width, 3]) y_inpuT = tf.placeholder(tf.float32, shape=[arg_batch_size, num_classes]) train_neural_network(x_inpuT, y_inpuT, data_path, val_data_path, save_loss_path=ar_save_loss_path, save_model_path=ar_save_model_path, batch_size=arg_batch_size, learning_rate=arg_lr, weight_decay=arg_wd, epochs=arg_epochs, val_batch_size=arg_val_batch_size,
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") gpus = [ x.name for x in device_lib.list_local_devices() if x.device_type == "GPU" ] if FLAGS.num_gpus > len(gpus): raise ValueError("Your machine has only %d gpus " "which is less than the requested --num_gpus=%d." % (len(gpus), FLAGS.num_gpus)) raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput(config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) models = {"Train": m, "Valid": mvalid, "Test": mtest} for name, model in models.items(): model.export_ops(name) metagraph = tf.train.export_meta_graph() if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1: raise ValueError( "num_gpus > 1 is not supported for TensorFlow versions " "below 1.1.0") soft_placement = False if FLAGS.num_gpus > 1: soft_placement = True util.auto_parallel(metagraph, m) with tf.Graph().as_default(): tf.train.import_meta_graph(metagraph) for model in models.values(): model.import_ops() sv = tf.train.Supervisor(logdir=FLAGS.save_path) config_proto = tf.ConfigProto(allow_soft_placement=soft_placement) with sv.managed_session(config=config_proto) as session: for i in range(config.max_max_epoch): lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest) print("Test Perplexity: %.3f" % test_perplexity) if FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
def get_available_gpus(): local_device_protos = device_lib.list_local_devices() return [x.name for x in local_device_protos if x.device_type == 'GPU']
def build_graph(reader, model, train_data_pattern, label_loss_fn=losses.CrossEntropyLoss(), batch_size=1000, base_learning_rate=0.01, learning_rate_decay_examples=1000000, learning_rate_decay=0.95, optimizer_class=tf.train.AdamOptimizer, clip_gradient_norm=1.0, regularization_penalty=1, num_readers=1, num_epochs=None): """Creates the Tensorflow graph. This will only be called once in the life of a training model, because after the graph is created the model will be restored from a meta graph file rather than being recreated. Args: reader: The data file reader. It should inherit from BaseReader. model: The core model (e.g. logistic or neural net). It should inherit from BaseModel. train_data_pattern: glob path to the training data files. label_loss_fn: What kind of loss to apply to the model. It should inherit from BaseLoss. batch_size: How many examples to process at a time. base_learning_rate: What learning rate to initialize the optimizer with. optimizer_class: Which optimization algorithm to use. clip_gradient_norm: Magnitude of the gradient to clip to. regularization_penalty: How much weight to give the regularization loss compared to the label loss. num_readers: How many threads to use for I/O operations. num_epochs: How many passes to make over the data. 'None' means an unlimited number of passes. """ global_step = tf.Variable(0, trainable=False, name="global_step") local_device_protos = device_lib.list_local_devices() gpus = [x.name for x in local_device_protos if x.device_type == "GPU"] gpus = gpus[:FLAGS.num_gpu] num_gpus = len(gpus) if num_gpus > 0: logging.info("Using the following GPUs to train: " + str(gpus)) num_towers = num_gpus device_string = "/gpu:%d" else: logging.info("No GPUs found. Training on CPU.") num_towers = 1 device_string = "/cpu:%d" learning_rate = tf.train.exponential_decay(base_learning_rate, global_step * batch_size * num_towers, learning_rate_decay_examples, learning_rate_decay, staircase=True) tf.summary.scalar("learning_rate", learning_rate) optimizer = optimizer_class(learning_rate) input_data_dict = (get_input_data_tensors(reader, train_data_pattern, batch_size=batch_size * num_towers, num_readers=num_readers, num_epochs=num_epochs)) model_input_raw = input_data_dict["video_matrix"] labels_batch = input_data_dict["labels"] num_frames = input_data_dict["num_frames"] print("model_input_shape, ", model_input_raw.shape) tf.summary.histogram("model/input_raw", model_input_raw) feature_dim = len(model_input_raw.get_shape()) - 1 model_input = tf.nn.l2_normalize(model_input_raw, feature_dim) tower_inputs = tf.split(model_input, num_towers) tower_labels = tf.split(labels_batch, num_towers) tower_num_frames = tf.split(num_frames, num_towers) tower_gradients = [] tower_predictions = [] tower_label_losses = [] tower_reg_losses = [] for i in range(num_towers): # For some reason these 'with' statements can't be combined onto the same # line. They have to be nested. with tf.device(device_string % i): with (tf.variable_scope(("tower"), reuse=True if i > 0 else None)): with (slim.arg_scope( [slim.model_variable, slim.variable], device="/cpu:0" if num_gpus != 1 else "/gpu:0")): result = model.create_model(tower_inputs[i], num_frames=tower_num_frames[i], vocab_size=reader.num_classes, labels=tower_labels[i]) for variable in slim.get_model_variables(): tf.summary.histogram(variable.op.name, variable) predictions = result["predictions"] tower_predictions.append(predictions) if "loss" in result.keys(): label_loss = result["loss"] else: label_loss = label_loss_fn.calculate_loss( predictions, tower_labels[i]) if "regularization_loss" in result.keys(): reg_loss = result["regularization_loss"] else: reg_loss = tf.constant(0.0) reg_losses = tf.losses.get_regularization_losses() if reg_losses: reg_loss += tf.add_n(reg_losses) tower_reg_losses.append(reg_loss) # Adds update_ops (e.g., moving average updates in batch normalization) as # a dependency to the train_op. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if "update_ops" in result.keys(): update_ops += result["update_ops"] if update_ops: with tf.control_dependencies(update_ops): barrier = tf.no_op(name="gradient_barrier") with tf.control_dependencies([barrier]): label_loss = tf.identity(label_loss) tower_label_losses.append(label_loss) # Incorporate the L2 weight penalties etc. final_loss = regularization_penalty * reg_loss + label_loss gradients = optimizer.compute_gradients( final_loss, colocate_gradients_with_ops=False) tower_gradients.append(gradients) label_loss = tf.reduce_mean(tf.stack(tower_label_losses)) tf.summary.scalar("label_loss", label_loss) if regularization_penalty != 0: reg_loss = tf.reduce_mean(tf.stack(tower_reg_losses)) tf.summary.scalar("reg_loss", reg_loss) merged_gradients = utils.combine_gradients(tower_gradients) if clip_gradient_norm > 0: with tf.name_scope("clip_grads"): merged_gradients = utils.clip_gradient_norms( merged_gradients, clip_gradient_norm) train_op = optimizer.apply_gradients(merged_gradients, global_step=global_step) tf.add_to_collection("global_step", global_step) tf.add_to_collection("loss", label_loss) tf.add_to_collection("predictions", tf.concat(tower_predictions, 0)) tf.add_to_collection("input_batch_raw", model_input_raw) tf.add_to_collection("input_batch", model_input) tf.add_to_collection("num_frames", num_frames) tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32)) tf.add_to_collection("train_op", train_op)
def line_lstm_ctc(input_shape, output_shape, window_width=28, window_stride=14): image_height, image_width = input_shape output_length, num_classes = output_shape num_windows = int((image_width - window_width) / window_stride) + 1 if num_windows < output_length: raise ValueError(f'Window width/stride need to generate at least {output_length} windows (currently {num_windows})') image_input = Input(shape=input_shape, name='image') y_true = Input(shape=(output_length,), name='y_true') input_length = Input(shape=(1,), name='input_length') label_length = Input(shape=(1,), name='label_length') gpu_present = len(device_lib.list_local_devices()) > 1 lstm_fn = CuDNNLSTM if gpu_present else LSTM # Your code should use slide_window and extract image patches from image_input. # Pass a convolutional model over each image patch to generate a feature vector per window. # Pass these features through one or more LSTM layers. # Convert the lstm outputs to softmax outputs. # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length). ##### Your code below (Lab 3) im = Reshape((image_height, image_width, 1))(image_input) im_patch = Lambda( slide_window, arguments={'window_width': window_width, 'window_stride': window_stride} )(im) # Make a LeNet and get rid of the last two layers (softmax and dropout) convnet = lenet((image_height, window_width, 1), (num_classes,)) convnet = KerasModel(inputs=convnet.inputs, outputs=convnet.layers[-2].output) convnet_outputs = TimeDistributed(convnet)(im_patch) lstm_output = lstm_fn(128, return_sequences=True)(convnet_outputs) softmax_output = Dense(num_classes, activation='softmax', name='softmax_output')(lstm_output) ##### Your code above (Lab 3) input_length_processed = Lambda( lambda x, num_windows=None: x * num_windows, arguments={'num_windows': num_windows} )(input_length) ctc_loss_output = Lambda( lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name='ctc_loss' )([y_true, softmax_output, input_length_processed, label_length]) ctc_decoded_output = Lambda( lambda x: ctc_decode(x[0], x[1], output_length), name='ctc_decoded' )([softmax_output, input_length_processed]) model = KerasModel( inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output] ) return model
import tensorflow as tf from tensorflow.python.client import device_lib if tf.test.gpu_device_name(): print 'Default GPU Device: %s' % tf.test.gpu_device_name() else: print 'Please install GPU version of TF' print device_lib.list_local_devices()
def get_gpus(self): """Setting up GPU.""" return [ x.locality.bus_id for x in device_lib.list_local_devices() if x.device_type == "GPU" ]
from tensorflow.keras import layers from tensorflow.keras import utils from tensorflow.keras import backend as K from tensorflow.keras.callbacks import ModelCheckpoint from tensorflow.keras.models import load_model from tensorflow.keras.models import Model, Sequential, model_from_json from tensorflow.keras.layers import LSTM from tensorflow.keras.layers import Dense from tensorflow.keras.layers import RepeatVector from tensorflow.keras.layers import TimeDistributed from tensorflow.keras.layers import Masking, Dropout from tensorflow.keras.layers import LeakyReLU from tensorflow.python.client import device_lib local_device_protos = device_lib.list_local_devices() print(device_lib.list_local_devices()) def recall_m(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives + K.epsilon()) return recall def precision_m(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision def f1_m(y_true, y_pred):
config.embedding_dims = 50 config.filters = 50 config.kernel_size = 3 config.hidden_dims = 100 config.epochs = 10 (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=config.vocab_size) X_train = sequence.pad_sequences(X_train, maxlen=config.maxlen) X_test = sequence.pad_sequences(X_test, maxlen=config.maxlen) print(X_train.shape) print("After pre-processing", X_train[0]) # overide LSTM & GRU if 'GPU' in str(device_lib.list_local_devices()): print("Using CUDA for RNN layers") LSTM = CuDNNLSTM GRU = CuDNNGRU model = tf.keras.models.Sequential() model.add( tf.keras.layers.Embedding(config.vocab_size, config.embedding_dims, input_length=config.maxlen)) model.add( tf.keras.layers.Conv1D(config.filters, config.kernel_size, padding='valid', activation='relu')) model.add(tf.keras.layers.Flatten())
def _get_available_devices(): local_device_protos = device_lib.list_local_devices() return [x.name for x in local_device_protos]
# pylint: disable=missing-docstring import unittest import numpy as np # pylint bug on next line from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module from src.FGSM.cleverhans.cleverhans.devtools.checks import CleverHansTest HAS_GPU = 'GPU' in {x.device_type for x in device_lib.list_local_devices()} class TestMNISTTutorialKeras(CleverHansTest): def test_mnist_tutorial_keras(self): import tensorflow as tf from src.FGSM.cleverhans.cleverhans_tutorials import mnist_tutorial_keras # Run the MNIST tutorial on a dataset of reduced size test_dataset_indices = { 'train_start': 0, 'train_end': 5000, 'test_start': 0, 'test_end': 333, 'nb_epochs': 2, 'testing': True } g = tf.Graph() with g.as_default(): np.random.seed(42) report = mnist_tutorial_keras.mnist_tutorial( **test_dataset_indices)
def get_num_of_available_gpus(): # Helps when building training loops with gpu local_device_protos = device_lib.list_local_devices() return len([x.name for x in local_device_protos if x.device_type == 'GPU'])
def train(self, X_train, X_test, y_train, y_test, model=None, model_fn=None, epochs_number=1000, batch_size=512): print(device_lib.list_local_devices()) self._X_train = X_train self._X_test = X_test self._y_train = y_train self._y_test = y_test num_training_samples = self._X_train.shape[0] if model is not None: self._model = model elif model_fn == None: self._model = self.baseline_model(X_train, y_train) else: self._model = model_fn(X_train, y_train) # model = self.build_model(X_train, y_train) print(self._model.summary()) # Use the Datasets API to scale to large datasets or multi-device training dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)) dataset = dataset.batch(batches_size).repeat() val_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)) val_dataset = val_dataset.batch(batches_size).repeat() tbCallBack = keras.callbacks.TensorBoard(log_dir="./graphs", histogram_freq=0, write_graph=True, write_images=True) history = self._model.fit(dataset, epochs=epochs_number, steps_per_epoch=(num_training_samples // batches_size), validation_data=val_dataset, validation_steps=3, callbacks=[tbCallBack]) hist = pd.DataFrame(history.history) hist['epoch'] = history.epoch print(hist.tail()) def plot_history(history): hist = pd.DataFrame(history.history) hist['epoch'] = history.epoch plt.figure() plt.xlabel('Epoch') plt.ylabel('Mean Abs Error [MPG]') plt.plot(hist['epoch'], hist['mean_absolute_error'], label='Train Error') plt.plot(hist['epoch'], hist['val_mean_absolute_error'], label='Val Error') plt.ylim([0, 5]) plt.legend() plt.figure() plt.xlabel('Epoch') plt.ylabel('Mean Square Error [$MPG^2$]') plt.plot(hist['epoch'], hist['mean_squared_error'], label='Train Error') plt.plot(hist['epoch'], hist['val_mean_squared_error'], label='Val Error') plt.ylim([0, 20]) plt.legend() plt.show() plot_history(history)
def show_device(): from tensorflow.python.client import device_lib return device_lib.list_local_devices()
def main(): """Create the model and start the training.""" # argument parsing args = get_arguments() MODEL_DIR = args.model_dir IMG_DIR = args.img_dir VGG16_PATH = args.vgg16 OUT_DIR = args.output_dir print(args.save_labeled) # initialization print('[Available Computing Devices]') print(device_lib.list_local_devices()) postfix="" hypes = load_hypes_from_logdir(MODEL_DIR); modules = {} f = os.path.join("architecture.py") arch = imp.load_source("arch_%s" % postfix, f) modules['arch'] = arch f = os.path.join("objective.py") objective = imp.load_source("objective_%s" % postfix, f) modules['objective'] = objective color_seg={1:(0,153,255,127), 2:(0,255,0,127), 3:(255,0,0,127), 4:(255,0,255,127)} if os.path.isdir(OUT_DIR) == False: os.mkdir(OUT_DIR) output_folder_overlay = os.path.join(OUT_DIR, 'overlay') output_folder_labeled = os.path.join(OUT_DIR, 'labeled') output_folder_labeled_raw = os.path.join(OUT_DIR, 'labeled_raw') if os.path.isdir(output_folder_overlay) == False: os.mkdir(output_folder_overlay) if os.path.isdir(output_folder_labeled) == False: os.mkdir(output_folder_labeled) for i in range(hypes['arch']['num_classes']): folder_name = os.path.join(output_folder_labeled, 'class_{}'.format(i)) if os.path.isdir(os.path.join(output_folder_labeled, 'class_{}'.format(i))) == False: os.mkdir(folder_name) if os.path.isdir(output_folder_labeled_raw) == False : os.mkdir(output_folder_labeled_raw) # Create tf graph and build module. with tf.Graph().as_default(): # Create placeholder for input image_pl = tf.placeholder(tf.float32) image = tf.expand_dims(image_pl, 0) # build Tensorflow graph using the model from logdir logits = modules['arch'].inference(hypes, image, VGG16_PATH, train=False) prediction = modules['objective'].decoder(hypes, logits, train=False) print("Graph build successfully.") # Create a session for running Ops on the Graph. with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: saver = tf.train.Saver() # Load weights from logdir load_weights(MODEL_DIR, sess, saver) print('[log] wieghts are restored') print("Start inference") # Load and resize input image for file in os.listdir(IMG_DIR): if len(file.split('.')) != 2: continue if file.split('.')[1] != 'png': print(file + ' is skipped') continue image_file = os.path.join(IMG_DIR, file) print("Input image: {}".format(image_file)) img = scp.misc.imread(image_file) # Run KittiSeg model on image start_time = time.time() feed = {image_pl: img} softmax = prediction['softmax'] output = sess.run([softmax], feed_dict=feed) # list, [51200,4] dt = (time.time() - start_time) print('Speed (msec): {}'.format(1000*dt)) print('Speed (fps): {}'.format(1/dt)) # Reshape output from flat vector to 2D Image shape = img.shape img_sum = np.zeros_like(img[:,:,0]) for c in range(hypes['arch']['num_classes']): img_prob = output[0][:, c].reshape(shape[0], shape[1]) threshold_name = 'threshold_{:d}'.format(c) threshold = np.array(hypes['threshold'][threshold_name]) img_thresh = (img_prob > threshold)*(c+1) img_sum = img_sum + img_thresh if args.save_labeled == True: save_path = os.path.join(output_folder_labeled, 'class_{}'.format(c)) image_name = os.path.join(save_path, file) scp.misc.imsave(image_name, img_thresh) overlay_seg = utils.overlay_segmentation(img,img_sum,color_seg) overlay_image_name = os.path.join(output_folder_overlay, file.split('.')[0] + '.png') scp.misc.imsave(overlay_image_name, overlay_seg)
def _get_cuda_gpu_arch_string(): gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU'] if len(gpus) == 0: raise RuntimeError('No GPU devices found') (major, minor) = _get_compute_cap(gpus[0]) return 'sm_%s%s' % (major, minor)
def __init__(self, n_classes, input_dims, lr, top_rnns=True, metrics_eval_discard_first_classes=2): self.train_history = None input = Input(shape=(None, input_dims), dtype='float32', name='bert_encodings') X = input if top_rnns: X = get_bi_lstm()(X) X = get_bi_lstm()(X) pred = Dense(n_classes, activation='softmax')(X) self.model_save = Model(input, pred) #logger.debug(f'available training devices:\n{device_lib.list_local_devices()}'.replace('\n', '\n\t')) devices = device_lib.list_local_devices() # take gpu count from device info manually, because virtual devices (e.g. XLA_GPU) cause wrong number gpus = len([None for d in devices if d.device_type == 'GPU']) if gpus > 1: self.model = multi_gpu_model(self.model_save, gpus=gpus, cpu_relocation=True) logging.info(f"Training using {gpus} GPUs...") else: self.model = self.model_save logging.info("Training using single GPU or CPU...") optimizer = Adam(lr=lr) self.model.compile( loss='categorical_crossentropy', optimizer=optimizer, metrics=[ ANDCounter( conditions_and=lambda y_true, y_pred: ( y_true, K.round(y_pred), # This condition masks all entries where y_true has class=0, i.e. <PAD>: # 1) gold values, except for the first class, are summed along the class-axis # 2) the resulting vector is broadcast back to the original format (via stack and number of classes) K.stack([ K.sum(y_true[:, :, metrics_eval_discard_first_classes:], axis=-1) ] * n_classes, axis=-1), ), name='tp'), ANDCounter( conditions_and=lambda y_true, y_pred: ( K.abs(y_true - K.ones_like(y_true)), K.round(y_pred), # this condition masks all entries where y_true has class=0, i.e. <PAD> (see above) K.stack([ K.sum(y_true[:, :, metrics_eval_discard_first_classes:], axis=-1) ] * n_classes, axis=-1), ), name='fp'), ANDCounter( conditions_and=lambda y_true, y_pred: ( y_true, K.abs(K.round(y_pred) - K.ones_like(y_pred)), # this condition masks all entries where y_true has class=0, i.e. <PAD> (see above) K.stack([ K.sum(y_true[:, :, metrics_eval_discard_first_classes:], axis=-1) ] * n_classes, axis=-1), ), name='fn'), ANDCounter( conditions_and=lambda y_true, y_pred: ( y_true, # this condition masks all entries where y_true has class=0, i.e. <PAD> (see above) K.stack([ K.sum(y_true[:, :, metrics_eval_discard_first_classes:], axis=-1) ] * n_classes, axis=-1), ), name='total_count'), 'acc', ]) plot_model(self.model, to_file='model.png', show_shapes=True)
def get_available_gpus(self): from tensorflow.python.client import device_lib local_device_protos = device_lib.list_local_devices() return [d for d in local_device_protos if d.device_type == 'GPU']
def train(self, data, session_name="weight_sets/test", session=None, shuffle=True, batch_size=5, max_iteration=100, continue_from_last=False): # builder = tf.profiler.ProfileOptionBuilder # opts = builder(builder.time_and_memory()).order_by('micros').build() # pwd = os.path.join(os.path.dirname(os.path.abspath(__file__)), "artifacts", "profile") # with tf.contrib.tfprof.ProfileContext(pwd, trace_steps=[], dump_steps=[]) as pctx: if session is None: config = tf.ConfigProto() config.gpu_options.allow_growth = True config.log_device_placement = True sess = tf.Session(config=config) else: sess = session batch_size = min(batch_size, data.shape[0] * data.shape[1]) self.build_training_graph(data.shape, batch_size, shuffle) sess.run(tf.global_variables_initializer()) if continue_from_last: self.load_session(sess, session_name) sess.run(self.upload_ops, feed_dict={self.input_data: data}) sub_epoch = 10 start_time = time.time() for step in range(max_iteration): sess.run(self.rebatch_ops) sum_loss = 0.0 total_batches = int(data.shape[0] * data.shape[1] / batch_size) for i in range(total_batches * sub_epoch): # pctx.trace_next_step() _, loss = sess.run((self.training_op, self.overall_cost)) sum_loss += loss print(step, " : ", sum_loss / (total_batches * sub_epoch)) if (step + 1) % 100 == 0: self.saver.save(sess, session_name) print("Checkpoint ...") # pctx.dump_next_step() elapsed_time = time.time() - start_time with open( os.path.join(os.path.dirname(os.path.abspath(__file__)), "artifacts", "log.txt"), "w") as file: file.write(str(device_lib.list_local_devices())) file.write("Total time ... " + str(elapsed_time) + " seconds") # pctx.profiler.profile_operations(options=opts) self.saver.save(sess, session_name) if session is None: sess.close()
def gpu_device_name(): """Returns the name of a GPU device if available or the empty string.""" for x in device_lib.list_local_devices(): if x.device_type == "GPU" or x.device_type == "SYCL": return x.name return ""