def main(device_id): with tf.device('/gpu:{}'.format(str(device_id))): pipe = get_pipe(device_id) daliop = dali_tf.DALIIterator() # Define shapes and types of the outputs # Define shapes and types of the outputs shapes = [(batch_size, 224, 224, 3), (batch_size, 1)] dtypes = [tf.float32, tf.int32] # Create tensorflow dataset out_dataset = dali_tf.DALIDataset(pipeline=pipe, batch_size=batch_size, shapes=shapes, dtypes=dtypes, device_id=0) m = get_reset50_model() m.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Train using DALI dataset m.fit(out_dataset, epochs=10, steps_per_epoch=8 * 8, use_multiprocessing=True)
def dali_input_fn(batch_size, num_channels, image_height, image_width, is_training): # Get all the file names # old vars: tfrecord_path, tfrecord_idx_path tfrecords = get_filenames(is_training, "/mnt/data/") idx_paths = get_idx_filenames(is_training, "/home/builder/imagenet_idx_files") dali_pipe = ResnetPipeline(batch_size=batch_size, num_threads=NUM_THREADS, device_id=USED_GPU, tfrecords=tfrecords, idx_paths=idx_paths) # Serialize Pipeline into Protobuf string serialized = dali_pipe.serialize() # Assign the iterator that will produce the (image, label) tensors daliop = dali_tf.DALIIterator() with tf.device("/gpu:0"): # daliop returns what we specify from define_graph() above # Shapes are the expected output Tensor shapes. # i.e.: img shape =[32,3,224,224], lbl shape=[<unknown>] # dtypes are the type of the 2 output tensors respectively img, label = daliop(serialized_pipeline=serialized, shapes=[(batch_size, num_channels, image_height, image_width), ()], dtypes=[tf.float32, tf.int32]) img = tf.reshape( img, shape=[batch_size, image_height, image_width, num_channels]) label = tf.reshape(label, shape=[batch_size]) return (img, label)
def __init__(self, filenames, idx_filenames, height, width, batch_size, num_threads, dtype=tf.uint8, dali_cpu=True, deterministic=False, training=False): device_id = hvd.local_rank() shard_id = hvd.rank() num_gpus = hvd.size() pipe = HybridPipe( tfrec_filenames=filenames, tfrec_idx_filenames=idx_filenames, height=height, width=width, batch_size=batch_size, num_threads=num_threads, device_id=device_id, shard_id=shard_id, num_gpus=num_gpus, deterministic=deterministic, dali_cpu=dali_cpu, training=training) daliop = dali_tf.DALIIterator() with tf.device("/gpu:0"): self.images, self.labels = daliop( pipeline=pipe, shapes=[(batch_size, height, width, 3), (batch_size, 1)], dtypes=[tf.float32, tf.int64], device_id=device_id)
def __init__(self, filenames, idx_filenames, height, width, batch_size, num_threads, dtype=tf.uint8, dali_cpu=True, deterministic=False): pipe = HybridPipe(tfrec_filenames=filenames, tfrec_idx_filenames=idx_filenames, height=height, width=width, batch_size=batch_size, num_threads=num_threads, device_id=hvd.rank(), num_gpus=hvd.size(), deterministic=deterministic, dali_cpu=dali_cpu) serialized_pipe = pipe.serialize() del pipe daliop = dali_tf.DALIIterator() with tf.device("/gpu:0"): self.images, self.labels = daliop( serialized_pipeline=serialized_pipe, shape=[batch_size, height, width, 3], device_id=hvd.rank())
def inputs_dali(batch_size, devices, tfrecord): tfrecord_idx = os.path.splitext(tfrecord)[0] + '.idx' tfrecord2idx_script = "tfrecord2idx" if not os.path.isfile(tfrecord_idx): call([tfrecord2idx_script, tfrecord, tfrecord_idx]) pipes = [ TFRecordPipeline(batch_size=batch_size, num_threads=2, device_id=device_id, tfrecord=FLAGS.tfrecord, tfrecord_idx=tfrecord_idx) for device_id in range(devices) ] serialized_pipes = [pipe.serialize() for pipe in pipes] del pipes daliop = dali_tf.DALIIterator() images = [] labels = [] for d in range(devices): with tf.device('/gpu:%i' % d): image, label = daliop(serialized_pipeline=serialized_pipes[d], batch_size=batch_size, height=224, width=224, device_id=d) images.append(image) labels.append(label) return images, labels
def test_python_operator_error(): daliop = dali_tf.DALIIterator() pipe = PythonOperatorPipeline() with tf.device('/cpu:0'): output = daliop(pipeline=pipe, shapes=[(1, 3, 3, 3)], dtypes=[tf.float32], device_id=0)
def get_data(batch_size, value): pipe = get_dali_pipe(batch_size=batch_size, device_id=types.CPU_ONLY_DEVICE_ID, num_threads=1, value=value) daliop = dali_tf.DALIIterator() out = [] with tf.device('/cpu'): data = daliop(pipeline = pipe, shapes = [(batch_size)], dtypes = [tf.int32], device_id = types.CPU_ONLY_DEVICE_ID) out.append(data) return [out]
def dali_generator(tfrecord_files, idx_files, batch_size, num_threads=4, device_id=0, rank=0, total_devices=1): pipe = TFRecordPipeline(tfrecord_files, idx_files, batch_size, device_id, rank, total_devices, num_threads) pipe.build() daliop = dali_tf.DALIIterator() while True: images, labels = daliop(pipeline = pipe, shapes = [(batch_size, 224, 224, 3), ()], dtypes = [tf.float16, tf.float16]) labels -= 1 yield images, labels
def get_pipeline_outs(pipe, device_id): daliop = dali_tf.DALIIterator() pipe.build() # to get epoch size epoch_size = list(pipe.epoch_size().values())[0] epoch_size = math.ceil(epoch_size / pipe.batch_size) with tf.device("/device:GPU:{}".format(device_id)): images_tensor, labels_tensor = daliop( pipeline=pipe, shapes=[(pipe.batch_size, 224, 224, 3), (pipe.batch_size, 1)], dtypes=[tf.uint8, tf.int32], device_id=device_id, ) images_tensor = tf.cast(images_tensor, tf.float32) / 127.5 - 1. return images_tensor, labels_tensor, epoch_size
def get_batch_dali(batch_size, pipe_type, label_type, num_gpus=1): pipes = [pipe_type(batch_size=batch_size, num_threads=2, device_id = device_id, num_gpus = num_gpus) for device_id in range(num_gpus)] daliop = dali_tf.DALIIterator() images = [] labels = [] for d in range(NUM_GPUS): with tf.device('/gpu:%i' % d): image, label = daliop(pipeline = pipes[d], shapes = [(batch_size, 3, 227, 227), ()], dtypes = [tf.int32, label_type], device_id = d) images.append(image) labels.append(label) return [images, labels]
def __init__(self, filenames, idx_filenames, height, width, batch_size, num_threads, dtype=tf.uint8, dali_cpu=True, deterministic=False, training=False): device_id = hvd.local_rank() shard_id = hvd.rank() num_gpus = hvd.size() self.pipe = get_dali_pipeline( tfrec_filenames=filenames, tfrec_idx_filenames=idx_filenames, height=height, width=width, batch_size=batch_size, num_threads=num_threads, device_id=device_id, shard_id=shard_id, num_gpus=num_gpus, dali_cpu=dali_cpu, training=training, seed=7 * (1 + hvd.rank()) if deterministic else None) self.daliop = dali_tf.DALIIterator() self.batch_size = batch_size self.height = height self.width = width self.device_id = device_id self.dalidataset = dali_tf.DALIDataset( pipeline=self.pipe, output_shapes=((batch_size, height, width, 3), (batch_size)), batch_size=batch_size, output_dtypes=(tf.float32, tf.int64), device_id=device_id)
def __init__(self, filenames, idx_filenames, height, width, batch_size, num_threads, dtype=tf.uint8, dali_pipeline_variant="GPU", deterministic=False): if 'nvidia.dali.plugin.tf' not in sys.modules: raise ImportError("Module dali_tf is not available.") device_id = hvd.local_rank() shard_id = hvd.rank() num_gpus = hvd.size() pipe = HybridPipe(tfrec_filenames=filenames, tfrec_idx_filenames=idx_filenames, height=height, width=width, batch_size=batch_size, num_threads=num_threads, device_id=device_id, shard_id=shard_id, num_gpus=num_gpus, deterministic=deterministic, dali_pipeline_variant=dali_pipeline_variant) serialized_pipe = pipe.serialize() del pipe daliop = dali_tf.DALIIterator() with tf.device("/gpu:0"): self.images, self.labels = daliop( serialized_pipeline=serialized_pipe, shapes=[(batch_size, height, width, 3), ()], dtypes=[tf.float32, tf.int64], device_id=device_id)
def _prepare_dali(args, n_classes): if args.gpu_cores > 1: logger.error( 'Have not built in support for more than one GPU at the moment.' ) sys.exit(1) # non NVIDIA cloud environments will not have dali, so we # have to do the import here. from image_segmentation.dali_pipeline import CommonPipeline import nvidia.dali.plugin.tf as dali_tf batch_size = args.batch_size image_size = args.image_size device_id = 0 filenames = [] for filename in args.data: if filename.startswith('gs://'): parts = filename[5:].split('/') bucket_name, blob_name = parts[0], '/'.join(parts[1:]) storage_client = storage.Client() bucket = storage_client.get_bucket(bucket_name) blob = bucket.blob(blob_name) download_filename = os.path.basename(blob_name) blob.download_to_filename(download_filename) filenames.append(download_filename) else: filenames.append(filename) tfindex_files = args.tfindex_files or [] if not tfindex_files: for path in filenames: tfindex_file = path.split('.')[0] + '.tfindex' build_tfindex_file(path, tfindex_file) logger.info('Created tfindex file: {input} -> {output}'.format( input=path, output=tfindex_file )) tfindex_files.append(tfindex_file) config = dali_config.DaliConfig() config.summarize() pipe = CommonPipeline( args.batch_size, args.parallel_calls, device_id, args.image_size, filenames, tfindex_files, config ) pipe.build() daliop = dali_tf.DALIIterator() with tf.device('/gpu:0'): results = daliop( serialized_pipeline=pipe.serialize(), shape=[args.batch_size, args.image_size, args.image_size, 3], label_type=tf.int64, ) input_tensor = results.batch results.label.set_shape([batch_size, image_size, image_size, 3]) mask = results.label new_shape = [image_size // 4, image_size // 4] mask_4 = ADE20KDatasetBuilder.scale_mask(mask, 4, new_shape, n_classes) new_shape = [image_size // 8, image_size // 8] mask_8 = ADE20KDatasetBuilder.scale_mask(mask, 8, new_shape, n_classes) new_shape = [image_size // 16, image_size // 16] mask_16 = ADE20KDatasetBuilder.scale_mask(mask, 16, new_shape, n_classes) return { 'input': input_tensor, 'mask_4': mask_4, 'mask_8': mask_8, 'mask_16': mask_16, }
variance_dis = tf.abs(t1_var - t2_var) return variance_dis def distance(t1, t2): k = 1.0 delta = 3.0 return tf.reduce_mean( tf.exp(k * tf.abs(t1 - t2) + delta) - tf.exp(delta), [1, 2, 3]) sess = tf.Session() dark_pipe = SimplePipeline(batch_size, 1, 0, dark_img_dir) gt_pipe = SimplePipeline(batch_size, 1, 0, gt_img_dir) daliop = dali_tf.DALIIterator() in_image = daliop(pipeline=dark_pipe, shapes=[[batch_size, 400, 600, 3]], dtypes=[tf.uint8]) in_image = tf.to_float(in_image[0]) / 255.0 gt_image = daliop(pipeline=gt_pipe, shapes=[[batch_size, 400, 600, 3]], dtypes=[tf.uint8]) gt_image = tf.to_float(gt_image[0]) / 255.0 print(in_image, gt_image) input_patches, gt_patches = generate_batch(patches_num, in_image, gt_image) output_patches = hist_stretching_layer_relu(input_patches, 10, 3) out_max = tf.reduce_max(output_patches)