def test_capture(self): global_step = tf.contrib.framework.get_or_create_global_step() # Some test computation some_weights = tf.get_variable("weigths", [2, 128]) computation = tf.nn.softmax(some_weights) hook = hooks.MetadataCaptureHook(step=5, output_dir=self.capture_dir) hook.begin() with self.test_session() as sess: sess.run(tf.global_variables_initializer()) #pylint: disable=W0212 mon_sess = monitored_session._HookedSession(sess, [hook]) # Should not trigger for step 0 sess.run(tf.assign(global_step, 0)) mon_sess.run(computation) self.assertEqual(gfile.ListDirectory(self.capture_dir), []) # Should trigger *after* step 5 sess.run(tf.assign(global_step, 5)) mon_sess.run(computation) self.assertEqual(gfile.ListDirectory(self.capture_dir), []) mon_sess.run(computation) self.assertEqual( set(gfile.ListDirectory(self.capture_dir)), set(["run_meta", "tfprof_log", "timeline.json"]))
def test_save_profile(self): logdir = self.get_temp_dir() profiler.start(logdir) with traceme.TraceMe('three_times_five'): three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) profiler.stop() file_list = gfile.ListDirectory(logdir) self.assertEqual(len(file_list), 2) for file_name in gfile.ListDirectory(logdir): if gfile.IsDirectory(os.path.join(logdir, file_name)): self.assertEqual(file_name, 'plugins') else: self.assertTrue(file_name.endswith('.profile-empty')) profile_dir = os.path.join(logdir, 'plugins', 'profile') run = gfile.ListDirectory(profile_dir)[0] hostname = socket.gethostname() overview_page = os.path.join(profile_dir, run, hostname + '.overview_page.pb') self.assertTrue(gfile.Exists(overview_page)) input_pipeline = os.path.join(profile_dir, run, hostname + '.input_pipeline.pb') self.assertTrue(gfile.Exists(input_pipeline)) tensorflow_stats = os.path.join(profile_dir, run, hostname + '.tensorflow_stats.pb') self.assertTrue(gfile.Exists(tensorflow_stats)) kernel_stats = os.path.join(profile_dir, run, hostname + '.kernel_stats.pb') self.assertTrue(gfile.Exists(kernel_stats)) trace_file = os.path.join(profile_dir, run, hostname + '.trace.json.gz') self.assertTrue(gfile.Exists(trace_file))
def testGC(self): export_path = os.path.join(tf.test.get_temp_dir(), "gc") self.doBasicsOneExportPath(export_path, global_step=100) self.assertEquals(gfile.ListDirectory(export_path), ["00000100"]) self.doBasicsOneExportPath(export_path, global_step=101) self.assertEquals(sorted(gfile.ListDirectory(export_path)), ["00000100", "00000101"]) self.doBasicsOneExportPath(export_path, global_step=102) self.assertEquals(sorted(gfile.ListDirectory(export_path)), ["00000101", "00000102"])
def test_save_profile(self): logdir = self.get_temp_dir() profile_pb = trace_events_pb2.Trace() profile_result = profile_pb.SerializeToString() profiler.save(logdir, profile_result) file_list = gfile.ListDirectory(logdir) self.assertEqual(len(file_list), 2) for file_name in gfile.ListDirectory(logdir): if gfile.IsDirectory(os.path.join(logdir, file_name)): self.assertEqual(file_name, 'plugins') else: self.assertTrue(file_name.endswith('.profile-empty'))
def test_dir_operations(self): """ Test directory operations""" d = get_oss_path("d1/d2/d3/d4") gfile.MakeDirs(d) self.assertTrue(gfile.Stat(d).is_directory) # Test listing bucket directory with and without trailing '/' content = gfile.ListDirectory( "oss://%s\x01id=%s\x02key=%s\x02host=%s" % (bucket, access_id, access_key, host)) content_s = gfile.ListDirectory( "oss://%s\x01id=%s\x02key=%s\x02host=%s/" % (bucket, access_id, access_key, host)) self.assertEqual(content, content_s) self.assertIn("oss_fs_test", content) self.assertIn("oss_fs_test/d1", content) self.assertIn("oss_fs_test/d1/d2", content) # Test listing test directory with and without trailing '/' content = gfile.ListDirectory( "oss://%s\x01id=%s\x02key=%s\x02host=%s" % (bucket, access_id, access_key, host) + "/oss_fs_test") content_s = gfile.ListDirectory( "oss://%s\x01id=%s\x02key=%s\x02host=%s" % (bucket, access_id, access_key, host) + "/oss_fs_test/") self.assertEqual(content, content_s) self.assertIn("d1", content) self.assertIn("d1/d2", content) # Test listing sub directories. content = gfile.ListDirectory(get_oss_path("d1")) content_s = gfile.ListDirectory(get_oss_path("d1/")) self.assertEqual(content, content_s) self.assertIn("d2", content) content = gfile.ListDirectory(get_oss_path("d1/d2/d3/d4")) content_s = gfile.ListDirectory(get_oss_path("d1/d2/d3/d4")) self.assertEqual(content, content_s) self.assertEqual([], content) # Test Rename directories self.assertTrue(gfile.Exists(get_oss_path("d1"))) gfile.Rename(get_oss_path("d1"), get_oss_path("rename_d1"), overwrite=True) self.assertTrue(gfile.Exists(get_oss_path("rename_d1"))) self.assertFalse(gfile.Exists(get_oss_path("d1"))) content = gfile.ListDirectory(get_oss_path("rename_d1")) content_s = gfile.ListDirectory(get_oss_path("rename_d1/")) self.assertEqual(content, content_s) self.assertIn("d2", content)
def combine_blog_to_one(blog_dir): """combine the blog content under blog_dir into one txt""" files = gfile.ListDirectory(blog_dir) print(len(files)) for f in files: one_dir = os.path.join(blog_dir, f) blog_files = gfile.ListDirectory(one_dir) content_file = open(os.path.join(one_dir, "all_content.txt"), "w+") for bf in blog_files: filename = os.path.join(one_dir, bf) content = extract_content(filename) content_file.write(content) content_file.close()
def test_dir_operations(self): """ Test directory operations""" d = get_oss_path("d1/d2") gfile.MakeDirs(d) self.assertTrue(gfile.Stat(d).is_directory) # Test listing bucket directory with and without trailing '/' content = gfile.ListDirectory("oss://" + bucket) content_s = gfile.ListDirectory("oss://" + bucket + "/") self.assertEqual(content, content_s) self.assertIn("oss_fs_test", content) self.assertIn("oss_fs_test/d1", content) self.assertIn("oss_fs_test/d1/d2", content) # Test listing test directory with and without trailing '/' content = gfile.ListDirectory("oss://" + bucket + "/oss_fs_test") content_s = gfile.ListDirectory("oss://" + bucket + "/oss_fs_test/") self.assertEqual(content, content_s) self.assertIn("d1", content) self.assertIn("d1/d2", content) # Test listing sub directories. content = gfile.ListDirectory(get_oss_path("d1")) content_s = gfile.ListDirectory(get_oss_path("d1/")) self.assertEqual(content, content_s) self.assertIn("d2", content) content = gfile.ListDirectory(get_oss_path("d1/d2")) content_s = gfile.ListDirectory(get_oss_path("d1/d2/")) self.assertEqual(content, content_s) self.assertEqual([], content)
def __init__(self, sess, session_root, watch_fn=None, thread_name_filter=None, pass_through_operrors=None, log_usage=True): """Constructor of DumpingDebugWrapperSession. Args: sess: The TensorFlow `Session` object being wrapped. session_root: (`str`) Path to the session root directory. Must be a directory that does not exist or an empty directory. If the directory does not exist, it will be created by the debugger core during debug `tf.Session.run` calls. As the `run()` calls occur, subdirectories will be added to `session_root`. The subdirectories' names has the following pattern: run_<epoch_time_stamp>_<zero_based_run_counter> E.g., run_1480734393835964_ad4c953a85444900ae79fc1b652fb324 watch_fn: (`Callable`) A Callable that can be used to define per-run debug ops and watched tensors. See the doc of `NonInteractiveDebugWrapperSession.__init__()` for details. thread_name_filter: Regular-expression white list for threads on which the wrapper session will be active. See doc of `BaseDebugWrapperSession` for more details. pass_through_operrors: If true, all captured OpErrors will be propagated. By default this captures all OpErrors. log_usage: (`bool`) whether the usage of this class is to be logged. Raises: ValueError: If `session_root` is an existing and non-empty directory or if `session_root` is a file. """ if log_usage: pass # No logging for open-source. framework.NonInteractiveDebugWrapperSession.__init__( self, sess, watch_fn=watch_fn, thread_name_filter=thread_name_filter, pass_through_operrors=pass_through_operrors) session_root = os.path.expanduser(session_root) if gfile.Exists(session_root): if not gfile.IsDirectory(session_root): raise ValueError("session_root path points to a file: %s" % session_root) elif gfile.ListDirectory(session_root): raise ValueError( "session_root path points to a non-empty directory: %s" % session_root) else: gfile.MakeDirs(session_root) self._session_root = session_root self._run_counter = 0 self._run_counter_lock = threading.Lock()
def walk_dir(image_dir): if not gfile.Exists(image_dir): tf.logging.error("Image directory '" + image_dir + "' not found.") return with concurrent.futures.ProcessPoolExecutor() as executor: result = collections.OrderedDict() sub_dirs = [ os.path.join(image_dir, item) for item in gfile.ListDirectory(image_dir)] sub_dirs = sorted(item for item in sub_dirs if gfile.IsDirectory(item)) extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] for sub_dir in sub_dirs: file_list = [] dir_name = os.path.basename(sub_dir) tf.logging.info('processing {} {}'.format(sub_dir, dir_name)) if dir_name == image_dir: continue tf.logging.info("Looking for images in '" + dir_name + "'") for extension in extensions: file_glob = os.path.join(image_dir, dir_name, '*.' + extension) file_list.extend(gfile.Glob(file_glob)) if not file_list: tf.logging.warning('No files found') continue executor.map(processImage, file_list) return
def get_paths(base_dir, parser): """Gets a list of Paths in a given directory. Args: base_dir: directory. parser: a function which gets the raw Path and can augment it with information such as the export_version, or ignore the path by returning None. An example parser may extract the export version from a path such as "/tmp/exports/100" an another may extract from a full file name such as "/tmp/checkpoint-99.out". Returns: A list of Paths contained in the base directory with the parsing function applied. By default the following fields are populated, - Path.path The parsing function is responsible for populating, - Path.export_version """ raw_paths = gfile.ListDirectory(base_dir) paths = [] for r in raw_paths: p = parser( Path( os.path.join(compat.as_str_any(base_dir), compat.as_str_any(r)), None)) if p: paths.append(p) return sorted(paths)
def ListDirectoryAbsolute(directory): """Yields all files in the given directory. The paths are absolute.""" if gcs.IsGCSPath(directory): return gcs.ListDirectory(directory) else: return (os.path.join(directory, path) for path in gfile.ListDirectory(directory))
def testModelFitTensorBoardEpochLevel(self, strategy): log_dir = self.get_temp_dir() callbacks = [callbacks_lib.TensorBoard(log_dir)] self._model_fit(strategy, callbacks=callbacks) self.assertTrue(gfile.Exists(log_dir)) files = gfile.ListDirectory(log_dir) self.assertGreaterEqual(len(files), 1)
def start_ps(): print(datetime.now(), "starting ps") processes = [multiprocessing.Process(target=ps, args=[i]) for i in range(FLAGS.ps_per_host)] [p.start() for p in processes] app_id = os.environ.get("APP_ID") if not gfile.Exists(FLAGS.log_dir): gfile.MakeDirs(FLAGS.log_dir) # write signal files signal_path = FLAGS.model_dir + "/" + get_app_id() + "/signals" signal_file = signal_path + "/ps_host_%d.ready" % FLAGS.task_index if not gfile.Exists(signal_path): gfile.MakeDirs(signal_path) print(datetime.now(), "start to write ps ready signal file") with gfile.Open(signal_file, "w") as f: f.write("ready") ps_ips, _ = get_machine_list() print(datetime.now(), "wait for all ps ready") while True: files = gfile.ListDirectory(signal_path) if len(files) != len(ps_ips): print(datetime.now(), "ready ps:", ",".join(files)) time.sleep(30) else: print(datetime.now(), "all ps ready:", ",".join(files)) break return processes
def _get_paths(base_dir, parser): """Gets a list of Paths in a given directory. Args: base_dir: directory. parser: a function which gets the raw Path and can augment it with information such as the export_version, or ignore the path by returning None. An example parser may extract the export version from a path such as "/tmp/exports/100" an another may extract from a full file name such as "/tmp/checkpoint-99.out". Returns: A list of Paths contained in the base directory with the parsing function applied. By default the following fields are populated, - Path.path The parsing function is responsible for populating, - Path.export_version """ # We are mocking this in the test, hence we should not use public API raw_paths = gfile.ListDirectory(base_dir) paths = [] for r in raw_paths: # ListDirectory() return paths with "/" at the last if base_dir was GCS URL r = tf.compat.as_str_any(r) if r[-1] == '/': r = r[0:len(r) - 1] p = parser(Path(os.path.join(tf.compat.as_str_any(base_dir), r), None)) if p: paths.append(p) return sorted(paths)
def create_image_lists(image_dir): if not tf.compat.v1.gfile.Exists(image_dir): tf.logging.error("Image directory '" + image_dir + "' not found.") return None result = collections.OrderedDict() sub_dirs = [ os.path.join(image_dir, item) for item in gfile.ListDirectory(image_dir)] sub_dirs = sorted(item for item in sub_dirs if gfile.IsDirectory(item)) for sub_dir in sub_dirs: extensions = ['jpg', 'jpeg', 'png', 'gif', 'bmp'] file_list = [] dir_name = os.path.basename(sub_dir) if dir_name == image_dir: continue tf.logging.info("Looking for images in '" + dir_name + "'") for extension in extensions: file_glob = os.path.join(image_dir, dir_name, '*.' + extension) file_list.extend(gfile.Glob(file_glob)) if not file_list: tf.logging.warning('No files found') continue label_name = re.sub(r'[^a-z0-9]+', ' ', dir_name.lower()) result[label_name] = { 'dir': dir_name, 'test': file_list } return result
def create_image_lists(image_dir, testing_percentage, validation_percentage): if not gfile.Exists(image_dir): tf.logging.error("Image directory '" + image_dir + "' not found.") return None result = collections.OrderedDict() sub_dirs = [ os.path.join(image_dir, item) for item in gfile.ListDirectory(image_dir) ] sub_dirs = sorted(item for item in sub_dirs if gfile.IsDirectory(item)) for sub_dir in sub_dirs: extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] file_list = [] dir_name = os.path.basename(sub_dir) if dir_name == image_dir: continue tf.logging.info("Looking for images in '" + dir_name + "'") for extension in extensions: file_glob = os.path.join(image_dir, dir_name, '*.' + extension) file_list.extend(gfile.Glob(file_glob)) if not file_list: tf.logging.warning('No files found') continue if len(file_list) < 20: tf.logging.warning( 'WARNING: Folder has less than 20 images, which may cause issues.' ) elif len(file_list) > MAX_NUM_IMAGES_PER_CLASS: tf.logging.warning( 'WARNING: Folder {} has more than {} images. Some images will ' 'never be selected.'.format(dir_name, MAX_NUM_IMAGES_PER_CLASS)) label_name = re.sub(r'[^a-z0-9]+', ' ', dir_name.lower()) training_images = [] testing_images = [] validation_images = [] for file_name in file_list: base_name = os.path.basename(file_name) hash_name = re.sub(r'_nohash_.*$', '', file_name) hash_name_hashed = hashlib.sha1( compat.as_bytes(hash_name)).hexdigest() percentage_hash = ((int(hash_name_hashed, 16) % (MAX_NUM_IMAGES_PER_CLASS + 1)) * (100.0 / MAX_NUM_IMAGES_PER_CLASS)) if percentage_hash < validation_percentage: validation_images.append(base_name) elif percentage_hash < (testing_percentage + validation_percentage): testing_images.append(base_name) else: training_images.append(base_name) result[label_name] = { 'dir': dir_name, 'training': training_images, 'testing': testing_images, 'validation': validation_images, } return result
def _GetNextPath(self): """Returns the path of the next file to use or None if no file exists.""" sorted_paths = [os.path.join(self._directory, path) for path in sorted(gfile.ListDirectory(self._directory))] # We filter here so the filter gets the full directory name. filtered_paths = (path for path in sorted_paths if self._path_filter(path) and path > self._path) return next(filtered_paths, None)
def generate_raw_data(self, data_source, partition_id, block_size, shuffle_win_size, feat_key_fmt, feat_val_fmt): dbm = data_block_manager.DataBlockManager(data_source, partition_id) raw_data_dir = os.path.join(data_source.raw_data_dir, 'partition_{}'.format(partition_id)) if gfile.Exists(raw_data_dir): gfile.DeleteRecursively(raw_data_dir) gfile.MakeDirs(raw_data_dir) useless_index = 0 for block_index in range(self.total_index // block_size): builder = data_block_manager.DataBlockBuilder( data_source.raw_data_dir, partition_id, block_index, None) cands = list( range(block_index * block_size, (block_index + 1) * block_size)) start_index = cands[0] for i in range(len(cands)): if random.randint(1, 4) > 2: continue a = random.randint(i - shuffle_win_size, i + shuffle_win_size) b = random.randint(i - shuffle_win_size, i + shuffle_win_size) if a < 0: a = 0 if a >= len(cands): a = len(cands) - 1 if b < 0: b = 0 if b >= len(cands): b = len(cands) - 1 if (abs(cands[a] - i - start_index) <= shuffle_win_size and abs(cands[b] - i - start_index) <= shuffle_win_size): cands[a], cands[b] = cands[b], cands[a] for example_idx in cands: feat = {} example_id = '{}'.format(example_idx).encode() feat['example_id'] = tf.train.Feature( bytes_list=tf.train.BytesList(value=[example_id])) event_time = 150000000 + example_idx feat['event_time'] = tf.train.Feature( int64_list=tf.train.Int64List(value=[event_time])) feat[feat_key_fmt.format(example_idx)] = tf.train.Feature( bytes_list=tf.train.BytesList( value=[feat_val_fmt.format(example_idx).encode()])) example = tf.train.Example(features=tf.train.Features( feature=feat)) builder.append(example.SerializeToString(), example_id, event_time, useless_index, useless_index) useless_index += 1 builder.finish_data_block() fpaths = [ os.path.join(raw_data_dir, f) for f in gfile.ListDirectory(raw_data_dir) if not gfile.IsDirectory(os.path.join(raw_data_dir, f)) ] for fpath in fpaths: if not fpath.endswith(common.DataBlockSuffix): gfile.Remove(fpath)
def images_data_list(image_dir, testing_data_phantram, validation_data_phantram): if not gfile.Exists(image_dir): tf.logging.error(" Directory cua images data '" + image_dir + "' khong duoc tim thay.") return None result = collections.OrderedDict() sub_dirs = [ os.path.join(image_dir,item) for item in gfile.ListDirectory(image_dir)] sub_dirs = sorted(item for item in sub_dirs if gfile.IsDirectory(item)) for sub_dir in sub_dirs: dinh_dang_hinhanh = ['jpg', 'jpeg', 'JPG', 'JPEG'] #cac dinh dang tap hinh anh ma python co the decode file_list = [] dir_name = os.path.basename(sub_dir) if dir_name == image_dir: continue tf.logging.info("tim kiem hinh anh trong '" + dir_name + "'") for extension in dinh_dang_hinhanh: file_glob = os.path.join(image_dir, dir_name, '*.' + extension) file_list.extend(gfile.Glob(file_glob)) if not file_list: tf.logging.warning('Khong tim thay hinh anh nao') continue if len(file_list) < 20: tf.logging.warning( 'CANHH BAO: Folder CO IT HON 20 TAM ANH, NEN XEM LAI TAP DATA.') elif len(file_list) > IMAGES_TOIDA: tf.logging.warning( 'CANH BAO: Folder {} CO NHIEU HON {} TAM ANH. MOT SO HINH ANH SE ' 'SE KHONG BAO GIO DUOC CHON.'.format(dir_name, IMAGES_TOIDA)) ten_label = re.sub(r'[^a-z0-9]+', ' ', dir_name.lower()) data_training = [] data_testing = [] data_valdiation = [] for file_name in file_list: base_name = os.path.basename(file_name) hash_name = re.sub(r'_nohash_.*$', '', file_name) #Phan chia tap anh cua chung ta thanh (valdiation + testing) va training #do chung ta de chung tat ca database vao chung 1 folder, voi thu viên hash #thi viec phan chia hop li cho model se duoc thuc hien hash_name_hashed = hashlib.sha1(compat.as_bytes(hash_name)).hexdigest() phantram_chia_nho = ((int(hash_name_hashed, 16) % (IMAGES_TOIDA + 1)) * (100.0 / IMAGES_TOIDA)) if phantram_chia_nho < validation_data_phantram: data_valdiation.append(base_name) elif phantram_chia_nho < (testing_data_phantram + validation_data_phantram): data_testing.append(base_name) else: data_training.append(base_name) result[ten_label] = { 'dir': dir_name, 'training': data_training, 'testing': data_testing, 'validation': data_valdiation, } return result
def test_list_dir(self): for i in range(10): with gfile.GFile('ram://a/b/%d.txt' % i, 'w') as f: f.write('') with gfile.GFile('ram://c/b/%d.txt' % i, 'w') as f: f.write('') matches = ['%d.txt' % i for i in range(10)] self.assertEqual(gfile.ListDirectory('ram://a/b/'), matches)
def _list_data_block_dir(self): data_block_dir = self._data_block_dir() fpaths = [ os.path.join(data_block_dir, f) for f in gfile.ListDirectory(data_block_dir) if not gfile.IsDirectory(os.path.join(data_block_dir, f)) ] fpaths.sort() return fpaths
def _copy_dir(dir_in, dir_out): gfile.MakeDirs(dir_out) for name in gfile.ListDirectory(dir_in): name_in = os.path.join(dir_in, name) name_out = os.path.join(dir_out, name) if gfile.IsDirectory(name_in): gfile.MakeDirs(name_out) _copy_dir(name_in, name_out) else: gfile.Copy(name_in, name_out, overwrite=True)
def test_save_profile(self): logdir = self.get_temp_dir() profiler.start(logdir) with traceme.TraceMe('three_times_five'): three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) profiler.stop() file_list = gfile.ListDirectory(logdir) self.assertEqual(len(file_list), 2) for file_name in gfile.ListDirectory(logdir): if gfile.IsDirectory(os.path.join(logdir, file_name)): self.assertEqual(file_name, 'plugins') else: self.assertTrue(file_name.endswith('.profile-empty')) profile_dir = os.path.join(logdir, 'plugins', 'profile') run = gfile.ListDirectory(profile_dir)[0] hostname = socket.gethostname() overview_page = os.path.join(profile_dir, run, hostname + '.overview_page.pb') self.assertTrue(gfile.Exists(overview_page)) input_pipeline = os.path.join(profile_dir, run, hostname + '.input_pipeline.pb') self.assertTrue(gfile.Exists(input_pipeline)) tensorflow_stats = os.path.join(profile_dir, run, hostname + '.tensorflow_stats.pb') self.assertTrue(gfile.Exists(tensorflow_stats)) trace_file = os.path.join(profile_dir, run, hostname + '.trace') self.assertTrue(gfile.Exists(trace_file)) with gfile.Open(trace_file, 'rb') as f: profile_pb = trace_events_pb2.Trace() profile_pb.ParseFromString(f.read()) devices = frozenset(device.name for device in profile_pb.devices.values()) self.assertIn('/host:CPU', devices) if config.list_physical_devices('GPU'): self.assertIn('/device:GPU:0', devices) events = frozenset(event.name for event in profile_pb.trace_events) self.assertIn('three_times_five', events) self.assertIn('Mul:Mul', events)
def create_image_lists(image_dir, testing_percentage, validation_percentage): result = collections.OrderedDict() sub_dirs = [ os.path.join(image_dir, item) for item in gfile.ListDirectory(image_dir) ] sub_dirs = sorted(item for item in sub_dirs if gfile.IsDirectory(item)) for sub_dir in sub_dirs: extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] file_list = [] dir_name = os.path.basename(sub_dir) for extension in extensions: file_glob = os.path.join(image_dir, dir_name, '*.' + extension) file_list.extend(gfile.Glob(file_glob)) label_name = re.sub(r'[^a-z0-9]+', ' ', dir_name.lower()) training_images = [] testing_images = [] validation_images = [] for file_name in file_list: base_name = os.path.basename(file_name) # We want to ignore anything after '_nohash_' in the file name when # deciding which set to put an image in, the data set creator has a way of # grouping photos that are close variations of each other. For example # this is used in the plant disease data set to group multiple pictures of # the same leaf. hash_name = re.sub(r'_nohash_.*$', '', file_name) # This looks a bit magical, but we need to decide whether this file should # go into the training, testing, or validation sets, and we want to keep # existing files in the same set even if more files are subsequently # added. # To do that, we need a stable way of deciding based on just the file name # itself, so we do a hash of that and then use that to generate a # probability value that we use to assign it. hash_name_hashed = hashlib.sha1( compat.as_bytes(hash_name)).hexdigest() percentage_hash = ((int(hash_name_hashed, 16) % (MAX_NUM_IMAGES_PER_CLASS + 1)) * (100.0 / MAX_NUM_IMAGES_PER_CLASS)) if percentage_hash < validation_percentage: validation_images.append(base_name) elif percentage_hash < (testing_percentage + validation_percentage): testing_images.append(base_name) else: training_images.append(base_name) result[label_name] = { 'dir': dir_name, 'training': training_images, 'testing': testing_images, 'validation': validation_images } return result
def testAutoTracingInDeubMode(self): ops.reset_default_graph() x = lib.BuildFullModel() with profile_context.ProfileContext(test.get_temp_dir(), debug=True): with session.Session() as sess: self.evaluate(variables.global_variables_initializer()) for _ in range(10): self.evaluate(x) for f in gfile.ListDirectory(test.get_temp_dir()): # Warm up, no tracing. self.assertFalse("run_meta" in f) self.evaluate(x) self.assertTrue( gfile.Exists(os.path.join(test.get_temp_dir(), "run_meta_11"))) gfile.Remove(os.path.join(test.get_temp_dir(), "run_meta_11")) # fetched already. self.evaluate(x) for f in gfile.ListDirectory(test.get_temp_dir()): self.assertFalse("run_meta" in f)
def _list_example_dumped_dir(self): example_dir = self._example_dumped_dir() if not gfile.Exists(example_dir): gfile.MakeDirs(example_dir) fpaths = [ os.path.join(example_dir, f) for f in gfile.ListDirectory(example_dir) if not gfile.IsDirectory(os.path.join(example_dir, f)) ] fpaths.sort() return fpaths
def _events_from_logdir(test_case, logdir): """Reads summary events from log directory.""" test_case.assertTrue(gfile.Exists(logdir)) files = gfile.ListDirectory(logdir) test_case.assertLen(files, 1) records = list(tf_record.tf_record_iterator(os.path.join(logdir, files[0]))) result = [] for r in records: event = event_pb2.Event() event.ParseFromString(r) result.append(event) return result
def main(argv): # parse task params # read line corresponding to task with gfile.Open(FLAGS.task_params_file, 'r') as f: for _ in range(FLAGS.taskid + 1): line = f.readline() print(line) # get task parameters by parsing the line. line_split = line.split(';') cells = gfile.ListDirectory(FLAGS.src_dir) cell_idx = line_split[0] cell_idx = cell_idx[1:-1].split(',') cell_idx = cell_idx[0] cell_idx = int(cell_idx) cell_string = cells[cell_idx] nsub = int(line_split[1]) projection_type = line_split[2] lam_proj = float(line_split[3]) ipartition = int(line_split[4][:-1]) # copy data for the corresponding task dst = os.path.join(FLAGS.tmp_dir, cell_string) if not gfile.Exists(dst): print('Started Copy') src = os.path.join(FLAGS.src_dir, cell_string) if not gfile.IsDirectory(FLAGS.tmp_dir): gfile.MkDir(FLAGS.tmp_dir) gfile.Copy(src, dst) print('File copied to destination') else: print('File exists') # Load data data = h5py.File(dst) stimulus = np.array(data.get('stimulus')) stimulus = stimulus[:-1, :] # drop the last frame so that it's # the same size as the binned spike train response = np.squeeze(np.array(data.get('response'))) response = np.expand_dims(response, 1) mask_matrix = np.array(data.get('mask')) # Fit with a given number of subunits print('Starting fitting') get_su_nsub(stimulus, response, mask_matrix, cell_string, nsub, projection_type, lam_proj, ipartition)
def _readLastEvent(self, logdir=None): if not logdir: logdir = self._tmp_logdir files = [ f for f in gfile.ListDirectory(logdir) if not gfile.IsDirectory(os.path.join(logdir, f)) ] file_path = os.path.join(logdir, files[0]) records = list(tf_record.tf_record_iterator(file_path)) event = event_pb2.Event() event.ParseFromString(records[-1]) return event
def get_inputs (bucket_path, num_files): file_list= gfile.ListDirectory(bucket_path) X = {} sess = tf.Session() with sess.as_default(): for filename in file_list: file_tensor = tf.read_file(bucket_path + '/' + filename) file_string = sess.run(file_tensor) X[filename] = file_string return X