def setUpClass(cls): cache_dir = tf.test.get_temp_dir() # Create a dummy file dummy_dir = os.path.join(cache_dir, 'dummy') dummy_filepath = os.path.join(dummy_dir, 'dummy.txt') gfile.MakeDirs(dummy_dir) dummy_file_contents = 'hello world' with gfile.Open(dummy_filepath, 'w') as f: f.write(dummy_file_contents) # File containing compressed archives input_dir = os.path.join(cache_dir, 'to_extract') gfile.MakeDirs(input_dir) dl_manager = download_manager.DownloadManager( cache_dir=cache_dir, mode=util.GenerateMode.REUSE_CACHE_IF_EXISTS, ) cls.dummy_dir = dummy_dir cls.dummy_filepath = dummy_filepath cls.dummy_file_contents = dummy_file_contents cls.input_dir = input_dir cls.dl_manager = dl_manager
def save_model(model, history): if not gfile.Exists(MODEL_DIR): gfile.MakeDirs(MODEL_DIR) model.save(MODEL_FILE) if gfile.Exists(HISTORY_DIR) == False: gfile.MakeDirs(HISTORY_DIR) with open(HISTORY_FILE, 'wb') as f: pickle.dump(history.history, f)
def extract_holdout_model(model): game_output_path = OUTPUT_PATH.format(FLAGS.base_dir, 'games', model) move_output_path = OUTPUT_PATH.format(FLAGS.base_dir, 'moves', model) gfile.MakeDirs(os.path.basename(game_output_path)) gfile.MakeDirs(os.path.basename(move_output_path)) with gfile.GFile(game_output_path, 'w') as game_f, \ gfile.GFile(move_output_path, 'w') as move_f: for sgf_name in tqdm(get_sgf_names(model)): game_data, move_data = extract_data(sgf_name) game_f.write(json.dumps(game_data) + '\n') for move_datum in move_data: move_f.write(json.dumps(move_datum) + '\n')
def prepare_dirs(recreate=False): """Prepare config dirs When recreate is True, if previous execution exists, remove them and recreate. When recreate is False, remain previous execution. """ experiment_dir = environment.EXPERIMENT_DIR tensorboard_dir = environment.TENSORBOARD_DIR checkpoints_dir = environment.CHECKPOINTS_DIR if recreate: message = """ Delete and recreate these dirs: experiment_dir: {experiment_dir} tensorboard_dir: {tensorboard_dir} checkpoints_dir: {checkpoints_dir} """.format(experiment_dir=experiment_dir, tensorboard_dir=tensorboard_dir, checkpoints_dir=checkpoints_dir) else: message = """ Create these dirs if the dirs dont exist: experiment_dir: {experiment_dir} tensorboard_dir: {tensorboard_dir} checkpoints_dir: {checkpoints_dir} """.format(experiment_dir=experiment_dir, tensorboard_dir=tensorboard_dir, checkpoints_dir=checkpoints_dir) print(message) if recreate: if gfile.Exists(experiment_dir): gfile.DeleteRecursively(experiment_dir) if gfile.Exists(tensorboard_dir): gfile.DeleteRecursively(tensorboard_dir) if gfile.Exists(checkpoints_dir): gfile.DeleteRecursively(checkpoints_dir) if not gfile.Exists(experiment_dir): gfile.MakeDirs(experiment_dir) if not gfile.Exists(tensorboard_dir): gfile.MakeDirs(tensorboard_dir) if not gfile.Exists(checkpoints_dir): gfile.MakeDirs(checkpoints_dir)
def after_run(self, _run_context, run_values): if not self.is_chief or self._done: return step_done = run_values.results if self._active: tf.logging.info("Captured full trace at step %s", step_done) # Create output directory gfile.MakeDirs(self._output_dir) # Save run metadata trace_path = os.path.join(self._output_dir, "run_meta") with gfile.GFile(trace_path, "wb") as trace_file: trace_file.write(run_values.run_metadata.SerializeToString()) tf.logging.info("Saved run_metadata to %s", trace_path) # Save timeline timeline_path = os.path.join(self._output_dir, "timeline.json") with gfile.GFile(timeline_path, "w") as timeline_file: tl_info = timeline.Timeline(run_values.run_metadata.step_stats) tl_chrome = tl_info.generate_chrome_trace_format(show_memory=True) timeline_file.write(tl_chrome) tf.logging.info("Saved timeline to %s", timeline_path) # Save tfprof op log tf.contrib.tfprof.tfprof_logger.write_op_log( graph=tf.get_default_graph(), log_dir=self._output_dir, run_meta=run_values.run_metadata) tf.logging.info("Saved op log to %s", self._output_dir) self._active = False self._done = True self._active = (step_done >= self.params["step"])
def begin(self): self._iter_count = 0 self._global_step = tf.train.get_global_step() self._pred_dict = graph_utils.get_dict_from_collection("predictions") # Create the sample directory if self._sample_dir is not None: gfile.MakeDirs(self._sample_dir)
def get_target_path(request, point_num): """Computes the output path for a specific point. Args: request: ResegmentationRequest proto point_num: index of the point of interest within the proto Returns: path to the output file where resegmentation results will be saved """ # Prepare the output directory. output_dir = request.output_directory id_a = request.points[point_num].id_a id_b = request.points[point_num].id_b if request.subdir_digits > 1: m = hashlib.md5() m.update(str(id_a)) m.update(str(id_b)) output_dir = os.path.join(output_dir, m.hexdigest()[:request.subdir_digits]) gfile.MakeDirs(output_dir) # Terminate early if the output already exists. dp = request.points[point_num].point target_path = os.path.join( output_dir, '%d-%d_at_%d_%d_%d.npz' % (id_a, id_b, dp.x, dp.y, dp.z)) if gfile.Exists(target_path): logging.info('Output already exists: %s', target_path) return return target_path
def dump_object(object_to_dump, output_path): if not tf.io.gfile.exists(output_path): gfile.MakeDirs(os.path.dirname(output_path)) with tf.io.gfile.GFile(output_path, 'w') as wf: joblib.dump(object_to_dump, wf)
def _download(self, trial): """Downloads a single url given by the trial (thread safe). Args: trial (UriTrial): Object containing info about download. Raises: ValueError: If the destination dir is not empty """ log = util.build_log(prefix=trial.id) # Check the download dir is empty if (gfile.Exists(trial.output_path) and gfile.ListDirectory(trial.output_path)): raise ValueError('Download dir {} should be empty'.format( trial.output_path)) gfile.MakeDirs(trial.output_path) log('Start downloading...') self._backend.download(trial) # TODO(epot): Compute the checksum # Update the output path trial.output_path = get_download_filepath(trial) log('Download complete at {}', trial.output_path)
def dump_object(object_to_dump, output_path): if not gfile.Exists(output_path): gfile.MakeDirs(os.path.dirname(output_path)) with gfile.Open(output_path, 'w') as wf: joblib.dump(object_to_dump, wf)
def _prepare(self): """ Prepares for evaluation. Builds the model with reuse=True, mode=EVAL and preprocesses data file(s). """ text_inputter = TextLineInputter(dataset=self._dataset, data_field_name="eval_features_file", batch_size=self._batch_size) self._eval_feeding_data = text_inputter.make_feeding_data() self._model_configs = update_infer_params( # update inference parameters self._model_configs, beam_size=self._beam_size, maximum_labels_length=self._maximum_labels_length, length_penalty=self._length_penalty) estimator_spec = model_fn(model_configs=self._model_configs, mode=ModeKeys.INFER, dataset=self._dataset, name=self._model_name, reuse=True, verbose=False) self._predict_ops = estimator_spec.predictions tmp_trans_dir = os.path.join(self._model_configs["model_dir"], GlobalNames.TMP_TRANS_DIRNAME) if not gfile.Exists(tmp_trans_dir): gfile.MakeDirs(tmp_trans_dir) self._tmp_trans_file_prefix = os.path.join( tmp_trans_dir, GlobalNames.TMP_TRANS_FILENAME_PREFIX) self._read_ckpt_bleulog() self._eval_labels_file = self._dataset.eval_labels_file self._check_bleu_script() self._estop_patience = 0 self._best_bleu_score = 0.
def main(_argv): """Main functions. Runs all anaylses.""" # pylint: disable=W0212 tfprof_logger._merge_default_with_oplog = merge_default_with_oplog FLAGS.model_dir = os.path.abspath(os.path.expanduser(FLAGS.model_dir)) output_dir = os.path.join(FLAGS.model_dir, "profile") gfile.MakeDirs(output_dir) run_meta, graph, op_log = load_metadata(FLAGS.model_dir) param_arguments = [ param_analysis_options(output_dir), micro_anaylsis_options(output_dir), flops_analysis_options(output_dir), device_analysis_options(output_dir), ] for tfprof_cmd, params in param_arguments: model_analyzer.print_model_analysis( graph=graph, run_meta=run_meta, op_log=op_log, tfprof_cmd=tfprof_cmd, tfprof_options=params) if params["dump_to_file"] != "": print("Wrote {}".format(params["dump_to_file"]))
def main(unused_argv): request = inference_flags.request_from_flags() if not gfile.Exists(request.segmentation_output_dir): gfile.MakeDirs(request.segmentation_output_dir) bbox = bounding_box_pb2.BoundingBox() text_format.Parse(FLAGS.bounding_box, bbox) runner = inference.Runner() runner.start(request, with_membrane=FLAGS.with_membrane) print('>>>>>>>>>>>>>>>>> FAKE RUN') runner.run((bbox.start.z, bbox.start.y, bbox.start.x), (bbox.size.z, bbox.size.y, bbox.size.x), with_membrane=FLAGS.with_membrane, fake=True) print('>>>>>>>>>>>>>>>>> REAL RUN') runner.run((bbox.start.z, bbox.start.y, bbox.start.x), (bbox.size.z, bbox.size.y, bbox.size.x), with_membrane=FLAGS.with_membrane) counter_path = os.path.join(request.segmentation_output_dir, 'counters.txt') if not gfile.Exists(counter_path): runner.counters.dump(counter_path)
def copy_latest_checkpoint(self): """Copy over the latest checkpoints to the target directory.""" chkpt = get_latest_checkpoint(self.model_directory) logging.info('Got latest checkpoint: %s', chkpt) if chkpt is None: return None # Check if the evaluation meta graph has been copied. if self.has_checkpoint() is None: # Don't copy temp export folders, e.g. 'temp-01234567/saved_model.pb' export_file = gfile.Glob( os.path.join(self.model_directory, 'export/best_exporter/[0-9]*/saved_model.pb'))[0] logging.info('Copying eval export file: %s', ', '.join(export_file)) target_export_dir = os.path.join( self.target_directory, 'export/best_exporter', os.path.basename(os.path.dirname(export_file))) gfile.MakeDirs(target_export_dir) verbose_copy( export_file, os.path.join(target_export_dir, os.path.basename(export_file))) files = gfile.Glob(os.path.join(self.model_directory, chkpt) + b'.*') logging.info('Copying files: %s', ', '.join(files)) for fname in files: verbose_copy( fname, os.path.join(self.target_directory, os.path.basename(fname))) return chkpt
def save_flags(): gfile.MakeDirs(FLAGS.train_dir) with gfile.Open(os.path.join(FLAGS.train_dir, 'flags.%d' % time.time()), 'w') as f: for mod, flag_list in FLAGS.flags_by_module_dict().items(): if (mod.startswith('google3.research.neuromancer.tensorflow') or mod.startswith('/')): for flag in flag_list: f.write('%s\n' % flag.serialize())
def dump_object(object_to_dump, output_path): """ Writes output trained model pipeline to GCS """ if not tf.io.gfile.exists(output_path): gfile.MakeDirs(os.path.dirname(output_path)) with tf.io.gfile.GFile(output_path, 'w') as wf: joblib.dump(object_to_dump, wf)
def make_dir(dir_name: str) -> str: if gfile.Exists(dir_name): if gfile.IsDirectory(dir_name): return dir_name else: logging.fatal( 'Trying to create directory "%s", but there ' 'is a file with the same name', dir_name) gfile.MakeDirs(dir_name) return dir_name
def verify_dirs_exist(dirname): '''Verify that the directory exists Will recursively create directories as needed. Input: dirname: str; directory name to create ''' if not exists(dirname): gfile.MakeDirs(dirname)
def save(saver, sess, logdir, step): model_name = 'model.ckpt' checkpoint_path = os.path.join(logdir, model_name) print('Storing checkpoint to {} ...'.format(logdir), end="") sys.stdout.flush() if not gfile.Exists(logdir): gfile.MakeDirs(logdir) saver.save(sess, checkpoint_path, global_step=step) print(' Done.')
def begin(self): self._global_step = tf.train.get_global_step() self._pred_dict = graph_utils.get_dict_from_collection("predictions") self._features = graph_utils.get_dict_from_collection("features") self._iter_count = 0 self._eval_str = "" self._current_global_step = None # Create the sample directory if self._evalution_result_dir is not None: if os.path.exists(self._evalution_result_dir) is False: gfile.MakeDirs(self._evalution_result_dir) os.chmod(self._evalution_result_dir, 777)
def dump(model_config, output_dir): """ Dumps model configurations. Args: model_config: A dict. output_dir: A string, the output directory. """ model_config_filename = os.path.join(output_dir, Constants.MODEL_CONFIG_YAML_FILENAME) if not gfile.Exists(output_dir): gfile.MakeDirs(output_dir) with open_file(model_config_filename, mode="w") as file: yaml.dump(model_config, file)
def main(unused_argv): request = inference_flags.request_from_flags() if not gfile.Exists(request.segmentation_output_dir): gfile.MakeDirs(request.segmentation_output_dir) bbox = bounding_box_pb2.BoundingBox() text_format.Parse(FLAGS.bounding_box, bbox) # start_pos = tuple([int(i) for i in FLAGS.start_pos]) runner = inference.Runner() corner = (bbox.start.z, bbox.start.y, bbox.start.x) subvol_size = (bbox.size.z, bbox.size.y, bbox.size.x) start_pos = tuple([int(i) for i in FLAGS.start_pos]) seg_path = storage.segmentation_path(request.segmentation_output_dir, corner) prob_path = storage.object_prob_path(request.segmentation_output_dir, corner) runner.start(request) canvas, alignment = runner.make_canvas(corner, subvol_size) num_iter = canvas.segment_at(start_pos) print('>>', num_iter) sel = [ slice(max(s, 0), e + 1) for s, e in zip(canvas._min_pos - canvas._pred_size // 2, canvas._max_pos + canvas._pred_size // 2) ] mask = canvas.seed[sel] >= canvas.options.segment_threshold raw_segmented_voxels = np.sum(mask) mask &= canvas.segmentation[sel] <= 0 actual_segmented_voxels = np.sum(mask) canvas._max_id += 1 canvas.segmentation[sel][mask] = canvas._max_id canvas.seg_prob[sel][mask] = storage.quantize_probability( expit(canvas.seed[sel][mask])) runner.save_segmentation(canvas, alignment, seg_path, prob_path) runner.run((bbox.start.z, bbox.start.y, bbox.start.x), (bbox.size.z, bbox.size.y, bbox.size.x)) counter_path = os.path.join(request.segmentation_output_dir, 'counters.txt') if not gfile.Exists(counter_path): runner.counters.dump(counter_path)
def create_captcha_dataset(size=100, data_dir='./data/', height=60, width=160, image_format='.png'): if gfile.Exists(data_dir): gfile.DeleteRecursively(data_dir) gfile.MakeDirs(data_dir) captcha = ImageCaptcha(width=width, height=height) for _ in range(size): text = gen_random_text(CAPTCHA_CHARSET, CAPTCHA_LENGTH) captcha.write(text, data_dir + text + image_format) return None
def write_handle(path, mode=None): if _supports_make_dirs(path): gfile.MakeDirs(os.path.dirname(path)) if mode is None: if _supports_binary_writing(path): mode = 'wb' else: mode = 'w' handle = gfile.Open(path, mode) yield handle handle.close()
def __init__(self, log_dir): """Create a new SummaryWriter. Args: log_dir: path to record tfevents files in. """ # If needed, create log_dir directory as well as missing parent directories. if not gfile.IsDirectory(log_dir): gfile.MakeDirs(log_dir) self.writer = tf.summary.FileWriter(log_dir, graph=None) self.end_summaries = [] self.step = 0 self.closed = False
def save_yaml(output_dir, config): """Save two yaml files. 1. 'config.yaml' is duplication of python config file as yaml. 2. 'meta.yaml' for application. The yaml's keys defined by `PARAMS_FOR_EXPORT`. """ if not gfile.Exists(output_dir): gfile.MakeDirs(output_dir) config_yaml_path = _save_config_yaml(output_dir, config) meta_yaml_path = _save_meta_yaml(output_dir, config) return config_yaml_path, meta_yaml_path
def dump_object(object_to_dump, output_path): """Pickle the object and save to the output_path. Args: object_to_dump: Python object to be pickled output_path: (string) output path which can be Google Cloud Storage Returns: None """ if not gfile.Exists(output_path): gfile.MakeDirs(os.path.dirname(output_path)) with gfile.Open(output_path, 'w') as wf: joblib.dump(object_to_dump, wf)
def dump(self, model_dir): """Dumps the options to a file in the model directory. Args: model_dir: Path to the model directory. The options will be dumped into a file in this directory. """ gfile.MakeDirs(model_dir) options_dict = { "model_class": self.model_class, "model_params": self.model_params, } with gfile.GFile(TrainOptions.path(model_dir), "wb") as file: file.write(json.dumps(options_dict).encode("utf-8"))
def _prepare(self): """ Prepares for evaluation. Builds the model with reuse=True, mode=EVAL and preprocesses data file(s). """ features_file = self._dataset["features_file"] labels_file = self._dataset["labels_file"] vocab_source = self._dataset["vocab_source"] vocab_target = self._dataset["vocab_target"] self._model_configs = update_infer_params( # update inference parameters self._model_configs, beam_size=self._beam_size, maximum_labels_length=self._maximum_labels_length, length_penalty=self._length_penalty) estimator_spec = model_fn(model_configs=self._model_configs, mode=ModeKeys.INFER, vocab_source=vocab_source, vocab_target=vocab_target, name=self._model_name, reuse=True, verbose=False) self._predict_ops = estimator_spec.predictions text_inputter = TextLineInputter( line_readers=LineReader( data=features_file, preprocessing_fn=lambda x: vocab_source.convert_to_idlist(x)), padding_id=vocab_source.pad_id, batch_size=self._batch_size) self._infer_data = text_inputter.make_feeding_data( input_fields=estimator_spec.input_fields) tmp_trans_dir = os.path.join(self._model_configs["model_dir"], Constants.TMP_TRANS_DIRNAME) if not gfile.Exists(tmp_trans_dir): gfile.MakeDirs(tmp_trans_dir) self._tmp_trans_file_prefix = os.path.join(tmp_trans_dir, Constants.TMP_TRANS_FILENAME_PREFIX) self._read_ckpt_bleulog() # load references self._references = [] for rfile in access_multiple_files(labels_file): with open_file(rfile) as fp: if self._char_level: self._references.append(to_chinese_char(fp.readlines())) else: self._references.append(fp.readlines()) self._references = list(map(list, zip(*self._references))) with open_file(features_file) as fp: self._sources = fp.readlines() self._bad_count = 0 self._best_bleu_score = 0.
def _extract(self, trial): """Extract a single file given by the trial. Args: trial (UriTrial): Object containing info about the extraction. Raises: ValueError: If the format is incorrect """ log = util.build_log(prefix=trial.id) src = trial.extract_info.path dst = trial.output_path rar_format = ExtractFormat(ext=TAR_EXT, fn=self._backend.extract_tar) zip_format = ExtractFormat(ext=['.zip'], fn=self._backend.extract_zip) gz_format = ExtractFormat(ext=['.gz'], fn=self._backend.extract_gzip) # Order matter as '.tar.gz' will call _extract_tar while '.gz' will # call _extract_gzip extraction_fns = collections.OrderedDict([ (download_pb2.ExtractInfo.RAR, rar_format), (download_pb2.ExtractInfo.ZIP, zip_format), (download_pb2.ExtractInfo.GZ, gz_format), ]) # Filetype explicitly defined if trial.extract_info.filetype != download_pb2.ExtractInfo.UNKNOWN: extract_filetype = trial.extract_info.filetype extract_fn = extraction_fns[extract_filetype] # Try to infer the filetype from the name else: for extract_filetype, extract_format in extraction_fns.items(): if any(src.lower().endswith(ext) for ext in extract_format.ext): extract_fn = extract_format.fn break else: # No break (unrecognized archive) raise ValueError( 'Unsuported archive file {} for trial {}. If you think this is an ' 'error, you can try to explicitly define the type in the ' 'ExtractFileType'.format(src, trial.id)) log('Extract {} with {}...', src, extract_fn.__name__) gfile.MakeDirs(dst) extract_fn(src, dst) if extract_filetype == download_pb2.ExtractInfo.GZ: trial.output_path = get_download_filepath(trial)