Exemplo n.º 1
0
    def test_capture(self):
        global_step = tf.contrib.framework.get_or_create_global_step()
        # Some test computation
        some_weights = tf.get_variable("weigths", [2, 128])
        computation = tf.nn.softmax(some_weights)

        hook = hooks.MetadataCaptureHook(params={"step": 5},
                                         model_dir=self.model_dir)
        hook.begin()

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            #pylint: disable=W0212
            mon_sess = monitored_session._HookedSession(sess, [hook])
            # Should not trigger for step 0
            sess.run(tf.assign(global_step, 0))
            mon_sess.run(computation)
            self.assertEqual(gfile.ListDirectory(self.model_dir), [])
            # Should trigger *after* step 5
            sess.run(tf.assign(global_step, 5))
            mon_sess.run(computation)
            self.assertEqual(gfile.ListDirectory(self.model_dir), [])
            mon_sess.run(computation)
            self.assertEqual(set(gfile.ListDirectory(self.model_dir)),
                             set(["run_meta", "tfprof_log", "timeline.json"]))
Exemplo n.º 2
0
def validate_hourly(working_dir, validate_name=None):
    """ compiles a list of games based on the new hourly directory format. Then
    calls validate on it """

    holdout_dirs = gfile.ListDirectory(fsdb.holdout_dir())
    holdout_files = (os.path.join(fsdb.holdout_dir(), d, f)
                     for d in reversed(gfile.ListDirectory(fsdb.holdout_dir()))
                     for f in gfile.ListDirectory(os.path.join(fsdb.holdout_dir(),d))
                     if gfile.IsDirectory(os.path.join(fsdb.holdout_dir(),d)))
    holdout_files = list(itertools.islice(holdout_files, 20000))
    random.shuffle(holdout_files)
    dual_net.validate(holdout_files)
Exemplo n.º 3
0
def execute_job():
    config = JobExecutorConfig()
    make_needed_dirs(config)
    configure_logging(config)
    if config.prediction == 'BERT':
        match_predictor = BertMatchPredictor()
    elif config.prediction == 'KEYED_VECTORS':
        match_predictor = KeyedVectorsFormatPredictor()
    else:
        raise Exception("Wrong prediction mode")

    while True:
        logging.info("job iteration started")
        dir_in = config.dir_in
        files_names = [
            f for f in gfile.ListDirectory(dir_in)
            if not gfile.IsDirectory(join(dir_in, f))
        ]
        for file_name in files_names:
            logging.info(file_name)
            file_path = join(dir_in, file_name)
            try:
                match_predictor.predict(dir_in, file_name, config.dir_result)
                gfile.Rename(file_path, join(config.dir_success, file_name))
            except Exception:
                logging.error(traceback.format_exc())
                gfile.Rename(file_path, join(config.dir_error, file_name))
        logging.info("job iteration finished")
        time.sleep(config.interval)
    def __init__(self,
                 img_name_to_token_ids,
                 img_feature_dir,
                 num_timesteps,
                 vocab,
                 deterministic=False):
        '''

        :param img_name_to_token_ids: 图像到描述字典
        :param img_feature_dir: 图像特征 保存文件目录
        :param num_timesteps: 时间步的数量
        :param vocab: 词表
        :param deterministic: 是否打乱
        '''
        self._vocab = vocab
        self._all_img_feature_filepaths = []  # 拼接出 图像特征文件的 路径
        for filename in gfile.ListDirectory(img_feature_dir):
            self._all_img_feature_filepaths.append(
                os.path.join(img_feature_dir, filename))

        self._img_name_to_token_ids = img_name_to_token_ids
        self._num_timesteps = num_timesteps
        self._indicator = 0  # batch_size 的 起始点
        self._deterministic = deterministic
        self._img_feature_filenames = []  # 保存所有图像特征的路径
        self._img_feature_data = []  # 保存 所有 图像特征
        self._load_img_feature_pickle()
        if not self._deterministic:
            self._random_shuffle()
Exemplo n.º 5
0
def produce_timeline_profile(profile_dir, resources_dir, profile_cnt, options):
  """Produces a timeline profile."""
  timeline_path = os.path.join(resources_dir, PROFILER_COMMON_PREFIX + 'timeline')
  if not os.path.isfile(timeline_path):
    profiles = {}
    log_path = os.path.join(PROFILER_TMP_DIR, PROFILER_TMP_NAME)
    options['output'] = 'timeline:outfile=' + log_path
    opts = model_analyzer._build_options(options)  # pylint: disable=protected-access
    for idx, prof in enumerate(gfile.ListDirectory(profile_dir)):
      prof_file = os.path.join(profile_dir, prof)
      if not os.path.isfile(prof_file):
        continue
      chosen_profile = os.path.join(resources_dir, PROFILER_COMMON_PREFIX + 'timeline_' + prof)
      profiles[prof] = chosen_profile
      if os.path.isfile(chosen_profile):
        if idx == 0:
          target_ts = get_timestamp(chosen_profile)
        continue
      tf.logging.info("Parse profile context %r" % prof_file)
      remove_tmp_files()
      # Parse profile context
      ProfilerFromFile(prof_file.encode('utf-8'))
      pwtf.Profile(options['view'].encode('utf-8'), opts.SerializeToString())
      DeleteProfiler()
      if idx == 0:
        prof_names = get_informative_profiles(PROFILER_TMP_DIR, profile_cnt)
        target_ts = get_timestamp(os.path.join(PROFILER_TMP_DIR, prof_names[0]))
      else:
        prof_names = get_profiles_by_timestamp(PROFILER_TMP_DIR, target_ts, profile_cnt)
      tf.logging.info("Choose %r as the most informative profile context for %r" % (prof_names, prof))
      gen_profile([os.path.join(PROFILER_TMP_DIR, name) for name in prof_names], chosen_profile)
    merge_profiles(profiles, timeline_path)
  return load_profile(timeline_path)
Exemplo n.º 6
0
def load_config(config_path, config=None):
    """Loads configs from (possibly multiple) file(s).

    Args:
        config_path: Paths to configuration files. This can be a `list` of
            config file names, or a path to a directory in which all files
            are loaded, or a string of multiple file names separated by commas.
        config (dict, optional): A config dict to which new configurations are
            added. If `None`, a new config dict is created.

    Returns:
        A `dict` of configurations.
    """
    fnames = []
    if isinstance(config_path, (list, tuple)):
        fnames = list(config_path)
    elif gfile.IsDirectory(config_path):
        for fname in gfile.ListDirectory(config_path):
            fname = os.path.join(config_path, fname)
            if not gfile.IsDirectory(fname):
                fnames.append(fname)
    else:
        for fname in config_path.split(","):
            fname = fname.strip()
            if not fname:
                continue
            fnames.append(fname)

    if config is None:
        config = {}

    for fname in fnames:
        config = load_config_single(fname, config)

    return config
Exemplo n.º 7
0
    def _download(self, trial):
        """Downloads a single url given by the trial (thread safe).

    Args:
      trial (UriTrial): Object containing info about download.

    Raises:
      ValueError: If the destination dir is not empty
    """
        log = util.build_log(prefix=trial.id)

        # Check the download dir is empty
        if (gfile.Exists(trial.output_path)
                and gfile.ListDirectory(trial.output_path)):
            raise ValueError('Download dir {} should be empty'.format(
                trial.output_path))

        gfile.MakeDirs(trial.output_path)

        log('Start downloading...')
        self._backend.download(trial)

        # TODO(epot): Compute the checksum

        # Update the output path
        trial.output_path = get_download_filepath(trial)

        log('Download complete at {}', trial.output_path)
def validate_holdout_selfplay():
    """Validate on held-out selfplay data."""
    holdout_dirs = (
        os.path.join(fsdb.holdout_dir(), d)
        for d in reversed(gfile.ListDirectory(fsdb.holdout_dir()))
        if gfile.IsDirectory(os.path.join(fsdb.holdout_dir(), d))
        for f in gfile.ListDirectory(os.path.join(fsdb.holdout_dir(), d)))

    # This is a roundabout way of computing how many hourly directories we need
    # to read in order to encompass 20,000 holdout games.
    holdout_dirs = set(itertools.islice(holdout_dirs), 20000)
    cmd = ['python3', 'validate.py'] + list(holdout_dirs) + [
        '--use_tpu', '--tpu_name={}'.format(TPU_NAME),
        '--flagfile=rl_loop/distributed_flags', '--expand_validation_dirs'
    ]
    mask_flags.run(cmd)
Exemplo n.º 9
0
def CopyRecursively(src, dst, overwrite=False):
    entries = gfile.ListDirectory(src)
    for entry in entries:
        src_path = os.path.join(src, entry)
        dst_path = os.path.join(dst, entry)
        if gfile.IsDirectory(src_path):
            gfile.MkDir(dst_path)
            CopyRecursively(src_path, dst_path, overwrite)
        else:
            gfile.Copy(src_path, dst_path, overwrite)
Exemplo n.º 10
0
def get_download_filepath(trial):
    """Extract the downloaded file from the completed trial."""
    # Get the downloaded file:
    files = list(gfile.ListDirectory(trial.output_path))
    if not files:  # Should have been catched before, but just in case
        raise ValueError('Download {} failed!'.format(trial.id))
    elif len(files) > 1:
        raise ValueError('Multiple files detected for {}.'.format(trial.id))

    # Update the output path
    filename = files[0]
    return os.path.join(trial.output_path, filename)
Exemplo n.º 11
0
def gather(
    input_directory: 'where to look for games' = 'data/selfplay/',
    output_directory: 'where to put collected games' = 'data/training_chunks/',
    examples_per_record:
    'how many tf.examples to gather in each chunk' = EXAMPLES_PER_RECORD):
    qmeas.start_time('gather')
    _ensure_dir_exists(output_directory)
    models = [
        model_dir.strip('/')
        for model_dir in sorted(gfile.ListDirectory(input_directory))[-50:]
    ]
    with timer("Finding existing tfrecords..."):
        model_gamedata = {
            model:
            gfile.Glob(os.path.join(input_directory, model, '*.tfrecord.zz'))
            for model in models
        }
    print("Found %d models" % len(models))
    for model_name, record_files in sorted(model_gamedata.items()):
        print("    %s: %s files" % (model_name, len(record_files)))

    meta_file = os.path.join(output_directory, 'meta.txt')
    try:
        with gfile.GFile(meta_file, 'r') as f:
            already_processed = set(f.read().split())
    except tf.errors.NotFoundError:
        already_processed = set()

    num_already_processed = len(already_processed)

    for model_name, record_files in sorted(model_gamedata.items()):
        if set(record_files) <= already_processed:
            continue
        print("Gathering files for %s:" % model_name)
        for i, example_batch in enumerate(
                tqdm(
                    preprocessing.shuffle_tf_examples(examples_per_record,
                                                      record_files))):
            output_record = os.path.join(
                output_directory,
                '{}-{}.tfrecord.zz'.format(model_name, str(i)))
            preprocessing.write_tf_examples(output_record,
                                            example_batch,
                                            serialize=False)
        already_processed.update(record_files)

    print("Processed %s new files" %
          (len(already_processed) - num_already_processed))
    with gfile.GFile(meta_file, 'w') as f:
        f.write('\n'.join(sorted(already_processed)))
    qmeas.stop_time('gather')
Exemplo n.º 12
0
 def _load_data(self):
     dataset = np.zeros((24 * 4 * 183, 64, 64, 3))
     all_files = [x for x in gfile.ListDirectory('/home/adarsh/Desktop/Disentangled-Representation-Learning-Playground/data/cars/') if ".mat" in x]
     for i, filename in enumerate(all_files):
         data_mesh = _load_mesh(filename)
         factor1 = np.array(list(range(4)))
         factor2 = np.array(list(range(24)))
         all_factors = np.transpose([
           np.tile(factor1, len(factor2)),
           np.repeat(factor2, len(factor1)),
           np.tile(i,len(factor1) * len(factor2))])
         indexes = self.index.features_to_index(all_factors)
         dataset[indexes] = data_mesh
     return dataset
def aggregate():
    logger.info("Gathering game results")

    os.makedirs(PATHS.TRAINING_CHUNK_DIR, exist_ok=True)
    os.makedirs(PATHS.SELFPLAY_DIR, exist_ok=True)
    models = [
        model_dir.strip('/')
        for model_dir in sorted(gfile.ListDirectory(PATHS.SELFPLAY_DIR))[-50:]
    ]

    with timer("Finding existing tfrecords..."):
        model_gamedata = {
            model: gfile.Glob(os.path.join(PATHS.SELFPLAY_DIR, model, '*.zz'))
            for model in models
        }
    logger.info("Found %d models" % len(models))
    for model_name, record_files in sorted(model_gamedata.items()):
        logger.info("    %s: %s files" % (model_name, len(record_files)))

    meta_file = os.path.join(PATHS.TRAINING_CHUNK_DIR, 'meta.txt')
    try:
        with gfile.GFile(meta_file, 'r') as f:
            already_processed = set(f.read().split())
    except tf.errors.NotFoundError:
        already_processed = set()

    num_already_processed = len(already_processed)

    for model_name, record_files in sorted(model_gamedata.items()):
        if set(record_files) <= already_processed:
            continue
        logger.info("Gathering files for %s:" % model_name)
        for i, example_batch in enumerate(
                tqdm(
                    preprocessing.shuffle_tf_examples(
                        GLOBAL_PARAMETER_STORE.EXAMPLES_PER_RECORD,
                        record_files))):
            output_record = os.path.join(
                PATHS.TRAINING_CHUNK_DIR,
                '{}-{}.tfrecord.zz'.format(model_name, str(i)))
            preprocessing.write_tf_examples(output_record,
                                            example_batch,
                                            serialize=False)
        already_processed.update(record_files)

    logger.info("Processed %s new files" %
                (len(already_processed) - num_already_processed))
    with gfile.GFile(meta_file, 'w') as f:
        f.write('\n'.join(sorted(already_processed)))
Exemplo n.º 14
0
def produce_timeline_profile():
  """Produces a timeline profile."""
  # Find the largest profile, since every step is profiled for the "graph"
  # view, and the largest step tends to be the most informative.
  # TODO: Optimize backend to only process largest step in this scenario.
  largest_profile_size = 0
  for file_name in gfile.ListDirectory(PROFILER_LOG_DIR):
    if 'profiler-ui.log_' in file_name:
      file_path = os.path.join(PROFILER_LOG_DIR, file_name)
      with gfile.GFile(file_path, 'rb') as profile:
        file_size = profile.size()
        if largest_profile_size < file_size:
          largest_profile_size = file_size
          path = os.path.join(PROFILER_LOG_DIR, file_name)
  return load_profile(path)
Exemplo n.º 15
0
 def _load_data(self):
   dataset = np.zeros((24 * 4 * 183, 64, 64, 3))
   all_files = [x for x in gfile.ListDirectory(CARS3D_PATH) if ".mat" in x]
   for i, filename in enumerate(all_files):
     data_mesh = _load_mesh(filename)
     factor1 = np.array(list(range(4)))
     factor2 = np.array(list(range(24)))
     all_factors = np.transpose([
         np.tile(factor1, len(factor2)),
         np.repeat(factor2, len(factor1)),
         np.tile(i,
                 len(factor1) * len(factor2))
     ])
     indexes = self.index.features_to_index(all_factors)
     dataset[indexes] = data_mesh
   return dataset
Exemplo n.º 16
0
def format_input(input_path, size):
    """Reads input path, randomly selects a sub-sample and concatenates them.

  Args:
    input_path: `str`, directory to read files from.
    size: `int`, number of files to read.

  Returns:
    List of `str` containing independent text reviews.
  """

    files = [
        path for path in gfile.ListDirectory(input_path)
        if path.endswith(constants.FILE_EXTENSION)
    ]
    files = np.random.choice(files, size, replace=False)
    files = [os.path.join(input_path, filename) for filename in files]
    return get_prediction_input(files)
Exemplo n.º 17
0
def aggregate_json_results(base_path):
    """Aggregates all the result files in a directory into a namespaced dict.

  Args:
    base_path: String with the directory containing JSON files that only contain
      dictionaries.

  Returns:
    Namespaced dictionary with the results.
  """
    result = {}
    compiled_pattern = re.compile(r"(.*)\.json")
    for filename in gfile.ListDirectory(base_path):
        match = compiled_pattern.match(filename)
        if match:
            path = os.path.join(base_path, filename)
            with tf.gfile.GFile(path, "r") as f:
                result[match.group(1)] = json.load(f)
    return namespaced_dict(**result)
Exemplo n.º 18
0
    def __init__(self,
                 img_name_to_token_ids,
                 img_feature_dir,
                 num_timesteps,
                 vocab,
                 deterministic=False):
        self._vocab = vocab
        self._all_img_feature_filepaths = []
        for filename in gfile.ListDirectory(img_feature_dir):
            self._all_img_feature_filepaths.append(
                os.path.join(img_feature_dir, filename))
        # pprint.pprint(self._all_img_feature_filepaths)

        self._img_name_to_token_ids = img_name_to_token_ids
        self._num_timesteps = num_timesteps
        self._indicator = 0
        self._deterministic = deterministic
        self._img_feature_filenames = []
        self._img_feature_data = []
        self._load_img_feature_pickle()
        if not self._deterministic:
            self._random_shuffle()
Exemplo n.º 19
0
 def __init__(self, img_feature_dir, valid_percent=0.1, test_percent=0.1, is_shuffle=True):
     self._all_img_feature_filepaths = []
     for filename in gfile.ListDirectory(img_feature_dir):
         self._all_img_feature_filepaths.append(os.path.join(img_feature_dir, filename))
     self.is_shuffle = is_shuffle
     self._img_feature_data = []
     self._img_feature_labels = []
     self._load_img_feature_pickle()
     if self.is_shuffle:
         self._random_shuffle()
     # 根据数据集加载出训练,验证,测试集
     valid_len = int(self.size() * valid_percent)
     test_len = int(self.size() * test_percent)
     self._valid_data = self._img_feature_data[:valid_len, :]
     self._valid_labels = self._img_feature_labels[:valid_len]
     self._test_data = self._img_feature_data[valid_len:valid_len + test_len, :]
     self._test_labels = self._img_feature_labels[valid_len:valid_len + test_len]
     self._train_data = self._img_feature_data[valid_len + test_len:, :]
     self._train_labels = self._img_feature_labels[valid_len + test_len:]
     self._train_indicator = 0
     print(self._valid_data.shape, self._valid_labels.shape)
     print(self._test_data.shape, self._test_labels.shape)
     print(self._train_data.shape, self._train_labels.shape)
Exemplo n.º 20
0
def get_sgf_names(model):
    game_dir = HOLDOUT_PATH.format(FLAGS.base_dir, model)
    tf_records = map(os.path.basename, gfile.ListDirectory(game_dir))
    sgfs = [record.replace('.tfrecord.zz', '.sgf') for record in tf_records]
    return [PATH_TEMPLATE.format(FLAGS.base_dir, model, sgf) for sgf in sgfs]
Exemplo n.º 21
0
def remove_tmp_files():
    """Removes temporary files created by the profiler."""
    for file_name in gfile.ListDirectory(PROFILER_LOG_DIR):
        if 'profiler-ui.' in file_name:
            gfile.Remove(os.path.join(PROFILER_LOG_DIR, file_name))
Exemplo n.º 22
0
def get_hour_dirs(root=None):
    """Gets the directories under selfplay_dir that match YYYY-MM-DD-HH."""
    root = root or selfplay_dir()
    return list(
        filter(lambda s: re.match(r"\d{4}-\d{2}-\d{2}-\d{2}", s),
               gfile.ListDirectory(root)))