Exemplo n.º 1
0
    def run(self, corner, subvol_size):
        """Runs FFN inference over a subvolume.

    Args:
      corner: start of the subvolume (z, y, x)
      subvol_size: size of the subvolume (z, y, x)

    Returns:
      Canvas object with the segmentation or None if the canvas could not
      be created or the segmentation subvolume already exists.
    """
        self.counters.reset()

        seg_path = storage.segmentation_path(
            self.request.segmentation_output_dir, corner)
        prob_path = storage.object_prob_path(
            self.request.segmentation_output_dir, corner)
        cpoint_path = storage.checkpoint_path(
            self.request.segmentation_output_dir, corner)

        if gfile.Exists(seg_path):
            if pyborgletinfo.RunningUnderBorglet():
                pywrapborgletlib.BorgletLib.SetStatusMsg(
                    'Segmentation already complete; exiting.')
            return None

        canvas, alignment = self.make_canvas(corner, subvol_size)
        if canvas is None:
            return None

        if gfile.Exists(cpoint_path):
            canvas.restore_checkpoint(cpoint_path)

        if self.request.alignment_options.save_raw:
            image_path = storage.subvolume_path(
                self.request.segmentation_output_dir, corner, 'align')
            with storage.atomic_file(image_path) as fd:
                np.savez_compressed(fd, im=canvas.image)

        canvas.segment_all(
            seed_policy=self.get_seed_policy(corner, subvol_size))
        self.save_segmentation(canvas, alignment, seg_path, prob_path)

        # Attempt to remove the checkpoint file now that we no longer need it.
        try:
            gfile.Remove(cpoint_path)
        except:  # pylint: disable=bare-except
            pass

        return canvas
Exemplo n.º 2
0
def threshold_segmentation(segmentation_dir, corner, labels, threshold):
  prob_path = object_prob_path(segmentation_dir, corner)
  if not gfile.Exists(prob_path):
    prob_path = legacy_object_prob_path(segmentation_dir, corner)
    if not gfile.Exists(prob_path):
      raise ValueError('Cannot find probability map %s' % prob_path)

  with gfile.Open(prob_path, 'rb') as f:
    data = np.load(f)
    if 'qprob' not in data:
      raise ValueError('Invalid FFN probability map.')

    prob = dequantize_probability(data['qprob'])
    labels[prob < threshold] = 0
Exemplo n.º 3
0
def get_mldata(dataset):
    # Use scikit to grab datasets and save them save_dir.
    save_dir = FLAGS.save_dir
    filename = os.path.join(save_dir, dataset[1] + '.pkl')

    if not gfile.Exists(save_dir):
        gfile.MkDir(save_dir)
    if not gfile.Exists(filename):
        if dataset[0][-3:] == 'csv':
            data = get_csv_data(dataset[0])
        elif dataset[0] == 'breast_cancer':
            data = load_breast_cancer()
        elif dataset[0] == 'iris':
            data = load_iris()
        elif dataset[0] == 'newsgroup':
            # Removing header information to make sure that no newsgroup identifying
            # information is included in data
            data = fetch_20newsgroups_vectorized(subset='all',
                                                 remove=('headers'))
            tfidf = TfidfTransformer(norm='l2')
            X = tfidf.fit_transform(data.data)
            data.data = X
        elif dataset[0] == 'rcv1':
            sklearn.datasets.rcv1.URL = (
                'http://www.ai.mit.edu/projects/jmlr/papers/'
                'volume5/lewis04a/a13-vector-files/lyrl2004_vectors')
            sklearn.datasets.rcv1.URL_topics = (
                'http://www.ai.mit.edu/projects/jmlr/papers/'
                'volume5/lewis04a/a08-topic-qrels/rcv1-v2.topics.qrels.gz')
            data = sklearn.datasets.fetch_rcv1(data_home='/tmp')
        elif dataset[0] == 'wikipedia_attack':
            data = get_wikipedia_talk_data()
        elif dataset[0] == 'cifar10':
            data = get_cifar10()
        elif 'keras' in dataset[0]:
            data = get_keras_data(dataset[0])
        else:
            try:
                data = fetch_mldata(dataset[0])
            except:
                raise Exception('ERROR: failed to fetch data from mldata.org')
        X = data.data
        y = data.target
        if X.shape[0] != y.shape[0]:
            X = np.transpose(X)
        assert X.shape[0] == y.shape[0]

        data = {'data': X, 'target': y}
        pickle.dump(data, gfile.GFile(filename, 'w'))
Exemplo n.º 4
0
def train(working_dir):
    model_num, model_name = fsdb.get_latest_model()

    games = gfile.Glob(os.path.join(fsdb.selfplay_dir(), model_name, '*.zz'))
    if len(games) < MIN_GAMES_PER_GENERATION:
        print("{} doesn't have enough games to train a new model yet ({})".
              format(model_name, len(games)))
        print("Sleeping...")
        time.sleep(10 * 60)
        print("Done...")
        sys.exit(1)

    print("Training on gathered game data, initializing from {}".format(
        model_name))
    new_model_num = model_num + 1
    new_model_name = shipname.generate(new_model_num)
    print("New model will be {}".format(new_model_name))
    training_file = os.path.join(fsdb.golden_chunk_dir(),
                                 str(new_model_num) + '.tfrecord.zz')
    while not gfile.Exists(training_file):
        print("Waiting for", training_file)
        time.sleep(1 * 60)
    print("Using Golden File:", training_file)

    save_file = os.path.join(fsdb.models_dir(), new_model_name)
    try:
        main.train(working_dir, [training_file],
                   save_file,
                   generation_num=model_num + 1)
    except:
        logging.exception("Train error")
Exemplo n.º 5
0
def preprocess(source, chunksize):
    reader = read_csv(source, header=0, chunksize=chunksize)
    if gfile.Exists(prep_data_path):
        gfile.Remove(prep_data_path)

    for data in reader:
        data = data.fillna(0)
        data.replace(('yes', 'no'), (1, 0), inplace=True)
        product_type, sub_area, ecology = \
          data['product_type'].values, data['sub_area'].values, data['ecology'].values
        data['product_type'] = np.reshape([
            one_hot(x, n=np.unique(product_type).shape[0] + 1, filters='')
            for x in product_type
        ], product_type.shape)
        sub_area = np.array([
            s.replace(' ', '').replace('-', '').replace('\'',
                                                        '').replace(',', '')
            for s in sub_area
        ])
        data['sub_area'] = np.reshape(
            [one_hot(x, n=np.unique(sub_area).shape[0] + 1) for x in sub_area],
            sub_area.shape)
        ecology = np.array([
            s.replace(' ', '').replace('-', '').replace('\'',
                                                        '').replace(',', '')
            for s in ecology
        ])
        data['ecology'] = np.reshape(
            [one_hot(x, n=np.unique(ecology).shape[0] + 1) for x in ecology],
            ecology.shape)
        data.to_csv(
            prep_data_path) if not isfile(prep_data_path) else data.to_csv(
                prep_data_path, mode='a', header=False)
Exemplo n.º 6
0
def inference(reader, checkpoint_file, train_dir, data_pattern, out_file_location, batch_size, top_k):
  with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess, gfile.Open(out_file_location, "w+") as out_file:
    video_id_batch, video_batch, num_frames_batch = get_input_data_tensors(reader, data_pattern, batch_size)
    if checkpoint_file:
      if not gfile.Exists(checkpoint_file + ".meta"):
        logging.fatal("Unable to find checkpoint file at provided location '%s'" % checkpoint_file)
      latest_checkpoint = checkpoint_file
    else:
      latest_checkpoint = tf.train.latest_checkpoint(train_dir)
    if latest_checkpoint is None:
      raise Exception("unable to find a checkpoint at location: %s" % train_dir)
    else:
      meta_graph_location = latest_checkpoint + ".meta"
      logging.info("loading meta-graph: " + meta_graph_location)
    saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
    logging.info("restoring variables from " + latest_checkpoint)
    saver.restore(sess, latest_checkpoint)
    input_tensor = tf.get_collection("input_batch_raw")[0]
    num_frames_tensor = tf.get_collection("num_frames")[0]
    predictions_tensor = tf.get_collection("predictions")[0]

    # Workaround for num_epochs issue.
    def set_up_init_ops(variables):
      init_op_list = []
      for variable in list(variables):
        if "train_input" in variable.name:
          init_op_list.append(tf.assign(variable, 1))
          variables.remove(variable)
      init_op_list.append(tf.variables_initializer(variables))
      return init_op_list

    sess.run(set_up_init_ops(tf.get_collection_ref(
        tf.GraphKeys.LOCAL_VARIABLES)))

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    num_examples_processed = 0
    start_time = time.time()
    out_file.write("VideoId,LabelConfidencePairs\n")

    try:
      while not coord.should_stop():
          video_id_batch_val, video_batch_val,num_frames_batch_val = sess.run([video_id_batch, video_batch, num_frames_batch])
          predictions_val, = sess.run([predictions_tensor], feed_dict={input_tensor: video_batch_val, num_frames_tensor: num_frames_batch_val})
          now = time.time()
          num_examples_processed += len(video_batch_val)
          num_classes = predictions_val.shape[1]
          logging.info("num examples processed: " + str(num_examples_processed) + " elapsed seconds: " + "{0:.2f}".format(now-start_time))
          for line in format_lines(video_id_batch_val, predictions_val, top_k):
            out_file.write(line)
          out_file.flush()


    except tf.errors.OutOfRangeError:
        logging.info('Done with inference. The output file was written to ' + out_file_location)
    finally:
        coord.request_stop()

    coord.join(threads)
    sess.close()
def load_from_config_path(config_paths, default_model_configs=None):
    """ Loads configurations from files of yaml format.

    Args:
        config_paths: A string (each file name is seperated by ",") or
          a list of strings (file names).
        default_model_configs: A dictionary of model configurations
          or None.

    Returns: A dictionary of model configurations, parsed from config files.
    """
    if isinstance(config_paths, six.string_types):
        config_paths = config_paths.strip().split(",")
    assert isinstance(config_paths, list) or isinstance(config_paths, tuple)
    model_configs = default_model_configs if default_model_configs else dict()
    for config_path in config_paths:
        config_path = config_path.strip()
        if not config_path:
            continue
        if not gfile.Exists(config_path):
            raise OSError("config file does not exist: {}".format(config_path))
        config_path = os.path.abspath(config_path)
        tf.logging.info("loading configurations from {}".format(config_path))
        with gfile.GFile(config_path, "r") as config_file:
            config_flags = yaml.load(config_file)
            model_configs = deep_merge_dict(model_configs, config_flags)
    return model_configs
Exemplo n.º 8
0
    def __init__(self, filename, bpe_codes_file=None, reverse_seq=False):
        """ Initializes the object.

        Args:
            filename: Path to a vocabulary file containing one word per line.
              Each word is mapped to its line number (starting from 0).
            bpe_codes_file: Path to a BPE code file. If provided, do BPE
              before feature mapping.
            reverse_seq: Whether to reverse the sequence when encode the words
              to ids.

        Raises:
            ValueError: if `filename` or `bpe_codes_file` does not exist.
        """
        self.vocab_dict, self.vocab_r_dict, _ = create_vocabulary_lookup_table_numpy(
            filename)
        self._sos_id = self.vocab_dict[Constants.SEQUENCE_START]
        self._eos_id = self.vocab_dict[Constants.SEQUENCE_END]
        self._unk_id = self.vocab_dict[Constants.UNKOWN]
        self._vocab_size = len(self.vocab_dict)
        self._reverse_seq = reverse_seq
        self._bpe = None
        if bpe_codes_file and not bpe_codes_file == "":
            if not gfile.Exists(bpe_codes_file):
                raise ValueError(
                    "bpe_codes_file: {} not exists".format(bpe_codes_file))
            self._bpe = BPE(bpe_codes_file, vocab=filename)
Exemplo n.º 9
0
  def _get_or_create_trial(self, uri, uri_info=None):
    """Create a new trial or get the previous one.

    The previous trials are recreated by looking at the content of the cached
    dir.

    Args:
      uri (str): Uri to create the trial for.
      uri_info (obj): Object containing additional info about the download
        or extraction (UrlInfo or ExtractInfo).

    Returns:
      trial (UriTrial): Result of the trial containing the destination, status,
        timestamp,...
    """
    # The generation is deterministic so generating keys for the same uri will
    # always gives the same result
    trial_id = '{}_{}'.format(
        util.escape_uri(uri),
        util.hash_uri(uri),
    )

    log = util.build_log(prefix=trial_id)

    # Generate a new trial to eventually use
    trial = download_pb2.UriTrial(
        id=trial_id,
        status=download_pb2.UriTrial.IN_PROGRESS,
        output_path=os.path.join(self._cache_dir, trial_id),
    )
    add_uri_info(trial, uri, uri_info)

    if gfile.Exists(trial.output_path):

      # If the directory exists, the previous trial was complete (as it was
      # renamed successfully from ".incomplete")
      if self._mode == util.GenerateMode.FORCE_REDOWNLOAD:
        log('Cleanup previous trial: {}', trial.output_path)
        gfile.DeleteRecursively(trial.output_path)
      else:
        log('Reusing previously cached data...')
        # Try to reuse the previous download
        trial.status = download_pb2.UriTrial.COMPLETED

        # For the downloads, the output_path contains the file
        # TODO(epot): Should instead write the meta-data on disk (in a
        # ._trial.json) and replace ListDirectory() by a version which filter
        # the metadata file.
        is_dl = not any(uri.startswith(p) for p in ('local://', 'extract://'))
        is_gz = (
            uri.startswith('extract://') and
            uri.endswith('.gz') and
            not uri.endswith('tar.gz')
        )
        if is_dl or is_gz:
          trial.output_path = get_download_filepath(trial)
    else:
      log('No cached value found.')

    return trial
Exemplo n.º 10
0
    def get_meta_filename(self, start_new_model, train_dir):
        if start_new_model:
            logging.info(
                "%s: Flag 'start_new_model' is set. Building a new model.",
                task_as_string(self.task))
            return None

        if FLAGS.checkpoint_file == '':
            latest_checkpoint = tf.train.latest_checkpoint(train_dir)
        else:
            latest_checkpoint = os.path.join(FLAGS.train_dir,
                                             FLAGS.checkpoint_file)

        if not latest_checkpoint:
            logging.info("%s: No checkpoint file found. Building a new model.",
                         task_as_string(self.task))
            return None

        meta_filename = latest_checkpoint + ".meta"
        if not gfile.Exists(meta_filename):
            logging.info("%s: No meta graph file found. Building a new model.",
                         task_as_string(self.task))
            return None
        else:
            return meta_filename
Exemplo n.º 11
0
def create_vocabulary_lookup_table(filename, default_value=None):
    """Creates a lookup table for a vocabulary file.
  Args:
    filename: Path to a vocabulary file containg one word per line.
      Each word is mapped to its line number.
    default_value: UNK tokens will be mapped to this id.
      If None, UNK tokens will be mapped to [vocab_size]
    Returns:
      A tuple (vocab_to_id_table, id_to_vocab_table,
      word_to_count_table, vocab_size). The vocab size does not include
      the UNK token.
    """
    if not gfile.Exists(filename):
        raise ValueError("File does not exist: {}".format(filename))

    # Load vocabulary into memory
    with gfile.GFile(filename) as file:
        vocab = list(line.strip("\n") for line in file)
    vocab_size = len(vocab)

    has_counts = len(vocab[0].split("\t")) == 2
    if has_counts:
        vocab, counts = zip(*[_.split("\t") for _ in vocab])
        counts = [float(_) for _ in counts]
        vocab = list(vocab)
    else:
        counts = [-1. for _ in vocab]

    # Add special vocabulary items
    special_vocab = get_special_vocab(vocab_size)
    vocab += list(special_vocab._fields)
    vocab_size += len(special_vocab)
    counts += [-1. for _ in list(special_vocab._fields)]

    if default_value is None:
        default_value = special_vocab.UNK

    tf.logging.info("Creating vocabulary lookup table of size %d", vocab_size)

    vocab_tensor = tf.constant(vocab)
    count_tensor = tf.constant(counts, dtype=tf.float32)
    vocab_idx_tensor = tf.range(vocab_size, dtype=tf.int64)

    # Create ID -> word mapping
    id_to_vocab_init = tf.contrib.lookup.KeyValueTensorInitializer(
        vocab_idx_tensor, vocab_tensor, tf.int64, tf.string)
    id_to_vocab_table = tf.contrib.lookup.HashTable(id_to_vocab_init, "UNK")

    # Create word -> id mapping
    vocab_to_id_init = tf.contrib.lookup.KeyValueTensorInitializer(
        vocab_tensor, vocab_idx_tensor, tf.string, tf.int64)
    vocab_to_id_table = tf.contrib.lookup.HashTable(vocab_to_id_init,
                                                    default_value)

    # Create word -> count mapping
    word_to_count_init = tf.contrib.lookup.KeyValueTensorInitializer(
        vocab_tensor, count_tensor, tf.string, tf.float32)
    word_to_count_table = tf.contrib.lookup.HashTable(word_to_count_init, -1)

    return vocab_to_id_table, id_to_vocab_table, word_to_count_table, vocab_size
Exemplo n.º 12
0
def main(argv):
  del argv  # Unused.
  if not gfile.Exists(FLAGS.save_dir):
    gfile.MkDir(FLAGS.save_dir)
  charting_filepath = os.path.join(FLAGS.save_dir,
                                   FLAGS.dataset + '_charts.pdf')
  sampling_methods = FLAGS.sampling_methods.split(',')
  scoring_methods = FLAGS.scoring_methods.split(',')
  files = gfile.Glob(
      os.path.join(FLAGS.source_dir, FLAGS.dataset + '*/results*.pkl'))
  files = [
      f for f in files
      if (get_sampling_method(FLAGS.dataset, f) in sampling_methods and
          get_scoring_method(f) in scoring_methods and
          get_normalize(f) == FLAGS.normalize and
          get_standardize(f) == FLAGS.standardize)
  ]

  print('Reading in %d files...' % len(files))
  all_results = combine_results(files)
  pdf = PdfPages(charting_filepath)

  print('Plotting charts...')
  plt.style.use('ggplot')
  for m in scoring_methods:
    plot_results(
        all_results,
        m,
        FLAGS.normalize,
        FLAGS.standardize,
        sampler_filter=sampling_methods)
    plt.title('Dataset: %s, Score Method: %s' % (FLAGS.dataset, m))
    pdf.savefig()
    plt.close()
  pdf.close()
Exemplo n.º 13
0
def train(load_dir=MODELS_DIR, save_dir=MODELS_DIR):
    model_num, model_name = get_latest_model()

    games = gfile.Glob(os.path.join(SELFPLAY_DIR, model_name, '*.zz'))
    if len(games) < MIN_GAMES_PER_GENERATION:
        print("{} doesn't have enough games to train a new model yet ({})".
              format(model_name, len(games)))
        print("Sleeping...")
        time.sleep(10 * 60)
        print("Done...")
        sys.exit(1)

    print("Training on gathered game data, initializing from {}".format(
        model_name))
    new_model_num = model_num + 1
    new_model_name = shipname.generate(new_model_num)
    print("New model will be {}".format(new_model_name))
    training_file = os.path.join(GOLDEN_CHUNK_DIR,
                                 str(new_model_num) + '.tfrecord.zz')
    while not gfile.Exists(training_file):
        print("Waiting for", training_file)
        time.sleep(1 * 60)
    print("Using Golden File:", training_file)

    load_file = os.path.join(load_dir, model_name)
    save_file = os.path.join(save_dir, new_model_name)
    try:
        main.train(ESTIMATOR_WORKING_DIR, [training_file],
                   save_file,
                   generation_num=model_num + 1)
    except:
        logging.exception("Train error")
Exemplo n.º 14
0
    def get_meta_filename(self, start_new_model, train_dir):
        if start_new_model:
            logging.info(
                "%s: Flag 'start_new_model' is set. Building a new model.",
                task_as_string(self.task))
            return None

        latest_checkpoint = tf.train.latest_checkpoint(train_dir)
        # latest_checkpoint = "model.ckpt-137964"
        print("...............................", latest_checkpoint)

        if not latest_checkpoint:
            logging.info("%s: No checkpoint file found. Building a new model.",
                         task_as_string(self.task))
            return None

        meta_filename = latest_checkpoint + ".meta"
        print("metafile:...........................................",
              meta_filename)
        if not gfile.Exists(meta_filename):
            logging.info("%s: No meta graph file found. Building a new model.",
                         task_as_string(self.task))
            return None
        else:
            return meta_filename
Exemplo n.º 15
0
    def get_meta_filename(self, start_new_model, train_dir):
        task_str = task_as_string(self.task)

        if start_new_model:
            logging.info(
                "{}: Flag 'start_new_model' is set. Building a new model.".
                format(task_str))
            return None
        """
        Finds the filename of latest saved checkpoint file.
        Returns: The FULL path to the latest checkpoint or None if no checkpoint was found.
        """
        latest_checkpoint = tf.train.latest_checkpoint(train_dir)
        if not latest_checkpoint:
            logging.info(
                "{}: No checkpoint file found. Building a new model.".format(
                    task_str))
            return None

        meta_filename = latest_checkpoint + ".meta"
        if not gfile.Exists(meta_filename):
            logging.info(
                "{}: No meta graph file found. Building a new model.".format(
                    task_str))
            return None
        else:
            return meta_filename
Exemplo n.º 16
0
def data_to_token_ids(data_path, target_path, vocabulary_path,
                      tokenizer=None, normalize_digits=True):
    """Tokenize data file and turn into token-ids using given vocabulary file.

    This function loads data line-by-line from data_path, calls the above
    sentence_to_token_ids, and saves the result to target_path. See comment
    for sentence_to_token_ids on the details of token-ids format.

    Args:
        data_path: path to the data file in one-sentence-per-line format.
        target_path: path where the file with token-ids will be created.
        vocabulary_path: path to the vocabulary file.
        tokenizer: a function to use to tokenize each sentence;
            if None, basic_tokenizer will be used.
        normalize_digits: Boolean; if true, all digits are replaced by 0s.
    """
    if not gfile.Exists(target_path):
        print("Tokenizing data in %s" % data_path)
        vocab, _ = initialize_vocabulary(vocabulary_path)
        with gfile.GFile(data_path, mode="rb") as data_file:
            with gfile.GFile(target_path, mode="w") as tokens_file:
                counter = 0
                for line in data_file:
                    counter += 1
                    if counter % 100000 == 0:
                        print("  tokenizing line %d" % counter)
                    token_ids = sentence_to_token_ids(tf.compat.as_bytes(line), vocab,
                                                      tokenizer, normalize_digits)
                    tokens_file.write(" ".join([str(tok) for tok in token_ids]) + "\n")
Exemplo n.º 17
0
def initialize_vocabulary(vocabulary_path):
    """Initialize vocabulary from file.

    We assume the vocabulary is stored one-item-per-line, so a file:
        dog
        cat
    will result in a vocabulary {"dog": 0, "cat": 1}, and this function will
    also return the reversed-vocabulary ["dog", "cat"].

    Args:
        vocabulary_path: path to the file containing the vocabulary.

    Returns:
        a pair: the vocabulary (a dictionary mapping string to integers), and
        the reversed vocabulary (a list, which reverses the vocabulary mapping).

    Raises:
        ValueError: if the provided vocabulary_path does not exist.
    """
    if gfile.Exists(vocabulary_path):
        rev_vocab = []
        with gfile.GFile(vocabulary_path, mode="rb") as f:
            rev_vocab.extend(f.readlines())
        rev_vocab = [tf.compat.as_bytes(line.strip()) for line in rev_vocab]
        vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)])
        return vocab, rev_vocab
    raise ValueError("Vocabulary file %s not found." % vocabulary_path)
Exemplo n.º 18
0
    def prover_pipeline(self, tasks: List[proof_assistant_pb2.ProverTask],
                        root):
        """Make a prover pipeline for the given task and this round."""
        current_round = self.loop_meta.status.current_round
        prover_options = deephol_pb2.ProverOptions()
        prover_options.CopyFrom(self.config.prover_options)
        prover_options.action_generator_options.random_tactic_probability = (
            get_random_tactic_probability(self.config, current_round))
        checkpoint = self.checkpoint_monitor.get_checkpoint()
        assert checkpoint, 'Model checkpoint is not present.'
        # Update prover options to utilize the latest checkpoint present. We also
        # make sure to utilize the embedding store as well.
        prover_options.path_model_prefix = checkpoint
        prover_options.theorem_embeddings = checkpoint + '.npy'
        assert gfile.Exists(prover_options.theorem_embeddings), (
            'Missing embeddings file "%s".' %
            prover_options.theorem_embeddings)
        output_dir = self.loop_meta.make_proof_logs_dir(current_round)
        output_prefix = os.path.join(output_dir, 'logs')
        logging.info('Prover options:\n%s',
                     text_format.MessageToString(prover_options))
        io_util.write_text_proto(
            str(os.path.join(output_dir, 'prover_options.pbtxt')),
            prover_options)

        return prover_runner.make_pipeline(tasks, prover_options,
                                           output_prefix)(root)
Exemplo n.º 19
0
def _find_conf(conf, save_path):
    if conf.startswith('s3'):
        if not gfile.Exists(conf):
            conf = conf.strip('/')
            conf_name = conf.rsplit('/', 1)[1]
            save_path = os.path.join(save_path, conf_name) + '/'
            gfile.MakeDirs(save_path)
            check = os.system('aws s3 cp {} {} --recursive'.format(conf+'/', save_path))
            conf = save_path
            if check:
                assert False, 'cant find conf in: {}'.format(conf)
        return conf
    conf_path = os.path.abspath(conf)
    if not gfile.Exists(conf_path):
        conf_path = os.path.join(_project_path, 'conf', conf)
    return conf_path
Exemplo n.º 20
0
def dump_object(object_to_dump, output_path):

    if not gfile.Exists(output_path):
        gfile.MakeDirs(os.path.dirname(output_path))

    with gfile.Open(output_path, 'w') as wf:
        joblib.dump(object_to_dump, wf)
Exemplo n.º 21
0
def save_config_file(config_file, dest_dir):
    if not gfile.Exists(dest_dir):
        gfile.MakeDirs(dest_dir)

    config_file_dest = os.path.join(dest_dir, 'blueoil_config.yaml')

    # HACK: This is for tensorflow bug workaround.
    # We can remove following 2 lines once it's been resolved in tensorflow
    # issue link: https://github.com/tensorflow/tensorflow/issues/28508
    if gfile.Exists(config_file_dest):
        gfile.Remove(config_file_dest)

    return gfile.Copy(
        config_file,
        config_file_dest
    )
Exemplo n.º 22
0
 def manual_dir(self):
     """Returns the directory containing the manually extracted data."""
     if not gfile.Exists(self._manual_dir):
         raise AssertionError(
             'Manual directory {} does not exist. Create it and download/extract'
             'dataset artifacts in there.'.format(self._manual_dir))
     return self._manual_dir
Exemplo n.º 23
0
def train(working_dir):
    model_num, model_name = fsdb.get_latest_model()

    print("Training on gathered game data, initializing from {}".format(model_name))
    new_model_num = model_num + 1
    new_model_name = shipname.generate(new_model_num)
    print("New model will be {}".format(new_model_name))
    training_file = os.path.join(
        fsdb.golden_chunk_dir(), str(new_model_num) + '.tfrecord.zz')
    while not gfile.Exists(training_file):
        print("Waiting for", training_file)
        time.sleep(1*60)
    print("Using Golden File:", training_file)

    try:
        save_file = os.path.join(fsdb.models_dir(), new_model_name)
        print("Training model")
        dual_net.train(training_file)
        print("Exporting model to ", save_file)
        dual_net.export_model(working_dir, save_file)
    except Exception as e:
        import traceback
        logging.error(traceback.format_exc())
        print(traceback.format_exc())
        logging.exception("Train error")
        sys.exit(1)
Exemplo n.º 24
0
    def _download(self, trial):
        """Downloads a single url given by the trial (thread safe).

    Args:
      trial (UriTrial): Object containing info about download.

    Raises:
      ValueError: If the destination dir is not empty
    """
        log = util.build_log(prefix=trial.id)

        # Check the download dir is empty
        if (gfile.Exists(trial.output_path)
                and gfile.ListDirectory(trial.output_path)):
            raise ValueError('Download dir {} should be empty'.format(
                trial.output_path))

        gfile.MakeDirs(trial.output_path)

        log('Start downloading...')
        self._backend.download(trial)

        # TODO(epot): Compute the checksum

        # Update the output path
        trial.output_path = get_download_filepath(trial)

        log('Download complete at {}', trial.output_path)
Exemplo n.º 25
0
def main(unused_argv):
    logging.set_verbosity(tf.logging.INFO)
    print("tensorflow version: %s" % tf.__version__)
    is_chief = (FLAGS.task == 0)

    # Recover session
    saver = None
    latest_checkpoint = tf.train.latest_checkpoint(FLAGS.train_dir)
    if FLAGS.start_new_model:
        logging.info(
            "'start_new_model' flag is set. Removing existing train dir.")
        try:
            gfile.DeleteRecursively(FLAGS.train_dir)
        except:
            logging.error(
                "Failed to delete directory " + FLAGS.train_dir +
                " when starting a new model. Please delete it manually and" +
                " try again.")
    elif not latest_checkpoint:
        logging.info("No checkpoint file found. Building a new model.")
    else:
        meta_filename = latest_checkpoint + ".meta"
        if not gfile.Exists(meta_filename):
            logging.info("No meta graph file found. Building a new model.")
        else:
            logging.info("Restoring from meta graph file %s", meta_filename)
            saver = tf.train.import_meta_graph(meta_filename)

    if not saver:
        # convert feature_names and feature_sizes to lists of values
        feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
            FLAGS.feature_names, FLAGS.feature_sizes)

        if FLAGS.frame_features:
            reader = readers.YT8MFrameFeatureReader(
                feature_names=feature_names, feature_sizes=feature_sizes)
        else:
            reader = readers.YT8MAggregatedFeatureReader(
                feature_names=feature_names, feature_sizes=feature_sizes)

        model = find_class_by_name(FLAGS.model,
                                   [frame_level_models, video_level_models])()
        label_loss_fn = find_class_by_name(FLAGS.label_loss, [losses])()
        optimizer_class = find_class_by_name(FLAGS.optimizer, [tf.train])
        build_graph(reader=reader,
                    model=model,
                    optimizer_class=optimizer_class,
                    train_data_pattern=FLAGS.train_data_pattern,
                    label_loss_fn=label_loss_fn,
                    base_learning_rate=FLAGS.base_learning_rate,
                    regularization_penalty=FLAGS.regularization_penalty,
                    num_readers=FLAGS.num_readers,
                    batch_size=FLAGS.batch_size)
        logging.info("built graph")
        saver = tf.train.Saver()

    train_loop(is_chief=is_chief,
               train_dir=FLAGS.train_dir,
               saver=saver,
               master=FLAGS.master)
Exemplo n.º 26
0
def get_target_path(request, point_num):
    """Computes the output path for a specific point.

  Args:
    request: ResegmentationRequest proto
    point_num: index of the point of interest within the proto

  Returns:
    path to the output file where resegmentation results will be saved
  """
    # Prepare the output directory.
    output_dir = request.output_directory

    id_a = request.points[point_num].id_a
    id_b = request.points[point_num].id_b

    if request.subdir_digits > 1:
        m = hashlib.md5()
        m.update(str(id_a))
        m.update(str(id_b))
        output_dir = os.path.join(output_dir,
                                  m.hexdigest()[:request.subdir_digits])
    gfile.MakeDirs(output_dir)

    # Terminate early if the output already exists.
    dp = request.points[point_num].point
    target_path = os.path.join(
        output_dir, '%d-%d_at_%d_%d_%d.npz' % (id_a, id_b, dp.x, dp.y, dp.z))
    if gfile.Exists(target_path):
        logging.info('Output already exists: %s', target_path)
        return

    return target_path
Exemplo n.º 27
0
    def _prepare(self):
        """ Prepares for evaluation.

        Builds the model with reuse=True, mode=EVAL and preprocesses
        data file(s).
        """
        text_inputter = TextLineInputter(dataset=self._dataset,
                                         data_field_name="eval_features_file",
                                         batch_size=self._batch_size)
        self._eval_feeding_data = text_inputter.make_feeding_data()
        self._model_configs = update_infer_params(  # update inference parameters
            self._model_configs,
            beam_size=self._beam_size,
            maximum_labels_length=self._maximum_labels_length,
            length_penalty=self._length_penalty)
        estimator_spec = model_fn(model_configs=self._model_configs,
                                  mode=ModeKeys.INFER,
                                  dataset=self._dataset,
                                  name=self._model_name,
                                  reuse=True,
                                  verbose=False)
        self._predict_ops = estimator_spec.predictions
        tmp_trans_dir = os.path.join(self._model_configs["model_dir"],
                                     GlobalNames.TMP_TRANS_DIRNAME)
        if not gfile.Exists(tmp_trans_dir):
            gfile.MakeDirs(tmp_trans_dir)
        self._tmp_trans_file_prefix = os.path.join(
            tmp_trans_dir, GlobalNames.TMP_TRANS_FILENAME_PREFIX)
        self._read_ckpt_bleulog()
        self._eval_labels_file = self._dataset.eval_labels_file
        self._check_bleu_script()
        self._estop_patience = 0
        self._best_bleu_score = 0.
def set_latest_checkpoint(dirname: str, chkpt: str):
    """Set the latest checkpoint in the checkpoint file.

  Args:
    dirname: Directory in which the checkpoint is located.
    chkpt: Checkpoint prefix.
  """
    chkpt_file = os.path.join(dirname, 'checkpoint')
    lines = []
    if gfile.Exists(chkpt_file):
        logging.info('Loading preexisting checkpoint file "%s"', chkpt_file)
        with gfile.Open(chkpt_file) as f:
            lines = [
                l.strip() for l in f.readlines()
                if l.startswith(b'all_model_checkpoint_paths:')
            ]
    else:
        logging.info('No preexisting checkpoint file "%s"', chkpt_file)
    with gfile.Open(chkpt_file, 'w') as f:
        lines = [
            '%s\n' % l.strip() for l in ([
                'model_checkpoint_path: "%s"' % chkpt,
                'all_model_checkpoint_paths: "%s"' % chkpt
            ] + lines)
        ]
        f.writelines(lines)
Exemplo n.º 29
0
def create_vocabulary_lookup_table_numpy(filename):
    """Creates a lookup table from a vocabulary file.

    Args:
        filename: Path to a vocabulary file containing one word per line.
          Each word is mapped to its line number (starting from 0).

    Returns: A tuple `(word_to_id_mapping, id_to_word_mapping, special_fields)`

    """
    if not gfile.Exists(filename):
        raise ValueError("File does not exist: {}".format(filename))

    # Load vocabulary into memory
    with open_file(filename, encoding="utf-8") as file:
        vocab = list(line.strip("\n") for line in file)
    vocab_size = len(vocab)

    has_counts = len(vocab[0].split("\t")) == 2
    if has_counts:
        vocab, counts = zip(*[_.split("\t") for _ in vocab])
        counts = [float(_) for _ in counts]
        vocab = list(vocab)
    else:
        counts = [-1. for _ in vocab]

    # Add special vocabulary items
    special_vocab = get_special_vocab(vocab_size)
    vocab += list(special_vocab._fields)
    vocab_size += len(special_vocab)
    counts += [-1. for _ in list(special_vocab._fields)]

    return {v: k for k, v in enumerate(vocab)}, \
           {k: v for k, v in enumerate(vocab)}, \
           special_vocab._fields
Exemplo n.º 30
0
def get_mldata(data_dir, name):
    """Loads data from data_dir.

  Looks for the file in data_dir.
  Assumes that data is in pickle format with dictionary fields data and target.


  Args:
    data_dir: directory to look in
    name: dataset name, assumes data is saved in the save_dir with filename
      <name>.pkl
  Returns:
    data and targets
  Raises:
    NameError: dataset not found in data folder.
  """
    dataname = name
    if dataname == "checkerboard":
        X, y = create_checker_unbalanced(split=[1. / 5, 4. / 5],
                                         n=10000,
                                         grid_size=4)
    else:
        filename = os.path.join(data_dir, dataname + ".pkl")
        if not gfile.Exists(filename):
            raise NameError("ERROR: dataset not available")
        data = pickle.load(gfile.GFile(filename, "r"))
        X = data["data"]
        y = data["target"]
        if "keras" in dataname:
            X = X / 255
            y = y.flatten()
    return X, y