def load_vocab(vocab_file): """Returns a lookup table and the vocabulary size.""" vocab_size = count_lines(vocab_file) + 1 # Add UNK. vocab = tf.contrib.lookup.index_table_from_file( vocab_file, vocab_size=vocab_size - 1, num_oov_buckets=1) return vocab, vocab_size
def _initialize(self, metadata): self.inputter.initialize(metadata) self.labels_vocabulary_file = metadata[self.labels_vocabulary_file_key] self.num_labels = count_lines(self.labels_vocabulary_file)
"length": tf.shape(x)[0]}) dataset = dataset.padded_batch(64, { "ids": [None], "length": []}) return dataset.make_initializable_iterator() if args.direction == 1: src_file, tgt_file = args.src, args.tgt src_vocab_file, tgt_vocab_file = args.src_vocab, args.tgt_vocab else: src_file, tgt_file = args.tgt, args.src src_vocab_file, tgt_vocab_file = args.tgt_vocab, args.src_vocab from opennmt.utils.misc import count_lines tgt_vocab_size = count_lines(tgt_vocab_file) + 1 src_vocab_size = count_lines(src_vocab_file) + 1 src_vocab = tf.contrib.lookup.index_table_from_file( src_vocab_file, vocab_size=src_vocab_size - 1, num_oov_buckets=1) with tf.device("cpu:0"): src_iterator = load_data(src_file, src_vocab) src = src_iterator.get_next() # Step 2
dataset = dataset.map(input_vocab.lookup) dataset = dataset.map(lambda x: {"ids": x, "length": tf.shape(x)[0]}) dataset = dataset.padded_batch(64, {"ids": [None], "length": []}) return dataset.make_initializable_iterator() if args.direction == 1: src_file, tgt_file = args.src, args.tgt src_vocab_file, tgt_vocab_file = args.src_vocab, args.tgt_vocab else: src_file, tgt_file = args.tgt, args.src src_vocab_file, tgt_vocab_file = args.tgt_vocab, args.src_vocab from opennmt.utils.misc import count_lines tgt_vocab_size = count_lines(tgt_vocab_file) + 1 src_vocab_size = count_lines(src_vocab_file) + 1 from tensorflow.contrib import lookup src_vocab = lookup.index_table_from_file(src_vocab_file, vocab_size=src_vocab_size - 1, num_oov_buckets=1) with tf.device("cpu:0"): src_iterator = load_data(src_file, src_vocab) src = src_iterator.get_next() # Step 2 hidden_size = 768 from bert.modeling import BertModel, BertConfig
def get_dataset_size(self, data_file): return count_lines(data_file)
def _initialize(self, metadata, asset_dir=None): assets = super(SequenceClassifier, self)._initialize(metadata, asset_dir=asset_dir) self.labels_vocabulary_file = metadata[self.labels_vocabulary_file_key] self.num_labels = count_lines(self.labels_vocabulary_file) return assets