def read_records(): inputs, decode = input.get_decodes(FLAGS.shuffle_then_decode, FLAGS.dynamic_batch_length) ops = inputs( FLAGS.input, decode=decode, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs, num_threads=FLAGS.num_threads, #num_threads=1, batch_join=FLAGS.batch_join, shuffle_batch=FLAGS.shuffle_batch, shuffle=FLAGS.shuffle, #fix_random=True, #fix_sequence=True, #no_random=True, allow_smaller_final_batch=True, ) print(ops) timer = Timer() tf_flow(lambda sess, step: read_once(sess, step, ops)) print('max_index:', max_index) print(timer.elapsed())
def read_records(): inputs, decode = input.get_decodes() #@TODO looks like single thread will be faster, but more threads for better randomness ? ops = inputs( FLAGS.input, decode_fn=decode, #batch_size=FLAGS.batch_size, batch_size=10, num_epochs=FLAGS.num_epochs, num_threads=FLAGS.num_threads, #num_threads=1, batch_join=FLAGS.batch_join, shuffle_batch=FLAGS.shuffle_batch, shuffle_files=FLAGS.shuffle_files, #fix_random=True, fix_sequence=True, #no_random=True, allow_smaller_final_batch=True, ) print(ops) timer = Timer() tf_flow(lambda sess, step: read_once(sess, step, ops)) print('max_index:', max_index) print(timer.elapsed())
def train(): global vocab_size vocabulary.init() vocab_size = vocabulary.get_vocab_size() def seq2seq_criterion(vocabSize): weight = torch.ones(vocabSize) weight[0] = 0 crit = nn.NLLLoss(weight, size_average=False) if torch.cuda.is_available(): crit.cuda() return crit global criterion criterion = seq2seq_criterion(vocab_size) model = seq2seq.Seq2Seq(vocab_size, FLAGS.emb_dim, FLAGS.rnn_hidden_size, FLAGS.batch_size) if torch.cuda.is_available(): model.cuda() init_range = 0.08 model.init_weights(init_range) optimizer = optim.Adagrad(model.parameters(), lr=FLAGS.learning_rate) inputs, decode = input.get_decodes(FLAGS.shuffle_then_decode, FLAGS.dynamic_batch_length) inputs = functools.partial( inputs, decode=decode, num_epochs=FLAGS.num_epochs, num_threads=FLAGS.num_threads, batch_join=FLAGS.batch_join, shuffle_batch=FLAGS.shuffle_batch, shuffle=FLAGS.shuffle, allow_smaller_final_batch=True, ) ops = inputs(FLAGS.input, batch_size=FLAGS.batch_size) print(ops) eval_ops = None if FLAGS.valid_input: #eval_ops = inputs(FLAGS.valid_input, batch_size=FLAGS.batch_size*10) eval_ops = inputs(FLAGS.valid_input, batch_size=FLAGS.batch_size) timer = Timer() tf_flow(lambda sess, step: process_once(sess, step, ops, eval_ops, model, optimizer)) print(timer.elapsed())
def read_records(): inputs, decode, decode_neg = input.get_decodes(FLAGS.shuffle_then_decode, FLAGS.dynamic_batch_length) #@TODO looks like single thread will be faster, but more threads for better randomness ? ops = inputs( FLAGS.input, decode=decode, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs, num_threads=FLAGS.num_threads, #num_threads=1, batch_join=FLAGS.batch_join, shuffle_batch=FLAGS.shuffle_batch, shuffle=FLAGS.shuffle, #fix_random=True, #fix_sequence=True, #no_random=True, allow_smaller_final_batch=True, ) print(ops) neg_ops = None if FLAGS.num_negs: neg_ops = inputs(FLAGS.input, decode=decode_neg, batch_size=FLAGS.batch_size * FLAGS.num_negs, num_epochs=FLAGS.num_epochs, num_threads=FLAGS.num_threads, batch_join=FLAGS.batch_join, shuffle=FLAGS.shuffle) neg_ops = input.reshape_neg_tensors(neg_ops, FLAGS.batch_size, FLAGS.num_negs) neg_ops = list(neg_ops) timer = Timer() tf_flow(lambda sess, step: read_once(sess, step, ops, neg_ops)) print('max_index:', max_index) print(timer.elapsed())
def gen_input(self, train_only=False): input_results = {} input_name_list = [self.input_train_name, self.input_train_neg_name, self.input_valid_name] for name in input_name_list: input_results[name] = None assert FLAGS.shuffle_then_decode, "since use sparse data for text, must shuffle then decode" inputs, decode_fn, decode_neg_fn = \ input.get_decodes(use_neg=(FLAGS.num_negs > 0)) input_results[self.input_train_name], trainset = self.gen_train_input(inputs, decode_fn) if decode_neg_fn is not None: input_results[self.input_train_neg_name] = self.gen_train_neg_input(inputs, decode_neg_fn, trainset) if not train_only: #---------------------- valid train_with_validation = bool(FLAGS.valid_input) self.train_with_validation = train_with_validation print('train_with_validation:', train_with_validation) if train_with_validation: input_results[self.input_valid_name], \ input_results[self.fixed_input_valid_name], \ eval_batch_size = self.gen_valid_input(inputs, decode_fn) if decode_neg_fn is not None: input_results[self.input_valid_neg_name] = self.gen_valid_neg_input(inputs, decode_neg_fn, trainset, eval_batch_size) print_input_results(input_results) return input_results