def train_eval(x_train, x_test, is_peeky): if is_peeky: model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size) else: model = Seq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose) acc = float(correct_num) / len(x_test) acc_list.append(acc) print('val acc %.3f%%' % (acc * 100)) return acc_list
def experiment_fn(run_config, params): data = Data(FLAGS) data.initialize_word_vectors() model = Seq2seq(data.vocab_size, FLAGS, data.embeddings_mat) estimator = tf.estimator.Estimator( model_fn=model.make_graph, # model_dir=FLAGS.model_dir, config=run_config, params=FLAGS) train_input_fn, train_feed_fn = data.make_input_fn('train') eval_input_fn, eval_feed_fn = data.make_input_fn('test') print_vars = [ 'source', 'predict' # 'decoder_output', # 'actual' ] print_inputs = tf.train.LoggingTensorHook(print_vars, every_n_iter=FLAGS.print_every, formatter=data.get_formatter( ['source', 'predict'])) experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, train_steps=FLAGS.iterations, min_eval_frequency=FLAGS.print_every, train_monitors=[tf.train.FeedFnHook(train_feed_fn), print_inputs], eval_hooks=[tf.train.FeedFnHook(eval_feed_fn)], eval_steps=10) return experiment
def main() -> None: (x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt') char_to_id, id_to_char = sequence.get_vocab() vocab_size = len(char_to_id) wordvec_size = 16 hidden_size = 128 batch_size = 128 max_epoch = 25 max_grad = 5.0 model = Seq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(1, max_epoch + 1): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose) acc = float(correct_num) / len(x_test) acc_list.append(acc) print(f'val acc {acc*100}%') print('DONE')
def __init__(self, trainable=True): self.trainable = trainable self.seq2seq = Seq2seq(trainable=False) self.seq2seq.build() init_op = tf.global_variables_initializer() self.sess = tf.Session() self.sess.run(init_op) self.seq2seq.init(self.sess)
def __init__(self): self.seq2seq = Seq2seq() self.seq2seq.build() init_op = tf.global_variables_initializer() self.sess = tf.Session() self.sess.run(init_op) self.reverse_vocab = preprocessor.load_reverse_vocab( Config.vocab_file_path) self.restore_variables(self.sess)
def evaluate(model, data, k=1): beam_search = Seq2seq(model.encoder, TopKDecoder(model.decoder, k)) input_vocab = data.fields[GlobalNames.src_field_name].vocab output_vocab = data.fields[GlobalNames.tgt_field_name].vocab pred_machine = Predictor(beam_search, input_vocab, output_vocab) result = [ " ".join(pred_machine.predict(item.src)) for item in data.examples ] return result
def main(args, load_exclude_set, restoreCallback): logging.basicConfig(\ filename=0,\ level=logging.DEBUG,\ format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s',\ datefmt='%H:%M:%S') if args.debug: debug() logging.info(json.dumps(args, indent=2)) cuda_init(0, args.cuda) volatile = Storage() volatile.load_exclude_set = load_exclude_set volatile.restoreCallback = restoreCallback data_class = SingleTurnDialog.load_class(args.dataset) data_arg = Storage() data_arg.file_id = args.datapath wordvec_class = WordVector.load_class(args.wvclass) if wordvec_class is None: wordvec_class = Glove def load_dataset(data_arg, wvpath, embedding_size): wv = wordvec_class(wvpath) dm = data_class(**data_arg) return dm, wv.load(embedding_size, dm.vocab_list) if args.cache: dm, volatile.wordvec = try_cache( load_dataset, (data_arg, args.wvpath, args.embedding_size), args.cache_dir, data_class.__name__ + "_" + wordvec_class.__name__) else: dm, volatile.wordvec = load_dataset(data_arg, args.wvpath, args.embedding_size) volatile.dm = dm param = Storage() param.args = args param.volatile = volatile model = Seq2seq(param) if args.mode == "train": model.train_process() elif args.mode == "test": test_res = model.test_process() for key, val in test_res.items(): if isinstance(val, bytes): test_res[key] = str(val) json.dump(test_res, open("./result.json", "w")) else: raise ValueError("Unknown mode")
def main(args, load_exclude_set, restoreCallback): logging.basicConfig(\ filename=0,\ level=logging.DEBUG,\ format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s',\ datefmt='%H:%M:%S') if args.debug: debug() logging.info(json.dumps(args, indent=2)) cuda_init(0, args.cuda) volatile = Storage() volatile.load_exclude_set = load_exclude_set volatile.restoreCallback = restoreCallback data_class = SingleTurnDialog.load_class(args.dataset) data_arg = Storage() data_arg.file_id = args.datapath + "#OpenSubtitles" data_arg.tokenizer = PretrainedTokenizer( BertTokenizer.from_pretrained(args.bert_vocab)) data_arg.pretrained = "bert" wordvec_class = WordVector.load_class(args.wvclass) if wordvec_class is None: wordvec_class = Glove def load_dataset(data_arg, wvpath, embedding_size): wv = wordvec_class(wvpath) dm = data_class(**data_arg) return dm, wv.load_matrix(embedding_size, dm.frequent_vocab_list) if args.cache: dm, volatile.wordvec = try_cache( load_dataset, (data_arg, args.wvpath, args.embedding_size), args.cache_dir, data_class.__name__ + "_" + wordvec_class.__name__) else: dm, volatile.wordvec = load_dataset(data_arg, args.wvpath, args.embedding_size) volatile.dm = dm param = Storage() param.args = args param.volatile = volatile model = Seq2seq(param) if args.mode == "train": model.train_process() elif args.mode == "test": model.test_process() else: raise ValueError("Unknown mode")
def train(train_loader, model: seq2seq.Seq2seq, criterion, optimizer, epoch, teacher_forcing_ratio): """Run one train epoch""" losses = AverageMeter() # Switch to train mode model.train() for i, batch in enumerate(train_loader): # data: seq_len, N # data_mask: seq_len, N # target: seq_len, N data, data_mask, target = batch target = target.cuda(non_blocking=True) data_mask = data_mask.cuda(non_blocking=True) data = data.cuda() batch_size = data.size(1) target_len = target.size(0) # Forward # Encoder source_hs, hidden = model.encoder(data) # Decoder ctx = None hidden = model.transformHidden(hidden) outputs = [] use_teacher_forcing = random.random() < teacher_forcing_ratio x = target[0] for j in range(1, target_len): output, hidden, ctx = model.decoder(x, hidden, ctx, source_hs, data_mask) outputs.append(output) with torch.no_grad(): if use_teacher_forcing: x = target[j] else: topi = torch.topk(output, 1, dim=1)[1] # N, 1 x = topi.squeeze() # N outputs = torch.stack(outputs) # seq_len, N, n_tokens loss = criterion(outputs, target[1:], batch_size) # Backward optimizer.zero_grad() loss.backward() # Update torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() # Measure loss losses.update(loss.item(), batch_size) # Print Training Information if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(train_loader), loss=losses))
def load_model(model_path, device): state = torch.load(model_path, map_location=device) params = state['parameter'] if params['name'] == 'Transformer': params.pop('name') model = Transformer(**params) else: model = Seq2seq(**params) model.to(device) model.load_state_dict(state['state_dict']) return model, state['src_lang'], state['tgt_lang'], state[ 'src_vocab'], state['tgt_vocab']
def main(): seq2seq = Seq2seq(lr=0.3, init_range=0.3) for i in range(1000): cost = seq2seq.train([2, 1], [2]) cost += seq2seq.train([1], [1]) cost += seq2seq.train([3, 1], [3]) if i % 100 == 0: print 'Epoch:', i print 'training cost:', cost / 3 print[2, 1], '->', seq2seq.predict([2, 1]) print[1], '->', seq2seq.predict([1]) print[3, 1], '->', seq2seq.predict([3, 1]) print
def main(args): # tf.logging._logger.setLevel(logging.INFO) tf.logging.set_verbosity(logging.INFO) data = Data(FLAGS) model = Seq2seq(data.vocab_size, FLAGS) input_fn, feed_fn = data.make_input_fn() print_inputs = tf.train.LoggingTensorHook( ['source', 'target', 'predict'], every_n_iter=FLAGS.print_every, formatter=data.get_formatter(['source', 'target', 'predict'])) estimator = tf.estimator.Estimator( model_fn=model.make_graph, model_dir=FLAGS.model_dir) #, params=FLAGS) estimator.train(input_fn=input_fn, hooks=[tf.train.FeedFnHook(feed_fn), print_inputs], steps=FLAGS.iterations)
def __init__(self): self.data = Data(FLAGS) model = Seq2seq(self.data.vocab_size, FLAGS) estimator = tf.estimator.Estimator(model_fn=model.make_graph, model_dir=FLAGS.model_dir) def input_fn(): inp = tf.placeholder(tf.int64, shape=[None, None], name='input') output = tf.placeholder(tf.int64, shape=[None, None], name='output') tf.identity(inp[0], 'source') tf.identity(output[0], 'target') dict = {'input': inp, 'output': output} return tf.estimator.export.ServingInputReceiver(dict, dict) self.predictor = tf.contrib.predictor.from_estimator( estimator, input_fn)
def main(): seq2seq = Seq2seq(lr=0.3, init_range=0.3) for i in range(1000): import random cost = 0 for t in range(10): #a=random.randrange(9) #b=random.randrange(9) cost = seq2seq.train([1, 1], [1]) cost += seq2seq.train([8, 1], [1]) cost += seq2seq.train([7, 1], [1]) cost += seq2seq.train([9, 1], [1]) cost += seq2seq.train([4, 1], [1]) cost += seq2seq.train([3, 1], [1]) cost += seq2seq.train([1, 1], [1]) cost += seq2seq.train([4, 1], [1]) cost += seq2seq.train([0, 2], [2]) cost += seq2seq.train([3, 2], [2]) cost += seq2seq.train([5, 2], [2]) cost += seq2seq.train([6, 2], [2]) cost += seq2seq.train([1, 2], [2]) cost += seq2seq.train([9, 2], [2]) cost += seq2seq.train([8, 2], [2]) cost += seq2seq.train([7, 2], [2]) cost += seq2seq.train([6, 2], [2]) cost += seq2seq.train([5, 2], [2]) cost += seq2seq.train([4, 2], [2]) cost += seq2seq.train([3, 2], [2]) cost += seq2seq.train([2, 2], [2]) cost += seq2seq.train([1, 2], [2]) print('training cost:', cost / 22) if i % 100 == 0: print('Epoch:', i) print('training cost:', cost / 3) a = random.randrange(9) b = random.randrange(9) print([5, 2], '->', seq2seq.predict([5, 2]))
def main(args): logging.basicConfig(\ filename=0,\ level=logging.DEBUG,\ format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s',\ datefmt='%H:%M:%S') if args.debug: debug() logging.info(json.dumps(args, indent=2)) cuda_init(0, args.cuda) volatile = Storage() data_class = SingleTurnDialog.load_class(args.dataset) wordvec_class = WordVector.load_class(args.wvclass) if wordvec_class is None: wordvec_class = Glove if args.cache: dm = try_cache(data_class, (args.datapath, ), args.cache_dir) volatile.wordvec = try_cache(\ lambda wv, ez, vl: wordvec_class(wv).load(ez, vl), \ (args.wvpath, args.embedding_size, dm.vocab_list), args.cache_dir, wordvec_class.__name__) else: dm = data_class(args.datapath) wv = wordvec_class(args.wvpath) volatile.wordvec = wv.load(args.embedding_size, dm.vocab_list) volatile.dm = dm param = Storage() param.args = args param.volatile = volatile model = Seq2seq(param) if args.mode == "train": model.train_process() elif args.mode == "test": model.test_process() else: raise ValueError("Unknown mode")
def __init__(self, checkpoint='checkpoint', directory='coco'): self.data = Data(directory + '/train_source.txt', directory + '/train_target.txt', directory + '/train_vocab.txt') model = Seq2seq(self.data.vocab_size) estimator = tf.estimator.Estimator(model_fn=model.make_graph, model_dir=checkpoint) def input_fn(): inp = tf.placeholder(tf.int64, shape=[None, None], name='input') output = tf.placeholder(tf.int64, shape=[None, None], name='output') tf.identity(inp[0], 'source') tf.identity(output[0], 'target') dict = {'input': inp, 'output': output} return tf.estimator.export.ServingInputReceiver(dict, dict) self.predictor = tf.contrib.predictor.from_estimator( estimator, input_fn)
def build_model(encoder_vocab, decoder_vocab): model = Seq2seq(encoder_vocab_size=encoder_vocab.get_vocab_size(), encoder_embedding_size=opt.encoder_embedding_size, encoder_hidden_size=opt.encoder_hidden_size, encoder_num_layers=opt.encoder_num_layers, encoder_bidirectional=opt.encoder_bidirectional, decoder_vocab_size=decoder_vocab.get_vocab_size(), decoder_embedding_size=opt.decoder_embedding_size, decoder_hidden_size=opt.decoder_hidden_size, decoder_num_layers=opt.decoder_num_layers, decoder_attn_type=opt.decoder_attn_type, dropout_ratio=opt.dropout_ratio, padding_idx=PAD_id, tied=opt.tied, device=device) print(model) model.to(device=device) return model
def vectorize_nn(word_index, embedding_matrix, sentences, max_num_vectors=200, num_features=200, batch_size=32, latent_dim=200, timesteps=200, epochs=5): print("Substituting words in descriptions by their vector representetion") vec_sentences = [] sec_count = 0 for i, sentence in enumerate(sentences): # Print run information sec_count += 1 if sec_count % 5000 == 0: print('{} descriptions have been preprocessed.'.format(sec_count)) vector = seqWords2seqVec(sentence, word_index, embedding_matrix, max_num_vectors, num_features) # Store vector vector = np.asarray(vector) vec_sentences.append(vector) print("All words have been subtituted by their vector representation") vec_sentences = np.asarray(vec_sentences) vec_sentences = np.reshape(vec_sentences, (len(sentences), num_vectors, num_features)) # Create and train Neural net s2s = Seq2seq(num_vectors, latent_dim, timesteps, batch_size, word_index, embedding_matrix) print("Training autoencoder...") s2s.fit(vec_sentences, epochs) print("Getting vector representation of each description...") predictions = s2s.predict(vec_sentences) print("Saving neural network...") s2s.encoder.save('model/encoder.h5') return predictions
def main(): seq2seq = Seq2seq() last_seq = None cost = 0 for i in range(100000): X = [randint(1, 2) for _ in range(randint(1, 10))] Y = [x for x in X if x == 1] cost += seq2seq.train(X, Y) if i % 1000 == 0: print i, '\t', cost / 1000 cost = 0 X = [randint(1, 2) for _ in range(randint(1, 10))] Y = seq2seq.predict(X) print X, '->', Y seq2seq.lr /= 2
hidden_size = 128 bidirectional = True encoder = EncoderRNN(len(src.vocab), max_len, hidden_size, bidirectional=bidirectional, variable_lengths=True) decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size, dropout_p=0.2, use_attention=True, bidirectional=bidirectional, eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) if torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5) scheduler = StepLR(optimizer.optimizer, 1) optimizer.set_scheduler(scheduler) # train
def __init__(self, config): super(Model, self).__init__() self.config = config self.embedding = Embedding(config) self.seq2seq = Seq2seq(config)
# Reverse input? ================================================= is_reverse = False # True if is_reverse: x_train, x_test = x_train[:, ::-1], x_test[:, ::-1] # ================================================================ # ハイパーパラメータの設定 vocab_size = len(char_to_id) wordvec_size = 16 hideen_size = 128 batch_size = 128 max_epoch = 25 max_grad = 5.0 # Normal or Peeky? ============================================== model = Seq2seq(vocab_size, wordvec_size, hideen_size) # model = PeekySeq2seq(vocab_size, wordvec_size, hideen_size) # ================================================================ optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)):
def main(args): logging.basicConfig( filename=0, level=logging.DEBUG, format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s', datefmt='%H:%M:%S') if args.debug: debug() logging.info(json.dumps(args, indent=2)) cuda_init(args.cuda_num, args.cuda) volatile = Storage() volatile.load_exclude_set = args.load_exclude_set volatile.restoreCallback = args.restoreCallback if args.dataset == 'WizardOfWiki': data_class = WizardOfWiki elif args.dataset == 'HollE': data_class = HollE else: raise ValueError wordvec_class = WordVector.load_class(args.wvclass) if wordvec_class is None: wordvec_class = Glove if not os.path.exists(args.cache_dir): os.mkdir(args.cache_dir) args.cache_dir = os.path.join(args.cache_dir, args.dataset) if not os.path.exists(args.out_dir): os.mkdir(args.out_dir) args.out_dir = os.path.join(args.out_dir, args.dataset) if not os.path.exists(args.model_dir): os.mkdir(args.model_dir) if args.dataset not in args.model_dir: args.model_dir = os.path.join(args.model_dir, args.dataset) if args.cache: dm = try_cache(data_class, (args.datapath, ), args.cache_dir) volatile.wordvec = try_cache( lambda wv, ez, vl: wordvec_class(wv).load_matrix(ez, vl), (args.wvpath, args.embedding_size, dm.vocab_list), args.cache_dir, wordvec_class.__name__) else: dm = data_class(args.datapath) wv = wordvec_class(args.wvpath) volatile.wordvec = wv.load_matrix(args.embedding_size, dm.vocab_list) volatile.dm = dm param = Storage() param.args = args param.volatile = volatile model = Seq2seq(param) if args.mode == "train": model.train_process() elif args.mode == "test": model.test_process() elif args.mode == 'dev': model.test_dev() else: raise ValueError("Unknown mode")
word2idx.update({'start_id': start_id}) word2idx.update({'end_id': end_id}) idx2word = idx2word + ['start_id', 'end_id'] src_vocab_size = tgt_vocab_size = src_vocab_size + 2 num_epochs = 10 vocabulary_size = src_vocab_size decoder_seq_length = 25 model_ = Seq2seq( decoder_seq_length=decoder_seq_length, cell_enc=tf.keras.layers.GRUCell, cell_dec=tf.keras.layers.GRUCell, n_layer=3, n_units=1024, embedding_layer=tl.layers.Embedding(vocabulary_size=vocabulary_size, embedding_size=emb_dim), ) # Uncomment below statements if you have already saved the model load_weights = tl.files.load_npz(name='WinterMute_rms1024_cornell.npz') tl.files.assign_weights(load_weights, model_) optimizer = tf.optimizers.Adam(learning_rate=0.001) #optimizer = tf.optimizers.RMSprop(learning_rate=0.001) model_.train() for epoch in range(num_epochs):
encoder = EncoderRNN(len(src.vocab), max_len, hidden_size, bidirectional=bidirectional, rnn_cell='lstm', variable_lengths=True) decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2, dropout_p=0.2, use_attention=True, bidirectional=bidirectional, rnn_cell='lstm', eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) if torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # train t = SupervisedTrainer(loss=loss, batch_size=32, checkpoint_every=50, print_every=10, expt_dir=opt.expt_dir) seq2seq = t.train(seq2seq, train,
def main(): args = args_set('big') # Create save dir create_dirs(args.save_dir) # Check CUDA if torch.cuda.is_available(): args.cuda = True args.device = torch.device("cuda" if args.cuda else "cpu") print("Using CUDA: {}".format(args.cuda)) # Set seeds set_seeds(seed=1234, cuda=args.cuda) # load state model_spatial = SpatialModel(num_input_channels=5, out_num=1053, dropout_p=args.dropout_p) model_time = Seq2seq(num_features=1053, hidden_size=512, input_seq_len=args.input_seq_len, pred_seq_len=args.pred_seq_len, batch_size=1) # model_time = Seq2seq_attn(num_features=1053, # input_seq_len=args.input_seq_len, # pred_seq_len=args.pred_seq_len, # batch_size=1, # dropout=args.dropout_p) # model_time = Seq2seq_mlp(num_features=1053, # input_seq_len=args.input_seq_len, # pred_seq_len=args.pred_seq_len, # batch_size=1, device=args.device) resume = os.path.join(args.save_dir, 'check_point_{}'.format(40)) print('Resuming model check point from {}\n'.format(40)) check_point = torch.load(resume) model_spatial.load_state_dict(check_point['model_spatial']) model_spatial.to(args.device) model_time.load_state_dict(check_point['model_time']) model_time.to(args.device) # data = DataPrepare(save_dir=args.save_dir, data_folder=args.data_folder, # train_size=args.train_size, # val_size=args.val_size, # test_size=args.test_size, # input_seq_len=args.input_seq_len, # pred_seq_len=args.pred_seq_len, shuffle=True) # data.create_data() test_exps = np.load('exp_list.npy') scales = np.load('scales.npy') tester = Tester(test_exps=test_exps, data_folder=args.data_folder, scales=scales, input_seq_len=args.input_seq_len, pred_seq_len=args.pred_seq_len, model_spatial=model_spatial, model_time=model_time, extract_num=4, save_dir=args.save_dir, save_sample_path=args.save_sample_path, device='cuda') tester.run_test_loop()
embedding_size=512, num_layers=1, dropout=0, is_training=True) conv_decoder = ConvDecoder(len(word_to_index), max_target_length + 2, hidden_size=128, embedding_size=512, num_layers=1, dropout=0, is_training=True) examples = np.array(examples) examples_target = np.array(examples_target) seq2seq = Seq2seq(conv_encoder, conv_decoder, len(word_to_index)) seq_output = seq2seq(examples, examples_target) seq_output = seq_output.data.numpy() sentences = [index_to_word_sentence(seq) for seq in seq_output] print(sentences) while True: new_text = input('type in text to predict:') new_text_token = np.array( [[word_to_index[token] for token in new_text.lower().split()]]) new_text_token = np.concatenate([new_text_token, [[1]]], axis=1) outputs = seq2seq(new_text_token, is_training=False) outputs = outputs.data.numpy() sentences = [index_to_word_sentence(seq) for seq in outputs]
mode = 'train' rnn_size = 1024 num_layers = 1 max_encoder_steps = 30 max_decoder_steps = 30 embedding_size = 256 data_processor = DataProcessor(mode) idx2word_dict = data_processor.get_dictionary() vocab_size = len(idx2word_dict) model = Seq2seq(rnn_size=rnn_size, num_layers=num_layers, batch_size=batch_size, vocab_size=vocab_size, mode=mode, max_encoder_steps=max_encoder_steps, max_decoder_steps=max_decoder_steps, embedding_size=embedding_size) # TODO: sampling probability for each epoch def func(x): return 2 - 2 / (1 + np.exp(0.3 * (x - 2 * epochs))) sampling_prob = func(np.arange(epochs)) with tf.Session() as sess: sess.run(tf.global_variables_initializer())
def main(): # Arguments args = args_set('big') # Create save dir create_dirs(args.save_dir) # Check CUDA if torch.cuda.is_available(): args.cuda = True args.device = torch.device("cuda" if args.cuda else "cpu") print("Using CUDA: {}".format(args.cuda)) # Set seeds set_seeds(seed=1234, cuda=args.cuda) dataset = SpatialTimeDataset(args.save_sample_path) # create model model_spatial = SpatialModel(num_input_channels=dataset[0][0].shape[1], out_num=1053, dropout_p=args.dropout_p) # model_time = Seq2seq_mlp(num_features=1053, # input_seq_len=args.input_seq_len, # pred_seq_len=args.pred_seq_len, # batch_size=args.batch_size, device=args.device) model_time = Seq2seq(num_features=1053, hidden_size=512, input_seq_len=args.input_seq_len, pred_seq_len=args.pred_seq_len, batch_size=args.batch_size) # model_time = Seq2seq_attn(num_features=data.targets_time['train'].shape[2], # input_seq_len=args.input_seq_len, # pred_seq_len=args.pred_seq_len, # batch_size=args.batch_size, # dropout=args.dropout_p) optimizer = optim.Adam([{ 'params': model_spatial.parameters() }, { 'params': model_time.parameters() }], lr=args.learning_rate, weight_decay=1e-4) # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, # milestones=[12, 25, 37], # gamma=0.1, # last_epoch = start_epoch-1) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.5, patience=1) start_epoch = args.resume train_state = args_train_state( early_stopping_criteria=args.early_stopping_criteria, learning_rate=args.learning_rate) if args.resume: resume = os.path.join(args.save_dir, 'check_point_{}'.format(args.resume)) print('Resuming model check point from {}\n'.format(resume)) check_point = torch.load(resume) start_epoch = check_point['epoch'] model_spatial.load_state_dict(check_point['model_spatial']) model_spatial.to(args.device) model_time.load_state_dict(check_point['model_time']) model_time.to(args.device) optimizer.load_state_dict(check_point['optimizer']) train_state = check_point['train_state'] scheduler.optimizer = optimizer scheduler.last_epoch = start_epoch - 1 scheduler.cooldown_counter = check_point['lr']['cooldown_counter'] scheduler.best = check_point['lr']['best'] scheduler.num_bad_epochs = check_point['lr']['num_bad_epochs'] scheduler.mode_worse = check_point['lr']['mode_worse'] scheduler.is_better = check_point['lr']['is_better'] # define train class trainer = Trainer(dataset=dataset, model_spatial=model_spatial, model_time=model_time, optimizer=optimizer, scheduler=scheduler, device=args.device, teacher_forcing_ratio=args.teacher_forcing_ratio, train_state=train_state) # train & validation print('start train29 training...') for epoch_index in range(start_epoch, args.num_epochs): epoch_start = time.time() trainer.train_state['epoch_index'] = epoch_index + 1 dataset.set_split('train') batch_generator_train = dataset.generate_batches( batch_size=args.batch_size, collate_fn=collate_fn, shuffle=args.shuffle, device=args.device) trainer.run_train_loop(batch_generator_train, args.alpha, device=args.device) epoch_end = time.time() print('\nEntire epoch train time cost: {:.2f} min'.format( (epoch_end - epoch_start) / 60)) dataset.set_split('val') batch_generator_val = dataset.generate_batches( batch_size=args.batch_size, collate_fn=collate_fn, shuffle=False, device=args.device) trainer.run_val_loop(batch_generator_val, device=args.device) # check point save_name = os.path.join( args.save_dir, 'check_point_{}'.format(trainer.train_state['epoch_index'])) check_point = { 'epoch': trainer.train_state['epoch_index'], 'model_spatial': trainer.model_spatial.state_dict(), 'model_time': trainer.model_time.state_dict(), 'optimizer': trainer.optimizer.state_dict(), 'train_state': trainer.train_state, 'lr': { 'cooldown_counter': trainer.scheduler.cooldown_counter, 'best': trainer.scheduler.cooldown_counter, 'num_bad_epochs': trainer.scheduler.num_bad_epochs, 'mode_worse': trainer.scheduler.mode_worse, 'is_better': trainer.scheduler.is_better } } torch.save(check_point, save_name) if trainer.train_state['stop_early']: break #plot loss plot_performance(trainer.train_state['train_loss'], trainer.train_state['val_loss'], args.save_dir) print('start testing...') test_exps = np.load('exp_list.npy', allow_pickle=True) scales = np.load('scales.npy', allow_pickle=True) # test tester = Tester(test_exps=test_exps, data_folder=args.data_folder, scales=scales, input_seq_len=args.input_seq_len, pred_seq_len=args.pred_seq_len, model_spatial=model_spatial, model_time=model_time, extract_num=4, save_dir=args.save_dir, save_sample_path=args.save_sample_path, device='cuda') tester.run_test_loop()
from seq2seq import Seq2seq as seq # 训练 import tensorflow as tf import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' char_inputs = [[2,1],[1,2],[2,3],[3,4],[4,0]] used = tf.sign(tf.abs(char_inputs)) length = tf.reduce_sum(used, reduction_indices=0) lengths = tf.cast(length, tf.int32) sess = tf.Session() print(sess.run(lengths)) # 训练 seq.train() # 预测 seq.predict("天气") # 重新训练 seq.retrain()