def setup(): global model, vocab, reverse_vocab sess = tf.Session() with open(os.path.join(PATH, 'vocab.json'), 'r') as fp: vocab = json.load(fp) reverse_vocab = dict() for key, value in vocab.items(): reverse_vocab[value] = key vocab_size = len(vocab) model = seq2seq(sess, encoder_vocab_size=vocab_size, decoder_vocab_size=vocab_size, max_step=50) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(PATH + '/models'))
def train_seq2seq(self): print("Input sequence read, starting training") s2s = seq2seq(self.vocab_size + 3, self.maxlen + 2, \ self.vocab_size + 3) model = s2s.seq2seq_plain() for e in range(10000): print("epoch %d \n" % e) for ind, (X, Y) in enumerate(self.proproces.gen_batch()): loss, acc = model.train_on_batch( X, Y) #, batch_size=64, nb_epoch=1) #print("Loss is %f, accuracy is %f " % (loss, acc), end='\r') # After one epoch test one sentence if ind % 100 == 0: testX = X[0, :].reshape(1, self.maxlen + 2) testY = Y[0] pred = model.predict(testX, batch_size=1) self.decode(testX, pred)
import tensorflow as tf import model as m import process_data import matplotlib.pyplot as plt num_samples = 7260 # Number of samples to train on. encoder_input_data, decoder_input_data, decoder_target_data = process_data.process( ) model = m.seq2seq() callbacks = [ # If 'val_loss' does not improve over 2 epochs, the training stops. tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'), # Record logs for displaying on tensor board tf.keras.callbacks.TensorBoard(log_dir='./tensor_board') ] history = model.fit([encoder_input_data, decoder_input_data], decoder_target_data, callbacks=callbacks, batch_size=m.batch_size, epochs=m.epochs, validation_split=0.2) # Save model model.save_weights('./pretrained_weights/t1_savedModel', save_format='tf') # Plot training & validation loss values plt.plot(history.history['loss']) plt.plot(history.history['val_loss'])
from model import seq2seq import tensorflow as tf import re, json from data_process import * if __name__ == "__main__": PATH = "models" sess = tf.Session() with open('vocab.json', 'r') as fp: vocab = json.load(fp) reverse_vocab = dict() for key, value in vocab.items(): reverse_vocab[value] = key vocab_size = len(vocab) model = seq2seq(sess, encoder_vocab_size=vocab_size, decoder_vocab_size=vocab_size, max_step=50) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(PATH)) while True: test = input("User >> ") if test == "exit": break speak = sentence_to_char_index([test], vocab, False) result = model.inference([speak]) for sentence in result: response = '' for index in sentence: if index == 0: break
val_tokens, maxlen, error_rate=error_rate, shuffle=False) print(val_encoder[:10]) print(val_decoder[:10]) print(val_target[:10]) print('Number of non-unique validation tokens =', len(val_tokens)) print('Max sequence length in the validation set:', val_maxlen) # Define training and evaluation configuration. input_ctable = CharacterTable(input_chars) target_ctable = CharacterTable(target_chars) train_steps = len(vocab) // train_batch_size val_steps = len(val_tokens) // val_batch_size # Compile the model. model, encoder_model, decoder_model = seq2seq( hidden_size, nb_input_chars, nb_target_chars) print(model.summary()) # Train and evaluate. for epoch in range(nb_epochs): print('Main Epoch {:d}/{:d}'.format(epoch + 1, nb_epochs)) train_encoder, train_decoder, train_target = transform( vocab, maxlen, error_rate=error_rate, shuffle=True) train_encoder_batch = batch(train_encoder, maxlen, input_ctable, train_batch_size, reverse) train_decoder_batch = batch(train_decoder, maxlen, target_ctable, train_batch_size) train_target_batch = batch(train_target, maxlen, target_ctable, train_batch_size)
def main(): parse = argparse.ArgumentParser() parse.add_argument( "--data_dir", default='./nmt/en-cn/', type=str, required=False, help= "The input data dir. Should contain the .tsv files (or other data files) for the task.", ) parse.add_argument("--batch_size", default=16, type=int) parse.add_argument("--do_train", default=True, action="store_true", help="Whether to run training.") parse.add_argument("--do_test", default=True, action="store_true", help="Whether to run test.") parse.add_argument("--do_translate", default=True, action="store_true", help="Whether to run training.") parse.add_argument("--learnning_rate", default=5e-4, type=float) parse.add_argument("--dropout", default=0.2, type=float) parse.add_argument("--num_epoch", default=10, type=int) parse.add_argument("--max_vocab_size", default=50000, type=int) parse.add_argument("--embed_size", default=300, type=int) parse.add_argument("--enc_hidden_size", default=512, type=int) parse.add_argument("--dec_hidden_size", default=512, type=int) parse.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parse.add_argument("--GRAD_CLIP", default=1, type=float) parse.add_argument("--UNK_IDX", default=1, type=int) parse.add_argument("--PAD_IDX", default=0, type=int) parse.add_argument("--beam_size", default=5, type=int) parse.add_argument("--max_beam_search_length", default=100, type=int) args = parse.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") args.device = device setseed() processor = DataProcessor(args) encoder = Encoder(processor.en_tokenizer.vocab_size, args.embed_size, args.enc_hidden_size, args.dec_hidden_size, args.dropout) decoder = Decoder(processor.cn_tokenizer.vocab_size, args.embed_size, args.enc_hidden_size, args.dec_hidden_size, args.dropout) model = seq2seq(encoder, decoder) if os.path.exists("translate-best.th"): model.load_state_dict(torch.load("translate-best.th")) model.to(device) loss_fn = LanguageModelCriterion().to(device) train_data = processor.get_train_examples(args) eval_data = processor.get_dev_examples(args) if args.do_train: train(args, model, train_data, loss_fn, eval_data) if args.do_test: test(args, model, processor) if args.do_translate: model.load_state_dict(torch.load("translate-best.th")) model.to(device) while True: title = input("请输入要翻译的英文句子:\n") if len(title.strip()) == 0: continue title = ['BOS'] + nltk.word_tokenize(title.lower()) + ['EOS'] title_num = [ processor.en_tokenizer.word2idx.get(word, 1) for word in title ] mb_x = torch.from_numpy(np.array(title_num).reshape( 1, -1)).long().to(device) mb_x_len = torch.from_numpy(np.array([len(title_num) ])).long().to(device) bos = torch.Tensor([[processor.cn_tokenizer.word2idx['BOS']] ]).long().to(device) completed_hypotheses = model.beam_search( mb_x, mb_x_len, bos, processor.cn_tokenizer.word2idx['EOS'], topk=args.beam_size, max_length=args.max_beam_search_length) for hypothes in completed_hypotheses: result = "".join([ processor.cn_tokenizer.id2word[id] for id in hypothes.value ]) score = hypothes.score print("翻译后的中文结果为:{},score:{}".format(result, score))
def main(): char2idx = load_vocab() train_data, test_data = load_dataset(char2idx) model = seq2seq(len(char2idx), EMB_SIZE, HIDDEN_SIZE, 0.1, RNN_LAYERS, TEACHER_FORCING_RATIO) #model = torch.load('./model20.pkl') if USE_CUDA: model = model.cuda() loss_function = nn.CrossEntropyLoss(size_average=True, ignore_index=0) optimizer = optim.Adam(params=model.parameters(), lr=LR) losses = [] for epoch in range(EPOCH): for _, batch in enumerate(getBatch(BATCH_SIZE, train_data)): batch_x, batch_y = pad_to_batch(batch, char2idx[PADDING]) batch_x, batch_y = Variable(torch.LongTensor(batch_x)), Variable( torch.LongTensor(batch_y)) start_decode = Variable( torch.LongTensor([[char2idx[SOS]]] * batch_x.size(0))) if USE_CUDA: batch_x, batch_y, start_decode = batch_x.cuda(), batch_y.cuda( ), start_decode.cuda() preds = model(batch_x, start_decode, batch_y.size(1), batch_y) loss = loss_function(preds, batch_y.view(-1)) losses.append(loss.data.item()) model.zero_grad() loss.backward() optimizer.step() if len(losses) == LOSSES_NUM: print('loss:', np.mean(losses)) losses = [] # Eval bleu_scores = [] for _, batch in enumerate(getBatch(BATCH_SIZE, test_data)): test_x, test_y = pad_to_batch(batch, char2idx[PADDING]) test_x, test_y = Variable(torch.LongTensor(test_x)), Variable( torch.LongTensor(test_y)) start_decode = Variable( torch.LongTensor([[char2idx[SOS]]] * test_x.size(0))) if USE_CUDA: test_x, test_y, start_decode = test_x.cuda(), test_y.cuda( ), start_decode.cuda() #output, hidden = encoder(test_x) #preds = decoder(start_decode, hidden, test_y.size(1), output) preds = model(test_x, start_decode, test_y.size(1), None, False) preds = torch.max(preds, 1)[1].view(test_y.size(0), test_y.size(1)) bleu_scores.append(cal_bleu(preds, test_y, char2idx[EOS])) print('Epoch.', epoch, ':mean_bleu_score:', np.mean(bleu_scores)) print(bleu_scores) torch.save(model, './model/model' + str(epoch + 1) + '.pkl') torch.save(model, './model/model.pkl')
if __name__ == "__main__": PATH = "models" # load vocab, reverse_vocab, vocab_size with open('vocab.json', 'r') as fp: vocab = json.load(fp) reverse_vocab = dict() for key, value in vocab.items(): reverse_vocab[value] = key vocab_size = len(vocab) config = tf.ConfigProto() # GPU/CPU usage config.gpu_options.allow_growth = True sess = tf.Session(config=config) # Creating a session model = seq2seq(sess, encoder_vocab_size=vocab_size, decoder_vocab_size=vocab_size, max_step=50) # Starting the seq-to-seq learning saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(PATH)) while True: test = input("User >> ") if test == "exit": break speak = sentence_to_char_index([test], vocab, False) result = model.inference([speak]) response = '' for index in result[0]: if index == 0: break response += reverse_vocab[index]
# read and build dataset data = read_txt('./data/dialog.txt') vocab, reverse_vocab, vocab_size = build_character(data) # save vocab with open('vocab.json', 'w') as fp: json.dump(vocab, fp) # open session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # make model instance model = seq2seq(sess, encoder_vocab_size=vocab_size, decoder_vocab_size=vocab_size) # make train batches input, target = make_dataset(data) batches = batch_iter(list(zip(input, target)), batch_size=64, num_epochs=1001) # model saver saver = tf.train.Saver(max_to_keep=3, keep_checkpoint_every_n_hours=0.5) # train model print('모델 훈련을 시작합니다.') avgLoss = [] for step, batch in enumerate(batches):
#train_x = train_x / 255. train_x = normalize(train_x) tEnd = time.time() print('cost %s' % tfmt(tEnd - tStart)) ''' if args.valid_ratio != 0: valid_size = int(len(train_x) * args.valid_ratio) print('Split %d/%d validation data...' % (valid_size, len(train_x))) train_x = shuffle(train_x) valid_x = train_x[-valid_size:] train_x = train_x[:-valid_size] ''' print('Select %s Model' % args.model) if args.model == 'seq2seq': model = seq2seq(train_x.shape[1], args.latent, verbose=1) adam = Adam(lr=1e-3) csvlogger = CSVLogger(logger) earlystopping = EarlyStopping(monitor='loss', patience=20, verbose=1, mode='min') checkpoint = ModelCheckpoint(params, monitor='loss', save_best_only=True, verbose=0, mode='min') model.compile(loss='mse', optimizer=adam) print('Start Training...') model.fit(train_x,
def main(args): source_dataset, target_dataset, vocab, vocab_inv = read_data_and_vocab( args.source_train, args.target_train, args.source_dev, args.target_dev, args.source_test, args.target_test, reverse_source=True) save_vocab(args.model_dir, vocab, vocab_inv) source_dataset_train, source_dataset_dev, source_dataset_test = source_dataset target_dataset_train, target_dataset_dev, target_dataset_test = target_dataset vocab_source, vocab_target = vocab vocab_inv_source, vocab_inv_target = vocab_inv # split into buckets source_buckets_train, target_buckets_train = make_buckets( source_dataset_train, target_dataset_train) if args.buckets_slice is not None: source_buckets_train = source_buckets_train[:args.buckets_slice + 1] target_buckets_train = target_buckets_train[:args.buckets_slice + 1] # development dataset source_buckets_dev = None if len(source_dataset_dev) > 0: source_buckets_dev, target_buckets_dev = make_buckets( source_dataset_dev, target_dataset_dev) if args.buckets_slice is not None: source_buckets_dev = source_buckets_dev[:args.buckets_slice + 1] target_buckets_dev = target_buckets_dev[:args.buckets_slice + 1] # test dataset source_buckets_test = None if len(source_dataset_test) > 0: source_buckets_test, target_buckets_test = make_buckets( source_dataset_test, target_dataset_test) if args.buckets_slice is not None: source_buckets_test = source_buckets_test[:args.buckets_slice + 1] target_buckets_test = target_buckets_test[:args.buckets_slice + 1] # show log dump_dataset( source_dataset, vocab, (source_buckets_train, source_buckets_dev, source_buckets_test)) # to maintain equilibrium required_interations = [] for data in source_buckets_train: itr = len(data) // args.batchsize + 1 required_interations.append(itr) total_iterations = sum(required_interations) buckets_distribution = np.asarray(required_interations, dtype=float) / total_iterations # init model = load_model(args.model_dir) if model is None: model = seq2seq(len(vocab_source), len(vocab_target), args.ndim_embedding, args.ndim_h, args.num_layers, pooling=args.pooling, dropout=args.dropout, zoneout=args.zoneout, weightnorm=args.weightnorm, wgain=args.wgain, densely_connected=args.densely_connected, attention=args.attention) if args.gpu_device >= 0: cuda.get_device(args.gpu_device).use() model.to_gpu() # setup an optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.grad_clip)) optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) final_learning_rate = 1e-5 total_time = 0 indices_train = [] for bucket_idx, bucket in enumerate(source_buckets_train): indices = np.arange(len(bucket)) np.random.shuffle(indices) indices_train.append(indices) def mean(l): return sum(l) / len(l) # training for epoch in range(1, args.epoch + 1): print("Epoch", epoch) start_time = time.time() with chainer.using_config("train", True): for itr in range(total_iterations): bucket_idx = int( np.random.choice(np.arange(len(source_buckets_train)), size=1, p=buckets_distribution)) source_bucket = source_buckets_train[bucket_idx] target_bucket = target_buckets_train[bucket_idx] # sample minibatch source_batch = source_bucket[:args.batchsize] target_batch = target_bucket[:args.batchsize] skip_mask = source_batch != ID_PAD target_batch_input, target_batch_output = make_source_target_pair( target_batch) # to gpu if args.gpu_device >= 0: skip_mask = cuda.to_gpu(skip_mask) source_batch = cuda.to_gpu(source_batch) target_batch_input = cuda.to_gpu(target_batch_input) target_batch_output = cuda.to_gpu(target_batch_output) # compute loss model.reset_state() if args.attention: last_hidden_states, last_layer_outputs = model.encode( source_batch, skip_mask) y_batch = model.decode(target_batch_input, last_hidden_states, last_layer_outputs, skip_mask) else: last_hidden_states = model.encode(source_batch, skip_mask) y_batch = model.decode(target_batch_input, last_hidden_states) loss = softmax_cross_entropy(y_batch, target_batch_output, ignore_label=ID_PAD) # update parameters optimizer.update(lossfun=lambda: loss) # show log printr("iteration {}/{}".format(itr + 1, total_iterations)) source_buckets_train[bucket_idx] = np.roll(source_bucket, -args.batchsize, axis=0) # shift target_buckets_train[bucket_idx] = np.roll(target_bucket, -args.batchsize, axis=0) # shift # shuffle for bucket_idx in range(len(source_buckets_train)): indices = indices_train[bucket_idx] np.random.shuffle(indices) source_buckets_train[bucket_idx] = source_buckets_train[ bucket_idx][indices] target_buckets_train[bucket_idx] = target_buckets_train[ bucket_idx][indices] # serialize save_model(args.model_dir, model) # clear console printr("") # show log with chainer.using_config("train", False): if epoch % args.interval == 0: printb("translate (train)") dump_random_source_target_translation(model, source_buckets_train, target_buckets_train, vocab_inv_source, vocab_inv_target, num_translate=5, beam_width=1) if source_buckets_dev is not None: printb("translate (dev)") dump_random_source_target_translation(model, source_buckets_dev, target_buckets_dev, vocab_inv_source, vocab_inv_target, num_translate=5, beam_width=1) if source_buckets_dev is not None: printb("WER (dev)") wer_dev = compute_error_rate_buckets(model, source_buckets_dev, target_buckets_dev, len(vocab_inv_target), beam_width=1) print(mean(wer_dev), wer_dev) elapsed_time = (time.time() - start_time) / 60. total_time += elapsed_time print("done in {} min, lr = {:.4f}, total {} min".format( int(elapsed_time), get_current_learning_rate(optimizer), int(total_time))) # decay learning rate decay_learning_rate(optimizer, args.lr_decay_factor, final_learning_rate)
def main(args): # load textfile source_dataset, target_dataset, vocab, vocab_inv = read_data(args.source_filename, args.target_filename, train_split_ratio=args.train_split, dev_split_ratio=args.dev_split, seed=args.seed) save_vocab(args.model_dir, vocab, vocab_inv) source_dataset_train, source_dataset_dev, source_dataset_test = source_dataset target_dataset_train, target_dataset_dev, target_dataset_test = target_dataset print_bold("data #") print("train {}".format(len(source_dataset_train))) print("dev {}".format(len(source_dataset_dev))) print("test {}".format(len(source_dataset_test))) vocab_source, vocab_target = vocab vocab_inv_source, vocab_inv_target = vocab_inv print("vocab {} (source)".format(len(vocab_source))) print("vocab {} (target)".format(len(vocab_target))) # split into buckets source_buckets_train, target_buckets_train = make_buckets(source_dataset_train, target_dataset_train) if args.buckets_limit is not None: source_buckets_train = source_buckets_train[:args.buckets_limit+1] target_buckets_train = target_buckets_train[:args.buckets_limit+1] print_bold("buckets #data (train)") for size, data in zip(bucket_sizes, source_buckets_train): print("{} {}".format(size, len(data))) print_bold("buckets #data (dev)") source_buckets_dev, target_buckets_dev = make_buckets(source_dataset_dev, target_dataset_dev) if args.buckets_limit is not None: source_buckets_dev = source_buckets_dev[:args.buckets_limit+1] target_buckets_dev = target_buckets_dev[:args.buckets_limit+1] for size, data in zip(bucket_sizes, source_buckets_dev): print("{} {}".format(size, len(data))) print_bold("buckets #data (test)") source_buckets_test, target_buckets_test = make_buckets(source_dataset_test, target_dataset_test) if args.buckets_limit is not None: source_buckets_test = source_buckets_test[:args.buckets_limit+1] target_buckets_test = target_buckets_test[:args.buckets_limit+1] for size, data in zip(bucket_sizes, source_buckets_test): print("{} {}".format(size, len(data))) # to maintain equilibrium min_num_data = 0 for data in source_buckets_train: if min_num_data == 0 or len(data) < min_num_data: min_num_data = len(data) repeats = [] for data in source_buckets_train: repeats.append(len(data) // min_num_data + 1) num_updates_per_iteration = 0 for repeat, data in zip(repeats, source_buckets_train): num_updates_per_iteration += repeat * args.batchsize num_iteration = len(source_dataset_train) // num_updates_per_iteration + 1 # init model = load_model(args.model_dir) if model is None: model = seq2seq(len(vocab_source), len(vocab_target), args.ndim_embedding, args.num_layers, ndim_h=args.ndim_h, pooling=args.pooling, dropout=args.dropout, zoneout=args.zoneout, wgain=args.wgain, densely_connected=args.densely_connected, attention=args.attention) if args.gpu_device >= 0: cuda.get_device(args.gpu_device).use() model.to_gpu() # setup an optimizer if args.eve: optimizer = Eve(alpha=args.learning_rate, beta1=0.9) else: optimizer = optimizers.Adam(alpha=args.learning_rate, beta1=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.grad_clip)) optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) min_learning_rate = 1e-7 prev_wer = None total_time = 0 def mean(l): return sum(l) / len(l) # training for epoch in xrange(1, args.epoch + 1): print("Epoch", epoch) start_time = time.time() for itr in xrange(1, num_iteration + 1): for repeat, source_bucket, target_bucket in zip(repeats, source_buckets_train, target_buckets_train): for r in xrange(repeat): # sample minibatch source_batch, target_batch = sample_batch_from_bucket(source_bucket, target_bucket, args.batchsize) skip_mask = source_batch != ID_PAD target_batch_input, target_batch_output = make_source_target_pair(target_batch) # to gpu if model.xp is cuda.cupy: skip_mask = cuda.to_gpu(skip_mask) source_batch = cuda.to_gpu(source_batch) target_batch_input = cuda.to_gpu(target_batch_input) target_batch_output = cuda.to_gpu(target_batch_output) # compute loss model.reset_state() if args.attention: last_hidden_states, last_layer_outputs = model.encode(source_batch, skip_mask) Y = model.decode(target_batch_input, last_hidden_states, last_layer_outputs, skip_mask) else: last_hidden_states = model.encode(source_batch, skip_mask) Y = model.decode(target_batch_input, last_hidden_states) loss = softmax_cross_entropy(Y, target_batch_output, ignore_label=ID_PAD) optimizer.update(lossfun=lambda: loss) sys.stdout.write("\r{} / {}".format(itr, num_iteration)) sys.stdout.flush() if itr % args.interval == 0 or itr == num_iteration: save_model(args.model_dir, model) # show log sys.stdout.write("\r" + stdout.CLEAR) sys.stdout.flush() print_bold("translate (train)") show_random_source_target_translation(model, source_buckets_train, target_buckets_train, vocab_inv_source, vocab_inv_target, num_translate=5, argmax=True) print_bold("translate (dev)") show_random_source_target_translation(model, source_buckets_dev, target_buckets_dev, vocab_inv_source, vocab_inv_target, num_translate=5, argmax=True) print_bold("WER (sampled train)") wer_train = compute_random_mean_wer(model, source_buckets_train, target_buckets_train, len(vocab_inv_target), sample_size=args.batchsize, argmax=True) print(mean(wer_train), wer_train) print_bold("WER (dev)") wer_dev = compute_mean_wer(model, source_buckets_dev, target_buckets_dev, len(vocab_inv_target), batchsize=args.batchsize, argmax=True) mean_wer_dev = mean(wer_dev) print(mean_wer_dev, wer_dev) elapsed_time = (time.time() - start_time) / 60. total_time += elapsed_time print("done in {} min, lr = {}, total {} min".format(int(elapsed_time), optimizer.alpha, int(total_time))) # decay learning rate if prev_wer is not None and mean_wer_dev >= prev_wer and optimizer.alpha > min_learning_rate: optimizer.alpha *= 0.5 prev_wer = mean_wer_dev
from config import (VOCAB_SIZE, MAXLEN, EPOCHS, SAVE_AT, LEARNING_RATE, BATCH_SIZE, VERBOSE, LOSS) tokenizer = Tokenizer() encoder_input_data, decoder_input_data, decoder_output_data = create_training_data( ) # parsing the dataset and creating conversation pairs encoder_input_data, decoder_input_data, decoder_output_data = tokenizer.tokenize_and_pad_training_data( encoder_input_data, decoder_input_data, decoder_output_data) # tokenizing and padding those pairs tokenizer.save_tokenizer( f'tokenizer-vocab_size-{VOCAB_SIZE}') # saving tokenizer for layer use Seq2SeqModel = seq2seq() # creating the seq2seq model optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE, clipnorm=1.0, clipvalue=0.5) Seq2SeqModel.compile(optimizer=optimizer, loss=LOSS, metrics=['accuracy']) Seq2SeqModel.summary() def train(model, encoder_input_data, decoder_input_data, decoder_output_data, epochs, batch_size, verbose, save_at): with tf.device('/device:GPU:0' if utils.check_cuda else '/cpu:0'): for epoch in range(1, epochs + 1): print(f'Epochs {epoch}/{epochs}') model.fit([encoder_input_data, decoder_input_data], decoder_output_data,
def main(): parse = argparse.ArgumentParser() parse.add_argument( "--data_dir", default='./nmt/en-cn/', type=str, required=False, help= "The input data dir. Should contain the .tsv files (or other data files) for the task.", ) parse.add_argument("--batch_size", default=16, type=int) parse.add_argument("--do_train", action="store_true", help="Whether to run training.") parse.add_argument("--do_translate", default=True, action="store_true", help="Whether to run training.") parse.add_argument("--learnning_rate", default=5e-4, type=float) parse.add_argument("--dropout", default=0.2, type=float) parse.add_argument("--num_epoch", default=10, type=int) parse.add_argument("--max_vocab_size", default=50000, type=int) parse.add_argument("--embed_size", default=300, type=int) parse.add_argument("--enc_hidden_size", default=512, type=int) parse.add_argument("--dec_hidden_size", default=512, type=int) parse.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parse.add_argument("--GRAD_CLIP", default=1, type=float) parse.add_argument("--UNK_IDX", default=1, type=int) parse.add_argument("--PAD_IDX", default=0, type=int) args = parse.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") args.device = device setseed() processor = DataProcessor(args) encoder = Encoder(processor.en_tokenizer.vocab_size, args.embed_size, args.enc_hidden_size, args.dec_hidden_size, args.dropout) decoder = Decoder(processor.cn_tokenizer.vocab_size, args.embed_size, args.enc_hidden_size, args.dec_hidden_size, args.dropout) model = seq2seq(encoder, decoder) if os.path.exists("translate-best.th"): model.load_state_dict(torch.load("translate-best.th")) model.to(device) loss_fn = LanguageModelCriterion().to(device) train_data = processor.get_train_examples(args) eval_data = processor.get_dev_examples(args) if args.do_train: train(args, model, train_data, loss_fn, eval_data) if args.do_translate: model.load_state_dict(torch.load("translate-best.th")) model.to(device) while True: title = input("请输入要翻译的英文句子:\n") if len(title.strip()) == 0: continue title = ['BOS'] + nltk.word_tokenize(title.lower()) + ['EOS'] title_num = [ processor.en_tokenizer.word2idx.get(word, 1) for word in title ] mb_x = torch.from_numpy(np.array(title_num).reshape( 1, -1)).long().to(device) mb_x_len = torch.from_numpy(np.array([len(title_num) ])).long().to(device) bos = torch.Tensor([[processor.cn_tokenizer.word2idx['BOS']] ]).long().to(device) translation, attn = model.translate(mb_x, mb_x_len, bos) # 这里传入bos作为首个单词的输入 # translation=tensor([[ 8, 6, 11, 25, 22, 57, 10, 5, 6, 4]], device='cuda:0') translation = [ processor.cn_tokenizer.id2word[i] for i in translation.data.cpu().numpy().reshape(-1) ] trans = [] for word in translation: if word != "EOS": # 把数值变成单词形式 trans.append(word) # else: break print("翻译后的中文结果为:{}".format("".join(trans)))
from model import seq2seq import tensorflow as tf from tensorflow.contrib import rnn from tensorflow.python.ops import variable_scope from tensorflow.python.framework import dtypes X, y, predict, global_step = seq2seq() import numpy as np import pandas as pd aq = pd.read_csv('/home/duanchx/KDDCup2018/beijing_201802_201803_aq.csv') aq[['PM2.5', 'PM10', 'O3']] = aq[['PM2.5', 'PM10', 'O3']].fillna(aq[['PM2.5', 'PM10', 'O3']].mean()) fs = np.array(aq[aq['stationId'] == 'fangshan_aq']['PM2.5']) x_ = np.expand_dims(fs[0:120], axis=0) y_ = fs[120:168] with tf.Session() as session: session.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(session, './save/iteraction1650') feed = { X[t]: x_.reshape((-1, 120))[:, t].reshape((-1, 1)) for t in range(120) } feed.update({y[t]: np.array([0.0]).reshape((-1, 1)) for t in range(48)}) p = session.run(predict, feed_dict=feed) p = [np.expand_dims(p_, axis=1) for p_ in p]
X, y = get_pair(5, 2, 50) print(X.shape, y.shape) print('X=%s, y=%s' % (one_hot_decode(X[0]), one_hot_decode(y[0]))) # Baseline without attention # configure problem n_features = 50 n_timesteps_in = 5 n_timesteps_out = 2 # Create different models & compare simple_lstm = lstm(lstm_cells=150, n_timesteps_in=n_timesteps_in, n_features=n_features) seq2seq_model = seq2seq(lstm_cells=150, n_timesteps_in=n_timesteps_in, n_features=n_features) attention_model = attention(lstm_cells=150, n_timesteps_in=n_timesteps_in, n_features=n_features) for model in simple_lstm, seq2seq_model, attention_model: # train for epoch in range(5000): # generate new random sequence X, y = get_pair(n_timesteps_in, n_timesteps_out, n_features) # fit model for one epoch on this sequence model.fit(X, y, epochs=1, verbose=0) # evaluate