def evaluate(input_sentences, output_sentences, input_vocab, output_vocab, input_reverse, output_reverse, hy, writer): dataset = NMTDataset(input_sentences, output_sentences, input_vocab, output_vocab, input_reverse, output_reverse) loader = DataLoader(dataset, batch_size=hy.batch_size, shuffle=True, drop_last=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") input_vocab_size = len(input_vocab.keys()) output_vocab_size = len(output_vocab.keys()) encoder = EncoderRNN(input_vocab_size, hy.embedding_size, hy.hidden_size, hy.rnn_layers, hy.bidirectional, device) decoder = DecoderRNN(output_vocab_size, hy.embedding_size, hy.hidden_size, hy.rnn_layers, hy.bidirectional, device) accuracies = [] for epoch in range(1, hy.num_epochs + 1): encoder.load_state_dict( torch.load("saved_runs/encoder_{}_weights.pt".format(epoch))) decoder.load_state_dict( torch.load("saved_runs/decoder_{}_weights.pt".format(epoch))) accuracy = compute_model_accuracy(encoder, decoder, loader, device, epoch, writer) accuracies.append(accuracy) print("=" * 80) print("Final Accuracy = {:.1f}".format(100. * np.max(accuracies))) print("=" * 80) return accuracies
def build_model(self): if self.use_embeddings: self.embedding = nn.Embedding.from_pretrained(self.embedding_wts) else: self.embedding = nn.Embedding(self.vocab.n_words, self.embedding_dim) self.encoders = [] self.encoder_optimizers = [] # Note: No embeddings used in the encoders for m in ['v', 's']: encoder = EncoderRNN(self.enc_input_dim[m], self.hidden_size, self.enc_n_layers, self.dropout, self.unit, m).to(self.device) encoder_optimizer = optim.Adam(encoder.parameters(), lr=self.lr) if self.modality == 'ss-vv': checkpoint = torch.load(self.pretrained_modality[m], map_location=self.device) encoder.load_state_dict(checkpoint['en']) encoder_optimizer.load_state_dict(checkpoint['en_op']) self.encoders.append(encoder) self.encoder_optimizers.append(encoder_optimizer) self.decoder = DecoderRNN(self.attn_model, self.embedding_dim, self.hidden_size, self.vocab.n_words, self.unit, self.dec_n_layers, self.dropout, self.embedding).to(self.device) text_checkpoint = torch.load(self.pretrained_modality['t'], map_location=self.device) self.decoder.load_state_dict(text_checkpoint['de']) self.project_factor = self.encoders[0].project_factor self.latent2hidden = nn.Linear(self.latent_dim, self.hidden_size * self.project_factor).to(self.device) self.epoch = 0
def build_model(self): # Note: no embedding used here self.encoder = EncoderRNN(self.enc_input_dim, self.hidden_size, self.enc_n_layers, self.dropout, self.unit, self.modality).to(self.device) self.encoder_optimizer = optim.Adam(self.encoder.parameters(), lr=self.lr) self.epoch = 0 # define here to add resume training feature
def main(): argparser = argparse.ArgumentParser() argparser.add_argument('--test_query_file', '-i', type=str, required=True) argparser.add_argument('--load_path', '-p', type=str, required=True) # TODO: load epoch -> load best model argparser.add_argument('--load_epoch', '-e', type=int, required=True) argparser.add_argument('--output_file', '-o', type=str) argparser.add_argument('--dec_algorithm', '-algo', type=str, default='greedy') new_args = argparser.parse_args() arg_file = os.path.join(new_args.load_path, 'args.pkl') if not os.path.exists(arg_file): raise RuntimeError('No default arguments file to load') f = open(arg_file, 'rb') args = pickle.load(f) f.close() if args.use_cuda: USE_CUDA = True vocab, rev_vocab = load_vocab(args.vocab_file, max_vocab=args.max_vocab_size) vocab_size = len(vocab) word_embeddings = nn.Embedding(vocab_size, args.emb_dim, padding_idx=SYM_PAD) E = EncoderRNN(vocab_size, args.emb_dim, args.hidden_dim, args.n_layers, bidirectional=True, variable_lengths=True) G = Generator(vocab_size, args.response_max_len, args.emb_dim, 2 * args.hidden_dim, args.n_layers) if USE_CUDA: word_embeddings.cuda() E.cuda() G.cuda() reload_model(new_args.load_path, new_args.load_epoch, word_embeddings, E, G) predict(new_args.test_query_file, args.response_max_len, vocab, rev_vocab, word_embeddings, E, G, new_args.output_file)
def parrot_initialization_rgc(dataset, emb_path, dc=None, encoder=None, dddqn=None): ''' Trains the rgc to repeat the input ''' # TODO save optimizer if dc is None: dc = DataContainer(dataset, emb_path) dc.prepare_data() x_batch, y_parrot_batch, sl_batch = u.to_batch(dc.x, dc.y_parrot_padded, dc.sl, batch_size=dc.batch_size) # initialize rnn cell of the encoder and the dddqn rep = input('Load RNN cell pretrained for the encoder & dddqn? (y or n): ') if encoder is None: encoder = EncoderRNN(num_units=256) if rep == 'y' or rep == '': encoder.load(name='EncoderRNN-0') else: choose_best_rnn_pretrained(encoder, encoder.encoder_cell, dc, search_size=1, multiprocessed=False) # we do not need to train the dddqn rnn layer since we already trained the encoder rnn layer # we just have to initialize the dddqn rnn layer weights with the ones from the encoder if dddqn is None: dddqn = DDDQN(dc.word2idx, dc.idx2word, dc.idx2emb) u.init_rnn_layer(dddqn.lstm) u.update_layer(dddqn.lstm, encoder.encoder_cell) # define the loss function used to pretrain the rgc def get_loss(encoder, dddqn, epoch, x, y, sl, sos, max_steps, verbose=True): preds, logits, _, _, _ = pu.full_encoder_dddqn_pass(x, sl, encoder, dddqn, sos, max_steps, training=True) logits = tf.nn.softmax(logits) # normalize logits between 0 & 1 to allow training through cross-entropy sl = [end_idx + 1 for end_idx in sl] # sl = [len(sequence)-1, ...] => +1 to get the len loss = u.cross_entropy_cost(logits, y, sequence_lengths=sl) if verbose: acc_words, acc_sentences = u.get_acc_word_seq(logits, y, sl) logging.info('Epoch {} -> loss = {} | acc_words = {} | acc_sentences = {}'.format(epoch, loss, acc_words, acc_sentences)) return loss rep = input('Load pretrained RGC-ENCODER-DDDQN? (y or n): ') if rep == 'y' or rep == '': encoder.load('RGC/Encoder') dddqn.load('RGC/DDDQN') rep = input('Train RGC-ENCODER-DDDQN? (y or n): ') if rep == 'y' or rep == '': optimizer = tf.train.AdamOptimizer() # training loop over epoch and batchs for epoch in range(300): verbose = True for x, y, sl in zip(x_batch, y_parrot_batch, sl_batch): sos = dc.get_sos_batch_size(len(x)) optimizer.minimize(lambda: get_loss(encoder, dddqn, epoch, x, y, sl, sos, dc.max_tokens, verbose=verbose)) verbose = False encoder.save(name='RGC/Encoder') dddqn.save(name='RGC/DDDQN') acc = pu.get_acc_full_dataset(dc, encoder, dddqn) logging.info('Validation accuracy = {}'.format(acc)) if acc > 0.95: logging.info('Stopping criteria on validation accuracy raised') break return encoder, dddqn, dc
def __init__(self, input_size, output_size, hidden_size, learning_rate, teacher_forcing_ratio, device): super(Seq2Seq, self).__init__() self.teacher_forcing_ratio = teacher_forcing_ratio self.device = device self.encoder = EncoderRNN(input_size, hidden_size) self.decoder = AttnDecoderRNN(hidden_size, output_size) self.encoder_optimizer = optim.SGD(self.encoder.parameters(), lr=learning_rate) self.decoder_optimizer = optim.SGD(self.decoder.parameters(), lr=learning_rate) self.criterion = nn.NLLLoss()
def init_network(self): print("Initializing Network...") hidden_size = 256 self.encoder = EncoderRNN(self.src.n_words, hidden_size) self.attn_decoder = AttnDecoderRNN(hidden_size, self.target.n_words, dropout_p=0.1) self.next(self.train)
def __init__(self, dataset, emb_path, name='RGC', dc=None, bbc=None, split_size=0.5): self.name = name self.dataset = dataset self.emb_path = emb_path self.get_dc(dc, split_size) self.encoder = EncoderRNN(num_units=256) self.dddqn = DDDQN(self.dc.word2idx, self.dc.idx2word, self.dc.idx2emb, max_tokens=self.dc.max_tokens) self.bbc = BlackBoxClassifier( dc=self.dc, prepare_data=True) if bbc is None else bbc
def trainDemo(lang, dataSet, nlVocab, codeVocab, train_variables): print("Training...") encoder1 = EncoderRNN(codeVocab.n_words, setting.HIDDDEN_SIAZE) attn_decoder1 = AttnDecoderRNN(setting.HIDDDEN_SIAZE, nlVocab.n_words, 1, dropout_p=0.1) if setting.USE_CUDA: encoder1 = encoder1.cuda() attn_decoder1 = attn_decoder1.cuda() trainIters(lang, dataSet, train_variables, encoder1, attn_decoder1, 2000000, print_every=5000)
def initialize_models(voc): print('Building encoder and decoder ...') # initialize word embeddings embedding = nn.Embedding(voc.num_words, c.HIDDEN_SIZE) if c.LOAD_FILENAME: embedding.load_state_dict(embedding_sd) # initialize encoder & decoder models encoder = EncoderRNN(c.HIDDEN_SIZE, embedding, c.ENCODER_N_LAYERS, c.DROPOUT) decoder = LuongAttnDecoderRNN(c.ATTN_MODEL, embedding, c.HIDDEN_SIZE, voc.num_words, c.DECODER_N_LAYERS, c.DROPOUT) if c.LOAD_FILENAME: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(c.DEVICE) decoder = decoder.to(c.DEVICE) print('Models built and ready to go!') return embedding, encoder, decoder
def build_model(self): if self.use_embeddings: self.embedding = nn.Embedding.from_pretrained(self.embedding_wts) else: self.embedding = nn.Embedding(self.vocab.n_words, self.embedding_dim) if self.modality == 't': # Need embedding only for t2t mode self.encoder = EncoderRNN(self.embedding_dim, self.hidden_size, self.enc_n_layers, self.dropout, self.unit, self.modality, self.embedding, fusion_or_unimodal=True).to(self.device) else: # Note: no embedding used here self.encoder = EncoderRNN(self.enc_input_dim, self.hidden_size, self.enc_n_layers, self.dropout, self.unit, self.modality, fusion_or_unimodal=True).to(self.device) self.decoder = DecoderRNN(self.attn_model, self.embedding_dim, self.hidden_size, self.vocab.n_words, self.unit, self.dec_n_layers, self.dropout, self.embedding).to(self.device) self.encoder_optimizer = optim.Adam(self.encoder.parameters(), lr=self.lr) self.decoder_optimizer = optim.Adam(self.decoder.parameters(), lr=self.lr * self.dec_learning_ratio) self.epoch = 0 # define here to add resume training feature self.project_factor = self.encoder.project_factor self.latent2hidden = nn.Linear(self.latent_dim, self.hidden_size * self.project_factor).to(self.device)
def eval(): # 或者在train.py中import进来 input_lang = torch.load('model/input_lang') output_lang = torch.load('model/output_lang') eval_set = torch.load('model/test_set') encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, MAX_LENGTH, dropout_p=0.1).to(device) encoder1.load_state_dict(torch.load('model/encoder')) attn_decoder1.load_state_dict(torch.load('model/decoder')) evaluateRandomly(encoder1, attn_decoder1, eval_set, input_lang, output_lang) evaluateAndShowAttention(encoder1, attn_decoder1, input_lang, output_lang, random.choice(eval_set)[0]) try: while True: in_text = input("input " + input_lang.name + ": press ctrl+c to exit\n") evaluateAndShowAttention(in_text, False) except KeyboardInterrupt: pass
def choose_coders(dc, attention, search_size=8): ''' Trains search_size coders and return the best one ''' encoder = EncoderRNN() decoder = DecoderRNN(dc.word2idx, dc.idx2word, dc.idx2emb, max_tokens=dc.max_tokens, attention=attention) logging.info('Choosing coders...') logger = logging.getLogger() logger.disabled = True results_encoder = u.multiples_launch(pretrain_rnn_layer, [encoder, encoder.encoder_cell, dc], num_process=search_size) results_decoder = u.multiples_launch(pretrain_rnn_layer, [decoder, decoder.decoder_cell, dc], num_process=search_size) logger.disabled = False results_encoder.sort(key=lambda x: x[0], reverse=True) results_decoder.sort(key=lambda x: x[0], reverse=True) logging.info('Accuracy of the best encoder = {}'.format(results_encoder[0][0])) encoder.load(name='{}-{}'.format(encoder.name, results_encoder[0][1])) logging.info('Accuracy of the best decoder = {}'.format(results_decoder[0][0])) decoder.load(name='{}-{}'.format(decoder.name, results_decoder[0][1]), only_lstm=True) return encoder, decoder
def init_model(self, wd, hidden_size, e_layers, d_layers, base_rnn, pretrained_embeddings=None, dropout_p=0.1): self.base_rnn = base_rnn self.wd = wd self.dropout_p = dropout_p if pretrained_embeddings is True: print("Loading GloVe Embeddings ...") pretrained_embeddings = load_glove_embeddings( wd.word2index, hidden_size) self.encoder = EncoderRNN(wd.n_words, hidden_size, n_layers=e_layers, base_rnn=base_rnn, pretrained_embeddings=pretrained_embeddings) self.mlp = torch.nn.Sequential( torch.nn.Linear(int(hidden_size * 8), int(hidden_size)), torch.nn.ReLU(), torch.nn.Dropout(dropout_p), torch.nn.Linear(int(hidden_size), 3), torch.nn.Softmax(dim=1)) self.parameter_list = [ self.encoder.parameters(), self.mlp.parameters() ] if USE_CUDA: self.encoder = self.encoder.cuda() self.mlp = self.mlp.cuda() return self
def __init__(self, **kwargs): dp = DataPreprocessor() file_name_formatted = dp.write_to_file() dc = DataCleaner(file_name_formatted) dc.clean_data_pipeline().trim_rare_words() self.data_loader = DataLoader(dc.vocabulary, dc.pairs) self.dp = dp self.dc = dc load_embedding = kwargs.get('pretrained_embedding', False) embedding_file = kwargs.get('pretrained_embedding_file', None) load_enc_dec = kwargs.get('pretrained_enc_dec', False) load_enc_file = kwargs.get('pretrained_enc_file', None) load_dec_file = kwargs.get('pretrained_dec_file', None) self.model_name = kwargs.get('model_name', 'cb_model') attn_model = kwargs.get('attention_type', 'dot') self.hidden_size = kwargs.get('hidden_size', 500) self.encoder_nr_layers = kwargs.get('enc_nr_layers', 2) self.decoder_nr_layers = kwargs.get('dec_nr_layers', 2) dropout = kwargs.get('dropout', 0.1) self.batch_size = kwargs.get('batch_size', 64) self.clip = kwargs.get('clip', 50.0) self.teacher_forcing_ratio = kwargs.get('teacher_forcing_ratio', 1.0) self.learning_rate = kwargs.get('lr', 0.0001) self.decoder_learning_ratio = kwargs.get('decoder_learning_ratio', 5.0) self.nr_iteration = kwargs.get('nr_iterations', 4000) self.print_every = kwargs.get('print_every', 1) self.save_every = 500 self.embedding = nn.Embedding(self.dc.vocabulary.num_words, self.hidden_size) if load_embedding: self.embedding.load_state_dict(embedding_file) # Initialize encoder & decoder models encoder = EncoderRNN(self.hidden_size, self.embedding, self.encoder_nr_layers, dropout) decoder = DecoderRNN( attn_model, self.embedding, self.hidden_size, self.dc.vocabulary.num_words, self.decoder_nr_layers, dropout ) if load_enc_dec: encoder.load_state_dict(load_enc_file) decoder.load_state_dict(load_dec_file) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) self.encoder = encoder self.decoder = decoder self.encoder_optimizer = optim.Adam(encoder.parameters(), lr=self.learning_rate) self.decoder_optimizer = optim.Adam(decoder.parameters(), lr=self.learning_rate * self.decoder_learning_ratio) return
def main(): """primary entry """ voc, pairs = loadPreparedData() print('Building encoder and decoder ...') # Initialize word embeddings #embedding = nn.Embedding(voc.num_words, params.hidden_size) embedding = nn.Embedding(voc.num_words, params.embedding_size) # Initialize encoder & decoder models encoder = EncoderRNN(embedding, params.hidden_size, params.encoder_n_layers, params.dropout) decoder = LuongAttnDecoderRNN(params.attn_model, embedding, params.hidden_size, voc.num_words, params.decoder_n_layers, params.dropout) # Use appropriate device encoder = encoder.to(params.device) decoder = decoder.to(params.device) print('Models built and ready to go!') # Ensure dropout layers are in train mode encoder.train() decoder.train() # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=params.learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=params.learning_rate * params.decoder_learning_ratio) # Run training iterations print("Starting Training!") trainIters(voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, params.encoder_n_layers, params.decoder_n_layers, params.save_dir, params.n_iteration, params.batch_size, params.print_every, params.save_every, params.clip, params.corpus_name, load_filename=None)
def get_data(): dc = DataContainer(os.environ['INPUT'], os.environ['EMB']) dc.prepare_data() x_a = [sample for batch in dc.x_train for sample in batch] + dc.x_te sl_a = [sample for batch in dc.sl_train for sample in batch] + dc.sl_te y_parrot_a = [ sample for batch in dc.y_parrot_padded_batch for sample in batch ] + dc.y_p_p_te sos = dc.get_sos_batch_size(len(x_a)) encoder = EncoderRNN() decoder = DecoderRNN(dc.word2idx, dc.idx2word, dc.idx2emb, max_tokens=dc.max_tokens, attention=False) optimizer = tf.train.AdamOptimizer() x_batch = u.create_batch(x_a, batch_size=dc.batch_size) y_parrot_batch = u.create_batch(y_parrot_a, batch_size=dc.batch_size) sl_batch = u.create_batch(sl_a, batch_size=dc.batch_size) return dc, x_a, sl_a, y_parrot_a, sos, encoder, decoder, optimizer, x_batch, y_parrot_batch, sl_batch
def parrot_initialization_encoder_decoder(dataset, emb_path, attention): ''' Trains the encoder-decoder to reproduce the input ''' dc = DataContainer(dataset, emb_path) dc.prepare_data() x_batch, y_parrot_batch, sl_batch = u.to_batch(dc.x, dc.y_parrot_padded, dc.sl, batch_size=dc.batch_size) def get_loss(encoder, decoder, epoch, x, y, sl, sos): output, cell_state = encoder.forward(x, sl) loss = decoder.get_loss(epoch, sos, (cell_state, output), y, sl, x, encoder.outputs) return loss if os.path.isdir('models/Encoder-Decoder'): rep = input('Load previously trained Encoder-Decoder? (y or n): ') if rep == 'y' or rep == '': encoder = EncoderRNN() decoder = DecoderRNN(dc.word2idx, dc.idx2word, dc.idx2emb, max_tokens=dc.max_tokens, attention=attention) encoder.load(name='Encoder-Decoder/Encoder') decoder.load(name='Encoder-Decoder/Decoder') sos = dc.get_sos_batch_size(len(dc.x)) see_parrot_results(encoder, decoder, 'final', dc.x, dc.y_parrot_padded, dc.sl, sos, greedy=True) else: encoder, decoder = choose_coders(dc, attention, search_size=5) else: encoder, decoder = choose_coders(dc, attention, search_size=5) optimizer = tf.train.AdamOptimizer() for epoch in range(300): for x, y, sl in zip(x_batch, y_parrot_batch, sl_batch): sos = dc.get_sos_batch_size(len(x)) # grad_n_vars = optimizer.compute_gradients(lambda: get_loss(encoder, decoder, epoch, x, y, sl, sos)) # optimizer.apply_gradients(grad_n_vars) optimizer.minimize(lambda: get_loss(encoder, decoder, epoch, x, y, sl, sos)) if epoch % 30 == 0: # to reduce training time, compute global accuracy only every 30 epochs sos = dc.get_sos_batch_size(len(dc.x)) see_parrot_results(encoder, decoder, epoch, dc.x, dc.y_parrot_padded, dc.sl, sos, greedy=True) # see_parrot_results(encoder, decoder, epoch, dc.x, dc.y_parrot_padded, dc.sl, sos) encoder.save(name='Encoder-Decoder/Encoder') decoder.save(name='Encoder-Decoder/Decoder') if decoder.parrot_stopping: break # x_batch, y_parrot_batch, sl_batch = u.shuffle_data(x_batch, y_parrot_batch, sl_batch) # strangely, shuffle data between epoch make the training realy noisy return encoder, decoder, dc
def __init__(self, hidden_size, cond_embed_size, output_size, target_path, criterion, epoch, train_or_not, lr, input_embed_size, teacher_forcing_ratio, ratio_kind): # initialize variable self.hidden_size = hidden_size self.cond_embed_size = cond_embed_size self.output_size = output_size self.target_path = target_path self.criterion = criterion self.train_or_not = train_or_not self.epoch = epoch self.learning_rate = lr self.teacher_forcing_ratio = teacher_forcing_ratio self.input_embed_size = input_embed_size self.ratio_kind = ratio_kind filename = self.get_bleuname() self.weight_name = 'CVAE_' + filename.replace('.csv', '') + '.pt' # initialize using class self.C2D = Char2Dict(cond_embed_size) self.DataLoader = Data(target_path) self.Encoder = EncoderRNN(input_embed_size, hidden_size, cond_embed_size).to(device) self.Decoder = DecoderRNN(input_embed_size, hidden_size, output_size).to(device) self.CVAE = CVAE(encoder=self.Encoder, decoder=self.Decoder, hidden_size=self.hidden_size, cond_embed_size=self.cond_embed_size, C2D=self.C2D, Train=self.train_or_not, output_size=self.output_size, teacher_forcing_ratio=self.teacher_forcing_ratio, input_embed_size=self.input_embed_size) self.CVAE_optimizer = optim.SGD(self.CVAE.parameters(), lr=self.learning_rate, momentum=0.9)
torch.nn.utils.clip_grad_norm(decoder.parameters(), clip) encoder_opt.step() decoder_opt.step() return loss.data[0].item() / target_length input_lang, output_lang, pairs = etl.prepare_data(args.language) attn_model = 'general' hidden_size = 500 n_layers = 2 dropout_p = 0.05 # Initialize models encoder = EncoderRNN(input_lang.n_words, hidden_size, n_layers) decoder = AttentionDecoderRNN(attn_model, hidden_size, output_lang.n_words, n_layers, dropout_p=dropout_p) # Move models to GPU encoder.cuda() decoder.cuda() # Initialize optimizers and criterion learning_rate = 0.0001 encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) criterion = nn.NLLLoss()
class FeatureAutoEncoderNetwork(Sequence2SequenceNetwork): # This autoencoder is to be used only for video and speech vectors # Use base Sequence2SequenceNetwork class for autoencoding text def build_model(self): # Note: no embedding used here self.encoder = EncoderRNN(self.enc_input_dim, self.hidden_size, self.enc_n_layers, self.dropout, self.unit, self.modality).to(self.device) self.encoder_optimizer = optim.Adam(self.encoder.parameters(), lr=self.lr) self.epoch = 0 # define here to add resume training feature def load_pretrained_model(self): if self.load_model_name: checkpoint = torch.load(self.load_model_name, map_location=self.device) print('Loaded {}'.format(self.load_model_name)) self.epoch = checkpoint['epoch'] self.encoder.load_state_dict(checkpoint['en']) self.encoder_optimizer.load_state_dict(checkpoint['en_op']) def train_model(self): best_score = 1e-200 plot_losses = [] print_loss_total = 0 # Reset every epoch start = time.time() saving_skipped = 0 for epoch in range(self.epoch, self.n_epochs): random.shuffle(self.pairs) for iter in range(0, self.n_iters, self.batch_size): training_batch = batch2TrainData( self.vocab, self.pairs[iter:iter + self.batch_size], self.modality) if len(training_batch[1]) < self.batch_size: print('skipped a batch..') continue # Extract fields from batch input_variable, lengths, target_variable, \ tar_lengths = training_batch # Run a training iteration with the current batch loss = self.train(input_variable, lengths, target_variable, iter) self.writer.add_scalar('{}loss'.format(self.data_dir), loss, iter) print_loss_total += loss print_loss_avg = print_loss_total * self.batch_size / self.n_iters print_loss_total = 0 print('Epoch: [{}/{}] Loss: {:.4f}'.format(epoch, self.n_epochs, print_loss_avg)) if self.modality == 'tt': # evaluate and save the model curr_score = self.evaluate_all() else: # ss, vv curr_score = print_loss_avg if curr_score > best_score: saving_skipped = 0 best_score = curr_score self.save_model(epoch) saving_skipped += 1 if self.use_scheduler and saving_skipped > 3: saving_skipped = 0 new_lr = self.lr * 0.5 print('Entered the dungeon...') if new_lr > self.lr_lower_bound: # lower bound on lr self.lr = new_lr print('lr decreased to => {}'.format(self.lr)) def train(self, input_variable, lengths, target_variable, iter): input_variable = input_variable.to(self.device) lengths = lengths.to(self.device) target_variable = target_variable.to(self.device) # Initialize variables loss = 0 print_losses = [] n_totals = 0 # Forward pass through encoder encoder_outputs, encoder_hidden = self.encoder(input_variable, lengths) if self.unit == 'gru': latent = encoder_hidden else: (latent, cell_state) = encoder_hidden # reconstruct input from latent vector seq_len = input_variable.shape[0] self.latent2output = nn.Linear(self.latent_dim, self.enc_input_dim * seq_len).to(self.device) output = self.latent2output(latent) output = output.view(seq_len, self.batch_size, self.enc_input_dim) reconstructed_input = output loss = self.mean_square_error(reconstructed_input, target_variable) loss.backward() # Clip gradients: gradients are modified in place torch.nn.utils.clip_grad_norm_(self.encoder.parameters(), self.clip) self.encoder_optimizer.step() return loss.item() def mean_square_error(self, inp, target): criterion = nn.MSELoss() inp = (inp.permute(1, 0, 2)) target = (target.permute(1, 0, 2)) return criterion(inp, target) def save_model(self, epoch): directory = self.save_dir if not os.path.exists(directory): os.makedirs(directory) torch.save( { 'epoch': epoch, 'en': self.encoder.state_dict(), 'en_op': self.encoder_optimizer.state_dict() }, '{}{}-{}-{}-{}.pth'.format(directory, self.model_code, self.modality, self.langs, epoch))
def adversarial(): # user the root logger logger = logging.getLogger("lan2720") argparser = argparse.ArgumentParser(add_help=False) argparser.add_argument('--load_path', '-p', type=str, required=True) # TODO: load best argparser.add_argument('--load_epoch', '-e', type=int, required=True) argparser.add_argument('--filter_num', type=int, required=True) argparser.add_argument('--filter_sizes', type=str, required=True) argparser.add_argument('--training_ratio', type=int, default=2) argparser.add_argument('--g_learning_rate', '-glr', type=float, default=0.001) argparser.add_argument('--d_learning_rate', '-dlr', type=float, default=0.001) argparser.add_argument('--batch_size', '-b', type=int, default=168) # new arguments used in adversarial new_args = argparser.parse_args() # load default arguments default_arg_file = os.path.join(new_args.load_path, 'args.pkl') if not os.path.exists(default_arg_file): raise RuntimeError('No default argument file in %s' % new_args.load_path) else: with open(default_arg_file, 'rb') as f: args = pickle.load(f) args.mode = 'adversarial' #args.d_learning_rate = 0.0001 args.print_every = 1 args.g_learning_rate = new_args.g_learning_rate args.d_learning_rate = new_args.d_learning_rate args.batch_size = new_args.batch_size # add new arguments args.load_path = new_args.load_path args.load_epoch = new_args.load_epoch args.filter_num = new_args.filter_num args.filter_sizes = new_args.filter_sizes args.training_ratio = new_args.training_ratio # set up the output directory exp_dirname = os.path.join(args.exp_dir, args.mode, time.strftime("%Y-%m-%d-%H-%M-%S")) os.makedirs(exp_dirname) # set up the logger tqdm_logging.config(logger, os.path.join(exp_dirname, 'adversarial.log'), mode='w', silent=False, debug=True) # load vocabulary vocab, rev_vocab = load_vocab(args.vocab_file, max_vocab=args.max_vocab_size) vocab_size = len(vocab) word_embeddings = nn.Embedding(vocab_size, args.emb_dim, padding_idx=SYM_PAD) E = EncoderRNN(vocab_size, args.emb_dim, args.hidden_dim, args.n_layers, args.dropout_rate, bidirectional=True, variable_lengths=True) G = Generator(vocab_size, args.response_max_len, args.emb_dim, 2*args.hidden_dim, args.n_layers, dropout_p=args.dropout_rate) D = Discriminator(args.emb_dim, args.filter_num, eval(args.filter_sizes)) if args.use_cuda: word_embeddings.cuda() E.cuda() G.cuda() D.cuda() # define optimizer opt_G = torch.optim.Adam(G.rnn.parameters(), lr=args.g_learning_rate) opt_D = torch.optim.Adam(D.parameters(), lr=args.d_learning_rate) logger.info('----------------------------------') logger.info('Adversarial a neural conversation model') logger.info('----------------------------------') logger.info('Args:') logger.info(str(args)) logger.info('Vocabulary from ' + args.vocab_file) logger.info('vocabulary size: %d' % vocab_size) logger.info('Loading text data from ' + args.train_query_file + ' and ' + args.train_response_file) reload_model(args.load_path, args.load_epoch, word_embeddings, E, G) # start_epoch = args.resume_epoch + 1 #else: # start_epoch = 0 # dump args with open(os.path.join(exp_dirname, 'args.pkl'), 'wb') as f: pickle.dump(args, f) # TODO: num_epoch is old one for e in range(args.num_epoch): train_data_generator = batcher(args.batch_size, args.train_query_file, args.train_response_file) logger.info("Epoch: %d/%d" % (e, args.num_epoch)) step = 0 cur_time = time.time() while True: try: post_sentences, response_sentences = train_data_generator.next() except StopIteration: # save model save_model(exp_dirname, e, word_embeddings, E, G, D) ## evaluation #eval(args.valid_query_file, args.valid_response_file, args.batch_size, # word_embeddings, E, G, loss_func, args.use_cuda, vocab, args.response_max_len) break # prepare data post_ids = [sentence2id(sent, vocab) for sent in post_sentences] response_ids = [sentence2id(sent, vocab) for sent in response_sentences] posts_var, posts_length = padding_inputs(post_ids, None) responses_var, responses_length = padding_inputs(response_ids, args.response_max_len) # sort by post length posts_length, perms_idx = posts_length.sort(0, descending=True) posts_var = posts_var[perms_idx] responses_var = responses_var[perms_idx] responses_length = responses_length[perms_idx] if args.use_cuda: posts_var = posts_var.cuda() responses_var = responses_var.cuda() embedded_post = word_embeddings(posts_var) real_responses = word_embeddings(responses_var) # forward _, dec_init_state = E(embedded_post, input_lengths=posts_length.numpy()) fake_responses = G(dec_init_state, word_embeddings) # [B, T, emb_size] prob_real = D(embedded_post, real_responses) prob_fake = D(embedded_post, fake_responses) # loss D_loss = - torch.mean(torch.log(prob_real) + torch.log(1. - prob_fake)) G_loss = torch.mean(torch.log(1. - prob_fake)) if step % args.training_ratio == 0: opt_D.zero_grad() D_loss.backward(retain_graph=True) opt_D.step() opt_G.zero_grad() G_loss.backward() opt_G.step() if step % args.print_every == 0: logger.info('Step %5d: D accuracy=%.2f (0.5 for D to converge) D score=%.2f (-1.38 for G to converge) (%.1f iters/sec)' % ( step, prob_real.cpu().data.numpy().mean(), -D_loss.cpu().data.numpy()[0], args.print_every/(time.time()-cur_time))) cur_time = time.time() step = step + 1
# args = parser.parse_args() language = 'spa-eng' helpers.validate_language_params(language) input_lang, output_lang, pairs = etl.prepare_data(language) attn_model = 'general' hidden_size = 500 n_layers = 2 dropout_p = 0.05 teacher_forcing_ratio = .5 clip = 5. criterion = nn.NLLLoss() # Initialize models encoder = EncoderRNN(input_lang.n_words, hidden_size, n_layers) decoder = AttentionDecoderRNN(attn_model, hidden_size, output_lang.n_words, n_layers, dropout_p=dropout_p) learning_rate = 1 encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) # Load model parameters encoder.load_state_dict(torch.load( './data/encoder_params_{}'.format(language))) decoder.load_state_dict(torch.load( './data/decoder_params_{}'.format(language)))
pair = random.choice(pairs) print(pair) print('>', pair[2]) print('=', pair[0]) output_words, attentions = evaluate(encoder, decoder, pair[2]) output_sentence = ' '.join(output_words) print('<', output_sentence) print('') voc_path = abs_file_path + "/data/data_clean.txt" voc = Voc("total") voc.initVoc(voc_path) pairs = prepareData(abs_file_path) print(len(pairs)) hidden_size = 256 encoder1 = EncoderRNN(voc.num_words, hidden_size).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, voc.num_words, dropout=0.1).to(device) trainIters(encoder1, attn_decoder1, 75000) encoder_save_path = "encoder3.pth" decoder_save_path = "decoder3.pth" torch.save(encoder1, current_dir + '/' + encoder_save_path) torch.save(attn_decoder1, current_dir + "/" + decoder_save_path) model1 = torch.load(current_dir + "/" + encoder_save_path) model2 = torch.load(current_dir + "/" + decoder_save_path) evaluateRandomly(model1.to(torch.device("cpu")), model2.to(torch.device("cpu")))
voc, _ = loadPrepareData(corpus_name, datafile, MAX_LENGTH) checkpoint = torch.load(ckpt) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] voc.__dict__ = checkpoint['voc_dict'] # Initialize word embeddings embedding = nn.Embedding(voc.num_words, hidden_size) embedding.load_state_dict(embedding_sd) # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) encoder = encoder.to(device) decoder = decoder.to(device) # Set dropout layers to eval mode encoder.eval() decoder.eval() # Initialize search module searcher = GreedySearchDecoder(encoder, decoder)
if model == True: day = 19 hour = "01" nowTime = '2018-12-'+str(day)+'-'+str(hour) encoder_save_path = "model/combineEncoder+" +nowTime+ "+.pth" decoder_save_path = "model/combineDecoder+" +nowTime+ "+.pth" combiner_save_path = "model/combineCombiner+" +nowTime+ "+.pth" gcn_save_path= "model/combinegcn+" +nowTime+ "+.pth" encoder1 = torch.load(current_dir + "/" + encoder_save_path) attn_decoder1 = torch.load(current_dir + "/" + decoder_save_path) CombineEncoder = torch.load(current_dir + "/" + combiner_save_path) else: encoder1 = EncoderRNN(voc.num_words, hidden_size,embedding=embedding_layer, embedded=True).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, voc.num_words).to(device) CombineEncoder = doubleCombineEncoderRNN(hidden_size, hidden_size).to(device) GCN = GCN(voc.num_words, hidden_size, hidden=[12897], dropouts=[0.5]).to(device) GcnEncoder = EncoderRNN(hidden_size, hidden_size,embedding=embedding_layer, embedded=True).to(device) trainIters(encoder1, attn_decoder1, CombineEncoder, GCN, GcnEncoder ,trainpairs,10) encoder_save_path = "model/GCNcombineEncoder+" +nowTime+ "hidden"+str(hidden_size)+ "+.pth" decoder_save_path = "model/GCNcombineDecoder+" +nowTime+ "hidden"+str(hidden_size)+ "+.pth" combiner_save_path = "model/GCNcombineCombiner+" +nowTime+ "hidden"+str(hidden_size)+ "+.pth" torch.save(encoder1, current_dir + '/' + encoder_save_path) torch.save(attn_decoder1, current_dir + "/" + decoder_save_path) torch.save(CombineEncoder, current_dir + "/" + combiner_save_path)
# checkpoint=torch.load(loadFilename,map_location=torch.device('cpu') encoder_sd=checkpoint['en'] decoder_sd=checkpoint['de'] encoder_optimizer_sd=checkpoint['en_opt'] decoder_optimizer_sd=checkpoint['de_opt'] embedding_sd=checkpoint['embedding'] voc.__dict__=checkpoint['voc_dict'] print('Building encoder and decoder...') # 初始化embedding embedding=nn.Embedding(voc.num_words,hidden_size) if loadFilename: embedding.load_state_dict(embedding_sd) # 初始化encoder和decoder参数 encoder=EncoderRNN(hidden_size,embedding,encoder_n_layers,dropout) decoder=LuongAttnDecoderRNN(attn_model,embedding,hidden_size,voc.num_words,decoder_n_layers,dropout) if loadFilename: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # 选择device encoder=encoder.to(device) decoder=decoder.to(device) print("Models built and ready to go !") encoder.train() decoder.train()
from evaluation import * from lang import Lang device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): print("Using CUDA.") else: print("CUDA not available. Switching to CPU.") encoder_file_location = 'data/encoder.dictionary' decoder_file_location = 'data/decoder.dictionary' # Load model from files encoder = loadObjectFromFile(encoder_file_location) decoder = loadObjectFromFile(decoder_file_location) if encoder and decoder: evaluateRandomly(encoder, decoder) print(evaluate(encoder, decoder, 'c est un jeune directeur plein')[0]) # Create wod embeddings input_lang, output_lang, pairs = prepareData('eng', 'fra', True) print(random.choice(pairs)) print(input_lang) # Create and train new model hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) trainIters(encoder1, attn_decoder1, 75000, print_every=5000)
# it makes it easier to run multiple experiments) we can actually # initialize a network and start training. # # Remember that the input sentences were heavily filtered. For this small # dataset we can use relatively small networks of 256 hidden nodes and a # single GRU layer. After about 40 minutes on a MacBook CPU we'll get some # reasonable results. # # .. Note:: # If you run this notebook you can train, interrupt the kernel, # evaluate, and continue training later. Comment out the lines where the # encoder and decoder are initialized and run ``trainIters`` again. # hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1) TRAIN = False if "-t" in sys.argv: TRAIN = True TRAIN_ITER = 7500 if len(sys.argv) == 3: TRAIN_ITER = int(sys.argv[2]) if use_cuda: encoder1 = encoder1.cuda() attn_decoder1 = attn_decoder1.cuda() if os.path.exists("encoder.pt") and os.path.exists("decoder.pt") and not TRAIN:
train_iter = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=collate_fn) valid_iter = DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=False, num_workers=4, collate_fn=collate_fn) ### Initialize model encoder = EncoderRNN(embedding=src_embedding, rnn_type=opts.rnn_type, hidden_size=opts.hidden_size, num_layers=opts.num_layers, dropout=opts.dropout, bidirectional=opts.bidirectional) decoder = LuongAttnDecoderRNN(encoder, embedding=tgt_embedding, attention=opts.attention, tie_embeddings=opts.tie_embeddings, dropout=opts.dropout, tie_ext_feature=opts.tie_ext_feature, ext_rate_embedding=ext_rate_emebdding, ext_appcate_embedding=ext_appcate_embedding, ext_seqlen_embedding=ext_seqlen_embedding, ext_senti_embedding=ext_senti_embedding ) print("emb start") if opts.pretrained_embeddings: if opts.use_keyword: