print('Reading input data.') data_p = Data_Preprocess(args.dataset, min_length=args.min_length, max_length=args.max_length) print("Number of training Samples :", len(data_p.x_train)) print("Number of validation Samples :", len(data_p.x_val)) print('Creating Word Embedding.') ''' Use pre-trained word embeddings ''' embedding = Get_Embedding(data_p.word2index, args.embedding_file) encoder = Encoder_RNN(args.hidden_size, embedding.embedding_matrix, batch_size=args.batch_size, num_layers=args.num_layers, use_embedding=True, train_embedding=True) decoder = Decoder_RNN(args.hidden_size, embedding.embedding_matrix, num_layers=args.num_layers, use_embedding=True, train_embedding=True, dropout_p=args.dropout) # Delete embedding object post weight initialization in encoder and decoder del embedding if use_cuda: encoder = encoder.cuda() decoder = decoder.cuda()
input_lang, output_lang, pairs = data_preprocess.prepare_data('eng', 'fra', True) tracking_pair = random.choice(pairs) print(tracking_pair) helpFn = Helper() embedding_src = Get_Embedding(input_lang.word2index, input_lang.word2count, "./Embeddings/") embedding_dest = Get_Embedding(output_lang.word2index, input_lang.word2count, "./Embeddings/") input_emb_shape = torch.from_numpy(embedding_src.embedding_matrix).type(torch.FloatTensor).shape output_emb_shape = torch.from_numpy(embedding_dest.embedding_matrix).type(torch.FloatTensor).shape input_vocab_size = input_emb_shape[0] output_vocab_size = output_emb_shape[0] encoder = Encoder_RNN(hidden_size, torch.from_numpy(embedding_src.embedding_matrix).type(torch.FloatTensor), num_layers=num_layers, batch_size=batch_size, use_embedding=True, train_embedding=False) decoder = Decoder_RNN(hidden_size, torch.from_numpy(embedding_dest.embedding_matrix).type(torch.FloatTensor), num_layers=num_layers, use_embedding=True, train_embedding=False, dropout_p=0.1) discriminator_cnn = Discriminator_CNN(input_vocab_size, output_vocab_size) discriminator_dense = Discriminator_Dense(max_length) if use_cuda: encoder = encoder.cuda() decoder = decoder.cuda() discriminator_cnn = discriminator_cnn.cuda() discriminator_dense = discriminator_dense.cuda() train_network = Train_Network(encoder, decoder, discriminator_cnn, discriminator_dense, output_lang, max_length, torch.from_numpy(embedding_dest.embedding_matrix).type(torch.FloatTensor), input_vocab_size, batch_size=batch_size, num_layers=num_layers) if pre_train: print("######################################### Pre Training #########################################") pre_train_iters(train_network, input_lang, output_lang, pairs, max_length, batch_size=batch_size, tracking_pair=tracking_pair, n_iters=1)
print_file('Batch Size :' + str(batch_size)) print_file('Number of Layers :' + str(num_layers)) print_file('Embedding Size :' + str(embedding_size)) print_file('Max. input length :' + str(max_length)) print_file('Max. vocab size :' + str(vocab_size)) print_file('Num epochs :' + str(num_iters)) print_file('--------------------------------------') data_preprocess = Data_Preprocess(max_length, vocab_size, vocab_size) input_lang, output_lang, pairs = data_preprocess.prepare_data() tracking_pair = random.choice(pairs) print(tracking_pair) ''' Generate and learn embeddings ''' print("input_l") encoder = Encoder_RNN(hidden_size, (len(input_lang.word2index), embedding_size), batch_size=batch_size, num_layers=num_layers) decoder = Decoder_RNN(hidden_size, (len(output_lang.word2index), embedding_size), num_layers=num_layers) if use_cuda: encoder = encoder.cuda() decoder = decoder.cuda() print("Training Network.") train_network = Train_Network(encoder, decoder, output_lang, max_length, batch_size=batch_size,
index2word = data_preprocess.index2word word2count = data_preprocess.word2count vocab_size = data_preprocess.vocab_size personas = len(data_preprocess.people) + 1 print("Number of training Samples :", len(train_in_seq)) print("Number of validation Samples :", len(dev_in_seq)) print("Number of Personas :", personas) print('Creating Word Embedding.') ''' Use pre-trained word embeddings ''' embedding = Get_Embedding(word2index, word2count, embedding_file) encoder = Encoder_RNN(hidden_size, embedding.embedding_matrix, batch_size=batch_size, num_layers=num_layers, use_embedding=True, train_embedding=False) decoder = Decoder_RNN(hidden_size, embedding.embedding_matrix, (personas, persona_size), num_layers=num_layers, use_embedding=True, train_embedding=False, dropout_p=0.1) if use_cuda: encoder = encoder.cuda() decoder = decoder.cuda() print("Training Network.")
print('Number of Epochs :', args.num_iters) print('--------------------------------------------\n') print('Reading input data.') data_p = Data_Preprocess(args.dataset, min_length=args.min_length, max_length=args.max_length) personas = len(data_p.people) + 1 print("Number of training Samples :", len(data_p.x_train)) print("Number of validation Samples :", len(data_p.x_val)) print("Number of Personas :", personas) encoder = Encoder_RNN(args.hidden_size, (len(data_p.word2index), 300), batch_size=args.batch_size, num_layers=args.num_layers, use_embedding=False, train_embedding=True) decoder = Decoder_RNN(args.hidden_size, (len(data_p.word2index), 300), (personas, args.persona_size), num_layers=args.num_layers, use_embedding=False, train_embedding=True, dropout_p=args.dropout) if not args.encoder_parameters.endswith('.pt'): args.encoder_parameters += '.pt' if not args.decoder_parameters.endswith('.pt'): args.decoder_parameters += '.pt' args.encoder_parameters = path.join('../Pre_Train/',