def predict(id_2_word, seq_length=35, batch_size=20, load_GRU=True): if load_GRU: model = GRU(200, 1500, 35, 20, 10000, 2, 0.35)#(emb_size=350, hidden_size=1500, seq_len=35 batch_size=20, vocab_size=10000, num_layers=2, dp_keep_prob=0.35) model.load_state_dict(torch.load("model\\best_GRU.pt")) model.eval() filename = "predictions\\GRU_"+ str(seq_length) + ".txt" else: model = RNN(200, 1500, 35, 20, 10000, 2, 0.35) model.load_state_dict(torch.load("model\\best_RNN.pt")) model.eval() filename = "predictions\\RNN_"+ str(seq_length) + ".txt" hidden = model.init_hidden() random_input = torch.randint(10000, (batch_size,)) samples = model.generate(random_input, hidden, seq_length, batch_size) sequence = [[" " for j in range(seq_length)] for i in range(batch_size)] for i in range(batch_size): for j in range(seq_length): sequence[i][j] = id_2_word[samples[j, i].item()] save_prediction(sequence, seq_length, filename) return sequence
label = label.cuda() # setup optimizer lr = 0.0002 betas = (0.5, 0.999) optim_Di = optim.Adam(dis_i.parameters(), lr=lr, betas=betas) optim_Dv = optim.Adam(dis_v.parameters(), lr=lr, betas=betas) optim_Gi = optim.Adam(gen_i.parameters(), lr=lr, betas=betas) optim_GRU = optim.Adam(gru.parameters(), lr=lr, betas=betas) ''' use pre-trained models ''' if pre_train == True: dis_i.load_state_dict(torch.load(trained_path + '/Discriminator_I.model')) dis_v.load_state_dict(torch.load(trained_path + '/Discriminator_V.model')) gen_i.load_state_dict(torch.load(trained_path + '/Generator_I.model')) gru.load_state_dict(torch.load(trained_path + '/GRU.model')) optim_Di.load_state_dict( torch.load(trained_path + '/Discriminator_I.state')) optim_Dv.load_state_dict( torch.load(trained_path + '/Discriminator_V.state')) optim_Gi.load_state_dict(torch.load(trained_path + '/Generator_I.state')) optim_GRU.load_state_dict(torch.load(trained_path + '/GRU.state')) ''' calc grad of models ''' def bp_i(inputs, y, retain=False): label.resize_(inputs.size(0)).fill_(y) labelv = Variable(label) outputs = dis_i(inputs) err = criterion(outputs, labelv) err.backward(retain_graph=retain)
file_path = os.path.join(dir_path, 'Video_epoch-%d.mp4' % epoch) skvideo.io.vwrite(file_path, outputdata) trained_path = os.path.join(current_path, 'trained_models') ''' adjust to cuda ''' if cuda == True: gen_i.cuda() gru.cuda() criterion.cuda() label = label.cuda() gen_i.load_state_dict( torch.load(trained_path + '/Generator_I_epoch-' + str(nModel) + '.model')) gru.load_state_dict( torch.load(trained_path + '/GRU_epoch-' + str(nModel) + '.model')) ''' gen input noise for fake video ''' def gen_z(n_frames): z_C = Variable(torch.randn(batch_size, d_C)) # repeat z_C to (batch_size, n_frames, d_C) z_C = z_C.unsqueeze(1).repeat(1, n_frames, 1) eps = Variable(torch.randn(batch_size, d_E)) if cuda == True: z_C, eps = z_C.cuda(), eps.cuda() gru.initHidden(batch_size) # notice that 1st dim of gru outputs is seq_len, 2nd is batch_size z_M = gru(eps, n_frames).transpose(1, 0) z = torch.cat((z_M, z_C), 2) # z.size() => (batch_size, n_frames, nz)
batch_size=argsdict["batch_size"], vocab_size=vocab_size, num_layers=argsdict["RNN_num_layers"], dp_keep_prob=1) gru = GRU(emb_size=argsdict["GRU_emb_size"], hidden_size=argsdict["GRU_hidden_size"], seq_len=argsdict["seq_len"], batch_size=argsdict["batch_size"], vocab_size=vocab_size, num_layers=argsdict["GRU_num_layers"], dp_keep_prob=1) # Load the model weight rnn.load_state_dict(torch.load(args.RNN_path)) gru.load_state_dict(torch.load(args.GRU_path)) rnn.eval() gru.eval() # Initialize the hidden state hidden = [rnn.init_hidden(), gru.init_hidden()] # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) # Generate the word seed using random words # in the first 100 most common words. input = torch.randint(0, 100, (args.batch_size, 1)).squeeze() for name_model, model, init_hidden in zip(["RNN", "GRU"], [rnn, gru], hidden):
for seq_len in seq_lens: print("Sequence length: ", seq_len) #RNN output #Load "Best params model" RNN.seq_len = seq_len RNN.load_state_dict( torch.load(RNN_bestparams_path, map_location=device)) RNN_generation = generation(RNN, train_data, valid_data, test_data, word_to_id, id_2_word, seq_len, BatchSize) # print("RNN generated:") # print(RNN_generation) with open(os.path.join(OUTPUTPATH, 'RNN_%s_samples.txt' % (seq_len)), 'w') as f: f.write("Model RNN. Sequence length: %s\n" % (seq_len)) for index, sentence in enumerate(RNN_generation): f.write("Sentence %s: %s\n" % (index, sentence)) #GRU output #Load "Best params model" GRU.seq_len = seq_len GRU.load_state_dict( torch.load(GRU_bestparams_path, map_location=device)) GRU_generation = generation(GRU, train_data, valid_data, test_data, word_to_id, id_2_word, seq_len, BatchSize) # print("GRU generated:") # print(GRU_generation) with open(os.path.join(OUTPUTPATH, 'GRU_%s_samples.txt' % (seq_len)), 'w') as f: f.write("Model GRU. Sequence length: %s\n" % (seq_len)) for (index, sentence) in enumerate(GRU_generation): f.write("Sentence %s: %s\n" % (index, sentence))
def main(): parser = argparse.ArgumentParser(description='Start trainning MoCoGAN.....') parser.add_argument('--batch-size', type=int, default=16, help='set batch_size') parser.add_argument('--epochs', type=int, default=60000, help='set num of iterations') parser.add_argument('--pre-train', type=int, default=-1, help='set 1 when you use pre-trained models'), parser.add_argument('--img_size', type=int, default=96, help='set the input image size of frame'), parser.add_argument('--data', type=str, default='data', help='set the path for the direcotry containing dataset'), parser.add_argument('--channel', type=int, default=3, help='set the no. of channel of the frame'), parser.add_argument('--hidden', type=int, default=100, help='set the hidden layer size for gru'), parser.add_argument('--dc', type=int, default=50, help='set the size of motion vector'), parser.add_argument('--de', type=int, default=10, help='set the size of randomly generated epsilon'), parser.add_argument('--lr', type=int, default=0.0002, help='set the learning rate'), parser.add_argument('--beta', type=int, default=0.5, help='set the beta for the optimizer'), parser.add_argument('--trained_path', type=str, default='trained_models', help='set the path were to trained models are saved'), parser.add_argument('--T', type=int, default=16, help='set the no. of frames to be selected') args = parser.parse_args() batch_size = args.batch_size pre_train = args.pre_train img_size = args.img_size channel = args.channel d_E = args.de hidden_size = args.hidden d_C = args.dc os.environ['CUDA_VISIBLE_DEVICES'] = '0' args.device = torch.device('cuda:0') if torch.cuda.is_available() else 'cpu' cuda = 1 if torch.cuda.is_available() else -1 # Making required folder if not os.path.exists('./generated_videos'): os.makedirs('./generated_videos') if not os.path.exists('./trained_models'): os.makedirs('./trained_models') if not os.path.exists('./resized_data'): os.makedirs('./resized_data') T = args.T start_epoch = 1 seed = 0 np.random.seed(seed) if cuda == True: torch.cuda.set_device(0) videos, current_path = preprocess(args) num_vid = len(videos) d_M = d_E nz = d_C + d_M criterion = nn.BCELoss() # setup model # dis_i = Image_Discriminator(channel) dis_v = Video_Discriminator() gen_i = Generator(channel, nz) gru = GRU(d_E, hidden_size, gpu=cuda) gru.initWeight() # setup optimizer # lr = args.lr beta = args.beta optim_Di = optim.Adam(dis_i.parameters(), lr=lr, betas=(beta,0.999)) optim_Dv = optim.Adam(dis_v.parameters(), lr=lr, betas=(beta,0.999)) optim_Gi = optim.Adam(gen_i.parameters(), lr=lr, betas=(beta,0.999)) optim_GRU = optim.Adam(gru.parameters(), lr=lr, betas=(beta,0.999)) if cuda == True: dis_i.cuda() dis_v.cuda() gen_i.cuda() gru.cuda() criterion.cuda() trained_path = os.path.join(current_path, args.trained_path) video_lengths = [video.shape[1] for video in videos] if pre_train == True: checkpoint = torch.load(trained_path+'/last_state') start_epoch = checkpoint['epoch'] Gi_loss = checkpoint['Gi'] Gv_loss = checkpoint['Gv'] Dv_loss = checkpoint['Dv'] Di_loss = checkpoint['Di'] dis_i.load_state_dict(torch.load(trained_path + '/Image_Discriminator.model')) dis_v.load_state_dict(torch.load(trained_path + '/Video_Discriminator.model')) gen_i.load_state_dict(torch.load(trained_path + '/Generator.model')) gru.load_state_dict(torch.load(trained_path + '/GRU.model')) optim_Di.load_state_dict(torch.load(trained_path + '/Image_Discriminator.state')) optim_Dv.load_state_dict(torch.load(trained_path + '/Video_Discriminator.state')) optim_Gi.load_state_dict(torch.load(trained_path + '/Generator.state')) optim_GRU.load_state_dict(torch.load(trained_path + '/GRU.state')) print("Using Pre-trained model") def checkpoint(model, optimizer, epoch): state = {'epoch': epoch+1, 'Gi': Gi_loss, 'Gv': Gv_loss, 'Dv': Dv_loss, 'Di': Di_loss} torch.save(state, os.path.join(trained_path, 'last_state')) filename = os.path.join(trained_path, '%s' % (model.__class__.__name__)) torch.save(model.state_dict(), filename + '.model') torch.save(optimizer.state_dict(), filename + '.state') def generate_z(num_frame): eps = Variable(torch.randn(batch_size, d_E)) z_c = Variable(torch.randn(batch_size, 1, d_C)) z_c = z_c.repeat(1, num_frame, 1) if cuda == True: z_c, eps = z_c.cuda(), eps.cuda() # Initialising the hidden var for GRU gru.initHidden(batch_size) z_m = gru(eps, num_frame).transpose(1, 0) # print(z_m.shape) z = torch.cat((z_m, z_c), 2) # (batch_size, num_frame, nz) return z if pre_train == -1: Gi_loss = [] Gv_loss = [] Di_loss = [] Dv_loss = [] for epoch in range(start_epoch, args.epochs+1): start_time = time.time() real_videos = Variable(randomVideo(videos, batch_size, T)) # (batch_size, channel, T, img_size, img_size) if cuda == True: real_videos = real_videos.cuda() real_imgs = real_videos[:, :, np.random.randint(0, T), :, :] num_frame = video_lengths[np.random.randint(0, num_vid)] # Generate Z having num_frame no. of frames Z = generate_z(num_frame).view(batch_size,num_frame, nz, 1, 1) #print(Z.shape) Z = sample(Z, T).contiguous().view(batch_size*T, nz, 1, 1) # So that conv layers (nz, 1, 1) noise to (channel, img_size, img_size) image frame fake_vid = gen_i(Z).view(batch_size, T, channel, img_size, img_size) fake_vid = fake_vid.transpose(2, 1) # sample a fake image from fake_vid frames fake_img = fake_vid[: , :, np.random.randint(0, T), :, :] r_label = Variable(torch.FloatTensor(batch_size, 1).fill_(0.9)).to(args.device) f_label = Variable(torch.FloatTensor(batch_size, 1).fill_(0.0)).to(args.device) # Training Discriminators # Video Discriminator dis_v.zero_grad() outputs = dis_v(real_videos) loss = criterion(outputs, r_label) loss.backward() real_loss = loss outputs = dis_v(fake_vid.detach()) loss = criterion(outputs, f_label) loss.backward() fake_loss = loss dv_loss = real_loss + fake_loss optim_Dv.step() # Image Discriminator dis_i.zero_grad() r_outputs = dis_i(real_imgs) lossi = criterion(r_outputs, r_label) lossi.backward() real_lossi = lossi f_outputs = dis_i(fake_img.detach()) fake_lossi = criterion(f_outputs, f_label) fake_lossi.backward() di_loss = real_lossi + fake_lossi optim_Di.step() # Training Generator and GRU gen_i.zero_grad() gru.zero_grad() gen_outputs = dis_v(fake_vid) gv_loss = criterion(gen_outputs, r_label) gv_loss.backward(retain_graph=True) gen_out = dis_i(fake_img) gi_loss = criterion(gen_out, r_label) gi_loss.backward() optim_Gi.step() optim_GRU.step() Gi_loss.append(gi_loss.item()) Gv_loss.append(gv_loss.item()) Dv_loss.append(dv_loss.item()) Di_loss.append(di_loss.item()) end_time = time.time() if epoch % 100 == 0: print('[%d/%d] Time_taken: %f || Gi loss: %.3f || Gv loss: %.3f || Di loss: %.3f || Dv loss: %.3f'%(epoch, args.epochs, end_time-start_time, gi_loss, gv_loss, di_loss, dv_loss)) if epoch % 5000 == 0: checkpoint(dis_i, optim_Di, epoch) checkpoint(dis_v, optim_Dv, epoch) checkpoint(gen_i, optim_Gi, epoch) checkpoint(gru, optim_GRU, epoch) if epoch % 1000 == 0: save_video(fake_vid[0].data.cpu().numpy().transpose(1, 2, 3, 0), epoch, current_path) # Plot plt.plot(Gi_loss, label='Image Generator') plt.plot(Gv_loss, label='Video Generator') plt.plot(Di_loss, label='Image Discriminator') plt.plot(Dv_loss, label='Video Discriminator') plt.legend() plt.savefig("plot.png")