def train(): experiences_buffer = deque(maxlen=config.MAX_EXPERIENCES_SIZE) word2vec = LightWord2Vec() lang = Lang(word2vec.get_vocab()) actor = ActorCopy(config.EMBEDDING_SIZE, config.STATE_SIZE, lang, word2vec) critic = Critic(config.STATE_SIZE, config.EMBEDDING_SIZE, config.CRITIC_HIDDEN_SIZE) reader = DataSetReader('train') critic_optimizer = torch.optim.Adam(critic.parameters()) critic_criterion = torch.nn.MSELoss() actor_optimizer = torch.optim.Adam(actor.parameters()) if LOAD_INDEX > -1: actor, critic, critic_optimizer, critic_criterion, actor_optimizer, lang = load_model( LOAD_INDEX) if torch.cuda.is_available(): actor.cuda() critic.cuda() for epoch in range(LOAD_INDEX + 1, config.EPOCHS): # training actor for x, y in reader.read(config.TRAIN_BATCH_SIZE): for sentence, target_sentence in zip(x, y): states, actions, probs = actor( sentence, get_possible_actions(lang, sentence)) predicted_sentence = actions[:-1] # Skip None rewards = [ sari_reward(sentence[:i + 1], predicted_sentence[:i + 1], target_sentence[:i + 1]) for i in range( max(len(target_sentence), len(predicted_sentence))) ] + [0] for i in range(len(states) - 1): experiences_buffer.insert( 0, Experience(states[i], actions[i], states[i + 1], rewards[i], probs[i], sentence)) q_estimated = [] q_s = torch.zeros(config.Q_BATCH_SIZE, 1) # training q function exp_length = min(len(experiences_buffer), config.Q_BATCH_SIZE) for idx in range(exp_length): exp = experiences_buffer[random.randint(0, exp_length - 1)] action_emb = word2vec[exp.action] q_estimated.append(critic(exp.state, action_emb)[0, 0]) q_s[idx] = exp.reward if exp.next_state is not None: with torch.no_grad(): q_s[idx] += (config.GAMMA * max([ critic(exp.next_state, word2vec[action]) for action in get_possible_actions(lang, exp.sentence) ]))[0][0] q_estimated = torch.cat(q_estimated).view(-1, 1) q_estimated = q_estimated[:config.Q_BATCH_SIZE] critic_optimizer.zero_grad() loss = critic_criterion(q_s, q_estimated) loss.backward(retain_graph=True) critic_optimizer.step() # updating seq2seq model actor_optimizer.zero_grad() loss = shared_loss(experiences_buffer, q_estimated[:exp_length]) loss.backward() actor_optimizer.step() experiences_buffer.clear() with torch.no_grad(): actor.zero_grad() critic.zero_grad() if epoch % 100 == 0: save_model(epoch, actor, critic, critic_optimizer, critic_criterion, actor_optimizer, lang) print("Finished epoch:", epoch, " loss is ", torch.sum(loss))
drop_last=True) svhntestloader = data.DataLoader(svhn_testset, shuffle=True, batch_size=m, drop_last=True) latent_distr = torch.distributions.normal.Normal(0, 1) # Networks crit = Critic() gen = Generator(latent_size) classifier = models.vgg13(pretrained=False) # adjust final layer to handle 10 classes classifier.classifier._modules['6'] = torch.nn.Linear(4096, 10) classifier.train() crit.cuda() gen.cuda() classifier.cuda() adversarial_loss = torch.nn.BCELoss() neg_logl = torch.nn.NLLLoss() optimizer = torch.optim.Adam(crit.parameters(), lr=0.0001, betas=(0.5, 0.999)) optimizer_gen = torch.optim.Adam(gen.parameters(), lr=0.0001, betas=(0.5, 0.999)) optimizer_classifier = torch.optim.Adam(classifier.parameters(), lr=0.0001, betas=(0.5, 0.999)) scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.999)