Exemplo n.º 1
0
def train():
    experiences_buffer = deque(maxlen=config.MAX_EXPERIENCES_SIZE)
    word2vec = LightWord2Vec()
    lang = Lang(word2vec.get_vocab())
    actor = ActorCopy(config.EMBEDDING_SIZE, config.STATE_SIZE, lang, word2vec)
    critic = Critic(config.STATE_SIZE, config.EMBEDDING_SIZE,
                    config.CRITIC_HIDDEN_SIZE)
    reader = DataSetReader('train')
    critic_optimizer = torch.optim.Adam(critic.parameters())
    critic_criterion = torch.nn.MSELoss()
    actor_optimizer = torch.optim.Adam(actor.parameters())

    if LOAD_INDEX > -1:
        actor, critic, critic_optimizer, critic_criterion, actor_optimizer, lang = load_model(
            LOAD_INDEX)

    if torch.cuda.is_available():
        actor.cuda()
        critic.cuda()

    for epoch in range(LOAD_INDEX + 1, config.EPOCHS):
        # training actor
        for x, y in reader.read(config.TRAIN_BATCH_SIZE):
            for sentence, target_sentence in zip(x, y):
                states, actions, probs = actor(
                    sentence, get_possible_actions(lang, sentence))
                predicted_sentence = actions[:-1]  # Skip None

                rewards = [
                    sari_reward(sentence[:i + 1], predicted_sentence[:i + 1],
                                target_sentence[:i + 1])
                    for i in range(
                        max(len(target_sentence), len(predicted_sentence)))
                ] + [0]

                for i in range(len(states) - 1):
                    experiences_buffer.insert(
                        0,
                        Experience(states[i], actions[i], states[i + 1],
                                   rewards[i], probs[i], sentence))

        q_estimated = []
        q_s = torch.zeros(config.Q_BATCH_SIZE, 1)

        # training q function
        exp_length = min(len(experiences_buffer), config.Q_BATCH_SIZE)

        for idx in range(exp_length):
            exp = experiences_buffer[random.randint(0, exp_length - 1)]
            action_emb = word2vec[exp.action]
            q_estimated.append(critic(exp.state, action_emb)[0, 0])
            q_s[idx] = exp.reward
            if exp.next_state is not None:
                with torch.no_grad():
                    q_s[idx] += (config.GAMMA * max([
                        critic(exp.next_state, word2vec[action])
                        for action in get_possible_actions(lang, exp.sentence)
                    ]))[0][0]

        q_estimated = torch.cat(q_estimated).view(-1, 1)
        q_estimated = q_estimated[:config.Q_BATCH_SIZE]

        critic_optimizer.zero_grad()
        loss = critic_criterion(q_s, q_estimated)

        loss.backward(retain_graph=True)
        critic_optimizer.step()

        # updating seq2seq model
        actor_optimizer.zero_grad()
        loss = shared_loss(experiences_buffer, q_estimated[:exp_length])
        loss.backward()
        actor_optimizer.step()

        experiences_buffer.clear()
        with torch.no_grad():
            actor.zero_grad()
            critic.zero_grad()

        if epoch % 100 == 0:
            save_model(epoch, actor, critic, critic_optimizer,
                       critic_criterion, actor_optimizer, lang)

        print("Finished epoch:", epoch, " loss is ", torch.sum(loss))
                             drop_last=True)
svhntestloader = data.DataLoader(svhn_testset,
                                 shuffle=True,
                                 batch_size=m,
                                 drop_last=True)

latent_distr = torch.distributions.normal.Normal(0, 1)

# Networks
crit = Critic()
gen = Generator(latent_size)
classifier = models.vgg13(pretrained=False)
# adjust final layer to handle 10 classes
classifier.classifier._modules['6'] = torch.nn.Linear(4096, 10)
classifier.train()
crit.cuda()
gen.cuda()
classifier.cuda()

adversarial_loss = torch.nn.BCELoss()
neg_logl = torch.nn.NLLLoss()

optimizer = torch.optim.Adam(crit.parameters(), lr=0.0001, betas=(0.5, 0.999))
optimizer_gen = torch.optim.Adam(gen.parameters(),
                                 lr=0.0001,
                                 betas=(0.5, 0.999))
optimizer_classifier = torch.optim.Adam(classifier.parameters(),
                                        lr=0.0001,
                                        betas=(0.5, 0.999))

scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.999)