示例#1
0
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] #+ (['test'] if args.test else [])

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PTB(
            data_dir=args.data_dir,
            split=split,
            create_data=args.create_data,
            max_sequence_length=args.max_sequence_length,
            min_occ=args.min_occ
        )

    model = SentenceVAE(
        vocab_size=datasets['train'].vocab_size,
        sos_idx=datasets['train'].sos_idx,
        eos_idx=datasets['train'].eos_idx,
        pad_idx=datasets['train'].pad_idx,
        max_sequence_length=args.max_sequence_length,
        embedding_size=args.embedding_size,
        rnn_type=args.rnn_type,
        hidden_size=args.hidden_size,
        word_dropout=args.word_dropout,
        latent_size=args.latent_size,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional
        )

    if torch.cuda.is_available():
        model = model.cuda()

    if args.tensorboard_logging:
        writer = SummaryWriter(os.path.join('./',args.logdir, expierment_name(args,ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join('./',args.save_model_path,'VAE', ts)
    os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1/(1+np.exp(-k*(step-x0))))
        elif anneal_function == 'linear':
            return min(1, step/x0)

    NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx)
    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).data[0]].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))
        
        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)
        NLL_w_avg = NLL_loss/torch.sum(length).float()

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight,NLL_w_avg
    print(model)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        for split in splits:

            data_loader = DataLoader(
                dataset=datasets[split],
                batch_size=args.batch_size,
                shuffle=split=='train',
                num_workers=cpu_count(),
                pin_memory=torch.cuda.is_available()
            )

            tracker = defaultdict(tensor)
 
            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['length'])

                # loss calculation
                NLL_loss, KL_loss, KL_weight,NLL_w_avg = loss_fn(logp, batch['target'],
                    batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0)

                loss = (NLL_loss + KL_weight * KL_loss)/batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1


                # bookkeepeing
		# Avoid the .cat error !!!
                #print(loss.data)
                #print(tracker['ELBO'])
                loss_data = torch.tensor([loss.data.item()])
                tracker['ELBO'] = torch.cat((tracker['ELBO'], loss_data)) #Orig: tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data),1)

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration+1 == len(data_loader):
                    print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f, NLL-word-Loss %9.4f"
                        %(split.upper(), iteration, len(data_loader)-1, loss.data[0], NLL_loss.data[0]/batch_size, KL_loss.data[0]/batch_size, KL_weight,NLL_w_avg))
                
                #split = 'invalid' #JUST TO DEBUG!!!
                
                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) #ERROR HERE!!!
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO'])))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()}
                if not os.path.exists(os.path.join('./dumps', ts)):
                    os.makedirs('dumps/'+ts)
                with open(os.path.join('./dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file:
                    json.dump(dump,dump_file)

            # save checkpoint
            if split == 'train' and epoch %10 ==0 :
                checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch))
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s"%checkpoint_path)
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    datasets = OrderedDict()
    curBest = 1000000
    for split in splits:
        datasets[split] = Mixed(data_dir=args.data_dir,
                                split=split,
                                create_data=args.create_data,
                                max_sequence_length=args.max_sequence_length,
                                min_occ=args.min_occ)

    model = SentenceVAE(vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, experiment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, totalIterations, split):
        if (split != 'train'):
            return 1
        elif anneal_function == 'identity':
            return 1
        elif anneal_function == 'linear':
            return 1.005 * float(step) / totalIterations
        elif anneal_function == 'sigmoid':
            return (1 / (1 + math.exp(-8 * (float(step) / totalIterations))))
        elif anneal_function == 'tanh':
            return math.tanh(4 * (float(step) / totalIterations))
        elif anneal_function == 'linear_capped':
            #print(float(step)*30/totalIterations)
            return min(1.0, float(step) * 5 / totalIterations)
        elif anneal_function == 'cyclic':
            quantile = int(totalIterations / 5)
            remainder = int(step % quantile)
            midPoint = int(quantile / 2)
            if (remainder > midPoint):
                return 1
            else:
                return float(remainder) / midPoint
        else:
            return 1

    ReconLoss = torch.nn.NLLLoss(size_average=False,
                                 ignore_index=datasets['train'].pad_idx)

    def loss_fn(logp, target, length, mean, logv, anneal_function, step,
                totalIterations, split):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).data[0]].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        recon_loss = ReconLoss(logp, target)

        # KL Divergence
        #print((1 + logv - mean.pow(2) - logv.exp()).size())

        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        #print(KL_loss.size())
        KL_weight = kl_anneal_function(anneal_function, step, totalIterations,
                                       split)

        return recon_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    tensor2 = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    tensor3 = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    tensor4 = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor

    step = 0
    stop = False
    Z = []
    L = []
    for epoch in range(args.epochs):
        if (stop):
            break
        for split in splits:
            if (split == 'test'):
                z_data = []
                domain_label = []
                z_bool = False
                domain_label_bool = False
            if (stop):
                break
            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available())

            totalIterations = (int(len(datasets[split]) / args.batch_size) +
                               1) * args.epochs

            tracker = defaultdict(tensor)
            tracker2 = defaultdict(tensor2)
            tracker3 = defaultdict(tensor3)
            tracker4 = defaultdict(tensor4)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):
                #                 if(iteration > 400):
                #                     break
                batch_size = batch['input'].size(0)
                labels = batch['label']

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['length'])
                if (split == 'test'):
                    if (z_bool == False):
                        z_bool = True
                        domain_label = labels.tolist()
                        z_data = z
                    else:
                        domain_label += labels.tolist()
                        #print(domain_label)
                        z_data = torch.cat((z_data, z), 0)

                # loss calculation
                recon_loss, KL_loss, KL_weight = loss_fn(
                    logp, batch['target'], batch['length'], mean, logv,
                    args.anneal_function, step, totalIterations, split)

                if split == 'train':
                    #KL_loss_thresholded = torch.clamp(KL_loss, min=6.0)
                    loss = (recon_loss + KL_weight * KL_loss) / batch_size
                else:
                    # report complete elbo when validation
                    loss = (recon_loss + KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                tracker['negELBO'] = torch.cat((tracker['negELBO'], loss.data))
                tracker2['KL_loss'] = torch.cat(
                    (tracker2['KL_loss'], KL_loss.data))
                tracker3['Recon_loss'] = torch.cat(
                    (tracker3['Recon_loss'], recon_loss.data))
                tracker4['Perplexity'] = torch.cat(
                    (tracker4['Perplexity'],
                     torch.exp(recon_loss.data / batch_size)))

                if args.tensorboard_logging:
                    writer.add_scalar("%s/Negative_ELBO" % split.upper(),
                                      loss.data[0],
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/Recon_Loss" % split.upper(),
                                      recon_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL_Loss" % split.upper(),
                                      KL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL_Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    logger.info(
                        "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.data[0], recon_loss.data[0] / batch_size,
                           KL_loss.data[0] / batch_size, KL_weight))

                if (split == 'test'):
                    Z = z_data
                    L = domain_label

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].data,
                        i2w=datasets['train'].get_i2w(),
                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" %
                        (split.upper(), epoch, args.epochs,
                         torch.mean(tracker['negELBO'])))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/NegELBO" % split.upper(),
                                  torch.mean(tracker['negELBO']), epoch)
                writer.add_scalar("%s-Epoch/KL_loss" % split.upper(),
                                  torch.mean(tracker2['KL_loss']) / batch_size,
                                  epoch)
                writer.add_scalar(
                    "%s-Epoch/Recon_loss" % split.upper(),
                    torch.mean(tracker3['Recon_loss']) / batch_size, epoch)
                writer.add_scalar("%s-Epoch/Perplexity" % split.upper(),
                                  torch.mean(tracker4['Perplexity']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                if (torch.mean(tracker['negELBO']) < curBest):
                    curBest = torch.mean(tracker['negELBO'])
                else:
                    stop = True
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z'].tolist()
                }
                if not os.path.exists(os.path.join('dumps_32_0', ts)):
                    os.makedirs('dumps_32_0/' + ts)
                with open(
                        os.path.join('dumps_32_0/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            # if split == 'train':
            #     checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch))
            #     torch.save(model.state_dict(), checkpoint_path)
            #     logger.info("Model saved at %s"%checkpoint_path)

    Z = Z.data.cpu().numpy()
    print(Z.shape)
    beforeTSNE = TSNE(random_state=20150101).fit_transform(Z)
    scatter(beforeTSNE, L, [0, 1, 2], (5, 5), 'latent discoveries')
    plt.savefig('mixed_tsne' + args.anneal_function + '.png', dpi=120)
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PTB(data_dir=args.data_dir,
                              split=split,
                              create_data=args.create_data,
                              max_sequence_length=args.max_sequence_length,
                              min_occ=args.min_occ)

    model = SentenceVAE(vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, experiment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    def sigmoid(step):
        x = step - 6569.5
        if x < 0:
            a = np.exp(x)
            res = (a / (1 + a))
        else:
            res = (1 / (1 + np.exp(-x)))
        return float(res)

    def frange_cycle_linear(n_iter, start=0.0, stop=1.0, n_cycle=4, ratio=0.5):
        L = np.ones(n_iter) * stop
        period = n_iter / n_cycle
        step = (stop - start) / (period * ratio)  # linear schedule

        for c in range(n_cycle):
            v, i = start, 0
            while v <= stop and (int(i + c * period) < n_iter):
                L[int(i + c * period)] = v
                v += step
                i += 1
        return L

    n_iter = 0
    for epoch in range(args.epochs):
        split = 'train'
        data_loader = DataLoader(dataset=datasets[split],
                                 batch_size=args.batch_size,
                                 shuffle=split == 'train',
                                 num_workers=cpu_count(),
                                 pin_memory=torch.cuda.is_available())

        for iteration, batch in enumerate(data_loader):
            n_iter += 1
    print("Total no of iterations = " + str(n_iter))

    L = frange_cycle_linear(n_iter)

    def kl_anneal_function(anneal_function, step):
        if anneal_function == 'identity':
            return 1

        if anneal_function == 'sigmoid':
            return sigmoid(step)

        if anneal_function == 'cyclic':
            return float(L[step])

    ReconLoss = torch.nn.NLLLoss(size_average=False,
                                 ignore_index=datasets['train'].pad_idx)

    def loss_fn(logp,
                target,
                length,
                mean,
                logv,
                anneal_function,
                step,
                split='train'):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).data[0]].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        recon_loss = ReconLoss(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        if split == 'train':
            KL_weight = kl_anneal_function(anneal_function, step)
        else:
            KL_weight = 1

        return recon_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available())

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['length'])

                # loss calculation
                recon_loss, KL_loss, KL_weight = loss_fn(
                    logp, batch['target'], batch['length'], mean, logv,
                    args.anneal_function, step, split)

                if split == 'train':
                    loss = (recon_loss + KL_weight * KL_loss) / batch_size
                else:
                    # report complete elbo when validation
                    loss = (recon_loss + KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                tracker['negELBO'] = torch.cat((tracker['negELBO'], loss.data))

                if args.tensorboard_logging:
                    writer.add_scalar("%s/Negative_ELBO" % split.upper(),
                                      loss.data[0],
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/Recon_Loss" % split.upper(),
                                      recon_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL_Loss" % split.upper(),
                                      KL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL_Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    logger.info(
                        "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.data[0], recon_loss.data[0] / batch_size,
                           KL_loss.data[0] / batch_size, KL_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].data,
                        i2w=datasets['train'].get_i2w(),
                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" %
                        (split.upper(), epoch, args.epochs,
                         torch.mean(tracker['negELBO'])))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/NegELBO" % split.upper(),
                                  torch.mean(tracker['negELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z'].tolist()
                }
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % (epoch))
                torch.save(model.state_dict(), checkpoint_path)
                logger.info("Model saved at %s" % checkpoint_path)
示例#4
0
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PTB(data_dir=args.data_dir,
                              split=split,
                              create_data=args.create_data,
                              max_sequence_length=args.max_sequence_length,
                              min_occ=args.min_occ)

    log_file = open("res.txt", "a")
    log_file.write(expierment_name(args, ts))
    log_file.write("\n")
    graph_file = open("elbo-graph.txt", "a")
    graph_file.write(expierment_name(args, ts))
    graph_file.write("\n")

    model = SentenceVAE(vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0))))
        elif anneal_function == 'linear':
            return min(1, step / x0)
        elif anneal_function == "softplus":
            return min(1, np.log(1 + np.exp(k * step)))
        elif anneal_function == "no":
            return 1

    NLL = torch.nn.NLLLoss(size_average=False,
                           ignore_index=datasets['train'].pad_idx)

    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k,
                x0):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).data[0]].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    val_lowest_elbo = 5000
    val_accu_epoch = 0
    val_min_epoch = 0
    split_elbo = {"train": [], "valid": []}
    if args.test:
        split_elbo["test"] = []
    split_loss = {"train": [], "valid": []}
    if args.test:
        split_loss["test"] = []

    for epoch in range(args.epochs):

        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available())

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['length'])

                # loss calculation
                NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
                                                       batch['length'], mean,
                                                       logv,
                                                       args.anneal_function,
                                                       step, args.k, args.x0)

                if split != 'train':
                    KL_weight = 1.0

                loss = (NLL_loss + KL_weight * KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data))

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.data[0],
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(),
                                      NLL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(),
                                      KL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.data[0], NLL_loss.data[0] / batch_size,
                           KL_loss.data[0] / batch_size, KL_weight))
                    split_loss[split].append([
                        loss.data[0], NLL_loss.data[0] / batch_size,
                        KL_loss.data[0] / batch_size
                    ])

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].data,
                        i2w=datasets['train'].get_i2w(),
                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs,
                   torch.mean(tracker['ELBO'])))
            split_elbo[split].append([torch.mean(tracker["ELBO"])])

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                                  torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z'].tolist()
                }
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % (epoch))
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)

            if split == 'valid':
                if torch.mean(tracker['ELBO']) < val_lowest_elbo:
                    val_lowest_elbo = torch.mean(tracker['ELBO'])
                    val_accu_epoch = 0
                    val_min_epoch = epoch
                else:
                    val_accu_epoch += 1
                    if val_accu_epoch >= 3:
                        if not args.test:
                            exp_str = ""
                            exp_str += "train_ELBO={}\n".format(
                                split_elbo["train"][val_min_epoch])
                            exp_str += "valid_ELBO={}\n".format(
                                split_elbo["valid"][val_min_epoch])
                            exp_str += "==========\n"
                            log_file.write(exp_str)
                            log_file.close()
                            print(exp_str)
                            graph_file.write("ELBO\n")
                            line = ""
                            for s in splits:
                                for i in split_loss[s]:
                                    line += "{},".format(i[0])
                                line += "\n"
                            graph_file.write(line)
                            graph_file.write("NLL\n")
                            line = ""
                            for s in splits:
                                for i in split_loss[s]:
                                    line += "{},".format(i[1])
                                line += "\n"
                            graph_file.write(line)
                            graph_file.write("KL\n")
                            line = ""
                            for s in splits:
                                for i in split_loss[s]:
                                    line += "{},".format(i[2])
                                line += "\n"
                            graph_file.write(line)
                            graph_file.close()
                            exit()
            elif split == 'test' and val_accu_epoch >= 3:
                exp_str = ""
                exp_str += "train_ELBO={}\n".format(
                    split_elbo["train"][val_min_epoch])
                exp_str += "valid_ELBO={}\n".format(
                    split_elbo["valid"][val_min_epoch])
                exp_str += "test_ELBO={}\n".format(
                    split_elbo["test"][val_min_epoch])
                exp_str += "==========\n"
                log_file.write(exp_str)
                log_file.close()
                print(exp_str)
                graph_file.write("ELBO\n")
                line = ""
                for s in splits:
                    for i in split_loss[s]:
                        line += "{},".format(i[0])
                    line += "\n"
                for s in splits:
                    for i in split_elbo[s]:
                        line += "{},".format(i[0])
                    line += "\n"
                graph_file.write(line)
                graph_file.write("NLL\n")
                line = ""
                for s in splits:
                    for i in split_loss[s]:
                        line += "{},".format(i[1])
                    line += "\n"
                graph_file.write(line)
                graph_file.write("KL\n")
                line = ""
                for s in splits:
                    for i in split_loss[s]:
                        line += "{},".format(i[2])
                    line += "\n"
                graph_file.write(line)
                graph_file.close()
                exit()

        if epoch == args.epochs - 1:
            exp_str = ""
            exp_str += "train_ELBO={}\n".format(
                split_elbo["train"][val_min_epoch])
            exp_str += "valid_ELBO={}\n".format(
                split_elbo["valid"][val_min_epoch])
            if args.test:
                exp_str += "test_ELBO={}\n".format(
                    split_elbo["test"][val_min_epoch])
            exp_str += "==========\n"
            log_file.write(exp_str)
            log_file.close()
            print(exp_str)
            graph_file.write("ELBO\n")
            line = ""
            for s in splits:
                for i in split_loss[s]:
                    line += "{},".format(i[0])
                line += "\n"
            graph_file.write(line)
            graph_file.write("NLL\n")
            line = ""
            for s in splits:
                for i in split_loss[s]:
                    line += "{},".format(i[1])
                line += "\n"
            graph_file.write(line)
            graph_file.write("KL\n")
            line = ""
            for s in splits:
                for i in split_loss[s]:
                    line += "{},".format(i[2])
                line += "\n"
            graph_file.write(line)
            graph_file.close()
            exit()
示例#5
0
def main(args):

    # Load the vocab
    with open(args.data_dir+'/ptb.vocab.json', 'r') as file:
        vocab = json.load(file)

    w2i, i2w = vocab['w2i'], vocab['i2w']

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    # Initialize semantic loss
    sl = Semantic_Loss()

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PTB(
            data_dir=args.data_dir,
            split=split,
            create_data=args.create_data,
            max_sequence_length=args.max_sequence_length,
            min_occ=args.min_occ
        )

    params = dict(
        vocab_size=datasets['train'].vocab_size,
        sos_idx=datasets['train'].sos_idx,
        eos_idx=datasets['train'].eos_idx,
        pad_idx=datasets['train'].pad_idx,
        unk_idx=datasets['train'].unk_idx,
        max_sequence_length=args.max_sequence_length,
        embedding_size=args.embedding_size,
        rnn_type=args.rnn_type,
        hidden_size=args.hidden_size,
        word_dropout=args.word_dropout,
        embedding_dropout=args.embedding_dropout,
        latent_size=args.latent_size,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional
    )
    model = SentenceVAE(**params)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    with open(os.path.join(save_model_path, 'model_params.json'), 'w') as f:
        json.dump(params, f, indent=4)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1/(1+np.exp(-k*(step-x0))))
        elif anneal_function == 'linear':
            return min(1, step/x0)

    def perplexity_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1/ 1+np.exp(-k*(step-x0)))
        elif anneal_function == 'linear':
            return min(1, (step/x0))

    NLL = torch.nn.NLLLoss(ignore_index=datasets['train'].pad_idx, reduction='sum')
    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0, \
        batch_perplexity, perplexity_anneal_function):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).item()].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        # Perplexity
        perp_loss = batch_perplexity
        perp_weight = perplexity_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight


    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        # Keep track of epoch loss
        epoch_loss = []

        for split in splits:

            data_loader = DataLoader(
                dataset=datasets[split],
                batch_size=args.batch_size,
                shuffle=split=='train',
                num_workers=cpu_count(),
                pin_memory=torch.cuda.is_available()
            )

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            batch_t_start = None

            for iteration, batch in enumerate(data_loader):

                if batch_t_start:
                    batch_run_time = time.time() - batch_t_start
                    # print("Batch run time: " + str(batch_run_time))
                batch_t_start = time.time()


                batch_size = batch['input_sequence'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Get the original sentences in this batch
                batch_sentences = idx2word(batch['input_sequence'], i2w=i2w, pad_idx=w2i['<pad>'])
                # Remove the first tag
                batch_sentences = [x.replace("<sos>", "") for x in batch_sentences]

                # Forward pass
                (logp, mean, logv, z), states = model(**batch)


                # Choose some random pairs of samples within the batch
                #  to get latent representations for
                batch_index_pairs = list(itertools.combinations(np.arange(batch_size), 2))
                random.shuffle(batch_index_pairs)
                batch_index_pairs = batch_index_pairs[:args.perplexity_samples_per_batch]

                batch_perplexity = []

                # If we start the perplexity
                start_perplexity = epoch > 10

                # If we should have perplexity loss
                if start_perplexity and args.perplexity_loss:
                    # For each pair, get the intermediate representations in the latent space
                    for index_pair in batch_index_pairs:

                        with torch.no_grad():
                            z1_hidden = states['z'][index_pair[0]].cpu()
                            z2_hidden = states['z'][index_pair[1]].cpu()

                        z_hidden = to_var(torch.from_numpy(interpolate(start=z1_hidden, end=z2_hidden, steps=1)).float())

                        if args.rnn_type == "lstm":

                            with torch.no_grad():
                                z1_cell_state = states['z_cell_state'].cpu().squeeze()[index_pair[0]]
                                z2_cell_state = states['z_cell_state'].cpu().squeeze()[index_pair[1]]

                            z_cell_states = \
                                to_var(torch.from_numpy(interpolate(start=z1_cell_state, end=z2_cell_state, steps=1)).float())

                            samples, _ = model.inference(z=z_hidden, z_cell_state=z_cell_states)
                        else:
                            samples, _ = model.inference(z=z_hidden, z_cell_state=None)

                        # Check interpolated sentences
                        interpolated_sentences = idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>'])
                        # For each sentence, get the perplexity and show it
                        perplexities = []
                        for sentence in interpolated_sentences:
                            perplexities.append(sl.get_perplexity(sentence))
                        avg_sample_perplexity = sum(perplexities) / len(perplexities)
                        batch_perplexity.append(avg_sample_perplexity)
                    # Calculate batch perplexity
                    avg_batch_perplexity = sum(batch_perplexity) / len(batch_perplexity)

                    # loss calculation
                    NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight = loss_fn(logp, batch['target'],
                        batch['length'], mean, logv, args.anneal_function, step, \
                            args.k, args.x0, avg_batch_perplexity, perplexity_anneal_function)

                    loss = ((NLL_loss + KL_weight * KL_loss) / batch_size) + (perp_loss * perp_weight)

                else: # Epochs < X, so train without perplexity
                    # loss calculation
                    NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight = loss_fn(logp, batch['target'],
                        batch['length'], mean, logv, args.anneal_function, step, \
                            args.k, args.x0, 0, perplexity_anneal_function)

                    loss = (NLL_loss + KL_weight * KL_loss) / batch_size


                # Turn model back into train, since inference changed to eval
                if split == 'train':
                    model.train()
                else:
                    model.eval()

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                    # Add loss
                    epoch_loss.append(loss.item())

                # bookkeepeing
                tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data.view(1, -1)), dim=0)

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.item(), epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.item() / batch_size,
                                      epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.item() / batch_size,
                                      epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight,
                                      epoch*len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration+1 == len(data_loader):
                    print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f, Perp-loss %9.4f, Perp-weight %6.3f"
                          % (split.upper(), iteration, len(data_loader)-1, loss.item(), NLL_loss.item()/batch_size,
                          KL_loss.item()/batch_size, KL_weight, perp_loss, perp_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(),
                                                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, tracker['ELBO'].mean()))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist()}
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/'+ts)
                with open(os.path.join('dumps/'+ts+'/valid_E%i.json' % epoch), 'w') as dump_file:
                    json.dump(dump,dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % epoch)
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)
示例#6
0
文件: train.py 项目: kaletap/Bert-VAE
def main(args):
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    RANDOM_SEED = 42

    dataset = load_dataset("yelp_polarity", split="train")
    TRAIN_SIZE = len(dataset) - 2_000
    VALID_SIZE = 1_000
    TEST_SIZE = 1_000

    train_test_split = dataset.train_test_split(train_size=TRAIN_SIZE,
                                                seed=RANDOM_SEED)
    train_dataset = train_test_split["train"]
    test_val_dataset = train_test_split["test"].train_test_split(
        train_size=VALID_SIZE, test_size=TEST_SIZE, seed=RANDOM_SEED)
    val_dataset, test_dataset = test_val_dataset["train"], test_val_dataset[
        "test"]

    tokenizer = AutoTokenizer.from_pretrained(args.model_name, use_fast=True)
    datasets = OrderedDict()
    datasets['train'] = TextDataset(train_dataset, tokenizer,
                                    args.max_sequence_length,
                                    not args.disable_sent_tokenize)
    datasets['valid'] = TextDataset(val_dataset, tokenizer,
                                    args.max_sequence_length,
                                    not args.disable_sent_tokenize)
    if args.test:
        datasets['text'] = TextDataset(test_dataset, tokenizer,
                                       args.max_sequence_length,
                                       not args.disable_sent_tokenize)

    print(
        f"Loading {args.model_name} model. Setting {args.trainable_layers} trainable layers."
    )
    encoder = AutoModel.from_pretrained(args.model_name, return_dict=True)
    if not args.train_embeddings:
        for p in encoder.embeddings.parameters():
            p.requires_grad = False
    encoder_layers = encoder.encoder.layer
    if args.trainable_layers > len(encoder_layers):
        warnings.warn(
            f"You are asking to train {args.trainable_layers} layers, but this model has only {len(encoder_layers)}"
        )
    for layer in range(len(encoder_layers) - args.trainable_layers):
        for p in encoder_layers[layer].parameters():
            p.requires_grad = False
    params = dict(vocab_size=datasets['train'].vocab_size,
                  embedding_size=args.embedding_size,
                  rnn_type=args.rnn_type,
                  hidden_size=args.hidden_size,
                  word_dropout=args.word_dropout,
                  embedding_dropout=args.embedding_dropout,
                  latent_size=args.latent_size,
                  num_layers=args.num_layers,
                  bidirectional=args.bidirectional,
                  max_sequence_length=args.max_sequence_length)
    model = SentenceVAE(encoder=encoder, tokenizer=tokenizer, **params)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    with open(os.path.join(save_model_path, 'model_params.json'), 'w') as f:
        json.dump(params, f, indent=4)
    with open(os.path.join(save_model_path, 'train_args.json'), 'w') as f:
        json.dump(vars(args), f, indent=4)

    def kl_anneal_function(anneal_function, step, k, x0):
        if step <= x0:
            return args.initial_kl_weight
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0 - 2500))))
        elif anneal_function == 'linear':
            return min(1, step / x0)

    NLL = torch.nn.NLLLoss(ignore_index=datasets['train'].pad_idx,
                           reduction='sum')

    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k,
                x0):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).item()].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    params = [{
        'params': model.encoder.parameters(),
        'lr': args.encoder_learning_rate
    }, {
        'params': [
            *model.decoder_rnn.parameters(), *model.hidden2mean.parameters(),
            *model.hidden2logv.parameters(), *model.latent2hidden.parameters(),
            *model.outputs2vocab.parameters()
        ]
    }]
    optimizer = torch.optim.Adam(params,
                                 lr=args.learning_rate,
                                 weight_decay=args.weight_decay)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=(split == 'train'),
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available(),
                                     collate_fn=DataCollator(tokenizer))

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'],
                                            batch['attention_mask'],
                                            batch['length'])

                # loss calculation
                NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
                                                       batch['length'], mean,
                                                       logv,
                                                       args.anneal_function,
                                                       step, args.k, args.x0)

                loss = (NLL_loss + KL_weight * KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                tracker['ELBO'] = torch.cat(
                    (tracker['ELBO'], loss.data.view(1, -1)), dim=0)

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.item(),
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(),
                                      NLL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(),
                                      KL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.item(), NLL_loss.item() / batch_size,
                           KL_loss.item() / batch_size, KL_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].tolist(), tokenizer=tokenizer)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs, tracker['ELBO'].mean()))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                                  torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences, the encoded latent space and generated sequences
            if split == 'valid':
                samples, _ = model.inference(z=tracker['z'])
                generated_sents = idx2word(samples.tolist(), tokenizer)
                sents = [{
                    'original': target,
                    'generated': generated
                } for target, generated in zip(tracker['target_sents'],
                                               generated_sents)]
                dump = {'sentences': sents, 'z': tracker['z'].tolist()}
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file, indent=3)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % epoch)
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)
示例#7
0
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid']

    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
        filename=os.path.join(args.logdir,
                              experiment_name(args, ts) + ".log"))
    logger = logging.getLogger(__name__)

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PTB(data_dir=args.data_dir,
                              split=split,
                              create_data=args.create_data,
                              max_sequence_length=args.max_sequence_length,
                              min_occ=args.min_occ)

    model = SentenceVAE(vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, experiment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    total_step = int(args.epochs * 42000.0 / args.batch_size)

    def kl_anneal_function(anneal_function, step):
        if anneal_function == 'half':
            return 0.5
        if anneal_function == 'identity':
            return 1
        if anneal_function == 'double':
            return 2
        if anneal_function == 'quadra':
            return 4

        if anneal_function == 'sigmoid':
            return 1 / (1 + np.exp((0.5 * total_step - step) / 200))

        if anneal_function == 'monotonic':
            beta = step * 4 / total_step
            if beta > 1:
                beta = 1.0
            return beta

        if anneal_function == 'cyclical':
            t = total_step / 4
            beta = 4 * (step % t) / t
            if beta > 1:
                beta = 1.0
            return beta

    ReconLoss = torch.nn.NLLLoss(reduction='sum',
                                 ignore_index=datasets['train'].pad_idx)

    def loss_fn(logp, target, length, mean, logv, anneal_function, step):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).item()].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        recon_loss = ReconLoss(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step)

        return recon_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    train_loss = []
    test_loss = []
    for epoch in range(args.epochs):

        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available())

            tracker = defaultdict(list)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['length'])

                # loss calculation
                recon_loss, KL_loss, KL_weight = loss_fn(
                    logp, batch['target'], batch['length'], mean, logv,
                    args.anneal_function, step)

                if split == 'train':
                    loss = (recon_loss + KL_weight * KL_loss) / batch_size
                else:
                    # report complete elbo when validation
                    loss = (recon_loss + KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                # tracker['negELBO'] = torch.cat((tracker['negELBO'], loss.data))
                tracker["negELBO"].append(loss.item())
                tracker["recon_loss"].append(recon_loss.item() / batch_size)
                tracker["KL_Loss"].append(KL_loss.item() / batch_size)
                tracker["KL_Weight"].append(KL_weight)

                if args.tensorboard_logging:
                    writer.add_scalar("%s/Negative_ELBO" % split.upper(),
                                      loss.item(),
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/Recon_Loss" % split.upper(),
                                      recon_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL_Loss" % split.upper(),
                                      KL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL_Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    logger.info(
                        "\tStep\t%s\t%04d\t%i\t%9.4f\t%9.4f\t%9.4f\t%6.3f" %
                        (split.upper(), iteration, len(data_loader) - 1,
                         loss.item(), recon_loss.item() / batch_size,
                         KL_loss.item() / batch_size, KL_weight))
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.item(), recon_loss.item() / batch_size,
                           KL_loss.item() / batch_size, KL_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].data,
                        i2w=datasets['train'].get_i2w(),
                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'].append(z.data.tolist())

            logger.info(
                "\tEpoch\t%s\t%02d\t%i\t%9.4f\t%9.4f\t%9.4f\t%6.3f" %
                (split.upper(), epoch, args.epochs,
                 sum(tracker['negELBO']) / len(tracker['negELBO']),
                 1.0 * sum(tracker['recon_loss']) / len(tracker['recon_loss']),
                 1.0 * sum(tracker['KL_Loss']) / len(tracker['KL_Loss']),
                 1.0 * sum(tracker['KL_Weight']) / len(tracker['KL_Weight'])))
            print("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs,
                   sum(tracker['negELBO']) / len(tracker['negELBO'])))

            if args.tensorboard_logging:
                writer.add_scalar(
                    "%s-Epoch/NegELBO" % split.upper(),
                    1.0 * sum(tracker['negELBO']) / len(tracker['negELBO']),
                    epoch)
                writer.add_scalar(
                    "%s-Epoch/recon_loss" % split.upper(), 1.0 *
                    sum(tracker['recon_loss']) / len(tracker['recon_loss']),
                    epoch)
                writer.add_scalar(
                    "%s-Epoch/KL_Loss" % split.upper(),
                    1.0 * sum(tracker['KL_Loss']) / len(tracker['KL_Loss']),
                    epoch)
                writer.add_scalar(
                    "%s-Epoch/KL_Weight" % split.upper(), 1.0 *
                    sum(tracker['KL_Weight']) / len(tracker['KL_Weight']),
                    epoch)

            if split == 'train':
                train_loss.append(1.0 * sum(tracker['negELBO']) /
                                  len(tracker['negELBO']))
            else:
                test_loss.append(1.0 * sum(tracker['negELBO']) /
                                 len(tracker['negELBO']))
            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z']
                }
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % (epoch))
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)

    sns.set(style="whitegrid")
    df = pd.DataFrame()
    df["train"] = train_loss
    df["test"] = test_loss
    ax = sns.lineplot(data=df, legend=False)
    ax.set(xlabel='Epoch', ylabel='Loss')
    plt.legend(title='Split', loc='upper right', labels=['Train', 'Test'])
    plt.savefig(os.path.join(args.logdir,
                             experiment_name(args, ts) + ".png"),
                transparent=True,
                dpi=300)
def main(args):

	ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

	splits = ['train', 'valid'] + (['test'] if args.test else [])

	datasets = OrderedDict()
	for split in splits:

		if args.dataset == 'ptb':
			Dataset = PTB
		elif args.dataset == 'twitter':
			Dataset = PoliticianTweets
		else:
			print("Invalid dataset. Exiting")
			exit()

		datasets[split] = Dataset(
			data_dir=args.data_dir,
			split=split,
			create_data=args.create_data,
			max_sequence_length=args.max_sequence_length,
			min_occ=args.min_occ
		)

	model = SentenceVAE(
		vocab_size=datasets['train'].vocab_size,
		sos_idx=datasets['train'].sos_idx,
		eos_idx=datasets['train'].eos_idx,
		pad_idx=datasets['train'].pad_idx,
		unk_idx=datasets['train'].unk_idx,
		max_sequence_length=args.max_sequence_length,
		embedding_size=args.embedding_size,
		rnn_type=args.rnn_type,
		hidden_size=args.hidden_size,
		word_dropout=args.word_dropout,
		embedding_dropout=args.embedding_dropout,
		latent_size=args.latent_size,
		num_layers=args.num_layers,
		bidirectional=args.bidirectional
		)

	# if args.from_file != "":
	# 	model = torch.load(args.from_file)
	#

	if torch.cuda.is_available():
		model = model.cuda()

	print(model)

	if args.tensorboard_logging:
		writer = SummaryWriter(os.path.join(args.logdir, experiment_name(args,ts)))
		writer.add_text("model", str(model))
		writer.add_text("args", str(args))
		writer.add_text("ts", ts)

	save_model_path = os.path.join(args.save_model_path, ts)
	os.makedirs(save_model_path)

	
	if 'sigmoid' in args.anneal_function and args.dataset=='ptb':
		linspace = np.linspace(-5,5,13160) # 13160 = number of training examples in ptb
	elif 'sigmoid' in args.anneal_function and args.dataset=='twitter':
		linspace = np.linspace(-5, 5, 25190) #6411/25190? = number of training examples in short version of twitter

	def kl_anneal_function(anneal_function, step, param_dict=None):
		if anneal_function == 'identity':
			return 1
		elif anneal_function == 'sigmoid' or anneal_function=='sigmoid_klt':
			s = 1/(len(linspace))
			return(float((1)/(1+np.exp(-param_dict['ag']*(linspace[step])))))

	NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx)
	def loss_fn(logp, target, length, mean, logv, anneal_function, step, param_dict=None):

		# cut-off unnecessary padding from target, and flatten
		target = target[:, :torch.max(length).data[0]].contiguous().view(-1)
		logp = logp.view(-1, logp.size(2))
		
		# Negative Log Likelihood
		NLL_loss = NLL(logp, target)

		# KL Divergence
		KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
		if args.anneal_function == 'sigmoid_klt':
			if float(KL_loss)/args.batch_size < param_dict['kl_threshold']:
				# print("KL_loss of %s is below threshold %s. Returning this threshold instead"%(float(KL_loss)/args.batch_size,param_dict['kl_threshold']))
				KL_loss = to_var(torch.Tensor([param_dict['kl_threshold']*args.batch_size]))
		KL_weight = kl_anneal_function(anneal_function, step, {'ag': args.anneal_aggression})

		return NLL_loss, KL_loss, KL_weight

	optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

	tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor
	step = 0
	for epoch in range(args.epochs):

		for split in splits:

			data_loader = DataLoader(
				dataset=datasets[split],
				batch_size=args.batch_size,
				shuffle=split=='train',
				num_workers=0,
				pin_memory=torch.cuda.is_available()
			)

			tracker = defaultdict(tensor)

			# Enable/Disable Dropout
			if split == 'train':
				model.train()
			else:
				model.eval()

			for iteration, batch in enumerate(data_loader):

				batch_size = batch['input'].size(0)
				if split == 'train' and batch_size != args.batch_size:
					print("WARNING: Found different batch size\nargs.batch_size= %s, input_size=%s"%(args.batch_size, batch_size))
					

				for k, v in batch.items():
					if torch.is_tensor(v):
						batch[k] = to_var(v)

				# Forward pass
				logp, mean, logv, z = model(batch['input'], batch['length'])

				# loss calculation
				NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
					batch['length'], mean, logv, args.anneal_function, step, {'kl_threshold': args.kl_threshold})

				loss = (NLL_loss + KL_weight * KL_loss)/batch_size

				# backward + optimization
				if split == 'train':
					optimizer.zero_grad()
					loss.backward()
					optimizer.step()
					step += 1
					# print(step)

				# bookkeepeing
				tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data))

				
				if args.tensorboard_logging:
					writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration)
					writer.add_scalar("%s/NLL_Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration)
					writer.add_scalar("%s/KL_Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration)
					# print("Step %s: %s"%(epoch*len(data_loader) + iteration, KL_weight))
					writer.add_scalar("%s/KL_Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration)

				if iteration % args.print_every == 0 or iteration+1 == len(data_loader):
					logger.info("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
						%(split.upper(), iteration, len(data_loader)-1, loss.data[0], NLL_loss.data[0]/batch_size, KL_loss.data[0]/batch_size, KL_weight))

				if split == 'valid':
					if 'target_sents' not in tracker:
						tracker['target_sents'] = list()
					tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx)
					tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

			logger.info("%s Epoch %02d/%i, Mean ELBO %9.4f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO'])))

			if args.tensorboard_logging:
				writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch)

			# save a dump of all sentences and the encoded latent space
			if split == 'valid':
				dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()}
				if not os.path.exists(os.path.join('dumps', ts)):
					os.makedirs('dumps/'+ts)
				with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file:
					json.dump(dump,dump_file)

			# save checkpoint
			if split == 'train':
				checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch))
				torch.save(model.state_dict(), checkpoint_path)
				logger.info("Model saved at %s"%checkpoint_path)

	torch.save(model, f"model-{args.dataset}-{ts}.pickle")
def main(args):
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PTB(data_dir=args.data_dir,
                              split=split,
                              create_data=args.create_data,
                              max_sequence_length=args.max_sequence_length,
                              min_occ=args.min_occ)

    model = SentenceVAE(vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, experiment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    total_steps = (len(datasets["train"]) // args.batch_size) * args.epochs
    print("Train dataset size", total_steps)

    def kl_anneal_function(anneal_function, step):
        if anneal_function == 'identity':
            return 1
        if anneal_function == 'linear':
            if args.warmup is None:
                return 1 - (total_steps - step) / total_steps
            else:
                warmup_steps = (total_steps / args.epochs) * args.warmup
                return 1 - (warmup_steps - step
                            ) / warmup_steps if step < warmup_steps else 1.0

    ReconLoss = torch.nn.NLLLoss(size_average=False,
                                 ignore_index=datasets['train'].pad_idx)

    def loss_fn(logp, target, length, mean, logv, anneal_function, step):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).data[0]].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        recon_loss = ReconLoss(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step)

        return recon_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available())

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['length'])

                # loss calculation
                recon_loss, KL_loss, KL_weight = loss_fn(
                    logp, batch['target'], batch['length'], mean, logv,
                    args.anneal_function, step)

                if split == 'train':
                    loss = (recon_loss + KL_weight * KL_loss) / batch_size
                else:
                    # report complete elbo when validation
                    loss = (recon_loss + KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                tracker['negELBO'] = torch.cat(
                    (tracker['negELBO'], loss.data.unsqueeze(0)))

                if args.tensorboard_logging:
                    neg_elbo = (recon_loss + KL_loss) / batch_size
                    writer.add_scalar("%s/Negative_ELBO" % split.upper(),
                                      neg_elbo.data[0],
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/Recon_Loss" % split.upper(),
                                      recon_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL_Loss" % split.upper(),
                                      KL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL_Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    logger.info(
                        "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.data[0], recon_loss.data[0] / batch_size,
                           KL_loss.data[0] / batch_size, KL_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].data,
                        i2w=datasets['train'].get_i2w(),
                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" %
                        (split.upper(), epoch, args.epochs,
                         torch.mean(tracker['negELBO'])))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/NegELBO" % split.upper(),
                                  torch.mean(tracker['negELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z'].tolist()
                }
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % (epoch))
                torch.save(model.state_dict(), checkpoint_path)
                logger.info("Model saved at %s" % checkpoint_path)

    if args.num_samples:
        torch.cuda.empty_cache()
        model.eval()
        with torch.no_grad():
            print(f"Generating {args.num_samples} samples")
            generations, _ = model.inference(n=args.num_samples)
            vocab = datasets["train"].i2w

            print(
                "Sampled latent codes from z ~ N(0, I), generated sentences:")
            for i, generation in enumerate(generations, start=1):
                sentence = [vocab[str(word.item())] for word in generation]
                print(f"{i}:", " ".join(sentence))
示例#10
0
文件: train.py 项目: pvijayak/RNNVaE
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PTB(data_dir=args.data_dir,
                              split=split,
                              create_data=args.create_data,
                              max_sequence_length=args.max_sequence_length,
                              min_occ=args.min_occ)

    model = SentenceVAE(vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, experiment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, x1, x2):
        if anneal_function == 'identity':
            return 1
        elif anneal_function == 'linear':
            return min(1, step / x1)
        elif anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-x2 * (step - x1))))
        elif anneal_function == 'cyclic_log':
            return float(1 / (1 + np.exp(-x2 * ((step % (3 * x1)) - x1))))
        elif anneal_function == 'cyclic_lin':
            return min(1, (step % (3 * x1)) / x1)

    ReconLoss = torch.nn.NLLLoss(size_average=False,
                                 ignore_index=datasets['train'].pad_idx)

    def loss_fn(logp, target, length, mean, logv, anneal_function, step, x1,
                x2):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).item()].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        recon_loss = ReconLoss(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, x1, x2)

        return recon_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0

    early_stopping = EarlyStopping(history=10)
    for epoch in range(args.epochs):

        early_stopping_flag = False
        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available())

            # tracker = defaultdict(tensor)
            tracker = defaultdict(list)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['length'])

                # loss calculation
                recon_loss, KL_loss, KL_weight = loss_fn(
                    logp, batch['target'], batch['length'], mean, logv,
                    args.anneal_function, step, args.x1, args.x2)

                if split == 'train':
                    loss = (recon_loss + KL_weight * KL_loss) / batch_size
                else:
                    # report complete elbo when validation
                    loss = (recon_loss + KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                tracker['negELBO'].append(loss.item())

                if args.tensorboard_logging:
                    writer.add_scalar("%s/Negative_ELBO" % split.upper(),
                                      loss.item(),
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/Recon_Loss" % split.upper(),
                                      recon_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL_Loss" % split.upper(),
                                      KL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL_Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    # print(step)
                    # logger.info("Step = %d"%step)
                    logger.info(
                        "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.item(), recon_loss.item() / batch_size,
                           KL_loss.item() / batch_size, KL_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].data,
                        i2w=datasets['train'].get_i2w(),
                        pad_idx=datasets['train'].pad_idx)
                    # tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)
                    # print(z.data.shape)
                    tracker['z'].append(z.data.tolist())
            mean_loss = sum(tracker['negELBO']) / len(tracker['negELBO'])

            logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" %
                        (split.upper(), epoch, args.epochs, mean_loss))
            # print(mean_loss)

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/NegELBO" % split.upper(),
                                  mean_loss, epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z']
                }
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file)
                if (args.early_stopping):
                    if (early_stopping.check(mean_loss)):
                        early_stopping_flag = True

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % (epoch))
                torch.save(model.state_dict(), checkpoint_path)
                logger.info("Model saved at %s" % checkpoint_path)

        if (early_stopping_flag):
            print("Early stopping trigerred. Training stopped...")
            break
示例#11
0
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    datasets = OrderedDict()
    for split in splits:
        # datasets[split] = BGoogle(
        #     data_dir=args.data_dir,
        #     split=split,
        #     create_data=args.create_data,
        #     batch_size=args.batch_size ,
        #     max_sequence_length=args.max_sequence_length,
        #     min_occ=args.min_occ
        # )

        datasets[split] = Amazon(data_dir=args.data_dir,
                                 split=split,
                                 create_data=args.create_data,
                                 batch_size=args.batch_size,
                                 max_sequence_length=args.max_sequence_length,
                                 min_occ=args.min_occ)

    model = SentenceVAE(vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    tokenizer = TweetTokenizer(preserve_case=False)
    vocab_file = "amazon.vocab.json"
    with open(os.path.join(args.data_dir, vocab_file), 'r') as file:
        vocab = json.load(file)
        w2i, i2w = vocab['w2i'], vocab['i2w']

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    # save_model_path = os.path.join(args.save_model_path, ts)
    save_model_path = args.save_model_path

    if not os.path.exists(save_model_path):
        os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0))))
        elif anneal_function == 'linear':
            return min(1, step / x0)

    NLL = torch.nn.NLLLoss(size_average=False,
                           ignore_index=datasets['train'].pad_idx)

    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k,
                x0):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).data].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    save_mode = True
    last_ELBO = 1e32

    for epoch in range(args.epochs):
        print("+" * 20)

        # f_test_example(model, tokenizer, w2i, i2w)
        for split in splits:

            # data_loader = DataLoader(
            #     dataset=datasets[split],
            #     batch_size=args.batch_size,
            #     shuffle=split=='train',
            #     num_workers=cpu_count(),
            #     pin_memory=torch.cuda.is_available()
            # )
            batch_size = args.batch_size
            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            # for iteration, batch in enumerate(data_loader):
            iteration = 0
            iteration_total = datasets[split].batch_num
            print("batch_num", iteration_total)
            for input_batch_tensor, target_batch_tensor, length_batch_tensor in datasets[
                    split]:

                if torch.is_tensor(input_batch_tensor):
                    input_batch_tensor = to_var(input_batch_tensor)

                if torch.is_tensor(target_batch_tensor):
                    target_batch_tensor = to_var(target_batch_tensor)

                if torch.is_tensor(length_batch_tensor):
                    length_batch_tensor = to_var(length_batch_tensor)

                # batch_size = batch['input'].size(0)

                # for k, v in batch.items():
                #     if torch.is_tensor(v):
                #         batch[k] = to_var(v)

                # Forward pass
                # logp, mean, logv, z = model(batch['input'], batch['length'])
                logp, mean, logv, z = model(input_batch_tensor,
                                            length_batch_tensor)

                # loss calculation
                NLL_loss, KL_loss, KL_weight = loss_fn(
                    logp, target_batch_tensor, length_batch_tensor, mean, logv,
                    args.anneal_function, step, args.k, args.x0)

                loss = (NLL_loss + KL_weight * KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                iteration += 1
                # bookkeepeing
                # print("elbo", tracker['ELBO'])
                # print("loss", loss)
                if iteration == 0:
                    tracker['ELBO'] = loss.data
                    tracker['ELBO'] = tracker['ELBO'].view(1)
                else:
                    tracker['ELBO'] = torch.cat(
                        (tracker['ELBO'], loss.view(1)))

                if args.tensorboard_logging:
                    # print(loss.data)
                    writer.add_scalar("%s/ELBO" % split.upper(),
                                      loss.data.item(),
                                      epoch * iteration_total + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(),
                                      NLL_loss.data.item() / batch_size,
                                      epoch * iteration_total + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(),
                                      KL_loss.data.item() / batch_size,
                                      epoch * iteration_total + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(),
                                      KL_weight,
                                      epoch * iteration_total + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == iteration_total:
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, iteration_total - 1,
                           loss.data.item(), NLL_loss.data.item() / batch_size,
                           KL_loss.data.item() / batch_size, KL_weight))

                # if split == 'valid':
                # if 'target_sents' not in tracker:
                #     tracker['target_sents'] = list()
                # tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx)

                # # print("z", tracker['z'], z)
                # tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)
                # break

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs,
                   torch.mean(tracker['ELBO'])))

            cur_ELBO = torch.mean(tracker['ELBO'])
            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(), cur_ELBO,
                                  epoch)

            if split == "valid":
                if cur_ELBO < last_ELBO:
                    save_mode = True
                else:
                    save_mode = False
                last_ELBO = cur_ELBO

            # save a dump of all sentences and the encoded latent space
            # if split == 'valid':
            #     dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()}
            #     if not os.path.exists(os.path.join('dumps', ts)):
            #         os.makedirs('dumps/'+ts)
            #     with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file:
            #         json.dump(dump,dump_file)

            # save checkpoint
            if split == 'train':
                # checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch))
                checkpoint_path = os.path.join(save_model_path, "best.pytorch")
                if save_mode == True:
                    torch.save(model.state_dict(), checkpoint_path)
                    print("Model saved at %s" % checkpoint_path)
示例#12
0
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.localtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PTB(data_dir=args.data_dir,
                              split=split,
                              create_data=args.create_data,
                              max_sequence_length=args.max_sequence_length,
                              min_occ=args.min_occ,
                              use_bert=args. False)

    model = SentenceVAE(alphabet_size=datasets['train'].alphabet_size,
                        vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)
    print("Saving model to directory: " + save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0))))
        elif anneal_function == 'linear':
            return min(1, step / x0)

    def word_weight_function(step, k, x0):
        return float(1 / (1 + np.exp(-k * (step - x0))))

    NLL = torch.nn.NLLLoss(reduction='sum',
                           ignore_index=datasets['train'].pad_idx)

    def loss_fn(def_logp, word_logp, def_target, def_length, word_target,
                word_length, mean, logv):

        # cut-off unnecessary padding from target definition, and flatten
        def_target = def_target[:, :torch.max(def_length).item()].contiguous(
        ).view(-1)
        def_logp = def_logp.view(-1, def_logp.size(2))

        # Negative Log Likelihood
        def_NLL_loss = NLL(def_logp, def_target)

        # cut off padding for words
        word_target = word_target[:, :torch.max(word_length).item(
        )].contiguous().view(-1)
        word_logp = word_logp.view(-1, word_logp.size(2))

        # Word NLL
        word_NLL_loss = NLL(word_logp, word_target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())

        return def_NLL_loss, word_NLL_loss, KL_loss

    def get_weights(anneal_function, step, k, x0):
        # for logistic function, k = growth rate
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)
        word_weight = word_weight_function(step, k, x0)

        return {'def': 1, 'word': word_weight, 'kl': KL_weight}

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available())

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model = model.train()
            else:
                model = model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                [def_logp,
                 word_logp], mean, logv, z = model(batch['input'],
                                                   batch['length'],
                                                   batch['word_length'])

                # loss calculation
                def_NLL_loss, word_NLL_loss, KL_loss = loss_fn(
                    def_logp, word_logp, batch['target'], batch['length'],
                    batch['word'], batch['word_length'], mean, logv)
                weights = get_weights(args.anneal_function, step, args.k,
                                      args.x0)

                loss = (weights['def'] * def_NLL_loss + weights['word'] *
                        word_NLL_loss + weights['kl'] * KL_loss) / batch_size

                mean_logv = torch.mean(logv)

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                tracker['ELBO'] = torch.cat(
                    (tracker['ELBO'], loss.detach().unsqueeze(0)))

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.item(),
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/Def NLL Loss" % split.upper(),
                                      def_NLL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/Word NLL Loss" % split.upper(),
                                      word_NLL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(),
                                      KL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(),
                                      weights['kl'],
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/Word Weight" % split.upper(),
                                      weights['word'],
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, Def NLL-Loss %9.4f, Word NLL-Loss %9.4f  Word-Weight %6.3f, KL-Loss %9.4f, KL-Weight %6.3f KL-VAL %9.4f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.item(), def_NLL_loss.item() / batch_size,
                           word_NLL_loss.item() / batch_size, weights['word'],
                           KL_loss.item() / batch_size, weights['kl'],
                           mean_logv))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'],
                        i2w=datasets['train'].get_i2w(),
                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs,
                   torch.mean(tracker['ELBO'])))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                                  torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z'].tolist()
                }
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % (epoch))
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)
示例#13
0
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    ptb = PTB(vocab_file=args.vocab_file,
              train_file=args.train_file,
              train_with_vocab=False,
              create_data=args.create_data,
              max_sequence_length=args.max_sequence_length,
              min_occ=args.min_occ)
    datasets = PTBDataset(ptb)
    print('done preprocessing data')

    model = SentenceVAE(vocab_size=datasets.vocab_size,
                        sos_idx=datasets.sos_idx,
                        eos_idx=datasets.eos_idx,
                        pad_idx=datasets.pad_idx,
                        unk_idx=datasets.unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    model.ptb = ptb

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0))))
        elif anneal_function == 'linear':
            return min(1, step / x0)

    NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets.pad_idx)

    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k,
                x0):

        # cut-off unnecessary padding from target, and flatten
        #target = target[:, :torch.max(length).data[0]].contiguous().view(-1)
        target = target[:, :torch.max(length).data].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    model.train()
    split = 'train'
    for epoch in range(args.epochs):
        data_loader = DataLoader(dataset=datasets,
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 num_workers=cpu_count(),
                                 pin_memory=torch.cuda.is_available())

        tracker = defaultdict(list)
        for iteration, batch in enumerate(data_loader):

            batch_size = batch['input'].size(0)

            for k, v in batch.items():
                if torch.is_tensor(v):
                    batch[k] = to_var(v)

            # Forward pass
            #logp, mean, logv, z = model(batch['input'], batch['length'])
            logp, mean, logv, z, encoder_last = model(batch['input'],
                                                      batch['length'])

            # loss calculation
            NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
                                                   batch['length'], mean, logv,
                                                   args.anneal_function, step,
                                                   args.k, args.x0)

            loss = (NLL_loss + KL_weight * KL_loss) / batch_size

            # backward + optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            step += 1

            # bookkeepeing
            tracker['ELBO'].append(loss.data.cpu().numpy().tolist())

            if args.tensorboard_logging:
                writer.add_scalar("%s/ELBO" % split.upper(), loss.data,
                                  epoch * len(data_loader) + iteration)
                writer.add_scalar("%s/NLL Loss" % split.upper(),
                                  NLL_loss.data / batch_size,
                                  epoch * len(data_loader) + iteration)
                writer.add_scalar("%s/KL Loss" % split.upper(),
                                  KL_loss.data / batch_size,
                                  epoch * len(data_loader) + iteration)
                writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight,
                                  epoch * len(data_loader) + iteration)

            if iteration % args.print_every == 0 or iteration + 1 == len(
                    data_loader):
                print(
                    "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                    % (split.upper(), iteration, len(data_loader) - 1,
                       loss.data, NLL_loss.data / batch_size,
                       KL_loss.data / batch_size, KL_weight))

        if split == 'valid':
            if 'target_sents' not in tracker:
                tracker['target_sents'] = list()
            tracker['target_sents'] += idx2word(batch['target'].data,
                                                i2w=datasets.get_i2w(),
                                                pad_idx=datasets.pad_idx)
            tracker['z'].append(z.data)

        print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
              (split.upper(), epoch, args.epochs, np.mean(tracker['ELBO'])))

        if args.tensorboard_logging:
            writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                              np.mean(tracker['ELBO']), epoch)
        '''
        # save a dump of all sentences and the encoded latent space
        if split == 'valid':
            dump = {'target_sents':tracker['target_sents'], 'z':tracker['z']}
            if not os.path.exists(os.path.join('dumps', ts)):
                os.makedirs('dumps/'+ts)
            with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file:
                json.dump(dump,dump_file)
        '''

        # save checkpoint
        if split == 'train':
            checkpoint_path = os.path.join(save_model_path,
                                           "E%i.pytorch" % (epoch))
            torch.save(model.state_dict(), checkpoint_path)
            joblib.dump(model.cpu(), checkpoint_path)
            print("Model saved at %s" % checkpoint_path)

        if torch.cuda.is_available():
            model.cuda()
示例#14
0
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    print("Loading Vocab", args.vocab_path)
    vocab = WordVocab.load_vocab(args.vocab_path)
    print("Vocab Size: ", len(vocab))

    print("Loading Train Dataset", args.train_dataset)
    train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.max_sequence_length,
                                corpus_lines=args.corpus_lines, on_memory=args.on_memory)

    print("Loading Test Dataset", args.test_dataset)
    test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.max_sequence_length, on_memory=args.on_memory) \
        if args.test_dataset is not None else None

    print("Creating Dataloader")
    train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
    test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
        if test_dataset is not None else None

    splits = ['train', 'test']
    data_loaders = {
        'train': train_data_loader,
        'test': test_data_loader
    }

    model = SentenceVAE(
        vocab_size=len(vocab),
        sos_idx=vocab.sos_index,
        eos_idx=vocab.eos_index,
        pad_idx=vocab.pad_index,
        unk_idx=vocab.unk_index,
        max_sequence_length=args.max_sequence_length,
        embedding_size=args.embedding_size,
        rnn_type=args.rnn_type,
        hidden_size=args.hidden_size,
        word_dropout=args.word_dropout,
        embedding_dropout=args.embedding_dropout,
        latent_size=args.latent_size,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional
        )

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args,ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path)
    if not os.path.exists(save_model_path):
        os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1/(1+np.exp(-k*(step-x0))))
        elif anneal_function == 'linear':
            return min(1, step/x0)

    NLL = torch.nn.NLLLoss(size_average=False, ignore_index=vocab.pad_index)
    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0):

        # cut-off unnecessary padding from target, and flatten
        
        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        for split in splits:

            data_loader = data_loaders[split]

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            correct = 0
            close = 0
            total = 0
            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['raw_length'])

                # loss calculation
                NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
                    batch['raw_length'], mean, logv, args.anneal_function, step, args.k, args.x0)

                loss = (NLL_loss + KL_weight * KL_loss)/batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                correct += logp.argmax(dim=1).eq(batch['target']).sum().item()
                close += torch.mul(logp.argmax(dim=1).ge(batch["target"]-10), logp.argmax(dim=1).le(batch["target"]+10)).sum().item()
                total += batch['target'].nelement()


                # bookkeepeing
                tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.view(1,)))

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration+1 == len(data_loader):
                    print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        %(split.upper(), iteration, len(data_loader)-1, loss.item(), NLL_loss.item()/batch_size, KL_loss.item()/batch_size, KL_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(batch['raw'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f, acc %f, clo %f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']), correct/total, close/total))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()}
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/'+ts)
                with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file:
                    json.dump(dump,dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch))
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s"%checkpoint_path)
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid']

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PoetryDataset(
            data_dir=args.data_dir,
            split=split,
            create_data=args.create_data,
            max_sequence_length=args.max_sequence_length,
            min_occ=args.min_occ)

    model = SentenceVAE(vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional,
                        condition_size=7)

    if torch.cuda.is_available():
        model = model.cuda()

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0))))
        elif anneal_function == 'linear':
            return min(1, step / x0)

    NLL = torch.nn.NLLLoss(size_average=False,
                           ignore_index=datasets['train'].pad_idx)

    def calculate_bleu_scores(original, decoded):
        reference = original.split(' ')
        hypothesis = decoded.split(' ')
        return nltk.translate.bleu_score.sentence_bleu([reference], hypothesis)

    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k,
                x0):
        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length)].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        total_BLEU_score = 0

        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=0,
                                     pin_memory=torch.cuda.is_available())

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):
                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(
                    batch['input'],
                    batch['length'],
                    condition=batch['category'].float())
                # logp, mean, logv, z = model(batch['input'], batch['length'], condition=None)

                # loss calculation
                NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
                                                       batch['length'], mean,
                                                       logv,
                                                       args.anneal_function,
                                                       step, args.k, args.x0)

                loss = (NLL_loss + KL_weight * KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                tracker['ELBO'] = torch.cat(
                    (tracker['ELBO'], loss.data.unsqueeze(0)))

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.data[0],
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(),
                                      NLL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(),
                                      KL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.data.item(), NLL_loss.data.item() / batch_size,
                           KL_loss.data.item() / batch_size, KL_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].data,
                        i2w=datasets['train'].get_i2w(),
                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)
                    # Calculate BLEU score
                    decoded = torch.argmax(logp, dim=-1)

                    for i in range(decoded.shape[0]):
                        decoded_poem = idx2word(
                            [decoded[i]],
                            i2w=datasets['train'].get_i2w(),
                            pad_idx=datasets['train'].pad_idx)[0]
                        original_poem = idx2word(
                            [batch['target'].data[i]],
                            i2w=datasets['train'].get_i2w(),
                            pad_idx=datasets['train'].pad_idx)[0]
                        total_BLEU_score += calculate_bleu_scores(
                            original_poem, decoded_poem)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs,
                   torch.mean(tracker['ELBO'])))
            if split == 'valid':
                print("Average BLEU {}".format(total_BLEU_score /
                                               decoded.shape[0]))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                                  torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z'].tolist()
                }
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % (epoch))
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)