示例#1
0
def main() -> None:
    tokenizer = Tokenizer(args.vocab_file)
    vocabulary_size = len(tokenizer)
    dataset = SentenceDataset(args.input_file, tokenizer=tokenizer.encode)
    loader = DataLoader(dataset,
                        args.batch_size,
                        shuffle=False,
                        collate_fn=dataset.collate_fn,
                        drop_last=False)

    searcher = BeamSearch(tokenizer.eos_index, beam_size=args.search_width)

    model = VAE(
        num_embeddings=len(tokenizer),
        dim_embedding=args.dim_embedding,
        dim_hidden=args.dim_hidden,
        dim_latent=args.dim_latent,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional,
        dropout=0.,
        word_dropout=0.,
        dropped_index=tokenizer.unk_index,
    ).to(device)
    model.load_state_dict(torch.load(args.checkpoint_file,
                                     map_location=device))
    model.eval()

    print('Generating sentence...')
    all_hypotheses = []
    with torch.no_grad():
        for s in tqdm(loader):
            s = s.to(device)
            length = torch.sum(s != tokenizer.pad_index, dim=-1)
            bsz = s.shape[0]

            mean, logvar = model.encode(s, length)
            # z = model.reparameterize(mean, logvar)
            z = mean

            hidden = model.fc_hidden(z)
            hidden = hidden.view(bsz, -1,
                                 model.dim_hidden).transpose(0,
                                                             1).contiguous()

            start_predictions = torch.zeros(bsz, device=device).fill_(
                tokenizer.bos_index).long()
            start_state = {'hidden': hidden.permute(1, 0, 2)}
            predictions, log_probabilities = searcher.search(
                start_predictions, start_state, model.step)

            for preds in predictions:
                tokens = preds[0]
                tokens = tokens[tokens != tokenizer.eos_index].tolist()
                all_hypotheses.append(tokenizer.decode(tokens))
    print('Done')

    with open(args.output_file, 'w') as f:
        f.write('\n'.join(all_hypotheses))
示例#2
0
def main() -> None:
    tokenizer = Tokenizer(args.vocab_file)
    vocabulary_size = len(tokenizer)

    searcher = BeamSearch(tokenizer.eos_index, beam_size=args.search_width)

    model = VAE(
        num_embeddings=len(tokenizer),
        dim_embedding=args.dim_embedding,
        dim_hidden=args.dim_hidden,
        dim_latent=args.dim_latent,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional,
        dropout=0.,
        word_dropout=0.,
        dropped_index=tokenizer.unk_index,
    ).to(device)
    model.load_state_dict(torch.load(args.checkpoint_file,
                                     map_location=device))
    model.eval()

    sentence1 = input('Please input sentence1: ')
    sentence2 = input('Please input sentence2: ')

    s1 = [tokenizer.bos_index
          ] + tokenizer.encode(sentence1) + [tokenizer.eos_index]
    s2 = [tokenizer.bos_index
          ] + tokenizer.encode(sentence2) + [tokenizer.eos_index]

    z1, _ = model.encode(
        torch.tensor([s1]).to(device),
        torch.tensor([len(s1)]).to(device))
    z2, _ = model.encode(
        torch.tensor([s2]).to(device),
        torch.tensor([len(s2)]).to(device))

    print("\nGenerate intermediate sentences")
    print("      %s" % sentence1)
    for r in range(1, 10):
        z = (1 - 0.1 * r) * z1 + 0.1 * r * z2
        hidden = model.fc_hidden(z)
        hidden = hidden.view(1, -1,
                             model.dim_hidden).transpose(0, 1).contiguous()

        start_predictions = torch.zeros(1, device=device).fill_(
            tokenizer.bos_index).long()
        start_state = {'hidden': hidden.permute(1, 0, 2)}
        predictions, log_probabilities = searcher.search(
            start_predictions, start_state, model.step)

        tokens = predictions[0, 0]
        tokens = tokens[tokens != tokenizer.eos_index].tolist()
        print("[%d:%d] %s" % (10 - r, r, tokenizer.decode(tokens)))
    print("      %s" % sentence2)
示例#3
0
def main() -> None:
    tokenizer = Tokenizer(args.vocab_file)
    vocabulary_size = len(tokenizer)

    searcher = BeamSearch(tokenizer.eos_index, beam_size=args.search_width)

    model = VAE(
        num_embeddings=len(tokenizer),
        dim_embedding=args.dim_embedding,
        dim_hidden=args.dim_hidden,
        dim_latent=args.dim_latent,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional,
        dropout=0.,
        word_dropout=0.,
        dropped_index=tokenizer.unk_index,
    ).to(device)
    model.load_state_dict(torch.load(args.checkpoint_file,
                                     map_location=device))
    model.eval()

    z = torch.randn(args.sample_size, args.dim_latent, device=device)
    hidden = model.fc_hidden(z)
    hidden = hidden.view(args.sample_size, -1,
                         model.dim_hidden).transpose(0, 1).contiguous()

    start_predictions = torch.zeros(args.sample_size, device=device).fill_(
        tokenizer.bos_index).long()
    start_state = {'hidden': hidden.permute(1, 0, 2)}
    predictions, log_probabilities = searcher.search(start_predictions,
                                                     start_state, model.step)

    for pred in predictions:
        tokens = pred[0]
        tokens = tokens[tokens != tokenizer.eos_index].tolist()
        print(tokenizer.decode(tokens))