Exemplo n.º 1
0
def chat_with_latest(savepath=SAVE_PATH):
    model = load_latest_state_dict(savepath)

    attn_model = 'dot'
    #attn_model = 'general'
    #attn_model = 'concat'
    hidden_size = 500
    encoder_n_layers = 2
    decoder_n_layers = 2
    dropout = 0.1
    batch_size = 64

    voc = Voc(model['voc_dict']['name'])
    voc.__dict__ = model['voc_dict']

    embedding = nn.Embedding(voc.num_words, hidden_size)

    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, decoder_n_layers, dropout)

    encoder.load_state_dict(model['en'])
    decoder.load_state_dict(model['de'])

    searcher = GreedySearchDecoder(encoder, decoder)
    evaluateInput(encoder, decoder, searcher, voc)
Exemplo n.º 2
0
def train():
    N_EPOCHS = 5
    output_size = 1
    save_dir = 'data/save/Adversarial_Discriminator/'

    attn_model = 'dot'
    hidden_size = 500
    encoder_n_layers = 2
    decoder_n_layers = 2
    dropout = 0.1

    seq2seqModel = load_latest_state_dict(savepath=SAVE_PATH_SEQ2SEQ)
    voc = Voc('name')
    voc.__dict__ = seq2seqModel['voc_dict']

    embedding = nn.Embedding(voc.num_words, hidden_size)
    model = Adversarial_Discriminator(hidden_size, output_size, embedding)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.BCELoss()

    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, decoder_n_layers, dropout)

    encoder.load_state_dict(seq2seqModel['en'])
    decoder.load_state_dict(seq2seqModel['de'])
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    searcher = RLGreedySearchDecoder(encoder, decoder, voc)

    train_data = AlexaDataset('train.json',
                              rare_word_threshold=3)  # sorry cornell
    train_data.trimPairsToVocab(voc)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

    test_data = AlexaDataset('test_freq.json', rare_word_threshold=3)
    test_data.trimPairsToVocab(voc)
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

    for epoch in range(1, N_EPOCHS + 1):
        test_AdversarialDiscriminatorOnLatestSeq2Seq(model, searcher,
                                                     test_loader, voc)
        loss = trainAdversarialDiscriminatorOnLatestSeq2Seq(
            model, searcher, voc, train_loader, criterion, optimizer,
            embedding, save_dir, epoch)

        if epoch % 1 == 0:
            torch.save(
                {
                    'iteration': epoch,
                    'model': model.state_dict(),
                    'opt': optimizer.state_dict(),
                    'loss': loss,
                    'voc_dict': voc.__dict__,
                    'embedding': embedding.state_dict()
                }, os.path.join(save_dir, '{}_{}.tar'.format(epoch, 'epochs')))
Exemplo n.º 3
0
def load(file_path, dataset):
    checkpoint = torch.load(file_path, map_location=device)
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    embedding_sd = checkpoint['embedding']
    voc = Voc(checkpoint['voc_dict']['name'])
    voc.__dict__ = checkpoint['voc_dict']

    print('Building encoder and decoder ...')
    # Initialize word embeddings
    embedding = nn.Embedding(voc.num_words, hidden_size)
    if loadFilename:
        embedding.load_state_dict(embedding_sd)
    # Initialize encoder & decoder models
    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, decoder_n_layers, dropout)
    if loadFilename:
        encoder.load_state_dict(encoder_sd)
        decoder.load_state_dict(decoder_sd)
    # Use appropriate device
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    print('Models built and ready to go!')
    encoder.eval()
    decoder.eval()

    policy = RLGreedySearchDecoder(encoder, decoder, voc)
    env = Env(voc, dataset)
    return policy, env
Exemplo n.º 4
0
def loadModel(hidden_size=hidden_size,
              encoder_n_layers=encoder_n_layers,
              decoder_n_layers=decoder_n_layers,
              dropout=dropout,
              attn_model=attn_model,
              learning_rate=learning_rate,
              decoder_learning_ratio=decoder_learning_ratio,
              directory=SAVE_PATH):
    state_dict = load_latest_state_dict(directory)
    episode = state_dict['iteration']
    encoder_sd = state_dict['en']
    decoder_sd = state_dict['de']
    encoder_optimizer_sd = state_dict['en_opt']
    decoder_optimizer_sd = state_dict['de_opt']
    embedding_sd = state_dict['embedding']

    voc = Voc('placeholder_name')
    voc.__dict__ = state_dict['voc_dict']

    print('Building encoder and decoder ...')
    # Initialize word embeddings
    embedding = nn.Embedding(voc.num_words, hidden_size)
    embedding.load_state_dict(embedding_sd)
    embedding.to(device)
    # Initialize encoder & decoder models
    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, decoder_n_layers, dropout)
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
    # Use appropriate device
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    print('Models built and ready to go!')

    # Initialize optimizers
    print('Building optimizers ...')
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=learning_rate * decoder_learning_ratio)
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

    if device == 'cuda':
        # If you have cuda, configure cuda to call
        for state in encoder_optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()

        for state in decoder_optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()
    print('Optimizers built and ready to go!')

    return episode, encoder, decoder, encoder_optimizer, decoder_optimizer, voc
Exemplo n.º 5
0
def train():
    # corpus_name = "cornell movie-dialogs corpus"
    # corpus = os.path.join(BASE_DIR, "data", corpus_name)
    # # Define path to new file
    # datafile = os.path.join(corpus, "formatted_movie_lines.txt")
    #
    # delimiter = '\t'
    # # Unescape the delimiter
    # delimiter = str(codecs.decode(delimiter, "unicode_escape"))
    #
    # # Initialize lines dict, conversations list, and field ids
    # MOVIE_LINES_FIELDS = ["lineID", "characterID", "movieID", "character", "text"]
    # MOVIE_CONVERSATIONS_FIELDS = ["character1ID", "character2ID", "movieID", "utteranceIDs"]
    #
    # # Load lines and process conversations
    # print("\nProcessing corpus...")
    # lines = loadLines(os.path.join(corpus, "movie_lines.txt"), MOVIE_LINES_FIELDS)
    # print("\nLoading conversations...")
    # conversations = loadConversations(os.path.join(corpus, "movie_conversations.txt"),
    #                                   lines, MOVIE_CONVERSATIONS_FIELDS)
    #
    # # Write new csv file
    # print("\nWriting newly formatted file...")
    # with open(datafile, 'w', encoding='utf-8') as outputfile:
    #     writer = csv.writer(outputfile, delimiter=delimiter, lineterminator='\n')
    #     for pair in extractSentencePairs(conversations):
    #         writer.writerow(pair)
    #
    # # Print a sample of lines
    # print("\nSample lines from file:")
    # printLines(datafile)
    #
    # # Load/Assemble voc and pairs
    # save_dir = os.path.join("data", "save")
    # voc, pairs = loadPrepareData(corpus, corpus_name, datafile, save_dir)
    # # Print some pairs to validate
    # print("\npairs:")
    # for pair in pairs[:10]:
    #     print(pair)
    #
    # MIN_COUNT = 3  # Minimum word count threshold for trimming
    #
    # # Trim voc and pairs
    # pairs = trimRareWords(voc, pairs, MIN_COUNT)

    # save_dir = os.path.join(BASE_DIR, "data", "amazon", "models")
    save_dir = os.path.join(BASE_DIR, "data", "save")
    corpus_name = "Alexa"
    # voc, pairs = loadAlexaData()
    # _, pairs = loadAlexaData()

    dataset = AlexaDataset()
    pairs = dataset.data

    voc = Voc.from_dataset(dataset)

    # train_data = AlexaDataset('train.json')

    # Example for validation
    small_batch_size = 5
    batches = batch2TrainData(
        voc, [random.choice(pairs) for _ in range(small_batch_size)])
    input_variable, lengths, target_variable, mask, max_target_len = batches

    print("input_variable:", input_variable)
    print("lengths:", lengths)
    print("target_variable:", target_variable)
    print("mask:", mask)
    print("max_target_len:", max_target_len)

    # Configure models
    model_name = 'cb_model'
    attn_model = 'dot'
    #attn_model = 'general'
    #attn_model = 'concat'
    hidden_size = 500
    encoder_n_layers = 2
    decoder_n_layers = 2
    dropout = 0.1
    batch_size = 64

    # Set checkpoint to load from; set to None if starting from scratch
    loadFilename = None
    checkpoint_iter = 4000
    #loadFilename = os.path.join(save_dir, model_name, corpus_name,
    #                            '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
    #                            '{}_checkpoint.tar'.format(checkpoint_iter))

    # Load model if a loadFilename is provided
    if loadFilename:
        # If loading on same machine the model was trained on
        checkpoint = torch.load(loadFilename)
        # If loading a model trained on GPU to CPU
        #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
        encoder_sd = checkpoint['en']
        decoder_sd = checkpoint['de']
        encoder_optimizer_sd = checkpoint['en_opt']
        decoder_optimizer_sd = checkpoint['de_opt']
        embedding_sd = checkpoint['embedding']
        voc.__dict__ = checkpoint['voc_dict']
    else:
        checkpoint = None

    print('Building encoder and decoder ...')
    # Initialize word embeddings
    embedding = nn.Embedding(voc.num_words, hidden_size)
    if loadFilename:
        embedding.load_state_dict(embedding_sd)
    # Initialize encoder & decoder models
    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, decoder_n_layers, dropout)
    if loadFilename:
        encoder.load_state_dict(encoder_sd)
        decoder.load_state_dict(decoder_sd)
    # Use appropriate device
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    print('Models built and ready to go!')

    # Configure training/optimization
    clip = 50.0

    learning_rate = 0.0001
    decoder_learning_ratio = 5.0
    n_iteration = 4000
    print_every = 1
    save_every = 500

    # Ensure dropout layers are in train mode
    encoder.train()
    decoder.train()

    # Initialize optimizers
    print('Building optimizers ...')
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=learning_rate * decoder_learning_ratio)
    if loadFilename:
        encoder_optimizer.load_state_dict(encoder_optimizer_sd)
        decoder_optimizer.load_state_dict(decoder_optimizer_sd)

    # If you have cuda, configure cuda to call
    for state in encoder_optimizer.state.values():
        for k, v in state.items():
            if isinstance(v, torch.Tensor):
                state[k] = v.cuda()

    for state in decoder_optimizer.state.values():
        for k, v in state.items():
            if isinstance(v, torch.Tensor):
                state[k] = v.cuda()

    # Run training iterations
    print("Starting Training!")
    trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer,
               decoder_optimizer, embedding, encoder_n_layers,
               decoder_n_layers, save_dir, n_iteration, batch_size,
               print_every, save_every, clip, corpus_name, loadFilename,
               checkpoint, hidden_size)