Exemplo n.º 1
0
def generateResults(encoder_decoder: EncoderDecoder, data_loader,
                    resultFilename, input_tokens_list):
    idx_to_tok = encoder_decoder.lang.idx_to_tok
    all_output_seqs = []
    all_target_seqs = []

    for batch_idx, (input_idxs, target_idxs, _,
                    _) in enumerate(tqdm(data_loader)):
        input_lengths = (input_idxs != 0).long().sum(dim=1)

        sorted_lengths, order = torch.sort(input_lengths, descending=True)
        input_variable = Variable(input_idxs[order, :][:, :max(input_lengths)])
        target_variable = Variable(target_idxs[order, :])

        output_log_probs, output_seqs = encoder_decoder(
            input_variable, list(sorted_lengths))
        print(output_seqs.size())
        all_output_seqs.extend(trim_seqs(output_seqs))
        all_target_seqs.extend([list(seq[seq > 0])]
                               for seq in to_np(target_variable))

    with open(resultFilename, 'w') as fo:
        for seq, input_tokens in zip(all_output_seqs, input_tokens_list):
            print(type(seq))
            #seq = seq.data.view(-1)
            eos_idx = seq.index(2) if 2 in seq else seq
            string = seq_to_string(seq[:eos_idx + 1],
                                   idx_to_tok,
                                   input_tokens=None)
            fo.write(string + '\n')

    return None
Exemplo n.º 2
0
def evaluate(encoder_decoder: EncoderDecoder, data_loader):

    loss_function = torch.nn.NLLLoss(
        ignore_index=0, reduce=False
    )  # what does this return for ignored idxs? same length output?

    losses = []
    all_output_seqs = []
    all_target_seqs = []

    for batch_idx, (input_idxs, target_idxs, _,
                    _) in enumerate(tqdm(data_loader)):
        input_lengths = (input_idxs != 0).long().sum(dim=1)

        sorted_lengths, order = torch.sort(input_lengths, descending=True)

        input_variable = Variable(input_idxs[order, :][:, :max(input_lengths)],
                                  volatile=True)
        target_variable = Variable(target_idxs[order, :], volatile=True)
        batch_size = input_variable.shape[0]

        output_log_probs, output_seqs = encoder_decoder(
            input_variable, list(sorted_lengths))
        all_output_seqs.extend(trim_seqs(output_seqs))
        all_target_seqs.extend([list(seq[seq > 0])]
                               for seq in to_np(target_variable))

        flattened_log_probs = output_log_probs.view(
            batch_size * encoder_decoder.decoder.max_length, -1)
        batch_losses = loss_function(flattened_log_probs,
                                     target_variable.contiguous().view(-1))
        losses.extend(list(to_np(batch_losses)))

    mean_loss = len(losses) / sum(losses)
    """
    for i in range(20):
        print(all_target_seqs[i])
        print(all_output_seqs[i])
        print('*'*80)
    """

    bleu_score = corpus_bleu(all_target_seqs,
                             all_output_seqs,
                             smoothing_function=SmoothingFunction().method2)
    print('BLEU SCORE: ' + str(bleu_score))
    return mean_loss, bleu_score
Exemplo n.º 3
0
def validate(args, val_loader, model):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    val_logger = LogCollector()

    # switch to evaluate mode
    model.val_start()
    model.logger = val_logger
    end = time.time()
    max_length=50
    for i, val_data in enumerate(val_loader):

        decoder_outputs, sampled_idxs, mean, logvar, z = model.forward_emb(*val_data)

        if torch.cuda.is_available():
            val_data[1]=val_data[1].cuda()

        batch_size = val_data[1].size(0)
        max_length=50
        flattened_outputs = decoder_outputs.view(batch_size * max_length, -1)

        loss = model.loss_function(flattened_outputs, val_data[1].contiguous().view(-1))

        kl_loss = (-0.5 * torch.sum(logvar - torch.pow(mean, 2) - torch.exp(logvar) + 1, 1)).mean().squeeze()

        model.logger.update('KL Loss', kl_loss.item(), 1)
        model.logger.update('MLE Loss', loss.item(), 1)

        batch_outputs = trim_seqs(sampled_idxs)
        
        np_targets=trim_seqs(val_data[1].unsqueeze(-1))
            
        batch_targets = [[seq] for seq in np_targets]

        corpus_bleu_score = corpus_bleu(batch_targets, batch_outputs, smoothing_function=SmoothingFunction().method1)
        
        model.logger.update('C-BLEU', corpus_bleu_score, batch_size)

        batch_bleu_score=0
        for j in range(batch_size):
            batch_bleu_score += sentence_bleu(batch_targets[j], batch_outputs[j], weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=SmoothingFunction().method1)
        batch_bleu_score=batch_bleu_score/batch_size
                
        model.logger.update('S-BLEU', batch_bleu_score, batch_size)

        batch_time.update(time.time() - end)
        end = time.time()
        # Print log info
        
        model.Eiters += 1
        if model.Eiters % args.logging_step == 0:
            print('Test: [{0}/{1}]\t'
                    '{e_log}\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    .format(
                        i, len(val_loader), batch_time=batch_time,
                        e_log=str(model.logger)))
            
    print('Test: [{0}/{1}]\t'
            '{e_log}\t'
            'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
            .format(
                i, len(val_loader), batch_time=batch_time,
                e_log=str(model.logger))) 
    
    return batch_bleu_score
Exemplo n.º 4
0
def validate(args, val_loader, model):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    val_logger = LogCollector()

    # switch to evaluate mode
    model.val_start()
    model.logger = val_logger
    end = time.time()
    max_length=50
    for i, val_data in enumerate(val_loader):

        decoder_outputs, sampled_idxs, mean, logvar, z = model.forward_emb(*val_data)

        if torch.cuda.is_available():
            val_data[1]=val_data[1].cuda()

        batch_size = val_data[1].size(0)
        max_length=50
        flattened_outputs = decoder_outputs.view(batch_size * max_length, -1)

        batch_outputs = trim_seqs(sampled_idxs)
        
        np_targets=trim_seqs(val_data[1].unsqueeze(-1))
            
        batch_targets = [[seq] for seq in np_targets]

        corpus_bleu_score = corpus_bleu(batch_targets, batch_outputs, smoothing_function=SmoothingFunction().method1)
        model.logger.update('C-BLEU', corpus_bleu_score, batch_size)
        
        corpus_nist_score = corpus_nist(batch_targets, batch_outputs, n=4)

        model.logger.update('C-NIST', corpus_nist_score, batch_size)

        corpus_meteor_score=0
        rouge_scores=0
        for j in range(batch_size):
            reference=[]
            for tid in range(len(batch_targets[j][0])):
                tok=batch_targets[j][0][tid]
                reference.append(vocab_inv[str(tok)])
            ref=[str(' '.join(reference))]
            
            hypothesis=[]
            for tid in range(len(batch_outputs[j])):
                tok=batch_outputs[j][tid]
                hypothesis.append(vocab_inv[str(tok)])
            hypo=str(' '.join(hypothesis))
            corpus_meteor_score+=meteor_score(ref, hypo)
            rouge_scores += rouge.score(ref[0],hypo)['rougeL'][2]
        
        rouge_scores=rouge_scores/batch_size
        model.logger.update('ROUGH-L', rouge_scores, batch_size)
        corpus_meteor_score=corpus_meteor_score/batch_size
        model.logger.update('C-METEOR', corpus_meteor_score, batch_size)
        batch_time.update(time.time() - end)
        end = time.time()
        # Print log info
        
        model.Eiters += 1
        if model.Eiters % args.logging_step == 0:
            print('Test: [{0}/{1}]\t'
                    '{e_log}\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    .format(
                        i, len(val_loader), batch_time=batch_time,
                        e_log=str(model.logger)))
            
    print('Test: [{0}/{1}]\t'
            '{e_log}\t'
            'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
            .format(
                i, len(val_loader), batch_time=batch_time,
                e_log=str(model.logger))) 
    
    return 0
Exemplo n.º 5
0
def train(encoder_decoder: EncoderDecoder, train_data_loader: DataLoader,
          model_name, val_data_loader: DataLoader, keep_prob,
          teacher_forcing_schedule, lr, max_length, use_decay, data_path):

    global_step = 0
    loss_function = torch.nn.NLLLoss(ignore_index=0)
    optimizer = optim.Adam(encoder_decoder.parameters(), lr=lr)
    model_path = './saved/' + model_name + '/'

    if (use_decay == False):
        gamma = 1.0
    else:
        gamma = 0.5
    scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

    #val_loss, val_bleu_score = evaluate(encoder_decoder, val_data_loader)

    best_bleu = 0.0

    for epoch, teacher_forcing in enumerate(teacher_forcing_schedule):
        #scheduler.step()
        print('epoch %i' % (epoch), flush=True)
        print('lr: ' + str(scheduler.get_lr()))

        for batch_idx, (input_idxs, target_idxs, input_tokens,
                        target_tokens) in enumerate(tqdm(train_data_loader)):
            # input_idxs and target_idxs have dim (batch_size x max_len)
            # they are NOT sorted by length
            '''
            print(input_idxs[0])
            print(input_tokens[0])
            print(target_idxs[0])
            print(target_tokens[0])
            '''
            lengths = (input_idxs != 0).long().sum(dim=1)
            sorted_lengths, order = torch.sort(lengths, descending=True)

            input_variable = Variable(input_idxs[order, :][:, :max(lengths)])
            target_variable = Variable(target_idxs[order, :])

            optimizer.zero_grad()
            output_log_probs, output_seqs = encoder_decoder(
                input_variable,
                list(sorted_lengths),
                targets=target_variable,
                keep_prob=keep_prob,
                teacher_forcing=teacher_forcing)
            batch_size = input_variable.shape[0]

            flattened_outputs = output_log_probs.view(batch_size * max_length,
                                                      -1)

            batch_loss = loss_function(flattened_outputs,
                                       target_variable.contiguous().view(-1))

            batch_loss.backward()
            optimizer.step()
            batch_outputs = trim_seqs(output_seqs)

            batch_targets = [[list(seq[seq > 0])]
                             for seq in list(to_np(target_variable))]

            #batch_bleu_score = corpus_bleu(batch_targets, batch_outputs, smoothing_function=SmoothingFunction().method2)
            batch_bleu_score = corpus_bleu(batch_targets, batch_outputs)
            '''
            if global_step < 10 or (global_step % 10 == 0 and global_step < 100) or (global_step % 100 == 0 and epoch < 2):
                input_string = "Amy, Please schedule a meeting with Marcos on Tuesday April 3rd. Adam Kleczewski"
                output_string = encoder_decoder.get_response(input_string)
                writer.add_text('schedule', output_string, global_step=global_step)

                input_string = "Amy, Please cancel this meeting. Adam Kleczewski"
                output_string = encoder_decoder.get_response(input_string)
                writer.add_text('cancel', output_string, global_step=global_step)
            '''

            if global_step % 100 == 0:

                writer.add_scalar('train_batch_loss', batch_loss, global_step)
                writer.add_scalar('train_batch_bleu_score', batch_bleu_score,
                                  global_step)

                for tag, value in encoder_decoder.named_parameters():
                    tag = tag.replace('.', '/')
                    writer.add_histogram('weights/' + tag,
                                         value,
                                         global_step,
                                         bins='doane')
                    writer.add_histogram('grads/' + tag,
                                         to_np(value.grad),
                                         global_step,
                                         bins='doane')

            global_step += 1

            debug = False

            if (debug):
                if batch_idx == 5:
                    break

        val_loss, val_bleu_score = evaluate(encoder_decoder, val_data_loader)

        writer.add_scalar('val_loss', val_loss, global_step=global_step)
        writer.add_scalar('val_bleu_score',
                          val_bleu_score,
                          global_step=global_step)

        encoder_embeddings = encoder_decoder.encoder.embedding.weight.data
        encoder_vocab = encoder_decoder.lang.tok_to_idx.keys()
        writer.add_embedding(encoder_embeddings,
                             metadata=encoder_vocab,
                             global_step=0,
                             tag='encoder_embeddings')

        decoder_embeddings = encoder_decoder.decoder.embedding.weight.data
        decoder_vocab = encoder_decoder.lang.tok_to_idx.keys()
        writer.add_embedding(decoder_embeddings,
                             metadata=decoder_vocab,
                             global_step=0,
                             tag='decoder_embeddings')
        '''
        input_string = "Amy, Please schedule a meeting with Marcos on Tuesday April 3rd. Adam Kleczewski"
        output_string = encoder_decoder.get_response(input_string)
        writer.add_text('schedule', output_string, global_step=global_step)

        input_string = "Amy, Please cancel this meeting. Adam Kleczewski"
        output_string = encoder_decoder.get_response(input_string)
        writer.add_text('cancel', output_string, global_step=global_step)
        '''

        calc_bleu_score = get_bleu(encoder_decoder, data_path, None, 'dev')
        print('val loss: %.5f, val BLEU score: %.5f' %
              (val_loss, calc_bleu_score),
              flush=True)
        if (calc_bleu_score > best_bleu):
            print("Best BLEU score! Saving model...")
            best_bleu = calc_bleu_score
            torch.save(
                encoder_decoder, "%s%s_%i_%.3f.pt" %
                (model_path, model_name, epoch, calc_bleu_score))

        print('-' * 100, flush=True)

        scheduler.step()
Exemplo n.º 6
0
def train(encoder_decoder: EncoderDecoder, train_data_loader: DataLoader,
          model_name, val_data_loader: DataLoader, keep_prob,
          teacher_forcing_schedule, lr, max_length, device,
          test_data_loader: DataLoader):

    global_step = 0
    loss_function = torch.nn.NLLLoss(ignore_index=0)
    optimizer = optim.Adam(encoder_decoder.parameters(), lr=lr)
    model_path = './model/' + model_name + '/'
    trained_model = encoder_decoder

    for epoch, teacher_forcing in enumerate(teacher_forcing_schedule):
        print('epoch %i' % epoch, flush=True)
        correct_predictions = 0.0
        all_predictions = 0.0
        for batch_idx, (input_idxs, target_idxs, input_tokens,
                        target_tokens) in enumerate(tqdm(train_data_loader)):
            # Empty the cache at each batch
            torch.cuda.empty_cache()
            # input_idxs and target_idxs have dim (batch_size x max_len)
            # they are NOT sorted by length

            lengths = (input_idxs != 0).long().sum(dim=1)
            sorted_lengths, order = torch.sort(lengths, descending=True)

            input_variable = input_idxs[order, :][:, :max(lengths)]
            input_variable = input_variable.to(device)
            target_variable = target_idxs[order, :]
            target_variable = target_variable.to(device)

            optimizer.zero_grad()
            output_log_probs, output_seqs = encoder_decoder(
                input_variable,
                list(sorted_lengths),
                targets=target_variable,
                keep_prob=keep_prob,
                teacher_forcing=teacher_forcing)

            batch_size = input_variable.shape[0]

            output_sentences = output_seqs.squeeze(2)

            flattened_outputs = output_log_probs.view(batch_size * max_length,
                                                      -1)

            batch_loss = loss_function(flattened_outputs,
                                       target_variable.contiguous().view(-1))
            batch_outputs = trim_seqs(output_seqs)

            batch_inputs = [[list(seq[seq > 0])]
                            for seq in list(to_np(input_variable))]
            batch_targets = [[list(seq[seq > 0])]
                             for seq in list(to_np(target_variable))]

            for i in range(len(batch_outputs)):
                y_i = batch_outputs[i]
                tgt_i = batch_targets[i][0]

                if y_i == tgt_i:
                    correct_predictions += 1.0

                all_predictions += 1.0

            batch_loss.backward()
            optimizer.step()

            batch_bleu_score = corpus_bleu(
                batch_targets,
                batch_outputs,
                smoothing_function=SmoothingFunction().method1)

            if global_step % 100 == 0:

                writer.add_scalar('train_batch_loss', batch_loss, global_step)
                writer.add_scalar('train_batch_bleu_score', batch_bleu_score,
                                  global_step)

                for tag, value in encoder_decoder.named_parameters():
                    tag = tag.replace('.', '/')
                    writer.add_histogram('weights/' + tag,
                                         value,
                                         global_step,
                                         bins='doane')
                    writer.add_histogram('grads/' + tag,
                                         to_np(value.grad),
                                         global_step,
                                         bins='doane')

            global_step += 1

        encoder_embeddings = encoder_decoder.encoder.embedding.weight.data
        encoder_vocab = encoder_decoder.lang.tok_to_idx.keys()
        writer.add_embedding(encoder_embeddings,
                             metadata=encoder_vocab,
                             global_step=0,
                             tag='encoder_embeddings')

        decoder_embeddings = encoder_decoder.decoder.embedding.weight.data
        decoder_vocab = encoder_decoder.lang.tok_to_idx.keys()
        writer.add_embedding(decoder_embeddings,
                             metadata=decoder_vocab,
                             global_step=0,
                             tag='decoder_embeddings')

        print('training accuracy %.5f' %
              (100.0 * (correct_predictions / all_predictions)))
        torch.save(encoder_decoder,
                   "%s%s_%i.pt" % (model_path, model_name, epoch))
        trained_model = encoder_decoder

        print('-' * 100, flush=True)

    torch.save(encoder_decoder, "%s%s_final.pt" % (model_path, model_name))
    return trained_model
Exemplo n.º 7
0
def test(encoder_decoder: EncoderDecoder,
         test_data_loader: DataLoader,
         max_length,
         device,
         log_files=None):

    correct_predictions = 0.0
    struct_correct_only = 0.0
    all_predictions = 0.0
    for batch_idx, (input_idxs, target_idxs, input_tokens,
                    target_tokens) in enumerate(tqdm(test_data_loader)):
        print('memory usage {}'.format(torch.cuda.memory_allocated()))
        # input_idxs and target_idxs have dim (batch_size x max_len)
        # they are NOT sorted by length
        lengths = (input_idxs != 0).long().sum(dim=1)
        sorted_lengths, order = torch.sort(lengths, descending=True)

        input_variable = input_idxs[order, :][:, :max(lengths)]
        input_variable = input_variable.to(device)
        input_tokens = [input_tokens[o] for o in order]
        target_variable = target_idxs[order, :]
        target_variable = target_variable.to(device)
        target_tokens = [target_tokens[o] for o in order]

        output_log_probs, output_seqs = encoder_decoder(
            input_variable, list(sorted_lengths), targets=target_variable)

        batch_size = input_variable.shape[0]

        output_sentences = output_seqs.squeeze(2)

        flattened_outputs = output_log_probs.view(batch_size * max_length, -1)

        batch_outputs = trim_seqs(output_seqs)

        batch_inputs = [[list(seq[seq > 0])]
                        for seq in list(to_np(input_variable))]

        batch_targets = [[list(seq[seq > 0])]
                         for seq in list(to_np(target_variable))]

        for i in range(len(batch_outputs)):
            # Get the input and output tokens
            y_i = batch_outputs[i]
            tgt_i = batch_targets[i][0]
            src_i = batch_inputs[i][0]

            # Make dictionaries of unknown words and their tokens
            src_token_list = input_tokens[i].split()
            tar_token_list = target_tokens[i].split()
            src_unk_to_tok = {
                src_i[j]: src_token_list[j]
                for j in range(len(src_i))
                if not src_i[j] in encoder_decoder.lang.idx_to_tok
            }
            tar_unk_to_tok = {
                tgt_i[j]: tar_token_list[j]
                for j in range(len(tgt_i))
                if not tgt_i[j] in encoder_decoder.lang.idx_to_tok
            }

            # Translate tokens to words
            correct_seq = [
                encoder_decoder.lang.idx_to_tok[n]
                if n in encoder_decoder.lang.idx_to_tok else tar_unk_to_tok[n]
                for n in tgt_i
            ]
            incorrect_seq = [
                encoder_decoder.lang.idx_to_tok[n]
                if n in encoder_decoder.lang.idx_to_tok else src_unk_to_tok[n]
                for n in y_i
            ]
            input_seq = [
                encoder_decoder.lang.idx_to_tok[n]
                if n in encoder_decoder.lang.idx_to_tok else src_unk_to_tok[n]
                for n in src_i
            ]

            if y_i == tgt_i:
                correct_predictions += 1.0

                if not log_files == None:
                    log_files[1].write("INPUT PHRASE:      {}\n".format(
                        ' '.join(input_seq)))
                    log_files[1].write("CORRECT LABEL:     {}\n".format(
                        ' '.join(correct_seq)))
                    log_files[1].write("PREDICTED LABEL:   {}\n".format(
                        ' '.join(incorrect_seq)))
                    log_files[1].write("\n\n")
            else:
                if not log_files == None:
                    log_files[0].write("INPUT PHRASE:      {}\n".format(
                        ' '.join(input_seq)))
                    log_files[0].write("CORRECT LABEL:     {}\n".format(
                        ' '.join(correct_seq)))
                    log_files[0].write("PREDICTED LABEL:   {}\n".format(
                        ' '.join(incorrect_seq)))
                    log_files[0].write("\n\n")

            all_predictions += 1.0
    return 100.0 * (correct_predictions / all_predictions)
Exemplo n.º 8
0
    def train_emb(self, sources, qs_tar, mask_tar, temp_tar, sqls_lens,
                  question_lens, mask_lens, template_lens, ids, *args):

        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        sources = Variable(sources)
        qs_tar = Variable(qs_tar)
        mask_tar = Variable(mask_tar)
        temp_tar = Variable(temp_tar)
        if torch.cuda.is_available():
            sources = sources.cuda()
            temp_tar = temp_tar.cuda()
            mask_tar = mask_tar.cuda()
            qs_tar = qs_tar.cuda()

        data = [
            sources, qs_tar, mask_tar, temp_tar, sqls_lens, question_lens,
            mask_lens, template_lens, ids
        ]
        # compute the embeddings
        decoder_outputs, sampled_idxs, mean, logvar, z = self.forward_emb(
            *data)

        self.optimizer.zero_grad()

        batch_size = sources.shape[0]
        max_length = 50

        flattened_outputs = decoder_outputs.view(batch_size * max_length, -1)

        sos_tar = qs_tar.clone()
        sos_tar[sos_tar == 5] = 0

        loss = self.loss_function(flattened_outputs,
                                  sos_tar.contiguous().view(-1))

        kl_loss = (
            -0.5 *
            torch.sum(logvar - torch.pow(mean, 2) - torch.exp(logvar) + 1,
                      1)).mean().squeeze()

        self.logger.update('KL Loss', kl_loss.item(), 1)
        self.logger.update('CE Loss', loss.item(), 1)

        loss = loss + kl_loss

        loss.backward()

        self.optimizer.step()

        batch_outputs = trim_seqs(sampled_idxs)

        np_targets = trim_seqs(qs_tar.unsqueeze(-1))
        batch_targets = [[seq] for seq in np_targets]

        corpus_bleu_score = corpus_bleu(
            batch_targets,
            batch_outputs,
            smoothing_function=SmoothingFunction().method1)

        self.logger.update('C-BLEU', corpus_bleu_score, qs_tar.size(0))