Пример #1
0
def run_epoch_test(data_iter,
                   model,
                   pad_idx,
                   vocab,
                   model_avg=False,
                   raw_prob=True):
    start = time.time()
    total_tokens = 0
    tokens = 0
    candidate_corpus = []
    reference_corpus = []

    with torch.no_grad():
        for i, batch_ in enumerate(data_iter):
            batch = Batch(batch_.src, batch_.trg, pad=pad_idx)
            max_len = batch.src.shape[1] + 20
            if model_avg:
                out = model_average(model,
                                    batch.src,
                                    batch.src_mask,
                                    max_len,
                                    vocab[BOS_WORD],
                                    raw_prob=raw_prob)
            else:
                out = model.predict(batch.src, batch.src_mask, max_len,
                                    vocab[BOS_WORD])

            for j in range(out.shape[0]):
                words_can = []
                words_ref = []

                candidate = out[j]
                for k in range(1, len(candidate)):
                    if candidate[k] == vocab[EOS_WORD]:
                        break
                    words_can.append(vocab.itos[candidate[k]])

                reference = batch.trg_y[j]
                for v in range(len(reference)):
                    if reference[v] == vocab[EOS_WORD]:
                        break
                    words_ref.append(vocab.itos[reference[v]])

                candidate_corpus.append(words_can)
                reference_corpus.append([words_ref])

            total_tokens += batch.ntokens
            tokens += batch.ntokens

            if i % 5 == 0:
                elapsed = time.time() - start
                print("Step: %d     Tokens per Sec: %.2f    Time_used: %.2f" %
                      (i, tokens / elapsed, elapsed))
                start = time.time()
                tokens = 0

    bleu = data.bleu_score(candidate_corpus, reference_corpus)

    return bleu
Пример #2
0
def bleu(data, model, german, english, device):
    targets = []
    outputs = []

    for example in data:
        src = vars(example)["src"]
        trg = vars(example)["trg"]

        prediction = translate_sentence(model, src, german, english, device)
        prediction = prediction[:-1]  # remove <eos> token

        targets.append([trg])
        outputs.append(prediction)

    return bleu_score(outputs, targets)
Пример #3
0
model = load_model('trained_model_az-en_50epochs_size_150k_max_length')

preds = model.predict_classes(
    testX.reshape((testX[:].shape[0], testX.shape[1])))

preds_text = []
for i in preds:
    temp = []
    for j in range(len(i)):
        t = get_word(i[j], target_tokenizer)
        if j > 0:
            if (t == get_word(i[j - 1], target_tokenizer)) or (t == None):
                temp.append('')
            else:
                temp.append(t)
        else:
            if (t == None):
                temp.append('')
            else:
                temp.append(t)

    preds_text.append(temp)

targets = [[temp] for temp in test[:, 1]]
actuals = [' '.join(temp) for temp in test[:, 1]]
predicts = [' '.join(temp) for temp in preds_text]
print(bleu_score(preds_text, targets))
pred_df = pd.DataFrame({'actual': actuals, 'predicted': predicts})
print(pred_df.sample(15))
Пример #4
0
def run_epoch_test_with_beams(data_iter,
                              model,
                              pad_idx,
                              vocab,
                              beam_size=4,
                              n_best=4,
                              model_avg=False,
                              raw_prob=True):
    start = time.time()
    total_tokens = 0
    tokens = 0
    candidate_corpus = []
    reference_corpus = []

    with torch.no_grad():
        for i, batch_ in enumerate(data_iter):
            batch = Batch(batch_.src, batch_.trg, pad=pad_idx)
            max_len = batch.src.shape[1] + 20
            batch_size = batch.src.shape[0]
            beam_search = BeamSearch(beam_size,
                                     batch_size,
                                     vocab,
                                     n_best,
                                     min_length=1,
                                     max_length=max_len,
                                     block_ngram_repeat=0)

            if model_avg:
                memory = [m.encode(batch.src, batch.src_mask) for m in model]
                memory = [tile(x, beam_size) for x in memory]
            else:
                memory = model.encode(batch.src, batch.src_mask)
                memory = tile(memory, beam_size)

            batch.src_mask = tile(batch.src_mask, beam_size)

            probs = []
            attns = []
            for step in range(max_len):
                pred = beam_search.current_predictions
                if model_avg:
                    for m, mem in zip(model, memory):
                        out, attn_temp = m.decode(
                            mem, pred, batch.src_mask,
                            subsequent_mask(pred.shape[1]).type_as(
                                batch.src.data))
                        prob = m.generator(out[:, -1])
                        if raw_prob:
                            prob = torch.exp(prob)
                        probs.append(prob)
                        attns.append(attn_temp)

                    probs, attns = torch.stack(probs), torch.stack(attns)
                    mean_prob, attn = probs.mean(dim=0), attns.mean(dim=0)
                    if raw_prob:
                        log_probs = mean_prob.log()
                    else:
                        log_probs = mean_prob
                    probs = []
                    attns = []
                else:
                    out, attn = model.decode(
                        memory, pred, batch.src_mask,
                        subsequent_mask(pred.shape[1]).type_as(batch.src.data))
                    log_probs = model.generator(out[:, -1])

                beam_search.step(log_probs, attn)
                any_finished = beam_search.is_finished.any()
                if any_finished:
                    beam_search.update_finished()
                    if beam_search.done:
                        break
                    select_indices = beam_search.select_indices

                    if model_avg:
                        memory = [
                            x.index_select(0, select_indices) for x in memory
                        ]
                    else:
                        memory = memory.index_select(0, select_indices)

                    batch.src_mask = batch.src_mask.index_select(
                        0, select_indices)

            # scores = beam_search.scores
            predictions = beam_search.predictions
            best_preds = [p[0] for p in predictions]

            assert len(best_preds) == len(batch.trg_y), '预测样本与参考样本的数量不符'

            for j in range(batch_size):
                words_can = []
                words_ref = []

                candidate = best_preds[j]
                for k in range(1, len(candidate)):
                    if candidate[k] == vocab[EOS_WORD]:
                        break
                    words_can.append(vocab.itos[candidate[k]])

                reference = batch.trg_y[j]
                for v in range(len(reference)):
                    if reference[v] == vocab[EOS_WORD]:
                        break
                    words_ref.append(vocab.itos[reference[v]])

                candidate_corpus.append(words_can)
                reference_corpus.append([words_ref])

            total_tokens += batch.ntokens
            tokens += batch.ntokens

            if i % 5 == 0:
                elapsed = time.time() - start
                print("Step: %d     Tokens per Sec: %.2f    Time_used: %.2f" %
                      (i, tokens / elapsed, elapsed))
                start = time.time()
                tokens = 0

    bleu = data.bleu_score(candidate_corpus, reference_corpus)

    return bleu
Пример #5
0
                targets = labels[:, 1:unroll_steps].contiguous().view(-1)  # shifted by one because of BOS
                loss = criterion(outputs.contiguous().view(-1, outputs.shape[-1]), targets.long())
                if att_probs is not None:  # only with RecurrentDecoder, TransformerDecoder does not have attention
                    loss += 1. * ((1. - att_probs.sum(dim=1)) ** 2).mean()  # Doubly stochastic attention regularization
                loss_sum += loss.item()

                for beam_size in range(1, len(bleu_1) + 1):
                    prediction, _ = model.predict(data_dev, inputs, data_dev.max_length, beam_size, decoder_type=decoder_type)
                    decoded_prediction = data_dev.corpus.vocab.arrays_to_sentences(prediction)

                    decoded_references = []
                    for image_name in image_names:
                        decoded_references.append(data_dev.corpus.vocab.arrays_to_sentences(data_dev.get_all_references_for_image_name(image_name)))

                    idx = beam_size - 1
                    bleu_1[idx] += bleu_score(decoded_prediction, decoded_references, max_n=1, weights=[1])
                    bleu_2[idx] += bleu_score(decoded_prediction, decoded_references, max_n=2, weights=[0.5] * 2)
                    bleu_3[idx] += bleu_score(decoded_prediction, decoded_references, max_n=3, weights=[1 / 3] * 3)
                    bleu_4[idx] += bleu_score(decoded_prediction, decoded_references, max_n=4, weights=[0.25] * 4)

            global_step = epoch
            # Add bleu score to board
            tensorboard.writer.add_scalars('loss', {"dev_loss": loss_sum / len(dataloader_dev)}, global_step)
            for idx in range(len(bleu_1)):
                tensorboard.writer.add_scalar(f'BEAM-{idx + 1}/BLEU-1', bleu_1[idx] / len(dataloader_dev), global_step)
                tensorboard.writer.add_scalar(f'BEAM-{idx + 1}/BLEU-2', bleu_2[idx] / len(dataloader_dev), global_step)
                tensorboard.writer.add_scalar(f'BEAM-{idx + 1}/BLEU-3', bleu_3[idx] / len(dataloader_dev), global_step)
                tensorboard.writer.add_scalar(f'BEAM-{idx + 1}/BLEU-4', bleu_4[idx] / len(dataloader_dev), global_step)
            # Add predicted text to board
            tensorboard.add_predicted_text(global_step, data_dev, model, data_dev.max_length, decoder_type=decoder_type)
            tensorboard.writer.flush()