Пример #1
0
def evaluate(in_session, in_model, in_dataset, batch_size=64):
    batch_gen = batch_generator(in_dataset, batch_size)
    kl_losses = []
    nll_losses = []
    kl_w = []
    outputs = []
    for batch_enc_input, batch_dec_input, batch_dec_output in batch_gen:
        loss_dict, batch_outputs = in_model.step(batch_enc_input,
                                                 batch_dec_input,
                                                 batch_dec_output,
                                                 in_session,
                                                 forward_only=True)
        kl_losses.append(loss_dict['kl_loss'])
        nll_losses.append(loss_dict['nll_loss'])
        kl_w.append(loss_dict['kl_w'])
        outputs += list(batch_outputs)
    print('10 random eval sequences:')
    random_idx = np.random.choice(range(len(outputs)), size=10)
    for idx in random_idx:
        output = list(outputs[idx])
        if EOS in output:
            output = output[:output.index(EOS)]
        print(' '.join(output))
    loss = np.mean(np.array(kl_losses) * np.array(kl_w) + np.array(nll_losses))
    ppx = np.exp(loss)
    return {
        'nll_loss': np.mean(nll_losses),
        'kl_loss': np.mean(kl_losses),
        'loss': loss,
        'perplexity': ppx,
        'kl_w': loss_dict['kl_w']
    }
Пример #2
0
def train(in_session, in_model, in_train, in_dev, in_dst_folder, nb_epoch,
          batch_size, early_stopping_threshold, **kwargs):
    best_dev_loss = np.inf
    epochs_without_improvement = 0
    for epoch_counter in range(nb_epoch):
        batch_gen = batch_generator(in_train, batch_size)
        train_batch_losses = []
        for batch in batch_gen:
            enc_inp, dec_out = batch
            train_batch_loss = in_model.step(enc_inp, dec_out, in_session)
            train_batch_losses.append(train_batch_loss)
        train_loss = np.mean(train_batch_losses)
        print('Epoch {} out of {} results'.format(epoch_counter, nb_epoch))
        print('train loss: {:.3f}'.format(train_loss))
        dev_eval = evaluate(in_session, in_model, in_dev)
        print('; '.join([
            'dev {}: {:.3f}'.format(key, value)
            for key, value in dev_eval.items()
        ]))
        if dev_eval['loss'] < best_dev_loss:
            best_dev_loss = dev_eval['loss']
            in_model.save(in_dst_folder, in_session)
            print('New best loss. Saving checkpoint')
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
        if early_stopping_threshold < epochs_without_improvement:
            print('Early stopping after {} epochs'.format(epoch_counter))
            break
    print('Optimization Finished!')
Пример #3
0
    def train(self):
        print('\n:: training started\n')
        epochs = self.config['epochs']
        best_dev_accuracy = 0.0
        epochs_without_improvement = 0
        random_input_prob = self.config.get('random_input_prob', 0.0)
        unk_action_id = self.action_templates.index(UNK)
        for j in range(epochs):
            batch_gen = batch_generator(self.data_train, self.batch_size)
            for batch in batch_gen:
                batch_copy = [np.copy(elem) for elem in batch]
                enc_inp, dec_inp, dec_out, bow_out, context_features, action_masks, y = batch_copy
                num_turns = np.sum(np.vectorize(lambda x: x != 0)(y))
                for idx in range(num_turns):
                    if np.random.random() < random_input_prob:
                        random_input_idx = np.random.choice(
                            range(self.random_input[0].shape[0]))
                        random_input = [
                            random_input_i[random_input_idx]
                            for random_input_i in self.random_input
                        ]
                        enc_inp[0][idx], dec_inp[0][idx], dec_out[0][
                            idx], bow_out[0][idx] = random_input
                        y[0][idx] = unk_action_id
                batch_loss_dict, lr = self.net.train_step(
                    enc_inp, dec_inp, dec_out, bow_out, context_features,
                    action_masks, y)
            # evaluate every epoch
            train_accuracy, train_mean_losses = evaluate(self.net,
                                                         self.data_train,
                                                         runs_number=1)
            train_loss_report = ' '.join([
                '{}: {:.3f}'.format(key, value)
                for key, value in train_mean_losses.items()
            ])
            dev_accuracy, dev_mean_losses = evaluate(self.net,
                                                     self.data_dev,
                                                     runs_number=1)
            dev_loss_report = ' '.join([
                '{}: {:.3f}'.format(key, value)
                for key, value in dev_mean_losses.items()
            ])
            print(
                ':: {}@lr={:.5f} || trn accuracy {:.3f} {} || dev accuracy {:.3f} {}'
                .format(j + 1, lr, train_accuracy, train_loss_report,
                        dev_accuracy, dev_loss_report))

            if best_dev_accuracy < dev_accuracy:
                print('New best dev accuracy. Saving checkpoint')
                self.net.save(self.model_folder)
                best_dev_accuracy = dev_accuracy
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1
            if self.config[
                    'early_stopping_threshold'] < epochs_without_improvement:
                print(
                    'Finished after {} epochs due to early stopping'.format(j))
                break
Пример #4
0
def evaluate(in_session, in_model, in_dataset, batch_size=64):
    X, masks, labels = in_dataset
    batch_gen = batch_generator((X, masks), batch_size)
    predictions = []
    for batch in batch_gen:
        batch_predictions = in_model.predict(batch, in_session)
        predictions += batch_predictions.tolist()
    print(confusion_matrix(labels, predictions))
    return accuracy_score(labels, predictions)
Пример #5
0
def evaluate(in_session, in_model, in_dataset, batch_size=64):
    eval_data, labels = in_dataset
    batch_gen = batch_generator(eval_data, batch_size)
    predictions = []
    for batch_data in batch_gen:
        batch_predictions = in_model.predict(batch_data, in_session)
        predictions += list(batch_predictions)
    print(confusion_matrix(labels, predictions))
    return accuracy_score(labels, predictions)
Пример #6
0
def evaluate(in_model, in_dataset, batch_size=64):
    data, labels = in_dataset[:-1], in_dataset[-1]
    batch_gen = batch_generator(data, batch_size)
    predictions = []
    for batch in batch_gen:
        batch_predictions = in_model.predict(batch)
        predictions += list(batch_predictions)
    print(confusion_matrix(labels, predictions))
    return accuracy_score(labels, predictions)
Пример #7
0
def predict(in_session, in_model, in_dataset, batch_size=64):
    X, masks, labels = in_dataset
    batch_gen = batch_generator((X, masks), batch_size)
    losses, predictions, = [], []
    for batch_x, batch_masks in batch_gen:
        batch_predictions = in_model.predict_loss(batch_x, batch_masks, in_session)
        losses += batch_predictions[0].tolist()
        predictions += batch_predictions[1].tolist()
    return losses, predictions
Пример #8
0
def evaluate_advanced(in_model,
                      in_dataset,
                      in_dialog_indices,
                      in_action_templates,
                      ignore_ood_accuracy=False):
    if BABI_CONFIG['backoff_utterance'].lower() in in_action_templates:
        backoff_action = in_action_templates.index(
            babi_config['backoff_utterance'].lower())
    else:
        backoff_action = UNK_ID

    X, action_masks, sequence_masks, y = in_dataset
    losses, predictions = [], []
    batch_gen = batch_generator([X, action_masks, sequence_masks, y],
                                64,
                                verbose=False)
    for batch_x, batch_action_masks, batch_sequence_masks, batch_y in batch_gen:
        batch_predictions, batch_losses = in_model.forward(
            batch_x, batch_action_masks, batch_sequence_masks, batch_y)
        losses += list(batch_losses)
        predictions += batch_predictions.tolist()

    y_true_dialog, y_pred_dialog = [], []
    for y_true_i, y_pred_i in zip(y, predictions):
        y_true_dialog_i, y_pred_dialog_i = [], []
        for y_true_i_j, y_pred_i_j in zip(y_true_i, y_pred_i):
            if y_true_i_j != PAD_ID:
                y_true_dialog_i.append(y_true_i_j)
                y_pred_dialog_i.append(y_pred_i_j)
        y_true_dialog.append(y_true_dialog_i)
        y_pred_dialog.append(y_pred_dialog_i)

    total_turns = 0
    correct_turns = 0
    total_turns_after_ood = 0
    correct_turns_after_ood = 0
    for y_true, y_pred in zip(y_true_dialog, y_pred_dialog):
        ood_occurred = False
        for y_i_true, y_i_pred in zip(y_true, y_pred):
            current_turn_counts = not (y_i_true == backoff_action
                                       and ignore_ood_accuracy)
            total_turns += int(current_turn_counts)
            correct_turns += int(y_i_true == y_i_pred and current_turn_counts)
            if ood_occurred:
                total_turns_after_ood += int(current_turn_counts)
                correct_turns_after_ood += int(y_i_true == y_i_pred
                                               and current_turn_counts)
            if y_i_true == backoff_action:
                ood_occurred = True
    return {
        'avg_loss': np.mean(losses),
        'correct_turns': correct_turns,
        'total_turns': total_turns,
        'correct_turns_after_ood': correct_turns_after_ood,
        'total_turns_after_ood': total_turns_after_ood
    }
    def train(self):
        print('\n:: training started\n')
        epochs = self.config['epochs']
        best_dev_accuracy = 0.0
        epochs_without_improvement = 0
        for j in range(epochs):
            losses = []
            batch_gen = batch_generator(self.data_train, self.batch_size)
            for batch in batch_gen:  # batch_x, batch_context_features, batch_action_masks, batch_y in batch_gen:
                batch_copy = [np.copy(elem) for elem in batch]
                dropped_out_batch = self.drop_out_batch(batch_copy)
                batch_loss_dict, lr = self.net.train_step(*dropped_out_batch)

            # evaluate every epoch
            train_accuracy, train_loss_dict = evaluate(self.net, self.data_train)
            train_loss_report = ' '.join(['{}: {:.3f}'.format(key, value) for key, value in train_loss_dict.items()])
            dev_accuracy, dev_loss_dict = evaluate(self.net, self.data_dev, runs_number=3)
            dev_loss_report = ' '.join(['{}: {:.3f}'.format(key, value) for key, value in dev_loss_dict.items()])
            print(':: {}@lr={:.5f} || trn accuracy {:.3f} {} || dev accuracy {:.3f} {}'.format(j + 1, lr, train_accuracy, train_loss_report, dev_accuracy, dev_loss_report))

            eval_stats_noisy = evaluate_advanced(self.net,
                                                 self.data_test,
                                                 self.action_templates,
                                                 BABI_CONFIG['backoff_utterance'].lower(),
                                                 post_ood_turns=self.post_ood_turns_noisy,
                                                 runs_number=1)
            print('\n\n')
            print('Noisy dataset: {} turns overall, {} turns after the first OOD'.format(eval_stats_noisy['total_turns'],
                                                                                         eval_stats_noisy['total_turns_after_ood']))
            print('Accuracy:')
            accuracy = eval_stats_noisy['correct_turns'] / eval_stats_noisy['total_turns']
            accuracy_after_ood = eval_stats_noisy['correct_turns_after_ood'] / eval_stats_noisy['total_turns_after_ood'] \
                if eval_stats_noisy['total_turns_after_ood'] != 0 \
                else 0
            accuracy_post_ood = eval_stats_noisy['correct_post_ood_turns'] / eval_stats_noisy['total_post_ood_turns'] \
                if eval_stats_noisy['total_post_ood_turns'] != 0 \
                else 0
            accuracy_ood = eval_stats_noisy['correct_ood_turns'] / eval_stats_noisy['total_ood_turns'] \
                if eval_stats_noisy['total_ood_turns'] != 0 \
                else 0
            print('overall: {:.3f}; after first OOD: {:.3f}, directly post-OOD: {:.3f}; OOD: {:.3f}'.format(accuracy,
                                                                                                            accuracy_after_ood,
                                                                                                            accuracy_post_ood,
                                                                                                            accuracy_ood))

            if best_dev_accuracy < dev_accuracy:
                print('New best dev accuracy. Saving checkpoint')
                self.net.save(self.model_folder)
                best_dev_accuracy = dev_accuracy
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1
            if self.config['early_stopping_threshold'] < epochs_without_improvement:
                print('Finished after {} epochs due to early stopping'.format(j))
                break
Пример #10
0
def get_loss_stats(in_model, in_dataset, in_session, batch_size=64):
    batch_gen = batch_generator(in_dataset, batch_size)
    losses = []
    for batch in batch_gen:
        batch_losses, batch_outputs = in_model.step(*batch,
                                                    in_session,
                                                    forward_only=True)
        losses += list(batch_losses)
    return {
        'max': np.max(losses),
        'min': np.min(losses),
        'avg': np.mean(losses)
    }
Пример #11
0
def train(in_session, in_model, in_train, in_dev, in_dst_folder, nb_epoch,
          batch_size, early_stopping_threshold, dropout_keep_prob, **kwargs):
    best_dev_loss = np.inf
    epochs_without_improvement = 0
    for epoch_counter in range(nb_epoch):
        [random.shuffle(train_i) for train_i in in_train]
        batch_gen = batch_generator(in_train, batch_size, verbose=False)
        train_nll_losses = []
        train_kl_losses = []
        train_kl_w = []
        for idx, (batch_enc_input, batch_dec_input,
                  batch_dec_output) in enumerate(batch_gen):
            dec_input_dropped = [
                token if random.random() < dropout_keep_prob else UNK_ID
                for token in batch_dec_input[:, 1:].flatten()
            ]
            batch_dec_input_dropped = np.concatenate([
                np.expand_dims(batch_dec_input[:, 0], axis=-1),
                np.array(dec_input_dropped).reshape(
                    batch_dec_input.shape[0], batch_dec_input.shape[1] - 1)
            ],
                                                     axis=1)
            loss_dict = in_model.step(batch_enc_input, batch_dec_input_dropped,
                                      batch_dec_output, in_session)
            train_nll_losses.append(loss_dict['nll_loss'])
            train_kl_losses.append(loss_dict['kl_loss'])
            train_kl_w.append(loss_dict['kl_w'])
        train_loss = np.mean(
            np.array(train_nll_losses) +
            np.array(train_kl_losses) * np.array(train_kl_w))
        print('Epoch {} out of {} results'.format(epoch_counter, nb_epoch))
        print(
            'train loss: {:.3f} | nll_loss: {:.3f} | kl_loss: {:.3f} | kl_w: {:.5f}'
            .format(train_loss, np.mean(train_nll_losses),
                    np.mean(train_kl_losses), loss_dict['kl_w']))
        dev_eval = evaluate(in_session, in_model, in_dev)
        print(
            'dev loss: {:.3f} | nll_loss: {:.3f} | kl_loss: {:.3f} | kl_w: {:.5f}'
            .format(dev_eval['loss'], dev_eval['nll_loss'],
                    dev_eval['kl_loss'], dev_eval['kl_w']))
        if dev_eval['loss'] < best_dev_loss:
            best_dev_loss = dev_eval['loss']
            in_model.save(in_dst_folder, in_session)
            print('New best loss. Saving checkpoint')
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
        if early_stopping_threshold < epochs_without_improvement:
            print('Early stopping after {} epochs'.format(epoch_counter))
            break
    print('Optimization Finished!')
Пример #12
0
def get_kl_loss_stats(in_model, in_dataset, in_session, batch_size=64):
    batch_gen = batch_generator(in_dataset, batch_size)
    losses = []
    for batch_encoder_input, batch_decoder_input, batch_decoder_output in batch_gen:
        batch_loss_dict, batch_outputs = in_model.step(batch_encoder_input,
                                                       batch_decoder_input,
                                                       batch_decoder_output,
                                                       in_session,
                                                       forward_only=True)
        losses += batch_loss_dict['kl_loss'].tolist()
    return {
        'max': np.max(losses),
        'min': np.min(losses),
        'avg': np.mean(losses)
    }
Пример #13
0
def evaluate(in_model, in_dataset):
    X, action_masks, sequence_masks, y = in_dataset
    losses, predictions = [], []
    batch_gen = batch_generator([X, action_masks, sequence_masks, y],
                                64,
                                verbose=False)
    for batch_x, batch_action_masks, batch_sequence_masks, batch_y in batch_gen:
        batch_predictions, batch_losses = in_model.forward(
            batch_x, batch_action_masks, batch_sequence_masks, batch_y)
        losses += batch_losses.tolist()
        predictions += batch_predictions.flatten().tolist()
    return (sum([
        gold_i == pred_i and gold_i != PAD_ID
        for gold_i, pred_i in zip(y.flatten(), predictions)
    ]) / np.sum(map(lambda x: int(x != 0), y.flatten())), np.mean(losses))
Пример #14
0
def get_loss_stats(in_model,
                   in_dataset,
                   loss_components=['kl_loss', 'nll_loss'],
                   batch_size=64):
    batch_gen = batch_generator(in_dataset, batch_size)
    losses = []
    for batch in batch_gen:
        loss_dict = in_model.step(*batch, forward_only=True)
        cumulative_loss = np.sum(
            [loss_dict[component] for component in loss_components])
        losses.append(cumulative_loss)
    return {
        'max': np.max(losses),
        'min': np.min(losses),
        'avg': np.mean(losses)
    }
Пример #15
0
def evaluate_single_run(in_model, in_dataset):
    loss_dict = defaultdict(lambda: [])
    predictions = []
    batch_gen = batch_generator(in_dataset, 64)
    for batch in batch_gen:
        batch_predictions, batch_loss_dict = in_model.forward(*batch)
        for key, value in batch_loss_dict.items():
            loss_dict[key] += value.tolist()
        predictions += batch_predictions.flatten().tolist()
    y = in_dataset[-2]
    return (sum([
        gold_i == pred_i and gold_i != PAD_ID
        for gold_i, pred_i in zip(y.flatten(), predictions)
    ]) / np.sum(list(map(lambda x: int(x != 0), y.flatten()))),
            {key: np.mean(value)
             for key, value in loss_dict.items()})
Пример #16
0
def evaluate(in_model, in_dataset, batch_size=64):
    batch_gen = batch_generator(in_dataset, batch_size)
    kl_losses = []
    nll_losses = []
    kl_w = []
    bow_losses = []
    for batch in batch_gen:
        loss_dict = in_model.step(*batch, forward_only=True)
        kl_losses += loss_dict['kl_loss'].tolist()
        nll_losses += loss_dict['nll_loss'].tolist()
        kl_w += loss_dict['kl_w'].tolist()
        bow_losses += loss_dict['bow_loss'].tolist()
    loss = np.mean(np.array(kl_losses) * np.array(kl_w) + np.array(nll_losses))
    return {
        'loss': loss,
        'nll_loss': np.mean(nll_losses),
        'kl_loss': np.mean(kl_losses),
        'kl_w': np.mean(loss_dict['kl_w']),
        'bow_loss': np.mean(bow_losses)
    }
Пример #17
0
def evaluate(in_session, in_model, in_dataset, batch_size=64):
    batch_gen = batch_generator(in_dataset, batch_size)
    losses = []
    outputs = []
    for batch in batch_gen:
        enc_inp, dec_out = batch
        batch_losses, batch_outputs = in_model.step(enc_inp,
                                                    dec_out,
                                                    in_session,
                                                    forward_only=True)
        losses += list(batch_losses)
        outputs += list(batch_outputs)
    print('10 random eval sequences:')
    random_idx = np.random.choice(range(len(outputs)), size=10)
    for idx in random_idx:
        output = list(outputs[idx])
        if EOS in output:
            output = output[:output.index(EOS)]
        print(' '.join(output))
    loss = np.mean(losses)
    ppx = np.exp(loss)
    return {'loss': loss, 'perplexity': ppx}
Пример #18
0
def train(in_model, in_train, in_dev, in_config, in_model_folder):
    best_dev_loss = np.inf
    epochs_without_improvement = 0
    for epoch in range(in_config['num_epoch']):
        print("Epoch [%d/%d]" % (epoch + 1, in_config['num_epoch']))
        batch_gen = batch_generator(in_train, in_config['batch_size'])
        for batch in batch_gen:
            in_model.step(*batch)
        trn_loss_dict = evaluate(in_model, in_train)
        print(
            "trn results | nll_loss:%.3f | kl_w:%.3f | kl_loss:%.3f | bow_loss:%.3f"
            % (trn_loss_dict['nll_loss'], trn_loss_dict['kl_w'],
               trn_loss_dict['kl_loss'], trn_loss_dict['bow_loss']))
        dev_loss_dict = evaluate(in_model, in_dev)
        print(
            "dev results | nll_loss:%.3f | kl_w:%.3f | kl_loss:%.3f | bow_loss:%.3f"
            % (dev_loss_dict['nll_loss'], dev_loss_dict['kl_w'],
               dev_loss_dict['kl_loss'], dev_loss_dict['bow_loss']))
        for idx in np.random.choice(np.arange(len(in_dev)), size=5):
            encoder_input, decoder_input, decoder_output, bow_output = [
                dev_i[idx] for dev_i in in_dev
            ]
            encoder_input = ' '.join(
                [in_model.rev_vocab[word] for word in encoder_input])
            in_model.customized_reconstruct(encoder_input)
        resulting_dev_loss = dev_loss_dict['loss']
        if resulting_dev_loss < best_dev_loss:
            print('New best dev loss! Saving checkpoint')
            best_dev_loss = resulting_dev_loss
            in_model.save(in_model_folder)
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
        if epochs_without_improvement == in_config['early_stopping_threshold']:
            print('Stopped after {} epochs due to no loss improvement'.format(
                epoch + 1))
            break
    def train(self):
        print('\n:: training started\n')
        epochs = self.config['epochs']
        best_dev_accuracy = 0.0
        epochs_without_improvement = 0
        random_input_prob = self.config.get('random_input_prob', 0.0)
        unk_action_id = self.action_templates.index(UNK)
        for j in range(epochs):
            losses = []
            batch_gen = batch_generator([self.X_train, self.context_features_train, self.action_masks_train, self.prev_action_train, self.y_train], self.batch_size)
            for batch in batch_gen:  # batch_x, batch_context_features, batch_action_masks, batch_y in batch_gen:
                batch_copy = [np.copy(elem) for elem in batch]
                X, context_features, action_masks, prev_action, y = batch_copy
                num_turns = np.sum(np.vectorize(lambda x: x!= 0)(y))
                for idx in range(num_turns):
                    if np.random.random() < random_input_prob:
                        random_input_idx = np.random.choice(range(self.random_input[0].shape[0]))
                        random_input = [random_input_i[random_input_idx] for random_input_i in self.random_input]
                        X[0][idx] = random_input[0]
                        y[0][idx] = unk_action_id
                        if idx + 1 < num_turns:
                            prev_action[0][idx + 1] = unk_action_id
                batch_loss_dict, lr = self.net.train_step(X, context_features, action_masks, prev_action, y)

            # evaluate every epoch
            train_accuracy, train_loss_dict = evaluate(self.net, (self.X_train, self.context_features_train, self.action_masks_train, self.prev_action_train, self.y_train))
            train_loss_report = ' '.join(['{}: {:.3f}'.format(key, value) for key, value in train_loss_dict.items()])
            dev_accuracy, dev_loss_dict = evaluate(self.net, (self.X_dev, self.context_features_dev, self.action_masks_dev, self.prev_action_dev, self.y_dev))
            dev_loss_report = ' '.join(['{}: {:.3f}'.format(key, value) for key, value in dev_loss_dict.items()])
            print(':: {}@lr={:.5f} || trn accuracy {:.3f} {} || dev accuracy {:.3f} {}'.format(j + 1, lr, train_accuracy, train_loss_report, dev_accuracy, dev_loss_report))

            eval_stats_noisy = evaluate_advanced(self.net,
                                                 (self.X_test, self.context_features_test, self.action_masks_test, self.prev_action_test, self.y_test),
                                                 self.action_templates,
                                                 BABI_CONFIG['backoff_utterance'].lower(),
                                                 post_ood_turns=self.post_ood_turns_noisy,
                                                 runs_number=1)
            print('\n\n')
            print('Noisy dataset: {} turns overall, {} turns after the first OOD'.format(eval_stats_noisy['total_turns'],
                                                                                         eval_stats_noisy['total_turns_after_ood']))
            print('Accuracy:')
            accuracy = eval_stats_noisy['correct_turns'] / eval_stats_noisy['total_turns']
            accuracy_after_ood = eval_stats_noisy['correct_turns_after_ood'] / eval_stats_noisy['total_turns_after_ood'] \
                if eval_stats_noisy['total_turns_after_ood'] != 0 \
                else 0
            accuracy_post_ood = eval_stats_noisy['correct_post_ood_turns'] / eval_stats_noisy['total_post_ood_turns'] \
                if eval_stats_noisy['total_post_ood_turns'] != 0 \
                else 0
            accuracy_ood = eval_stats_noisy['correct_ood_turns'] / eval_stats_noisy['total_ood_turns'] \
                if eval_stats_noisy['total_ood_turns'] != 0 \
                else 0
            print('overall: {:.3f}; after first OOD: {:.3f}, directly post-OOD: {:.3f}; OOD: {:.3f}'.format(accuracy,
                                                                                                            accuracy_after_ood,
                                                                                                            accuracy_post_ood,
                                                                                                            accuracy_ood))

            if best_dev_accuracy < dev_accuracy:
                print('New best dev loss. Saving checkpoint')
                self.net.save(self.model_folder)
                best_dev_accuracy = dev_accuracy
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1
            if self.config['early_stopping_threshold'] < epochs_without_improvement:
                print('Finished after {} epochs due to early stopping'.format(j))
                break
def evaluate_advanced(in_model,
                      in_dataset,
                      in_action_templates,
                      post_ood_turns=None,
                      ignore_ood_accuracy=False):
    if BABI_CONFIG['backoff_utterance'].lower() in in_action_templates:
        backoff_action = in_action_templates.index(
            BABI_CONFIG['backoff_utterance'].lower())
    else:
        backoff_action = UNK_ID

    X, action_masks, sequence_masks, y = in_dataset
    losses, predictions = [], []
    batch_gen = batch_generator([X, action_masks, sequence_masks, y],
                                64,
                                verbose=False)
    for batch_x, batch_action_masks, batch_sequence_masks, batch_y in batch_gen:
        batch_predictions, batch_losses = in_model.forward(
            batch_x, batch_action_masks, batch_sequence_masks, batch_y)
        losses += list(batch_losses)
        predictions += batch_predictions.tolist()

    y_true_dialog, y_pred_dialog = [], []
    for y_true_i, y_pred_i in zip(y, predictions):
        y_true_dialog_i, y_pred_dialog_i = [], []
        for y_true_i_j, y_pred_i_j in zip(y_true_i, y_pred_i):
            if y_true_i_j != PAD_ID:
                y_true_dialog_i.append(y_true_i_j)
                y_pred_dialog_i.append(y_pred_i_j)
        y_true_dialog.append(y_true_dialog_i)
        y_pred_dialog.append(y_pred_dialog_i)

    total_turns = 0
    correct_turns = 0
    total_turns_after_ood = 0
    total_post_ood_turns = 0
    correct_post_ood_turns = 0
    correct_turns_after_ood = 0
    for y_true, y_pred in zip(y_true_dialog, y_pred_dialog):
        ood_occurred = False
        prev_action = None
        for y_i_true, y_i_pred in zip(y_true, y_pred):
            current_turn_counts = not (y_i_true == backoff_action
                                       and ignore_ood_accuracy)
            total_turns += int(current_turn_counts)
            correct_turns += int(y_i_true == y_i_pred and current_turn_counts)
            if prev_action == backoff_action and y_i_true != backoff_action:
                total_post_ood_turns += 1
                correct_post_ood_turns += int(y_i_true == y_i_pred)
            if ood_occurred:
                total_turns_after_ood += int(current_turn_counts)
                correct_turns_after_ood += int(y_i_true == y_i_pred
                                               and current_turn_counts)
            if y_i_true == backoff_action:
                ood_occurred = True
            prev_action = y_i_true
    post_ood_correct, post_ood_total = 0, 0
    if post_ood_turns is not None:
        non_pad_counter = 0
        post_ood_true, post_ood_pred = [], []
        for y_true_i, y_pred_i in zip(
                np.array(y).flatten(),
                np.array(predictions).flatten()):
            if y_true_i == PAD_ID:
                continue
            if non_pad_counter in post_ood_turns:
                post_ood_true.append(y_true_i)
                post_ood_pred.append(y_pred_i)
            non_pad_counter += 1
        post_ood_correct = sum([
            int(true_i == pred_i)
            for true_i, pred_i in zip(post_ood_true, post_ood_pred)
        ])
        post_ood_total = len(post_ood_turns)

    return {
        'avg_loss': np.mean(losses),
        'correct_turns': correct_turns,
        'total_turns': total_turns,
        'correct_turns_after_ood': correct_turns_after_ood,
        'total_turns_after_ood': total_turns_after_ood,
        'total_post_ood_turns': post_ood_total,
        'correct_post_ood_turns': post_ood_correct
    }
Пример #21
0
def evaluate_advanced_single(in_model,
                             in_dataset,
                             in_action_templates,
                             in_fallback_utterance,
                             post_ood_turns=None,
                             ignore_ood_accuracy=False):
    if in_fallback_utterance in in_action_templates:
        fallback_action = in_action_templates.index(in_fallback_utterance)
    else:
        fallback_action = len(in_action_templates) - 1

    losses, predictions = [], []
    batch_gen = batch_generator(in_dataset, 64)
    for batch in batch_gen:
        batch_predictions, batch_loss_dict = in_model.forward(*batch)
        losses += batch_loss_dict['loss'].tolist()
        predictions += batch_predictions.tolist()

    y = in_dataset[-2]
    y_true_dialog, y_pred_dialog = [], []
    y_true_all, y_pred_all = [], []
    y_true_binary, y_pred_binary = [], []
    for y_true_i, y_pred_i in zip(y, predictions):
        y_true_dialog_i, y_pred_dialog_i = [], []
        for y_true_i_j, y_pred_i_j in zip(y_true_i, y_pred_i):
            if y_true_i_j != PAD_ID:
                y_true_dialog_i.append(y_true_i_j)
                y_pred_dialog_i.append(y_pred_i_j)
        y_true_dialog.append(y_true_dialog_i)
        y_pred_dialog.append(y_pred_dialog_i)
        y_true_all += y_true_dialog_i
        y_pred_all += y_pred_dialog_i

    y_true_binary = [int(action == fallback_action) for action in y_true_all]
    y_pred_binary = [int(action == fallback_action) for action in y_pred_all]

    ood_f1 = f1_score(y_true_binary, y_pred_binary, average='binary')
    acc = accuracy_score(y_true_all, y_pred_all)

    total_turns = 0
    correct_turns = 0
    correct_continuous_turns = 0
    total_ood_turns = 0
    correct_ood_turns = 0
    for y_true, y_pred in zip(y_true_dialog, y_pred_dialog):
        error_occurred = False
        for y_i_true, y_i_pred in zip(y_true, y_pred):
            error_occurred = error_occurred or y_i_true != y_i_pred
            current_turn_counts = not (y_i_true == fallback_action
                                       and ignore_ood_accuracy)
            total_turns += int(current_turn_counts)
            correct_turns += int(y_i_true == y_i_pred and current_turn_counts)
            correct_continuous_turns += int(not error_occurred)
            if y_i_true == fallback_action:
                total_ood_turns += 1
                correct_ood_turns += int(y_i_true == y_i_pred)
    post_ood_correct, post_ood_total = 0, 0
    if post_ood_turns is not None:
        non_pad_counter = 0
        post_ood_true, post_ood_pred = [], []
        for y_true_i, y_pred_i in zip(
                np.array(y).flatten(),
                np.array(predictions).flatten()):
            if y_true_i == PAD_ID:
                continue
            if non_pad_counter in post_ood_turns:
                post_ood_true.append(y_true_i)
                post_ood_pred.append(y_pred_i)
            non_pad_counter += 1
        post_ood_correct = sum([
            int(true_i == pred_i)
            for true_i, pred_i in zip(post_ood_true, post_ood_pred)
        ])
        post_ood_total = len(post_ood_turns)

    assert abs(acc - correct_turns / total_turns) < 1e-7

    return {
        'avg_loss': np.mean(losses),
        'correct_turns': correct_turns,
        'total_turns': total_turns,
        'total_ood_turns': total_ood_turns,
        'correct_ood_turns': correct_ood_turns,
        'total_post_ood_turns': post_ood_total,
        'correct_post_ood_turns': post_ood_correct,
        'correct_continuous_turns': correct_continuous_turns,
        'ood_f1': ood_f1
    }