def evaluate(dataset, limit_num_sents: bool):
    train_str = dataset_2_string_rasa(dataset['train'], limit_num_sents=limit_num_sents, set_type='train')
    X_val, y_val = get_X_y_rasa(dataset['val'] + dataset['oos_val'], limit_num_sents=limit_num_sents,
                                set_type='val')
    X_test, y_test = get_X_y_rasa(dataset['test'] + dataset['oos_test'], limit_num_sents=limit_num_sents,
                                  set_type='test')

    with NamedTemporaryFile(suffix='.yml') as f:
        f.write(train_str.encode('utf8'))
        f.seek(0)

        training_data = rasa.shared.nlu.training_data.loading.load_data(f.name)

    config = rasa.nlu.config.load('config.yml')
    trainer = rasa.nlu.model.Trainer(config)
    model = trainer.train(training_data)

    val_predictions_labels = []  # used to find threshold

    for sent, true_int_label in zip(X_val, y_val):
        pred = model.parse(sent)
        pred_label = pred['intent']['name']
        similarity = pred['intent']['confidence']

        pred = (pred_label, similarity)
        val_predictions_labels.append((pred, true_int_label))

    threshold = find_best_threshold(val_predictions_labels, 'oos')

    # Test
    testing = Testing(model, X_test, y_test, 'rasa', 'oos')
    results_dct = testing.test_threshold(threshold)

    return results_dct
def evaluate(dataset, limit_num_sents: bool):
    # Split dataset
    split = Split()

    X_train, y_train = split.get_X_y(
        dataset['train'],
        fit=True,
        limit_num_sents=limit_num_sents,
        set_type='train')  # fit only on first dataset
    X_val, y_val = split.get_X_y(dataset['val'] + dataset['oos_val'],
                                 fit=False,
                                 limit_num_sents=limit_num_sents,
                                 set_type='val')
    X_test, y_test = split.get_X_y(dataset['test'] + dataset['oos_test'],
                                   fit=False,
                                   limit_num_sents=limit_num_sents,
                                   set_type='test')

    svc_int = svm.SVC(C=1, kernel='linear',
                      probability=True).fit(X_train, y_train)

    val_predictions_labels = []  # used to find threshold

    for sent_vec, true_int_label in zip(X_val, y_val):
        pred_probs = svc_int.predict_proba(sent_vec)[
            0]  # intent prediction probabilities
        pred_label = argmax(pred_probs)  # intent prediction
        similarity = pred_probs[pred_label]

        pred = (pred_label, similarity)
        val_predictions_labels.append((pred, true_int_label))

    threshold = find_best_threshold(val_predictions_labels,
                                    split.intents_dct['oos'])

    # Test
    testing = Testing(svc_int, X_test, y_test, 'svm', split.intents_dct['oos'])
    results_dct = testing.test_threshold(threshold)

    return results_dct
def evaluate(dataset, dim: int, limit_num_sents: bool):
    train_str = dataset_2_string(dataset['train'],
                                 limit_num_sents=limit_num_sents,
                                 set_type='train')
    X_val, y_val = get_X_y_fasttext(dataset['val'] + dataset['oos_val'],
                                    limit_num_sents=limit_num_sents,
                                    set_type='val')
    X_test, y_test = get_X_y_fasttext(dataset['test'] + dataset['oos_test'],
                                      limit_num_sents=limit_num_sents,
                                      set_type='test')

    with NamedTemporaryFile() as f:
        f.write(train_str.encode('utf8'))
        f.seek(0)

        # Train model for in-scope queries
        model = fasttext.train_supervised(
            input=f.name,
            dim=dim,
            pretrainedVectors=f'{PRETRAINED_VECTORS_PATH}/cc.en.{dim}.vec')

    val_predictions_labels = []  # used to find threshold

    for sent, true_int_label in zip(X_val, y_val):
        pred = model.predict(sent)
        pred_label = pred[0][0]
        similarity = pred[1][0]

        pred = (pred_label, similarity)
        val_predictions_labels.append((pred, true_int_label))

    threshold = find_best_threshold(val_predictions_labels, '__label__oos')

    # Test
    testing = Testing(model, X_test, y_test, 'fasttext', '__label__oos')
    results_dct = testing.test_threshold(threshold)

    return results_dct
def evaluate(dataset, limit_num_sents: bool):
    # Split and tokenize dataset
    split = Split_BERT()
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    X_train, y_train = split.get_X_y(dataset['train'],
                                     limit_num_sents=limit_num_sents,
                                     set_type='train')
    X_val, y_val = split.get_X_y(dataset['val'] + dataset['oos_val'],
                                 limit_num_sents=limit_num_sents,
                                 set_type='val')
    X_test, y_test = split.get_X_y(dataset['test'] + dataset['oos_test'],
                                   limit_num_sents=limit_num_sents,
                                   set_type='test')

    train_ids, train_attention_masks, train_labels = tokenize_BERT(
        X_train, y_train, tokenizer)
    val_ids, val_attention_masks, val_labels = tokenize_BERT(
        X_val, y_val, tokenizer)
    test_ids, test_attention_masks, test_labels = tokenize_BERT(
        X_test, y_test, tokenizer)

    num_labels = len(split.intents_dct.keys(
    )) - 1  # minus 1 because 'oos' label isn't used in training

    # Train model
    model = TFBertForSequenceClassification.from_pretrained(
        'bert-base-uncased',
        num_labels=num_labels)  # we have to adjust the number of labels
    print('\nBert Model', model.summary())

    log_dir = 'tensorboard_data/tb_bert'
    model_save_path = './models/bert_model.h5'

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(filepath=model_save_path,
                                           save_weights_only=True,
                                           monitor='val_loss',
                                           mode='min',
                                           save_best_only=True),
        tf.keras.callbacks.TensorBoard(log_dir=log_dir)
    ]

    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
    optimizer = tf.keras.optimizers.Adam(learning_rate=4e-5)

    model.compile(loss=loss, optimizer=optimizer, metrics=[metric])

    history = model.fit([train_ids, train_attention_masks],
                        train_labels,
                        batch_size=32,
                        epochs=5,
                        validation_data=([val_ids,
                                          val_attention_masks], val_labels),
                        callbacks=callbacks)

    val_predictions_labels = []  # used to find threshold

    for sent, true_int_label in zip(X_val, y_val):
        predict_input = tokenizer.encode(sent,
                                         truncation=True,
                                         padding=True,
                                         return_tensors="tf")

        tf_output = model.predict(predict_input)[0]
        pred_probs = tf.nn.softmax(
            tf_output, axis=1).numpy()[0]  # intent prediction probabilities
        pred_label = argmax(pred_probs)  # intent prediction
        similarity = pred_probs[pred_label]

        pred = (pred_label, similarity)
        val_predictions_labels.append((pred, true_int_label))

    threshold = find_best_threshold(val_predictions_labels,
                                    split.intents_dct['oos'])

    # Test
    testing = Testing(model, {
        'test_ids': test_ids,
        'test_attention_masks': test_attention_masks
    }, test_labels, 'bert', split.intents_dct['oos'])
    results_dct = testing.test_threshold(threshold)

    return results_dct