def evaluate(binary_dataset, model_int, X_int_test, y_int_test):
    train_str_bin = dataset_2_string_rasa(binary_dataset['train'],
                                          limit_num_sents=False,
                                          set_type='train')

    with NamedTemporaryFile(suffix='.yml') as f:
        f.write(train_str_bin.encode('utf8'))
        f.seek(0)

        training_data = rasa.shared.nlu.training_data.loading.load_data(f.name)

    config = rasa.nlu.config.load('config.yml')
    trainer = rasa.nlu.model.Trainer(config)
    model_bin = trainer.train(training_data)

    # Test
    testing = Testing(model_int,
                      X_int_test,
                      y_int_test,
                      'rasa',
                      'oos',
                      bin_model=model_bin)
    results_dct = testing.test_binary()

    return results_dct
def evaluate(binary_dataset, model_int, X_int_test, y_int_test, dim):
    train_str_bin = dataset_2_string(binary_dataset['train'],
                                     limit_num_sents=False,
                                     set_type='train')

    with NamedTemporaryFile() as f:
        f.write(train_str_bin.encode('utf8'))
        f.seek(0)

        # Train model for in-scope queries
        model_bin = fasttext.train_supervised(
            input=f.name,
            dim=dim,
            pretrainedVectors=f'{PRETRAINED_VECTORS_PATH}/cc.en.{dim}.vec')

    # Test
    testing = Testing(model_int,
                      X_int_test,
                      y_int_test,
                      'fasttext',
                      '__label__oos',
                      bin_model=model_bin)
    results_dct = testing.test_binary()

    return results_dct
def evaluate(binary_dataset, mlp_int, X_int_test, y_int_test, split):
    X_bin_train, y_bin_train = split.get_X_y(binary_dataset['train'], fit=False, limit_num_sents=False,
                                             set_type='train')

    mlp_bin = MLPClassifier(activation='tanh').fit(X_bin_train, y_bin_train)

    # Test
    testing = Testing(mlp_int, X_int_test, y_int_test, 'mlp', split.intents_dct['oos'], bin_model=mlp_bin)
    results_dct = testing.test_binary()

    return results_dct
def evaluate(binary_dataset, svc_int, X_int_test, y_int_test, split):
    X_bin_train, y_bin_train = split.get_X_y(binary_dataset['train'], fit=False, limit_num_sents=False,
                                             set_type='train')
    # X_bin_test, y_bin_test = split.get_X_y(binary_dataset['test'], fit=False, limit_num_sents=False, set_type='test')

    svc_bin = svm.SVC(C=1, kernel='linear').fit(X_bin_train, y_bin_train)

    # Test
    testing = Testing(svc_int, X_int_test, y_int_test, 'svm', split.intents_dct['oos'], bin_model=svc_bin)
    results_dct = testing.test_binary()

    return results_dct
示例#5
0
def evaluate(binary_dataset, model_int, X_int_test, y_int_test, split_int):
    # Split and tokenize dataset
    split_bin = Split_BERT(
    )  # we have to create a new split because the labels of BERT have to be between [0, num_labels - 1]
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    X_bin_train, y_bin_train = split_bin.get_X_y(binary_dataset['train'],
                                                 limit_num_sents=False,
                                                 set_type='train')
    X_bin_val, y_bin_val = split_bin.get_X_y(binary_dataset['val'],
                                             limit_num_sents=False,
                                             set_type='val')

    train_bin_ids, train_bin_attention_masks, train_bin_labels = tokenize_BERT(
        X_bin_train, y_bin_train, tokenizer)
    val_bin_ids, val_bin_attention_masks, val_bin_labels = tokenize_BERT(
        X_bin_val, y_bin_val, tokenizer)

    # Train model
    model_bin = TFBertForSequenceClassification.from_pretrained(
        'bert-base-uncased',
        num_labels=2)  # we have to adjust the number of labels
    print('\nBert Model', model_bin.summary())

    log_dir = 'tensorboard_data/tb_bert_bin'
    model_save_path = './models/bert_model_bin.h5'

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(filepath=model_save_path,
                                           save_weights_only=True,
                                           monitor='val_loss',
                                           mode='min',
                                           save_best_only=True),
        tf.keras.callbacks.TensorBoard(log_dir=log_dir)
    ]

    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
    optimizer = tf.keras.optimizers.Adam(learning_rate=4e-5)

    model_bin.compile(loss=loss, optimizer=optimizer, metrics=[metric])

    history = model_bin.fit(
        [train_bin_ids, train_bin_attention_masks],
        train_bin_labels,
        batch_size=32,
        epochs=5,
        validation_data=([val_bin_ids,
                          val_bin_attention_masks], val_bin_labels),
        callbacks=callbacks)

    # Test
    testing = Testing(model_int,
                      X_int_test,
                      y_int_test,
                      'bert',
                      split_int.intents_dct['oos'],
                      bin_model=model_bin,
                      bin_oos_label=split_bin.intents_dct['oos'])
    results_dct = testing.test_binary()

    return results_dct