Пример #1
0
def run_from_config(params, *args):
    if len(args) < 5:
        print("No input file or test file given: %s:%s" % (args, len(args)))
        sys.exit(-1)
    train_file = args[0]
    validation_file = args[1]
    test_file = args[2]
    neweq_test_file = args[3]
    all_file = args[4]

    local_params = dict(params)
    local_params["memory_size"] = 2 ** local_params["log_memory_size"]
    del local_params["log_memory_size"]
    local_params["hidden_layer_sizes"] = [2 ** local_params["log_hidden_layer_size"]]
    del local_params["log_hidden_layer_size"]
    local_params["ae_representation_size"] = 2 ** local_params["log_ae_representation_size"]
    del local_params["log_ae_representation_size"]
    local_params["constrain_intro_rate"] = 1 - 10 ** -local_params["constrain_intro_log_rate"]
    del local_params["constrain_intro_log_rate"]

    encoder = RecursiveNNSiameseEncoder(train_file, local_params)
    evaluation = SemanticEquivalentDistanceEvaluation('', encoder)
    val_xentropy, _ = encoder.train(train_file, validation_file)
    eval_results = np.sum(evaluation.evaluate_with_test(all_file, test_file))
    eval_results += np.sum(evaluation.evaluate_with_test(all_file, neweq_test_file))
    return -eval_results
def run_from_config(params, *args):
    if len(args) < 3:
        print(
            "Usage: <train_file> <validation_file> <test_file> [<all_file>]  %s:%s"
            % (args, len(args)))
        sys.exit(-1)
    train_file = args[0]
    validation_file = args[1]
    test_file = args[2]

    local_params = dict(params)
    local_params["embedding_size"] = 2**local_params["log_embedding_size"]
    del local_params["log_embedding_size"]
    local_params["representation_size"] = 32  # Keep this fixed

    encoder = SequenceGruSupervisedEncoder(train_file,
                                           local_params,
                                           use_centroid=True)
    evaluation = SemanticEquivalentDistanceEvaluation('', encoder)
    val_xentropy, _ = encoder.train(train_file, validation_file)
    if len(args) > 3:
        eval_results = np.sum(evaluation.evaluate_with_test(
            test_file, args[3]))
    else:
        eval_results = np.sum(evaluation.evaluate(test_file))
    return -eval_results
Пример #3
0
def run_from_config(params, *args):
    if len(args) < 3:
        print("No input file or test file given: %s:%s" % (args, len(args)))
        sys.exit(-1)
    train_file = args[0]
    validation_file = args[1]
    test_file = args[2]

    local_params = dict(params)
    local_params["embedding_size"] = 2**local_params["log_embedding_size"]
    del local_params["log_embedding_size"]
    local_params["representation_size"] = 32  # Keep this fixed

    encoder = SequenceGruSiameseEncoder(train_file, local_params)
    evaluation = SemanticEquivalentDistanceEvaluation('', encoder)
    _ = encoder.train(train_file, validation_file)
    eval_results = np.sum(evaluation.evaluate(test_file))
    return -eval_results
def run_from_config(params, *args):
    if len(args) < 5:
        print("No input file or test file given: %s:%s" % (args, len(args)))
        sys.exit(-1)
    train_file = args[0]
    validation_file = args[1]
    test_file = args[2]
    neweq_test_file = args[3]
    all_file = args[4]

    local_params = dict(params)
    local_params["memory_size"] = 2**local_params["log_memory_size"]
    del local_params["log_memory_size"]

    encoder = RecursiveNNSupervisedEncoder(train_file,
                                           local_params,
                                           combination_type='single')
    evaluation = SemanticEquivalentDistanceEvaluation('', encoder)
    val_xentropy, _ = encoder.train(train_file, validation_file)
    eval_results = np.sum(evaluation.evaluate_with_test(all_file, test_file))
    eval_results += np.sum(
        evaluation.evaluate_with_test(all_file, neweq_test_file))
    return -eval_results
Пример #5
0
def evaluate_on_all_dims(encoder_filename: str, full_dataset_filename,
                         test_datsets_fileprefix) -> dict:
    """Return a dict with all results from comparison"""
    encoder = AbstractEncoder.load(encoder_filename)

    testset_filename = test_datsets_fileprefix + '-testset.json.gz'
    assert os.path.exists(testset_filename)

    neweq_testset_filename = test_datsets_fileprefix + '-neweqtestset.json.gz'
    assert os.path.exists(neweq_testset_filename)

    results = {}
    results['testintradist'] = get_representation_distance_ratio(
        encoder, testset_filename)
    results['neweqintradist'] = get_representation_distance_ratio(
        encoder, neweq_testset_filename)

    nn_evaluator = SemanticEquivalentDistanceEvaluation(None, encoder)

    test_nn_all_stats = nn_evaluator.evaluate_with_test(full_dataset_filename,
                                                        testset_filename,
                                                        num_nns=15)
    test_nn_within_stats = nn_evaluator.evaluate_with_test(testset_filename,
                                                           testset_filename,
                                                           num_nns=15)

    neweq_nn_all_stats = nn_evaluator.evaluate_with_test(
        full_dataset_filename, neweq_testset_filename, num_nns=15)
    neweq_nn_within_stats = nn_evaluator.evaluate_with_test(
        neweq_testset_filename, neweq_testset_filename, num_nns=15)

    for i in range(15):
        results['testsetknn' + str(i + 1) + 'all'] = test_nn_all_stats[i]
        results['testsetknn' + str(i + 1) + 'within'] = test_nn_within_stats[i]
        results['neweqknn' + str(i + 1) + 'all'] = neweq_nn_all_stats[i]
        results['neweqknn' + str(i + 1) + 'within'] = neweq_nn_within_stats[i]

    return results
Пример #6
0
                           dropout_rate=0.11,
                           hidden_layer_sizes=[8],
                           constrain_intro_rate=.9999,
                           curriculum_initial_size=6.96,
                           curriculum_step=2.72,
                           accuracy_margin=.5)

    training_set = sys.argv[1]
    trained_file = os.path.basename(training_set)
    # assert trained_file.endswith('-trainset.json.gz')
    validation_set = sys.argv[2]
    all_params = dict(hyperparameters)
    all_params["training_set"] = training_set
    all_params["validation_set"] = validation_set
    encoder = RecursiveNNSupervisedEncoder(training_set, hyperparameters)
    evaluation = SemanticEquivalentDistanceEvaluation('', encoder)

    def store_knn_score(historic_data: dict):
        eval_results = evaluation.evaluate(sys.argv[2])
        print("Full kNN: %s" % eval_results)
        historic_data['kNNeval'].append(eval_results)

    with ExperimentLogger(
            name="TreeRnnSupervisedEncoder",
            parameters=all_params,
            directory=os.path.dirname(__file__)) as experiment_logger:
        validation_score, historic_data = encoder.train(
            training_set,
            validation_set,
            additional_code_to_run=store_knn_score)
        pickled_filename = 'rnnsupervisedencoder-' + trained_file[:-len(
Пример #7
0
                           minibatch_size=100,
                           embedding_size=128,
                           representation_size=32,
                           log_init_noise=-1,
                           dropout_rate=0.260395478)
    training_file = sys.argv[1]
    validation_file = sys.argv[2]
    test_file = sys.argv[3]
    all_params = dict(hyperparameters)
    all_params["training_set"] = training_file
    all_params["validation_set"] = validation_file
    assert sys.argv[4] == 'True' or sys.argv[4] == 'False'
    semantically_equivalent_noise = sys.argv[4] == 'True'

    ae = SequenceGruSupervisedEncoder(training_file, hyperparameters)
    evaluation = SemanticEquivalentDistanceEvaluation('', ae)

    def calculate_knn_score():
        eval_results = evaluation.evaluate(test_file)
        print("Full kNN: %s" % eval_results)

    with ExperimentLogger(
            name="GRUSupervisedEncoder",
            parameters=all_params,
            directory=os.path.dirname(__file__)) as experiment_logger:
        val_cross_entropy, historic_values = ae.train(
            training_file,
            validation_file,
            semantically_equivalent_noise=semantically_equivalent_noise,
            additional_code_to_run=calculate_knn_score)
        if len(historic_values) > 0: