Exemplo n.º 1
0
def evaluate(model_path=os.path.join('/home/ubuntu/checkpoints',
                                     'xception05-0.236.mod')):

    # load test data
    test_X = np.load(os.path.join('/home/ubuntu/training_data', 'test_X.npy'))
    test_Y = np.load(os.path.join('/home/ubuntu/training_data', 'test_Y.npy'))

    # load model
    model = model = load_model(model_path)

    # get predictions
    test_predictions = model.predict(test_X, batch_size=16)

    # get measures
    test_measures = measures.get_measures(test_predictions, test_Y, .5)

    # read genres
    genre_file_path = os.path.join('/home/ubuntu/training_data', 'genres.txt')
    with open(genre_file_path, 'r') as handler:
        genres = handler.readlines()
    genres = [genre[:-1] for genre in genres]

    # print measures
    print("Statistics on test data:")
    measures.print_measures(test_measures, genres)
Exemplo n.º 2
0
def evaluate(
        model_path=os.path.join('model', 'mod'), cutoff_file='cutoffs.npy'):

    # load test data
    test_X = np.load(os.path.join('training_data', 'test_X.npy'))
    test_Y = np.load(os.path.join('training_data', 'test_Y.npy'))

    # load model
    model = model = load_model(model_path)

    # load cutoffs
    cutoffs = np.load(os.path.join('cutoffs', cutoff_file))

    # get predictions
    test_predictions = model.predict(test_X, batch_size=16)

    # get measures
    test_measures = measures.get_measures(test_predictions, test_Y, cutoffs)

    # read genres
    genre_file_path = os.path.join('training_data', 'genres.txt')
    with open(genre_file_path, 'r') as handler:
        genres = handler.readlines()
    genres = [genre[:-1] for genre in genres]

    # print measures
    print("Statistics on test data:")
    measures.print_measures(test_measures, genres)
def pcc_experiment(dataset,
                   prediction_type,
                   classifier,
                   epsilon=0.0,
                   mc_iterations=0):
    print("------- {0} -------".format(dataset['name']))
    print("PCC: {0}, epsilon: {1}, MC iterations: {2}".format(
        prediction_type, epsilon, mc_iterations))
    x_train, y_train, x_test, y_test, number_of_labels = prepare_experiment(
        dataset)

    pcc = ProbabilisticClassifierChain(classifier)

    start_time = time.clock()
    pcc.fit(x_train, y_train, number_of_labels)
    learning_end_time = time.clock()
    y_predicted = pcc.predict(x_test, prediction_type, epsilon, mc_iterations)
    prediction_end_time = time.clock()

    number_of_visited_classifiers = float(
        pcc.get_number_of_visited_classifiers()) / len(x_test)
    print("Number of used classifiers: {0}".format(
        number_of_visited_classifiers))
    print_measures(y_predicted, y_test, number_of_labels)

    learning_time, prediction_time, total_time = calculate_run_times(
        start_time, learning_end_time, prediction_end_time)
    print("Learning time: {0:.4f}, Prediction time: {1:.4f}".format(
        learning_time, prediction_time))
def hc_regression_experiment(dataset,
                             depth_of_search,
                             loss_function,
                             regression_h,
                             regression_c,
                             br=None):
    print("------- {0} -------".format(dataset['name']))
    print "HC: regression, depth: {0}, Loss function: {1}".format(
        str(depth_of_search), loss_function)
    x_train, y_train, x_test, y_test, number_of_labels = prepare_experiment(
        dataset)

    hc_search = HCSearchRegression(regression_h, regression_c, loss_function,
                                   depth_of_search, number_of_labels)

    start_time = time.clock()
    hc_search.fit(x_train, y_train)
    learning_end_time = time.clock()
    y_predicted = hc_search.predict(x_test)
    prediction_end_time = time.clock()

    print_measures(y_predicted, y_test, number_of_labels)

    learning_time, prediction_time, total_time = calculate_run_times(
        start_time, learning_end_time, prediction_end_time)
    print("Learning time: {0:.4f}, Prediction time: {1:.4f}".format(
        learning_time, prediction_time))
Exemplo n.º 5
0
def fuzzy(timetable, loglevel=logging.INFO):
    # init logger for this function
    log = logging.getLogger("evaluation.fuzzy")
    log.setLevel(loglevel)

    # check if global variables are initialized
    assert fs[
        'scoring_ctrl'] is not None, "Fuzzy control system not initialized."

    # compute score
    scoring = skfuzzy.control.ControlSystemSimulation(fs['scoring_ctrl'])

    scoring.input['overlaps'] = measures.count_overlaps(timetable)
    scoring.input['slotdiff'] = measures.sum_up_slot_differences(timetable)
    scoring.input['testdiff'] = measures.sum_up_testlength_differences(
        timetable)
    scoring.input['rchanges'] = measures.count_room_changes(timetable)

    scoring.compute()

    if loglevel == logging.DEBUG:
        fs['score'].view(sim=scoring)

    log.debug(measures.print_measures(timetable))
    log.debug('score: %.2f' % (scoring.output['score']))
    return scoring.output['score']
def hc_ranking_experiment(dataset, depth_of_search, loss_function,
                          classifier_h, classifier_c, parameter_grid=None,
                          br=None, reduction=1.0):
    print("------- {0} -------".format(dataset['name']))
    if reduction != 1.0:
        print("DATASET REDUCED TO: {0}%".format(reduction*100))
    print "HC: ranking, depth: {0}, Loss function: {1}".format(str(depth_of_search), loss_function)
    x_train, y_train, x_test, y_test, number_of_labels = prepare_experiment(dataset)

    if dataset['name'] in ['bibtex']:
        x_train, _, y_train, _ = train_test_split(x_train, y_train, test_size=0.90, random_state=42)

    h = deepcopy(classifier_h)
    c = deepcopy(classifier_c)

    if parameter_grid is not None:

        best_parameters_h = None
        best_parameters_c = None
        best_loss = 1.0
        x_train_train, x_train_valid, y_train_train, y_train_valid = train_test_split(x_train, y_train, test_size=0.25, random_state=42)

        for parameters_h in ParameterGrid(parameter_grid):
            for parameters_c in ParameterGrid(parameter_grid):
                print("------H params: {0}, C params: {1}------".format(parameters_h, parameters_c))
                h = deepcopy(classifier_h).set_params(**parameters_h)
                c = deepcopy(classifier_c).set_params(**parameters_c)

                y_predicted = hc_learn_and_predict(br, c, depth_of_search, h, loss_function, number_of_labels,
                                                   x_train_train, x_train_valid, y_train_train, y_train_valid,
                                                   reduction)

                calculated_loss = calculate_average_loss(loss_function, y_predicted, y_train_valid, number_of_labels)
                print("Calculated loss: {0}".format(calculated_loss))
                if calculated_loss < best_loss:
                    best_parameters_h = parameters_h
                    best_parameters_c = parameters_c
                    best_loss = calculated_loss

        print {"Final H params: {0}, Final C params: {1}".format(best_parameters_h, best_parameters_c)}
        h = h.set_params(**best_parameters_h)
        c = c.set_params(**best_parameters_c)

    y_predicted = hc_learn_and_predict(br, c, depth_of_search, h, loss_function, number_of_labels,
                                       x_train, x_test, y_train, y_test, reduction)

    print_measures(y_predicted, y_test, number_of_labels)
def br_experiment(dataset, base_classifier):
    print("------- {0} -------".format(dataset['name']))
    print("BR experiment")
    x_train, y_train, x_test, y_test, number_of_labels = prepare_experiment(dataset)
    classifier = OneVsRestClassifier(base_classifier, n_jobs=-1)

    start_time = time.clock()
    classifier.fit(x_train, y_train)
    learning_end_time = time.clock()
    y_predicted = classifier.predict(x_test)
    prediction_end_time = time.clock()

    print_measures(y_predicted, y_test, number_of_labels)
    learning_time, prediction_time, total_time = calculate_run_times(start_time, learning_end_time, prediction_end_time)
    print("Learning time: {0:.4f}, Prediction time: {1:.4f}".format(learning_time, prediction_time))

    return classifier
def hc_regression_experiment(dataset, depth_of_search, loss_function, regression_h, regression_c, br=None):
    print("------- {0} -------".format(dataset['name']))
    print "HC: regression, depth: {0}, Loss function: {1}".format(str(depth_of_search), loss_function)
    x_train, y_train, x_test, y_test, number_of_labels = prepare_experiment(dataset)

    hc_search = HCSearchRegression(regression_h, regression_c, loss_function, depth_of_search, number_of_labels)

    start_time = time.clock()
    hc_search.fit(x_train, y_train)
    learning_end_time = time.clock()
    y_predicted = hc_search.predict(x_test)
    prediction_end_time = time.clock()

    print_measures(y_predicted, y_test, number_of_labels)

    learning_time, prediction_time, total_time = calculate_run_times(start_time, learning_end_time, prediction_end_time)
    print("Learning time: {0:.4f}, Prediction time: {1:.4f}".format(learning_time, prediction_time))
def pcc_experiment(dataset, prediction_type, classifier, epsilon=0.0, mc_iterations=0):
    print("------- {0} -------".format(dataset['name']))
    print("PCC: {0}, epsilon: {1}, MC iterations: {2}".format(prediction_type, epsilon, mc_iterations))
    x_train, y_train, x_test, y_test, number_of_labels = prepare_experiment(dataset)

    pcc = ProbabilisticClassifierChain(classifier)

    start_time = time.clock()
    pcc.fit(x_train, y_train, number_of_labels)
    learning_end_time = time.clock()
    y_predicted = pcc.predict(x_test, prediction_type, epsilon, mc_iterations)
    prediction_end_time = time.clock()

    number_of_visited_classifiers = float(pcc.get_number_of_visited_classifiers()) / len(x_test)
    print("Number of used classifiers: {0}".format(number_of_visited_classifiers))
    print_measures(y_predicted, y_test, number_of_labels)

    learning_time, prediction_time, total_time = calculate_run_times(start_time, learning_end_time, prediction_end_time)
    print("Learning time: {0:.4f}, Prediction time: {1:.4f}".format(learning_time, prediction_time))
def br_experiment(dataset, base_classifier):
    print("------- {0} -------".format(dataset['name']))
    print("BR experiment")
    x_train, y_train, x_test, y_test, number_of_labels = prepare_experiment(
        dataset)
    classifier = OneVsRestClassifier(base_classifier, n_jobs=-1)

    start_time = time.clock()
    classifier.fit(x_train, y_train)
    learning_end_time = time.clock()
    y_predicted = classifier.predict(x_test)
    prediction_end_time = time.clock()

    print_measures(y_predicted, y_test, number_of_labels)
    learning_time, prediction_time, total_time = calculate_run_times(
        start_time, learning_end_time, prediction_end_time)
    print("Learning time: {0:.4f}, Prediction time: {1:.4f}".format(
        learning_time, prediction_time))

    return classifier
Exemplo n.º 11
0
def fuzzy(timetable, loglevel=logging.INFO):
    # init logger for this function
    log = logging.getLogger("evaluation.fuzzy")
    log.setLevel(loglevel)

    # check if global variables are initialized
    assert fs['scoring_ctrl'] is not None, "Fuzzy control system not initialized."

    # compute score
    scoring = skfuzzy.control.ControlSystemSimulation(fs['scoring_ctrl'])

    scoring.input['overlaps'] = measures.count_overlaps(timetable)
    scoring.input['slotdiff'] = measures.sum_up_slot_differences(timetable)
    scoring.input['testdiff'] = measures.sum_up_testlength_differences(timetable)
    scoring.input['rchanges'] = measures.count_room_changes(timetable)

    scoring.compute()

    if loglevel == logging.DEBUG:
        fs['score'].view(sim=scoring)

    log.debug(measures.print_measures(timetable))
    log.debug('score: %.2f' %(scoring.output['score']))
    return scoring.output['score']
def hc_ranking_experiment(dataset,
                          depth_of_search,
                          loss_function,
                          classifier_h,
                          classifier_c,
                          parameter_grid=None,
                          br=None,
                          reduction=1.0):
    print("------- {0} -------".format(dataset['name']))
    if reduction != 1.0:
        print("DATASET REDUCED TO: {0}%".format(reduction * 100))
    print "HC: ranking, depth: {0}, Loss function: {1}".format(
        str(depth_of_search), loss_function)
    x_train, y_train, x_test, y_test, number_of_labels = prepare_experiment(
        dataset)

    if dataset['name'] in ['bibtex']:
        x_train, _, y_train, _ = train_test_split(x_train,
                                                  y_train,
                                                  test_size=0.90,
                                                  random_state=42)

    h = deepcopy(classifier_h)
    c = deepcopy(classifier_c)

    if parameter_grid is not None:

        best_parameters_h = None
        best_parameters_c = None
        best_loss = 1.0
        x_train_train, x_train_valid, y_train_train, y_train_valid = train_test_split(
            x_train, y_train, test_size=0.25, random_state=42)

        for parameters_h in ParameterGrid(parameter_grid):
            for parameters_c in ParameterGrid(parameter_grid):
                print("------H params: {0}, C params: {1}------".format(
                    parameters_h, parameters_c))
                h = deepcopy(classifier_h).set_params(**parameters_h)
                c = deepcopy(classifier_c).set_params(**parameters_c)

                y_predicted = hc_learn_and_predict(
                    br, c, depth_of_search, h, loss_function, number_of_labels,
                    x_train_train, x_train_valid, y_train_train, y_train_valid,
                    reduction)

                calculated_loss = calculate_average_loss(
                    loss_function, y_predicted, y_train_valid,
                    number_of_labels)
                print("Calculated loss: {0}".format(calculated_loss))
                if calculated_loss < best_loss:
                    best_parameters_h = parameters_h
                    best_parameters_c = parameters_c
                    best_loss = calculated_loss

        print {
            "Final H params: {0}, Final C params: {1}".format(
                best_parameters_h, best_parameters_c)
        }
        h = h.set_params(**best_parameters_h)
        c = c.set_params(**best_parameters_c)

    y_predicted = hc_learn_and_predict(br, c, depth_of_search, h,
                                       loss_function, number_of_labels,
                                       x_train, x_test, y_train, y_test,
                                       reduction)

    print_measures(y_predicted, y_test, number_of_labels)