Пример #1
0
def evaluate(model_path=os.path.join('/home/ubuntu/checkpoints',
                                     'xception05-0.236.mod')):

    # load test data
    test_X = np.load(os.path.join('/home/ubuntu/training_data', 'test_X.npy'))
    test_Y = np.load(os.path.join('/home/ubuntu/training_data', 'test_Y.npy'))

    # load model
    model = model = load_model(model_path)

    # get predictions
    test_predictions = model.predict(test_X, batch_size=16)

    # get measures
    test_measures = measures.get_measures(test_predictions, test_Y, .5)

    # read genres
    genre_file_path = os.path.join('/home/ubuntu/training_data', 'genres.txt')
    with open(genre_file_path, 'r') as handler:
        genres = handler.readlines()
    genres = [genre[:-1] for genre in genres]

    # print measures
    print("Statistics on test data:")
    measures.print_measures(test_measures, genres)
Пример #2
0
def evaluate(
        model_path=os.path.join('model', 'mod'), cutoff_file='cutoffs.npy'):

    # load test data
    test_X = np.load(os.path.join('training_data', 'test_X.npy'))
    test_Y = np.load(os.path.join('training_data', 'test_Y.npy'))

    # load model
    model = model = load_model(model_path)

    # load cutoffs
    cutoffs = np.load(os.path.join('cutoffs', cutoff_file))

    # get predictions
    test_predictions = model.predict(test_X, batch_size=16)

    # get measures
    test_measures = measures.get_measures(test_predictions, test_Y, cutoffs)

    # read genres
    genre_file_path = os.path.join('training_data', 'genres.txt')
    with open(genre_file_path, 'r') as handler:
        genres = handler.readlines()
    genres = [genre[:-1] for genre in genres]

    # print measures
    print("Statistics on test data:")
    measures.print_measures(test_measures, genres)
Пример #3
0
def fit_predict(model, train, test):
    """
    fit model on train, predict on test and return accuracy
    """
    model.fit(train)
    predict, true = model.predict(test)
    TP, TN, FP, FN, acc, recall, precision, f1 = get_measures(predict, true)
    return acc
Пример #4
0
def check_knn():
    ds_ = DataSet("dataset.txt")
    knn_ = Knn(5)
    knn_.fit(ds_)
    predict, true = knn_.predict(ds_)

    TP, TN, FP, FN, acc, recall, precision, f1 = get_measures(predict, true)
    print("accuracy:", acc, "\n"
          "recall:", recall, "\n"
          "precision:", precision, "\n"
          "f1:", f1, "\n")
Пример #5
0
def check_naive_bayes():
    ds_ = DataSet("dataset.txt")
    naive_bayes_ = NaiveBayes(ds_.header)
    naive_bayes_.fit(ds_)
    predict, true = naive_bayes_.predict(ds_)

    TP, TN, FP, FN, acc, recall, precision, f1 = get_measures(predict, true)
    print("accuracy:", acc, "\n"
          "recall:", recall, "\n"
          "precision:", precision, "\n"
          "f1:", f1, "\n")
Пример #6
0
def test_scalability(timeout=10):
    results = defaultdict(list)

    nodes = [100, 1000, 10000, 100000, 1000000]
    graphs = [graph_loader(graph_type='CSF', n=n, seed=1) for n in nodes]

    measures = get_measures()

    measures = measures + graph_approx + spectral_approx

    for measure in tqdm(measures):
        run_times = parallelize_evaluation(graphs, measure, timeout)
        results[measure] = run_times

    color = plt.cm.gist_ncar(np.linspace(0, 0.9, len(measures)))
    mpl.rcParams['axes.prop_cycle'] = cycler.cycler('color', color)
    mpl.rcParams['legend.fontsize'] = 5

    fig, axes = plt.subplots(ncols=5, figsize=(5 * 6 - 1, 5))
    for (measure, run_time) in results.items():

        axes[0].plot(nodes,
                     run_time,
                     label=measure,
                     linewidth=1,
                     linestyle=measure_style[measure])
        axes[0].set_xlabel('Number of nodes')
        axes[0].set_ylabel('Time in seconds')

        axes[1].plot(nodes,
                     run_time,
                     label=measure,
                     linewidth=1,
                     linestyle=measure_style[measure])
        axes[1].set_xscale('log')
        axes[1].set_xlabel('Number of nodes (log-scale)')
        axes[1].set_ylabel('Time in seconds')

        axes[2].plot(nodes,
                     run_time,
                     label=measure,
                     linewidth=1,
                     linestyle=measure_style[measure])
        axes[2].set_xscale('log')
        axes[2].set_yscale('log')
        axes[2].set_xlabel('Number of nodes (log-scale)')
        axes[2].set_ylabel('Time in seconds (log-scale)')

    axes[0].legend()
    plt.title('Clustered Scale Free Graph')

    save_dir = os.getcwd() + '/plots/'
    os.makedirs(save_dir, exist_ok=True)
    plt.savefig(save_dir + 'scalability.pdf')
def check_dtl():
    ds_ = DataSet("dataset.txt")
    dtl_ = DecisionTree(ds_.header)
    tree_ = dtl_.fit(ds_)
    predict, true = dtl_.predict(ds_)

    TP, TN, FP, FN, acc, recall, precision, f1 = get_measures(predict, true)
    f = open("tree.txt", "wt")
    f.write(str(dtl_))
    print("accuracy:", acc, "\n"
          "recall:", recall, "\n"
          "precision:", precision, "\n"
          "f1:", f1, "\n")
    e = 0