示例#1
0
文件: main.py 项目: crab-a/lab4
def run_knn(points):
    m = KNN(5)
    m.train(points)
    print(f'predicted class: {m.predict(points[0])}')
    print(f'true class: {points[0].label}')
    cv = CrossValidation()
    cv.run_cv(points, 10, m, accuracy_score)
示例#2
0
def run_knn(points):
    """
    Runs knn with given set of data
    :param points: set of data
    """
    m = KNN(5)
    m.train(points)
    print(f'predicted class: {m.predict(points[0])}')
    print(f'true class: {points[0].label}')
    cv = CrossValidation()
    cv.run_cv(points, 10, m, accuracy_score)
示例#3
0
def k_fold_cross_validation(points, k):
    """
    Runs a knn for a given k value on a set of data and each time with different fold
    :param points: set of data
    :param k: value for knn
    """
    folds = [2, 10, 20]
    print(f"K={k}")
    for fold in folds:
        a = KNN(k)
        a.train(points)
        cv = CrossValidation()
        print(f"{fold}-fold-cross-validation:")
        cv.run_cv(points, fold, a, accuracy_score, False, True)
示例#4
0
def question_3(points, k):
    """
    question 3
    :param points: list of Point
    :param k: the best classifier for the given data, based on question 2
    """
    m = KNN(k)
    m.train(points)
    n_folds_list = [2, 10, 20]
    print("Question 3:")
    print("K=", k, sep="")
    for i in n_folds_list:
        print(i, "-fold-cross-validation:", sep="")
        cv = CrossValidation()
        cv.run_cv(points, i, m, accuracy_score, False, True)
示例#5
0
def question_4(points):
    """
    question 4
    :param points: list of Point
    """
    k_list = [5, 7]
    normalization_list = [[DummyNormalizer, "DummyNormalizer"],
                          [SumNormalizer, "SumNormalizer"],
                          [MinMaxNormalizer, "MinMaxNormalizer"],
                          [ZNormalizer, "ZNormalizer"]]
    print("Question 4:")
    for k in k_list:
        print("K=", k, sep="")
        m = KNN(k)
        m.train(points)
        cv = CrossValidation()
        for i in normalization_list:
            normalize_object = i[0]()
            normalize_object.fit(points)
            new_points = normalize_object.transform(points)
            #  2 is the best n-fold
            average_score = cv.run_cv(new_points, 2, m, accuracy_score, False,
                                      True)
            formatted_average_score = "{:.2f}".format(average_score)
            print("Accuracy of", i[1], "is", formatted_average_score)
            print()
示例#6
0
def knn_n_fold(k, n, points, normal_type):
    """
    run knn with n folds with normalized points
    :param k: k-nn
    :param n: n folds
    :param points: the points to use
    :param normal_type: the normalization of those points
    :return:
    """
    m = KNN(k)
    cv = CrossValidation()
    cv.run_cv(normal_type(points),
              n,
              m,
              accuracy_score,
              normal_type,
              print_fold_score=True)
def ques_two(points):
    max_accuracy = 0
    best_k = 0
    for k in range(1, 31):
        m = KNN(k)
        m.train(points)
        cv = CrossValidation()
        # print("current k=", k ,"  ", end="")
        a = cv.run_cv(points, len(points), m, accuracy_score, False)
        if max_accuracy < a:
            max_accuracy = a
            best_k = k
    return best_k
示例#8
0
def question_4(points, normalizers):
    print("Question 4:")
    m = KNN(5)
    m.train(points)
    cv = CrossValidation()
    print("K=5")
    for key in normalizers.keys():
        norm = normalizers.get(key)
        n = norm()
        n.fit(points)
        new_points = n.transform(points)
        print(f"Accuracy of {key} is " + str(cv.run_cv(new_points, 2, m, accuracy_score, False, True)))
        print("")
    m = KNN(7)
    m.train(points)
    print("K=7")
    for key in normalizers.keys():
        norm = normalizers.get(key)
        n = norm()
        n.fit(points)
        new_points = n.transform(points)
        print(f"Accuracy of {key} is " + str(cv.run_cv(new_points, 2, m, accuracy_score, False, True)))
        print("")
def ques_four(points):
    print("Question 4:")
    list_of_k = [5, 7]
    for i in list_of_k:
        print("K={}".format(i))
        m = KNN(i)
        m.train(points)

        cv = CrossValidation()
        a = cv.run_cv(points, 2, m, accuracy_score, False, True)
        print("Accuracy of DummyNormalizer is", a)
        print()

        new_p = SumNormalizer()
        new_p.fit(points)
        new_points = new_p.transform(points)
        cv = CrossValidation()
        a = cv.run_cv(new_points, 2, m, accuracy_score, False, True)
        print("Accuracy of SumNormalizer is", a)
        print()

        new_p = MinMaxNormalizer()
        new_p.fit(points)
        new_points = new_p.transform(points)
        cv = CrossValidation()
        a = cv.run_cv(new_points, 2, m, accuracy_score, False, True)
        print("Accuracy of MinMaxNormalizer is", a)
        print()

        new_p = ZNormalizer()
        new_p.fit(points)
        new_points = new_p.transform(points)
        cv = CrossValidation()
        a = cv.run_cv(new_points, 2, m, accuracy_score, False, True)
        print("Accuracy of ZNormalizer is", a)
        if i == 5: print()
示例#10
0
def question_3(points, k):
    m = KNN(k)
    m.train(points)
    cv = CrossValidation()
    print("Question 3:")
    print("K=" + str(k))
    print("2-fold-cross-validation:")
    cv.run_cv(points, 2, m, accuracy_score, False, True)
    print("10-fold-cross-validation:")
    cv.run_cv(points, 10, m, accuracy_score, False, True)
    print("20-fold-cross-validation:")
    cv.run_cv(points, 20, m, accuracy_score, False, True)
示例#11
0
def run_knn_k(points):
    """
    a function for question 2
    :param points: list of Point
    :return: a number, which is the best classifier for the given data
    """
    best_classifier = 0
    best_accuracy_score = 0.0
    for k in range(1, 31):
        m = KNN(k)
        m.train(points)
        cv = CrossValidation()
        current_accuracy = cv.run_cv(points, len(points), m, accuracy_score,
                                     False, False)
        if current_accuracy > best_accuracy_score:
            best_accuracy_score = current_accuracy
            best_classifier = k
    return best_classifier
示例#12
0
def run_knn(points):
    #for k in range(1,31):
    #   m = KNN(k=k)
    #  m.train(points)
    # print(f'predicted class: {m.predict(points[0])}')
    #print(f'true class: {points[0].label}')
    #cv = CrossValidation()
    #cv.run_cv(points, len(points), m, accuracy_score,d.transform(points))
    print("Question 3:\nK=19")
    m = KNN(k=19)
    m.train(points)
    cv = CrossValidation()
    z = ZNormalizer()
    z.fit(points)
    d = DummyNormalizer()
    sum = SumNormalizer()
    min_max = MinMaxNormalizer()
    min_max.fit(points)
    print("2-fold-cross-validation:")
    cv.run_cv(points,
              2,
              m,
              accuracy_score,
              d.transform,
              print_final_score=False,
              print_fold_score=True)
    print("10-fold-cross-validation:")
    cv.run_cv(points,
              10,
              m,
              accuracy_score,
              d.transform,
              print_final_score=False,
              print_fold_score=True)
    print("20-fold-cross-validation:")
    cv.run_cv(points,
              20,
              m,
              accuracy_score,
              d.transform,
              print_final_score=False,
              print_fold_score=True)
    print("Question 4:\nK=5")
    knn_n_fold(5, 2, points, d.transform)
    knn_n_fold(5, 2, points, sum.l1)
    knn_n_fold(5, 2, points, min_max.transform)
    knn_n_fold(5, 2, points, z.transform)
    print("K=7")
    knn_n_fold(7, 2, points, d.transform)
    knn_n_fold(7, 2, points, sum.l1)
    knn_n_fold(7, 2, points, min_max.transform)
    knn_n_fold(7, 2, points, z.transform)
示例#13
0
文件: main.py 项目: crab-a/lab4
def q3(k, points):
    m = KNN(k)
    m.train(points)
    cv = CrossValidation()

    print("Question 3:")
    print(f'K={k}')
    print("2-fold-cross-validation:")
    cv.run_cv(points, 2, m, accuracy_score, False, True)
    print("10-fold-cross-validation:")
    cv.run_cv(points, 10, m, accuracy_score, False, True)
    print("20-fold-cross-validation:")
    cv.run_cv(points, 20, m, accuracy_score, False, True)
def ques_three(points):
    print("Question 3:")
    # best_k = ques_two(points)
    best_k = 19
    print("K={}".format(best_k))
    m = KNN(best_k)
    m.train(points)
    cv = CrossValidation()
    print("2-fold-cross-validation:")
    cv.run_cv(points, 2, m, accuracy_score, False, True)
    print("10-fold-cross-validation:")
    cv.run_cv(points, 10, m, accuracy_score, False, True)
    print("20-fold-cross-validation:")
    cv.run_cv(points, 20, m, accuracy_score, False, True)
示例#15
0
def run_1_to_30_knn(points):
    """
    Runs knn with k=0 to k=30 on a given set of data
    :param points: set of data
    """
    k = 0
    accuracy = 0
    num_of_points = len(points)
    for index in range(1, 31):
        a = KNN(index)
        a.train(points)
        print(f"classifier {index}:")
        print(f'predicted class: {a.predict(points[0])}')
        print(f'true class: {points[0].label}')
        cv = CrossValidation()
        temp_score = cv.run_cv(points, num_of_points, a, accuracy_score)
        if temp_score > accuracy:
            accuracy = temp_score
            k = index
        print()
    print(f"best classifier is: {k}, best accuracy is: {accuracy}")
示例#16
0
def two_fold_cross_validation(points):
    """
    Runs two fold cross validation on specific k values and each time test another norm
    :param points: set of data
    """
    knns = [5, 7]
    norms = [DummyNormalizer, SumNormalizer, MinMaxNormalizer, ZNormalizer]
    prints = 0
    for knn in knns:
        print(f"K={knn}")
        for norm in norms:
            a = KNN(knn)
            nor = norm()
            nor.fit(points)
            temp_points = nor.transform(points)
            a.train(temp_points)
            cv = CrossValidation()
            accuracy = cv.run_cv(temp_points, 2, a, accuracy_score, True, True)
            print(f"Accuracy of {norm.__name__} is {accuracy}")
            prints += 1
            if prints != len(knns) * len(norms):
                print()
示例#17
0
def run(model_id):
    """Run experiment."""

    config = configs[model_id]
    logger.info('\n\n\ntrain model {}'.format(model_id))

    # prepare data
    if config['preprocess_fn'] is not None:
        function = getattr(data_generator, config['preprocess_fn'])
        preprocess_fn = partial(function, **config['preprocess'])
    else:
        preprocess_fn = None
    generator = Generator(path=PATH_TRAIN,
                          IDs=meta_train.index.tolist(),
                          labels=meta_train[['target']],
                          preprocessing_fn=preprocess_fn,
                          shuffle=False, batch_size=64,
                          **config['generator'])
    X, y = generate_train_data(generator, meta_train)
    logger.info('X shape: {}, y shape: {}'.format(X.shape, y.shape))

    # define model
    model_function = getattr(models, config['model_name'])
    nn_model = partial(model_function,
                       input_shape=(X.shape[1:]),
                       **config['model_params'])
    nn_model().summary(print_fn=logger.info)
    model = KerasModel(nn_model, logger=logger, **config['train'])

    # train and save model
    cross_val = CrossValidation(X=X, y=y, Xtest=X[:100],
                                logger=logger, **config['cv'])
    pred, pred_test, metrics, trained_models = cross_val.run_cv(model)

    for i, model in enumerate(trained_models):
        path = os.path.join(MODELS_PATH, 'model_{}_{}.h5'.format(model_id, i))
        model.save(path)
示例#18
0
文件: main.py 项目: yuval-livne/lab4
def implementation(points):
    # Q1
    # m = KNN()
    # m.train(points)
    # predicted = m.predict(points)
    # real = [point.get_label() for point in points]
    # print(sum([real[i] == predicted[i] for i in range(len(real))]) / len(real))

    # Q2
    max = 0
    best_k = 0
    #  this part
    for i in range(30):
        m = KNN(i + 1)
        m.train(points)
        cv = CrossValidation()
        temp_average_score = cv.run_cv(points,
                                       len(points),
                                       m,
                                       accuracy_score,
                                       print_final_score=False)
        if max < temp_average_score:
            max = temp_average_score
            best_k = i + 1

    print("Question 3:")
    print(f'K={best_k}')
    list_n_folds = [2, 10, 20]
    k_q3 = KNN(best_k)
    k_q3.train(points)
    for n in list_n_folds:
        print(f'{n}-fold-cross-validation:')
        # print(f'K={best_k}')
        cv.run_cv(points,
                  n,
                  k_q3,
                  accuracy_score,
                  print_final_score=False,
                  print_fold_score=True)

    print("Question 4:")
    list_k = [5, 7]
    dummy = DummyNormalizer()
    z_norm = ZNormalizer()
    sum_norm = SumNormalizer()
    min_max_norm = MinMaxNormalizer()
    list_norm = [dummy, sum_norm, min_max_norm, z_norm]
    n_folds_q4 = 2
    for k in list_k:
        k_q4 = KNN(k)
        print(f'K={k}')
        for norm in list_norm:
            norm.fit(points)
            t_points = norm.transform(points)
            k_q4.train(t_points)
            avg_acc = cv.run_cv(t_points,
                                n_folds_q4,
                                k_q4,
                                accuracy_score,
                                print_final_score=False,
                                print_fold_score=True)
            print('Accuracy of {} is {:.2f}'.format(norm.print_name(),
                                                    avg_acc))
            if not (norm == list_norm[-1] and k == list_k[-1]):
                print()
from sklearn.linear_model import Lasso
import pandas as pd
import numpy as np
from cross_validation import CrossValidation

d = pd.DataFrame({'x': range(100)})
d['y'] = d['x'] + np.random.rand(100)*10


def mse(x, y):
    return np.mean([(x_val-y_val)**2 for x_val, y_val in zip(x, y)])

cv = CrossValidation(d, ['x'], 'y', mse)
cv.run_cv(Lasso())
示例#20
0
文件: main.py 项目: crab-a/lab4
def q4_print(points, k):
    m = KNN(k)
    m.train(points)
    cv = CrossValidation()
    return cv.run_cv(points, 2, m, accuracy_score, False, True)
示例#21
0
文件: main.py 项目: crab-a/lab4
def q2(k, points):
    m = KNN(k)
    m.train(points)
    l = len(points)
    cv = CrossValidation()
    return cv.run_cv(points, l, m, accuracy_score, False, False)