def q4(argv=None): dataset, method_name, k, latex = wrapper_args( argv, 'q4', ['Digits'], ['MyLogisticRegGen', 'LogisticRegression']) Digits_X, Digits_y = prepare_digits(want_noise=False) default_order = [ ('MyLogisticRegGen', 'Digits'), ('LogisticRegression', 'Digits') ] methods = { ('MyLogisticRegGen', 'Digits'): (MyLogisticRegGen(verbose=False), Digits_X, Digits_y), ('LogisticRegression', 'Digits'): (LogisticRegression(), Digits_X, Digits_y) } if dataset == 'all': order = default_order else: order = [(method_name, dataset)] for key in order: name, dataset = key method, X, y = methods[key] print('==============') print('method: {}, dataset: {}'.format(key[0], key[1])) scores = my_cross_val(method, X, y, k) report(name, dataset, scores, latex=latex)
def q3(argv=None): dataset, method_name, k, latex = wrapper_args( argv, 'q3', ['Boston50', 'Boston75', 'Digits']) x_boston, y_boston = load_dataset(load_boston) x_digits, y_digits = prepare_digits(True) x_boston_50, y_boston_50 = percentileAssignment(50, x_boston, y_boston) x_boston_75, y_boston_75 = percentileAssignment(75, x_boston, y_boston) default_order = [('MultiGaussClassify_WithFullMatrix', 'Boston50'), ('MultiGaussClassify_WithFullMatrix', 'Boston75'), ('MultiGaussClassify_WithFullMatrix', 'Digits'), ('MultiGaussClassify_WithDiagonal', 'Boston50'), ('MultiGaussClassify_WithDiagonal', 'Boston75'), ('MultiGaussClassify_WithDiagonal', 'Digits'), ('LogisticRegression', 'Boston50'), ('LogisticRegression', 'Boston75'), ('LogisticRegression', 'Digits')] methods = { ('MultiGaussClassify_WithFullMatrix', 'Boston50'): (MultiGaussClassify(len(np.unique(y_boston_50)), x_boston_50.shape[1]), x_boston_50, y_boston_50), ('MultiGaussClassify_WithFullMatrix', 'Boston75'): (MultiGaussClassify(len(np.unique(y_boston_75)), x_boston_50.shape[1]), x_boston_75, y_boston_75), ('MultiGaussClassify_WithFullMatrix', 'Digits'): (MultiGaussClassify(len(np.unique(y_digits)), x_digits.shape[1]), x_digits, y_digits), ('MultiGaussClassify_WithDiagonal', 'Boston50'): (MultiGaussClassify(len(np.unique(y_boston_50)), x_boston_50.shape[1], True), x_boston_50, y_boston_50), ('MultiGaussClassify_WithDiagonal', 'Boston75'): (MultiGaussClassify(len(np.unique(y_boston_75)), x_boston_50.shape[1], True), x_boston_75, y_boston_75), ('MultiGaussClassify_WithDiagonal', 'Digits'): (MultiGaussClassify(len(np.unique(y_digits)), x_digits.shape[1], True), x_digits, y_digits), ('LogisticRegression', 'Boston50'): (LogisticRegression(), x_boston_50, y_boston_50), ('LogisticRegression', 'Boston75'): (LogisticRegression(), x_boston_75, y_boston_75), ('LogisticRegression', 'Digits'): (LogisticRegression(), x_digits, y_digits) } if dataset == 'all': order = default_order else: order = [(method_name, dataset)] for key in order: name, dataset = key method, X, y = methods[key] print('==============') print('method: {}, dataset: {}'.format(key[0], key[1])) scores = my_cross_val(method, X, y, k) report(name, dataset, scores, True)
def q3ii(argv=None): dataset, method_name, k, pi, latex = wrapper_args( argv, 'q3ii', ['Boston50', 'Boston75', 'Digits'], include_pi=True) x_boston, y_boston = load_dataset(load_boston) x_digits, y_digits = load_dataset(load_digits) x_boston_50, y_boston_50 = percentileAssignment(50, x_boston, y_boston) x_boston_75, y_boston_75 = percentileAssignment(75, x_boston, y_boston) default_order = [('LinearSVC', 'Boston50'), ('LinearSVC', 'Boston75'), ('LinearSVC', 'Digits'), ('SVC', 'Boston50'), ('SVC', 'Boston75'), ('SVC', 'Digits'), ('LogisticRegression', 'Boston50'), ('LogisticRegression', 'Boston75'), ('LogisticRegression', 'Digits')] methods = { ('LinearSVC', 'Boston50'): (LinearSVC(max_iter=2000), x_boston_50, y_boston_50), ('LinearSVC', 'Boston75'): (LinearSVC(max_iter=2000), x_boston_75, y_boston_75), ('LinearSVC', 'Digits'): (LinearSVC(max_iter=2000), x_digits, y_digits), ('SVC', 'Boston50'): (SVC(gamma='scale', C=10), x_boston_50, y_boston_50), ('SVC', 'Boston75'): (SVC(gamma='scale', C=10), x_boston_75, y_boston_75), ('SVC', 'Digits'): (SVC(gamma='scale', C=10), x_digits, y_digits), ('LogisticRegression', 'Boston50'): (LogisticRegression(penalty='l2', solver='lbfgs', multi_class='multinomial', max_iter=5000), x_boston_50, y_boston_50), ('LogisticRegression', 'Boston75'): (LogisticRegression(penalty='l2', solver='lbfgs', multi_class='multinomial', max_iter=5000), x_boston_75, y_boston_75), ('LogisticRegression', 'Digits'): (LogisticRegression(penalty='l2', solver='lbfgs', multi_class='multinomial', max_iter=5000), x_digits, y_digits) } if dataset == 'all': order = default_order else: order = [(method_name, dataset)] for key in order: name, dataset = key method, X, y = methods[key] print('==============') print('method: {}, dataset: {}'.format(key[0], key[1])) scores = my_train_test(method, X, y, 0.75, k) report(name, dataset, scores, latex=latex)
def q3i(argv=None): dataset, method_name, k, latex = wrapper_args( argv, 'q3i', ['Boston50', 'Boston75', 'Digits']) Boston50_X, Boston50_y = prepare_boston50() Boston75_X, Boston75_y = prepare_boston75() Digits_X, Digits_y = prepare_digits() default_order = [ ('LinearSVC', 'Boston50'), ('LinearSVC', 'Boston75'), ('LinearSVC', 'Digits'), ('SVC', 'Boston50'), ('SVC', 'Boston75'), ('SVC', 'Digits'), ('LogisticRegression', 'Boston50'), ('LogisticRegression', 'Boston75'), ('LogisticRegression', 'Digits') ] methods = {('LinearSVC', 'Boston50'): (LinearSVC(), Boston50_X, Boston50_y), ('LinearSVC', 'Boston75'): (LinearSVC(), Boston75_X, Boston75_y), ('LinearSVC', 'Digits'): (LinearSVC(), Digits_X, Digits_y), ('SVC', 'Boston50'): (SVC(), Boston50_X, Boston50_y), ('SVC', 'Boston75'): (SVC(), Boston75_X, Boston75_y), ('SVC', 'Digits'): (SVC(), Digits_X, Digits_y), ('LogisticRegression', 'Boston50'): (LogisticRegression(), Boston50_X, Boston50_y), ('LogisticRegression', 'Boston75'): (LogisticRegression(), Boston75_X, Boston75_y), ('LogisticRegression', 'Digits'): (LogisticRegression(), Digits_X, Digits_y)} if dataset == 'all': order = default_order else: order = [(method_name, dataset)] for key in order: name, dataset = key method, X, y = methods[key] print('==============') print('method: {}, dataset: {}'.format(key[0], key[1])) scores = my_cross_val(method, X, y, k) report(name, dataset, scores, latex=latex)
def q4(argv): x_digits, y_digits = load_dataset(load_digits) dataset, method_name, k, latex = wrapper_args(argv, 'q4', ['X1', 'X2', 'X3']) X1 = rand_proj(x_digits, 32) X2 = quad_proj(x_digits) default_order = [ ('LinearSVC', 'X1'), ('LinearSVC', 'X2'), ('SVC', 'X1'), ('SVC', 'X2'), ('LogisticRegression', 'X1'), ('LogisticRegression', 'X2'), ] methods = { ('LinearSVC', 'X1'): (LinearSVC(max_iter=2000), X1, y_digits), ('LinearSVC', 'X2'): (LinearSVC(max_iter=2000), X2, y_digits), ('SVC', 'X1'): (SVC(gamma='scale', C=10), X1, y_digits), ('SVC', 'X2'): (SVC(gamma='scale', C=10), X2, y_digits), ('LogisticRegression', 'X1'): (LogisticRegression(penalty='l2', solver='lbfgs', multi_class='multinomial', max_iter=5000), X1, y_digits), ('LogisticRegression', 'X2'): (LogisticRegression(penalty='l2', solver='lbfgs', multi_class='multinomial', max_iter=5000), X2, y_digits) } if dataset == 'all': order = default_order else: order = [(method_name, dataset)] for key in order: name, dataset = key method, X, y = methods[key] print('==============') print('method: {}, dataset: {}'.format(name, dataset)) scores = my_cross_val(method, X, y, k) report(name, dataset, scores, latex=latex)
def q3(argv=None): dataset, method_name, k, latex = wrapper_args( argv, 'q3', ['Boston50', 'Boston75', 'Digits']) Boston50_X, Boston50_y = prepare_boston50() Boston75_X, Boston75_y = prepare_boston75() # Note that prepare_digits adds gaussian noise to the data to # avoid singlar covariance matrices. For details, see # datasets.prepare_digits Digits_X, Digits_y = prepare_digits() default_order = [ ('MultiGaussClassify', 'Boston50'), ('MultiGaussClassify', 'Boston75'), ('MultiGaussClassify', 'Digits'), ('LogisticRegression', 'Boston50'), ('LogisticRegression', 'Boston75'), ('LogisticRegression', 'Digits') ] methods = { ('MultiGaussClassify', 'Boston50'): (MultiGaussClassify(), Boston50_X, Boston50_y), ('MultiGaussClassify', 'Boston75'): (MultiGaussClassify(), Boston75_X, Boston75_y), ('MultiGaussClassify', 'Digits'): (MultiGaussClassify(linear=False), Digits_X, Digits_y), ('LogisticRegression', 'Boston50'): (LogisticRegression(), Boston50_X, Boston50_y), ('LogisticRegression', 'Boston75'): (LogisticRegression(), Boston75_X, Boston75_y), ('LogisticRegression', 'Digits'): (LogisticRegression(), Digits_X, Digits_y) } if dataset == 'all': order = default_order else: order = [(method_name, dataset)] for key in order: name, dataset = key method, X, y = methods[key] print('==============') print('method: {}, dataset: {}'.format(key[0], key[1])) scores = my_cross_val(method, X, y, k) report(name, dataset, scores, latex=latex)
def q3(argv=None): dataset, method_name, k, latex = wrapper_args( argv, 'q3', ['Boston50', 'Boston75'], ['MyFLDA2', 'LogisticRegression']) Boston50_X, Boston50_y = prepare_boston50() Boston75_X, Boston75_y = prepare_boston75() default_order = [('MyFLDA2', 'Boston50'), ('MyFLDA2', 'Boston75'), ('LogisticRegression', 'Boston50'), ('LogisticRegression', 'Boston75')] # Find the optimal separation for the training set print('Finding optimal threshold for MyFLDA2 on Boston50 ...') myflda_boston50 = find_best_myflda2(MyFLDA2, Boston50_X, Boston50_y, k) print('Finding optimal threshold for MyFLDA2 on Boston75 ...') myflda_boston75 = find_best_myflda2(MyFLDA2, Boston75_X, Boston75_y, k) print('Done.') methods = { ('MyFLDA2', 'Boston50'): (myflda_boston50, Boston50_X, Boston50_y), ('MyFLDA2', 'Boston75'): (myflda_boston75, Boston75_X, Boston75_y), ('LogisticRegression', 'Boston50'): (LogisticRegression(), Boston50_X, Boston50_y), ('LogisticRegression', 'Boston75'): (LogisticRegression(), Boston75_X, Boston75_y) } if dataset == 'all': order = default_order else: order = [(method_name, dataset)] for key in order: name, dataset = key method, X, y = methods[key] print('==============') print('method: {}, dataset: {}'.format(key[0], key[1])) scores = my_cross_val(method, X, y, k) report(name, dataset, scores, latex=latex)