示例#1
0
def q4(argv=None):

    dataset, method_name, k, latex = wrapper_args(
        argv, 'q4',
        ['Digits'],
        ['MyLogisticRegGen', 'LogisticRegression'])

    Digits_X, Digits_y = prepare_digits(want_noise=False)

    default_order = [
        ('MyLogisticRegGen', 'Digits'),
        ('LogisticRegression', 'Digits')
    ]
    methods = {
        ('MyLogisticRegGen', 'Digits'):
        (MyLogisticRegGen(verbose=False), Digits_X, Digits_y),
        ('LogisticRegression', 'Digits'):
        (LogisticRegression(), Digits_X, Digits_y)
    }

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(key[0], key[1]))
        scores = my_cross_val(method, X, y, k)
        report(name, dataset, scores, latex=latex)
示例#2
0
def q3(argv=None):

    dataset, method_name, k, latex = wrapper_args(
        argv, 'q3', ['Boston50', 'Boston75', 'Digits'])

    x_boston, y_boston = load_dataset(load_boston)
    x_digits, y_digits = prepare_digits(True)
    x_boston_50, y_boston_50 = percentileAssignment(50, x_boston, y_boston)
    x_boston_75, y_boston_75 = percentileAssignment(75, x_boston, y_boston)

    default_order = [('MultiGaussClassify_WithFullMatrix', 'Boston50'),
                     ('MultiGaussClassify_WithFullMatrix', 'Boston75'),
                     ('MultiGaussClassify_WithFullMatrix', 'Digits'),
                     ('MultiGaussClassify_WithDiagonal', 'Boston50'),
                     ('MultiGaussClassify_WithDiagonal', 'Boston75'),
                     ('MultiGaussClassify_WithDiagonal', 'Digits'),
                     ('LogisticRegression', 'Boston50'),
                     ('LogisticRegression', 'Boston75'),
                     ('LogisticRegression', 'Digits')]

    methods = {
        ('MultiGaussClassify_WithFullMatrix', 'Boston50'):
        (MultiGaussClassify(len(np.unique(y_boston_50)),
                            x_boston_50.shape[1]), x_boston_50, y_boston_50),
        ('MultiGaussClassify_WithFullMatrix', 'Boston75'):
        (MultiGaussClassify(len(np.unique(y_boston_75)),
                            x_boston_50.shape[1]), x_boston_75, y_boston_75),
        ('MultiGaussClassify_WithFullMatrix', 'Digits'):
        (MultiGaussClassify(len(np.unique(y_digits)),
                            x_digits.shape[1]), x_digits, y_digits),
        ('MultiGaussClassify_WithDiagonal', 'Boston50'):
        (MultiGaussClassify(len(np.unique(y_boston_50)), x_boston_50.shape[1],
                            True), x_boston_50, y_boston_50),
        ('MultiGaussClassify_WithDiagonal', 'Boston75'):
        (MultiGaussClassify(len(np.unique(y_boston_75)), x_boston_50.shape[1],
                            True), x_boston_75, y_boston_75),
        ('MultiGaussClassify_WithDiagonal', 'Digits'):
        (MultiGaussClassify(len(np.unique(y_digits)), x_digits.shape[1],
                            True), x_digits, y_digits),
        ('LogisticRegression', 'Boston50'): (LogisticRegression(), x_boston_50,
                                             y_boston_50),
        ('LogisticRegression', 'Boston75'): (LogisticRegression(), x_boston_75,
                                             y_boston_75),
        ('LogisticRegression', 'Digits'): (LogisticRegression(), x_digits,
                                           y_digits)
    }

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(key[0], key[1]))
        scores = my_cross_val(method, X, y, k)
        report(name, dataset, scores, True)
示例#3
0
def q3ii(argv=None):
    dataset, method_name, k, pi, latex = wrapper_args(
        argv, 'q3ii', ['Boston50', 'Boston75', 'Digits'], include_pi=True)

    x_boston, y_boston = load_dataset(load_boston)
    x_digits, y_digits = load_dataset(load_digits)
    x_boston_50, y_boston_50 = percentileAssignment(50, x_boston, y_boston)
    x_boston_75, y_boston_75 = percentileAssignment(75, x_boston, y_boston)

    default_order = [('LinearSVC', 'Boston50'), ('LinearSVC', 'Boston75'),
                     ('LinearSVC', 'Digits'), ('SVC', 'Boston50'),
                     ('SVC', 'Boston75'), ('SVC', 'Digits'),
                     ('LogisticRegression', 'Boston50'),
                     ('LogisticRegression', 'Boston75'),
                     ('LogisticRegression', 'Digits')]

    methods = {
        ('LinearSVC', 'Boston50'):
        (LinearSVC(max_iter=2000), x_boston_50, y_boston_50),
        ('LinearSVC', 'Boston75'): (LinearSVC(max_iter=2000), x_boston_75,
                                    y_boston_75),
        ('LinearSVC', 'Digits'): (LinearSVC(max_iter=2000), x_digits,
                                  y_digits),
        ('SVC', 'Boston50'): (SVC(gamma='scale',
                                  C=10), x_boston_50, y_boston_50),
        ('SVC', 'Boston75'): (SVC(gamma='scale',
                                  C=10), x_boston_75, y_boston_75),
        ('SVC', 'Digits'): (SVC(gamma='scale', C=10), x_digits, y_digits),
        ('LogisticRegression', 'Boston50'):
        (LogisticRegression(penalty='l2',
                            solver='lbfgs',
                            multi_class='multinomial',
                            max_iter=5000), x_boston_50, y_boston_50),
        ('LogisticRegression', 'Boston75'):
        (LogisticRegression(penalty='l2',
                            solver='lbfgs',
                            multi_class='multinomial',
                            max_iter=5000), x_boston_75, y_boston_75),
        ('LogisticRegression', 'Digits'):
        (LogisticRegression(penalty='l2',
                            solver='lbfgs',
                            multi_class='multinomial',
                            max_iter=5000), x_digits, y_digits)
    }

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(key[0], key[1]))
        scores = my_train_test(method, X, y, 0.75, k)
        report(name, dataset, scores, latex=latex)
示例#4
0
def q3i(argv=None):

    dataset, method_name, k, latex = wrapper_args(
        argv, 'q3i', ['Boston50', 'Boston75', 'Digits'])

    Boston50_X, Boston50_y = prepare_boston50()
    Boston75_X, Boston75_y = prepare_boston75()
    Digits_X, Digits_y = prepare_digits()

    default_order = [
        ('LinearSVC', 'Boston50'),
        ('LinearSVC', 'Boston75'),
        ('LinearSVC', 'Digits'),
        ('SVC', 'Boston50'),
        ('SVC', 'Boston75'),
        ('SVC', 'Digits'),
        ('LogisticRegression', 'Boston50'),
        ('LogisticRegression', 'Boston75'),
        ('LogisticRegression', 'Digits')
    ]

    methods = {('LinearSVC', 'Boston50'):
               (LinearSVC(), Boston50_X, Boston50_y),
               ('LinearSVC', 'Boston75'):
               (LinearSVC(), Boston75_X, Boston75_y),
               ('LinearSVC', 'Digits'):
               (LinearSVC(), Digits_X, Digits_y),
               ('SVC', 'Boston50'):
               (SVC(), Boston50_X, Boston50_y),
               ('SVC', 'Boston75'):
               (SVC(), Boston75_X, Boston75_y),
               ('SVC', 'Digits'):
               (SVC(), Digits_X, Digits_y),
               ('LogisticRegression', 'Boston50'):
               (LogisticRegression(), Boston50_X, Boston50_y),
               ('LogisticRegression', 'Boston75'):
               (LogisticRegression(), Boston75_X, Boston75_y),
               ('LogisticRegression', 'Digits'):
               (LogisticRegression(), Digits_X, Digits_y)}

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(key[0], key[1]))
        scores = my_cross_val(method, X, y, k)
        report(name, dataset, scores, latex=latex)
示例#5
0
def q4(argv):

    x_digits, y_digits = load_dataset(load_digits)

    dataset, method_name, k, latex = wrapper_args(argv, 'q4',
                                                  ['X1', 'X2', 'X3'])

    X1 = rand_proj(x_digits, 32)
    X2 = quad_proj(x_digits)

    default_order = [
        ('LinearSVC', 'X1'),
        ('LinearSVC', 'X2'),
        ('SVC', 'X1'),
        ('SVC', 'X2'),
        ('LogisticRegression', 'X1'),
        ('LogisticRegression', 'X2'),
    ]

    methods = {
        ('LinearSVC', 'X1'): (LinearSVC(max_iter=2000), X1, y_digits),
        ('LinearSVC', 'X2'): (LinearSVC(max_iter=2000), X2, y_digits),
        ('SVC', 'X1'): (SVC(gamma='scale', C=10), X1, y_digits),
        ('SVC', 'X2'): (SVC(gamma='scale', C=10), X2, y_digits),
        ('LogisticRegression', 'X1'):
        (LogisticRegression(penalty='l2',
                            solver='lbfgs',
                            multi_class='multinomial',
                            max_iter=5000), X1, y_digits),
        ('LogisticRegression', 'X2'):
        (LogisticRegression(penalty='l2',
                            solver='lbfgs',
                            multi_class='multinomial',
                            max_iter=5000), X2, y_digits)
    }

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(name, dataset))
        scores = my_cross_val(method, X, y, k)
        report(name, dataset, scores, latex=latex)
示例#6
0
def q3(argv=None):

    dataset, method_name, k, latex = wrapper_args(
        argv, 'q3', ['Boston50', 'Boston75', 'Digits'])

    Boston50_X, Boston50_y = prepare_boston50()
    Boston75_X, Boston75_y = prepare_boston75()
    # Note that prepare_digits adds gaussian noise to the data to
    # avoid singlar covariance matrices.  For details, see
    # datasets.prepare_digits
    Digits_X, Digits_y = prepare_digits()

    default_order = [
        ('MultiGaussClassify', 'Boston50'),
        ('MultiGaussClassify', 'Boston75'),
        ('MultiGaussClassify', 'Digits'),
        ('LogisticRegression', 'Boston50'),
        ('LogisticRegression', 'Boston75'),
        ('LogisticRegression', 'Digits')
    ]

    methods = {
        ('MultiGaussClassify', 'Boston50'):
        (MultiGaussClassify(), Boston50_X, Boston50_y),
        ('MultiGaussClassify', 'Boston75'):
        (MultiGaussClassify(), Boston75_X, Boston75_y),
        ('MultiGaussClassify', 'Digits'):
        (MultiGaussClassify(linear=False), Digits_X, Digits_y),
        ('LogisticRegression', 'Boston50'):
        (LogisticRegression(), Boston50_X, Boston50_y),
        ('LogisticRegression', 'Boston75'):
        (LogisticRegression(), Boston75_X, Boston75_y),
        ('LogisticRegression', 'Digits'):
        (LogisticRegression(), Digits_X, Digits_y)
    }

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(key[0], key[1]))
        scores = my_cross_val(method, X, y, k)
        report(name, dataset, scores, latex=latex)
示例#7
0
def q3(argv=None):

    dataset, method_name, k, latex = wrapper_args(
        argv, 'q3', ['Boston50', 'Boston75'],
        ['MyFLDA2', 'LogisticRegression'])

    Boston50_X, Boston50_y = prepare_boston50()
    Boston75_X, Boston75_y = prepare_boston75()

    default_order = [('MyFLDA2', 'Boston50'), ('MyFLDA2', 'Boston75'),
                     ('LogisticRegression', 'Boston50'),
                     ('LogisticRegression', 'Boston75')]

    # Find the optimal separation for the training set
    print('Finding optimal threshold for MyFLDA2 on Boston50 ...')
    myflda_boston50 = find_best_myflda2(MyFLDA2, Boston50_X, Boston50_y, k)

    print('Finding optimal threshold for MyFLDA2 on Boston75 ...')
    myflda_boston75 = find_best_myflda2(MyFLDA2, Boston75_X, Boston75_y, k)

    print('Done.')

    methods = {
        ('MyFLDA2', 'Boston50'): (myflda_boston50, Boston50_X, Boston50_y),
        ('MyFLDA2', 'Boston75'): (myflda_boston75, Boston75_X, Boston75_y),
        ('LogisticRegression', 'Boston50'):
        (LogisticRegression(), Boston50_X, Boston50_y),
        ('LogisticRegression', 'Boston75'):
        (LogisticRegression(), Boston75_X, Boston75_y)
    }

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(key[0], key[1]))
        scores = my_cross_val(method, X, y, k)
        report(name, dataset, scores, latex=latex)