示例#1
0
def q4(argv=None):

    dataset, method_name, k, latex = wrapper_args(
        argv, 'q4',
        ['Digits'],
        ['MyLogisticRegGen', 'LogisticRegression'])

    Digits_X, Digits_y = prepare_digits(want_noise=False)

    default_order = [
        ('MyLogisticRegGen', 'Digits'),
        ('LogisticRegression', 'Digits')
    ]
    methods = {
        ('MyLogisticRegGen', 'Digits'):
        (MyLogisticRegGen(verbose=False), Digits_X, Digits_y),
        ('LogisticRegression', 'Digits'):
        (LogisticRegression(), Digits_X, Digits_y)
    }

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(key[0], key[1]))
        scores = my_cross_val(method, X, y, k)
        report(name, dataset, scores, latex=latex)
示例#2
0
def q3(argv=None):

    dataset, method_name, k, latex = wrapper_args(
        argv, 'q3', ['Boston50', 'Boston75', 'Digits'])

    x_boston, y_boston = load_dataset(load_boston)
    x_digits, y_digits = prepare_digits(True)
    x_boston_50, y_boston_50 = percentileAssignment(50, x_boston, y_boston)
    x_boston_75, y_boston_75 = percentileAssignment(75, x_boston, y_boston)

    default_order = [('MultiGaussClassify_WithFullMatrix', 'Boston50'),
                     ('MultiGaussClassify_WithFullMatrix', 'Boston75'),
                     ('MultiGaussClassify_WithFullMatrix', 'Digits'),
                     ('MultiGaussClassify_WithDiagonal', 'Boston50'),
                     ('MultiGaussClassify_WithDiagonal', 'Boston75'),
                     ('MultiGaussClassify_WithDiagonal', 'Digits'),
                     ('LogisticRegression', 'Boston50'),
                     ('LogisticRegression', 'Boston75'),
                     ('LogisticRegression', 'Digits')]

    methods = {
        ('MultiGaussClassify_WithFullMatrix', 'Boston50'):
        (MultiGaussClassify(len(np.unique(y_boston_50)),
                            x_boston_50.shape[1]), x_boston_50, y_boston_50),
        ('MultiGaussClassify_WithFullMatrix', 'Boston75'):
        (MultiGaussClassify(len(np.unique(y_boston_75)),
                            x_boston_50.shape[1]), x_boston_75, y_boston_75),
        ('MultiGaussClassify_WithFullMatrix', 'Digits'):
        (MultiGaussClassify(len(np.unique(y_digits)),
                            x_digits.shape[1]), x_digits, y_digits),
        ('MultiGaussClassify_WithDiagonal', 'Boston50'):
        (MultiGaussClassify(len(np.unique(y_boston_50)), x_boston_50.shape[1],
                            True), x_boston_50, y_boston_50),
        ('MultiGaussClassify_WithDiagonal', 'Boston75'):
        (MultiGaussClassify(len(np.unique(y_boston_75)), x_boston_50.shape[1],
                            True), x_boston_75, y_boston_75),
        ('MultiGaussClassify_WithDiagonal', 'Digits'):
        (MultiGaussClassify(len(np.unique(y_digits)), x_digits.shape[1],
                            True), x_digits, y_digits),
        ('LogisticRegression', 'Boston50'): (LogisticRegression(), x_boston_50,
                                             y_boston_50),
        ('LogisticRegression', 'Boston75'): (LogisticRegression(), x_boston_75,
                                             y_boston_75),
        ('LogisticRegression', 'Digits'): (LogisticRegression(), x_digits,
                                           y_digits)
    }

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(key[0], key[1]))
        scores = my_cross_val(method, X, y, k)
        report(name, dataset, scores, True)
示例#3
0
def q3i(argv=None):

    dataset, method_name, k, latex = wrapper_args(
        argv, 'q3i', ['Boston50', 'Boston75', 'Digits'])

    Boston50_X, Boston50_y = prepare_boston50()
    Boston75_X, Boston75_y = prepare_boston75()
    Digits_X, Digits_y = prepare_digits()

    default_order = [
        ('LinearSVC', 'Boston50'),
        ('LinearSVC', 'Boston75'),
        ('LinearSVC', 'Digits'),
        ('SVC', 'Boston50'),
        ('SVC', 'Boston75'),
        ('SVC', 'Digits'),
        ('LogisticRegression', 'Boston50'),
        ('LogisticRegression', 'Boston75'),
        ('LogisticRegression', 'Digits')
    ]

    methods = {('LinearSVC', 'Boston50'):
               (LinearSVC(), Boston50_X, Boston50_y),
               ('LinearSVC', 'Boston75'):
               (LinearSVC(), Boston75_X, Boston75_y),
               ('LinearSVC', 'Digits'):
               (LinearSVC(), Digits_X, Digits_y),
               ('SVC', 'Boston50'):
               (SVC(), Boston50_X, Boston50_y),
               ('SVC', 'Boston75'):
               (SVC(), Boston75_X, Boston75_y),
               ('SVC', 'Digits'):
               (SVC(), Digits_X, Digits_y),
               ('LogisticRegression', 'Boston50'):
               (LogisticRegression(), Boston50_X, Boston50_y),
               ('LogisticRegression', 'Boston75'):
               (LogisticRegression(), Boston75_X, Boston75_y),
               ('LogisticRegression', 'Digits'):
               (LogisticRegression(), Digits_X, Digits_y)}

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(key[0], key[1]))
        scores = my_cross_val(method, X, y, k)
        report(name, dataset, scores, latex=latex)
示例#4
0
def q3(argv=None):

    dataset, method_name, k, latex = wrapper_args(
        argv, 'q3', ['Boston50', 'Boston75', 'Digits'])

    Boston50_X, Boston50_y = prepare_boston50()
    Boston75_X, Boston75_y = prepare_boston75()
    # Note that prepare_digits adds gaussian noise to the data to
    # avoid singlar covariance matrices.  For details, see
    # datasets.prepare_digits
    Digits_X, Digits_y = prepare_digits()

    default_order = [
        ('MultiGaussClassify', 'Boston50'),
        ('MultiGaussClassify', 'Boston75'),
        ('MultiGaussClassify', 'Digits'),
        ('LogisticRegression', 'Boston50'),
        ('LogisticRegression', 'Boston75'),
        ('LogisticRegression', 'Digits')
    ]

    methods = {
        ('MultiGaussClassify', 'Boston50'):
        (MultiGaussClassify(), Boston50_X, Boston50_y),
        ('MultiGaussClassify', 'Boston75'):
        (MultiGaussClassify(), Boston75_X, Boston75_y),
        ('MultiGaussClassify', 'Digits'):
        (MultiGaussClassify(linear=False), Digits_X, Digits_y),
        ('LogisticRegression', 'Boston50'):
        (LogisticRegression(), Boston50_X, Boston50_y),
        ('LogisticRegression', 'Boston75'):
        (LogisticRegression(), Boston75_X, Boston75_y),
        ('LogisticRegression', 'Digits'):
        (LogisticRegression(), Digits_X, Digits_y)
    }

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(key[0], key[1]))
        scores = my_cross_val(method, X, y, k)
        report(name, dataset, scores, latex=latex)
示例#5
0
def q3():
    # Initialize the values from the datasets
    Boston50_X, Boston50_y, Boston50_k, Boston50_d = prepare_boston50()
    Boston25_X, Boston25_y, Boston25_k, Boston25_d = prepare_boston25()
    Digits_X, Digits_y, Digits_k, Digits_d = prepare_digits()

    default_order = [
        ('MultiGaussClassify with full covariance matrix', 'Boston50'),
        ('MultiGaussClassify with full covariance matrix', 'Boston25'),
        ('MultiGaussClassify with full covariance matrix', 'Digits'),
        ('MultiGaussClassify with diagonal covariance matrix', 'Boston50'),
        ('MultiGaussClassify with diagonal covariance matrix', 'Boston25'),
        ('MultiGaussClassify with diagonal covariance matrix', 'Digits'),
        ('LogisticRegression', 'Boston50'), ('LogisticRegression', 'Boston25'),
        ('LogisticRegression', 'Digits')
    ]

    methods = {
        ('MultiGaussClassify with full covariance matrix', 'Boston50'):
        (MultiGaussClassify(Boston50_k, Boston50_d), Boston50_X, Boston50_y),
        ('MultiGaussClassify with full covariance matrix', 'Boston25'):
        (MultiGaussClassify(Boston25_k, Boston25_d), Boston25_X, Boston25_y),
        ('MultiGaussClassify with full covariance matrix', 'Digits'):
        (MultiGaussClassify(Digits_k, Digits_d), Digits_X, Digits_y),
        ('MultiGaussClassify with diagonal covariance matrix', 'Boston50'):
        (MultiGaussClassify(Boston50_k, Boston50_d,
                            True), Boston50_X, Boston50_y),
        ('MultiGaussClassify with diagonal covariance matrix', 'Boston25'):
        (MultiGaussClassify(Boston25_k, Boston25_d,
                            True), Boston25_X, Boston25_y),
        ('MultiGaussClassify with diagonal covariance matrix', 'Digits'):
        (MultiGaussClassify(Digits_k, Digits_d, True), Digits_X, Digits_y),
        ('LogisticRegression', 'Boston50'): (LogisticRegression(), Boston50_X,
                                             Boston50_y),
        ('LogisticRegression', 'Boston25'): (LogisticRegression(), Boston25_X,
                                             Boston25_y),
        ('LogisticRegression', 'Digits'): (LogisticRegression(), Digits_X,
                                           Digits_y)
    }

    for key in default_order:
        name, dataset = key
        method, X, y = methods[key]
        # Using my implementation of cross validation instead of the built-in one
        scores = my_cross_val(method, X, y, 5)
        my_pretty_print(name, dataset, scores)