Пример #1
0
def wine_net(seed, r, nn_type, options=None):
    if nn_type is 'cat':
        filename = '../resources/winequality/winequality-combined-cat.csv'
        y_key = 'Reviews'
        split_options = {'exclude-keys': ['quality']}
    else:
        filename = '../resources/winequality/winequality-combined.csv'
        y_key = 'quality'
        split_options = {'exclude-keys': []}
    split_options['perc_sample'] = r
    dir = 'nn-exports/wine/'
    seed_dir = 'attempts/' + dir + str(seed) + '/'
    np.random.seed(seed)
    options = util.default(options, {})
    class_types = util.myget(options, 'class_types', ['net'])
    layers = util.myget(options, 'layers', [(5, 6)])
    data = util.get_data(filename)
    split_data = util.prep_data(data, y_key, split_options)
    net_results = {}
    for t in class_types:
        for l in layers:
            np.random.seed(seed)
            start_time = datetime.datetime.now()
            [train_results,
             test_results] = train_and_test(seed_dir, split_data, t, l)
            net_results[t + '_train'] = train_results
            net_results[t + '_test'] = test_results
            total_time = datetime.datetime.now() - start_time
            write_results(seed, t, dir, train_results['error-perc'],
                          test_results['error-perc'], total_time,
                          len(split_data[0]), l, nn_type + '-')
    pass
Пример #2
0
def breast_cancer_net(seed, r, options=None):
    dir = 'nn-exports/breast_cancer/'
    seed_dir = 'attempts/' + dir + str(seed) + '/'
    np.random.seed(seed)
    options = util.default(options, {})
    class_types = util.myget(options, 'class_types', ['net'])
    layers = util.myget(options, 'layers', [(5, 6)])
    data = util.get_data('../resources/breast-cancer/wdbc.data.csv')
    data = util.split_feature(data, 'diagnosis')
    split_data = util.prep_data(
        data, 'diagnosis_M', {
            'exclude-keys': ['id', 'diagnosis_B', 'diagnosis'],
            'perc_sample': r
        })
    net_results = {}
    for t in class_types:
        for l in layers:
            np.random.seed(seed)
            start_time = datetime.datetime.now()
            [train_results,
             test_results] = train_and_test(seed_dir, split_data, t, l)
            net_results[t + '_train'] = train_results
            net_results[t + '_test'] = test_results
            total_time = datetime.datetime.now() - start_time
            write_results(seed, t, dir, train_results['error-perc'],
                          test_results['error-perc'], total_time,
                          len(split_data[0]), l)
    return net_results
Пример #3
0
def main():
    filename = '../resources/winequality/winequality-combined.csv'
    # filename = '../resources/winequality/dummy-data.csv'
    exclude_column = 'quality'
    # exclude_column = 'stuff3'
    data = util.get_data(filename)
    duplicated = data.loc[:, data.columns != exclude_column].duplicated()
    print(duplicated.value_counts())
    print(data.duplicated().value_counts())
    data = data.sort_values(by=list(data.columns))
    return duplicated
Пример #4
0
def wine_report_prep(level):
    if level is 'cat':
        filename = '../resources/winequality/winequality-combined-cat.csv'
        y_key = 'Reviews'
        split_options = {'exclude-keys': ['quality']}
    else:
        filename = '../resources/winequality/winequality-combined.csv'

        y_key = 'quality'
        split_options = {'exclude-keys': []}
    data = util.get_data(filename)
    return data, y_key, split_options
Пример #5
0
def main2():
    filename = '../resources/winequality/winequality-combined.csv'
    data = util.get_data(filename)
    reviews = []
    for i in data['quality']:
        if i >= 1 and i <= 3:
            reviews.append('1')
        elif i >= 4 and i <= 7:
            reviews.append('2')
        elif i >= 8 and i <= 10:
            reviews.append('3')
    data['Reviews'] = reviews
    data.to_csv('../resources/winequality/winequality-combined-cat.csv')
Пример #6
0
def breast_cancer_prep():
    bc_data = util.get_data('../resources/breast-cancer/wdbc.data.csv')
    bc_data = util.split_feature(bc_data, 'diagnosis')
    return bc_data