示例#1
0
def red_wine_quality(test_size=0.2):
    data = np.loadtxt("data/regression/wine_quality/winequality-red.csv",
                      delimiter=';',
                      skiprows=1)
    x, y = data[:, :11], data[:, 11]

    return data_util.normalize_split(x, y, test_size)
示例#2
0
def facebook_metric(test_size=0.2):
    data = np.loadtxt("data/regression/facebook_metrics/dataset_Facebook.csv",
                      delimiter=';',
                      skiprows=1,
                      converters={1: post_type})
    x, y = data[:, :18], data[:, 18]

    return data_util.normalize_split(x, y, test_size)
示例#3
0
def breast_cancer(test_size=0.2):
    data = np.loadtxt("data/classification/breast_cancer/wdbc.data",
                      delimiter=',',
                      skiprows=0,
                      converters={1: cancer_type_num})
    x, y = data[:, 2:32], data[:, 1]

    return data_util.normalize_split(x, y, test_size)
示例#4
0
def community_crime(test_size=0.2):
    used_cols = set(range(0, 128)) - set(range(0, 5)) - set(range(
        101, 118)) - set(range(121, 127))
    data = np.loadtxt("data/regression/communities.data",
                      delimiter=',',
                      usecols=used_cols)
    x, y = data[:, :99], data[:, 99]

    return data_util.normalize_split(x, y, test_size)
示例#5
0
def gpu(test_size=0.2):
    data = pd.read_csv('data/regression/gpu/sgemm_product.csv')
    # from the 4 runs, use the average as target
    data['average'] = data.iloc[:, 14:18].median(axis=1)
    # take logarithm as suggested in data set readme
    data.average = data.average.apply(np.log10)

    x, y = data.iloc[:, :14], data.iloc[:, 18]

    return data_util.normalize_split(x, y, test_size)
示例#6
0
def yeast(test_size=0.2):
    data = pd.read_csv('data/classification/yeast/yeast.data',
                       delimiter='\s+',
                       names=range(10))
    # encode target classes as numbers
    enc = preprocessing.OrdinalEncoder()
    enc.fit(data.select_dtypes(include=object))

    x = data.iloc[:, 1:9]
    y = enc.transform(data.select_dtypes(include=object))

    return data_util.normalize_split(x, y[:, 1], test_size)
示例#7
0
def seismic_bumps(test_size=0.2):
    data = np.loadtxt("data/classification/seismic-bumps.arff",
                      delimiter=',',
                      skiprows=155,
                      converters={
                          0: seismic_level,
                          1: seismic_level,
                          2: shift,
                          7: seismic_level
                      })
    x, y = data[:, :18].astype(int), data[:, 18].astype(int)

    return data_util.normalize_split(x, y, test_size)
示例#8
0
def molecular(test_size=0.2):
    file = 'data/regression/molecular/ACT4_competition_training.csv'
    with open(file) as f:
        cols = f.readline().rstrip('\n').split(',')

    X = np.loadtxt(file,
                   delimiter=',',
                   usecols=range(2, len(cols)),
                   skiprows=1,
                   dtype=np.uint8)
    y = np.loadtxt(file, delimiter=',', usecols=[1], skiprows=1)
    np.savez('act4.npz', X, y)

    file = 'data/regression/molecular/ACT2_competition_training.csv'
    with open(file) as f:
        cols = f.readline().rstrip('\n').split(',')

    X = np.loadtxt(file,
                   delimiter=',',
                   usecols=range(2, len(cols)),
                   skiprows=1,
                   dtype=np.uint8)
    y = np.loadtxt(file, delimiter=',', usecols=[1], skiprows=1)
    np.savez('act2.npz', X, y)

    ac4 = np.load('act4.npz')
    ac2 = np.load('act2.npz')

    x4, y4 = ac4['arr_0'], ac4['arr_1']
    x2, y2 = ac2['arr_0'], ac2['arr_1']

    x4_train, x4_test, y4_train, y4_test = data_util.normalize_split(
        x4, y4, test_size)
    x2_train, x2_test, y2_train, y2_test = data_util.normalize_split(
        x2, y2, test_size)

    return x4_train, x4_test, y4_train, y4_test, x2_train, x2_test, y2_train, y2_test
示例#9
0
def plates(test_size=0.2):
    data = np.loadtxt('data/classification/Plates/Faults.NNA', delimiter='\t')
    x, y = data[:, :27], data[:, 27:]
    _y, y_class = np.nonzero(y)  # simple coding of target data
    return data_util.normalize_split(x, y_class, test_size)
示例#10
0
def QSAR(test_size=0.2):
    data = np.loadtxt("data/regression/qsar_aquatic_toxicity.csv",
                      delimiter=';')
    x, y = data[:, :9], data[:, 9]

    return data_util.normalize_split(x, y, test_size)
示例#11
0
def concrete(test_size=0.2):
    data = pd.read_excel('data/regression/concrete/Concrete_Data.xls')
    x, y = data.iloc[:, :8], data.iloc[:, 8]

    return data_util.normalize_split(x, y, test_size)
示例#12
0
def credit_card_client(test_size=0.2):
    data = np.loadtxt("data/classification/default_credit_card_clients.csv",
                      delimiter=',',
                      skiprows=2)
    x, y = data[:, 1:24], data[:, 24]
    return data_util.normalize_split(x, y, test_size)
示例#13
0
def statlog_australian(test_size=0.2):
    data = np.loadtxt("data/classification/australian.dat", skiprows=0)
    x, y = data[:, :14], data[:, 14]

    return data_util.normalize_split(x, y, test_size)
示例#14
0
def statlog_german(test_size=0.2):
    data = np.loadtxt("data/classification/german.data-numeric", skiprows=0)
    x, y = data[:, :24].astype(int), data[:, 24].astype(int)

    return data_util.normalize_split(x, y, test_size)
示例#15
0
def diabetic_retinopathy(test_size=0.2):
    data = np.loadtxt("data/classification/messidor_features.arff",
                      delimiter=',',
                      skiprows=24)
    x, y = data[:, :18], data[:, 18]
    return data_util.normalize_split(x, y, test_size)