示例#1
0
def load_sinus():
    size = 1000
    location = 0.0
    scale = 0.5
    amplitude = 0.3
    freq = 3.5
    x, y = DataDistributions.generateSinusCluster(size, location, scale, amplitude, freq)
    x_test, y_test = DataDistributions.generateSinusCluster(size * 3, location, scale, amplitude, freq)
    return x, x_test, y, y_test
示例#2
0
def load_data(dataType):
    '''
    Method for loading data. Returns training and test data.

    :param dataType: String for the name of the data. The data will be automatically searched in the data directory.
    :return: Returns 4 arrays: x, x_test, y, y_test
    '''
    if dataType == "sinus":
        x, x_test, y, y_test = load_sinus()
    elif dataType == "cod-rna":
        x, x_test, y, y_test = load_codrna()
    elif dataType == "covtype":
        x, x_test, y, y_test = load_covtype()
    elif dataType == "banana":
        x, x_test, y, y_test = load_banana()
    # elif dataType == "skin":
    #    x, x_test, y, y_test = load_skin()
    elif dataType == "cluster":
        x, x_test, y, y_test = DataDistributions.generateNonLinearClusters(10000)
    elif dataType == "clusterx":
        x, x_test, y, y_test = DataDistributions.generateNonLinearClusters(10000, x_noise=True)
    elif dataType == "clustery":
        x, x_test, y, y_test = DataDistributions.generateNonLinearClusters(10000, y_noise=True)
    elif dataType == "mnist":
        mnist = fetch_mldata('MNIST original')
        data = mnist.data
        target = mnist.target
        target = np.where(target > 0, 1, -1)
        x, x_test, y, y_test = cv.train_test_split(data, target, train_size=0.7)
    elif dataType == "shuttle":
        x, x_test, y, y_test = load_libsvm_file(dataType)
        y = np.where(y != 1, -1, 1)
        y_test = np.where(y_test != 1, -1, 1)
    else:
        x, x_test, y, y_test = load_libsvm_file(dataType)
    return x, x_test, y, y_test