def load_sinus(): size = 1000 location = 0.0 scale = 0.5 amplitude = 0.3 freq = 3.5 x, y = DataDistributions.generateSinusCluster(size, location, scale, amplitude, freq) x_test, y_test = DataDistributions.generateSinusCluster(size * 3, location, scale, amplitude, freq) return x, x_test, y, y_test
def load_data(dataType): ''' Method for loading data. Returns training and test data. :param dataType: String for the name of the data. The data will be automatically searched in the data directory. :return: Returns 4 arrays: x, x_test, y, y_test ''' if dataType == "sinus": x, x_test, y, y_test = load_sinus() elif dataType == "cod-rna": x, x_test, y, y_test = load_codrna() elif dataType == "covtype": x, x_test, y, y_test = load_covtype() elif dataType == "banana": x, x_test, y, y_test = load_banana() # elif dataType == "skin": # x, x_test, y, y_test = load_skin() elif dataType == "cluster": x, x_test, y, y_test = DataDistributions.generateNonLinearClusters(10000) elif dataType == "clusterx": x, x_test, y, y_test = DataDistributions.generateNonLinearClusters(10000, x_noise=True) elif dataType == "clustery": x, x_test, y, y_test = DataDistributions.generateNonLinearClusters(10000, y_noise=True) elif dataType == "mnist": mnist = fetch_mldata('MNIST original') data = mnist.data target = mnist.target target = np.where(target > 0, 1, -1) x, x_test, y, y_test = cv.train_test_split(data, target, train_size=0.7) elif dataType == "shuttle": x, x_test, y, y_test = load_libsvm_file(dataType) y = np.where(y != 1, -1, 1) y_test = np.where(y_test != 1, -1, 1) else: x, x_test, y, y_test = load_libsvm_file(dataType) return x, x_test, y, y_test