def reset(self, params, rep): name = params['dataset'] print params random.seed(abs(hash(str(params)))) if name.startswith('toy'): data_dims, cone_dims = [int(x) for x in name.split('-')[1:]] self.dimensions = [cone_dims] self.dataset = make_data(data_dims, cone_dims) elif name.startswith('wn'): self.dimensions = params['dimensions'] self.dataset = SvmlightDataset( load_svmlight_file('../../../Documents/conewordnetdata/data-nouns-deps-mi/' + name + '.mat')) print self.dataset.target.shape print self.dataset.data.shape else: self.dimensions = params['dimensions'] self.dataset = fetch_mldata(name) # Ensure that the data is always shuffled the same way: # seed RNG on data itself seed = int(hashlib.sha1(self.dataset.data).hexdigest()[:7], 16) # print len(self.dataset.data), self.dataset.target.shape[0] # shuffled_data, shuffled_target = utils.shuffle( # self.dataset.data, self.dataset.target, random_state = seed) # StratifiedKFold is deterministic self.cv = KFold(k = params['repetitions'], n = self.dataset.target.shape[0], shuffle = True, random_state = seed) train, test = list(self.cv)[rep] print len(train), len(test) self.X_train = self.dataset.data[train] self.X_test = self.dataset.data[test] self.y_train = self.dataset.target[train] self.y_test = self.dataset.target[test]
def generator(data_dims, cone_dims, num_instances=1000): return make_data( data_dims, cone_dims, size=num_instances, epsilon=epsilon)
def generateNoisyTestData(self, data_dims, cone_dims, num_instances=1000): return make_data(data_dims, cone_dims, size=num_instances, noise=0.1)