def get_dataset_dictionaries(self, lengths=[5, 6]): x_train = dict() y_train = dict() x_test = dict() y_test = dict() for n_obj in lengths: self.kwargs["n_objects"] = n_obj seed = self.random_state.randint(2 ** 32, dtype="uint32") total_instances = self.n_test_instances + self.n_train_instances self.kwargs["n_instances"] = total_instances X, Y = self.dataset_function(**self.kwargs, seed=seed) x_1, x_2, y_1, y_2 = train_test_split( X, Y, random_state=self.random_state, test_size=self.n_test_instances ) if self.standardize: x_1, x_2 = standardize_features(x_1, x_2) x_train[n_obj], x_test[n_obj], y_train[n_obj], y_test[n_obj] = ( x_1, x_2, y_1, y_2, ) logger.info("Done") return x_train, y_train, x_test, y_test
def get_single_train_test_split(self): self.dataset_function() x_train, x_test, y_train, y_test = train_test_split(self.X, self.Y, random_state=self.random_state, test_size=self.n_test_instances) if self.standardize: x_train, x_test = standardize_features(x_train, x_test) self.logger.info('Done') return x_train, y_train, x_test, y_test
def splitter(self, iter): for train_idx, test_idx in iter: x_train, y_train, x_test, y_test = ( self.X[train_idx], self.Y[train_idx], self.X[test_idx], self.Y[test_idx], ) x_train, x_test = standardize_features(x_train, x_test) yield x_train, y_train, x_test, y_test
def splitter(self, iter): for i in iter: seed = self.random_state.randint(2 ** 32, dtype="uint32") total_instances = self.n_test_instances + self.n_train_instances self.kwargs["n_instances"] = total_instances X, Y = self.dataset_function(**self.kwargs, seed=seed) x_train, x_test, y_train, y_test = train_test_split( X, Y, random_state=self.random_state, test_size=self.n_test_instances ) if self.standardize: x_train, x_test = standardize_features(x_train, x_test) yield x_train, y_train, x_test, y_test
def get_dataset_dictionaries(self, lengths=[5, 6]): x_train = dict() y_train = dict() x_test = dict() y_test = dict() for n_obj in lengths: self.n_objects = n_obj self.dataset_function() x_1, x_2, y_1, y_2 = train_test_split(self.X, self.Y, random_state=self.random_state, test_size=self.n_test_instances) if self.standardize: x_1, x_2 = standardize_features(x_1, x_2) x_train[n_obj], x_test[n_obj], y_train[n_obj], y_test[n_obj] = x_1, x_2, y_1, y_2 self.logger.info('Done') return x_train, y_train, x_test, y_test
def get_single_train_test_split(self): seed = self.random_state.randint(2 ** 32, dtype="uint32") total_instances = self.n_test_instances + self.n_train_instances self.kwargs["n_instances"] = total_instances self.X, self.Y = self.dataset_function(**self.kwargs, seed=seed) self.__check_dataset_validity__() x_train, x_test, y_train, y_test = train_test_split( self.X, self.Y, random_state=self.random_state, test_size=self.n_test_instances, ) if self.standardize: x_train, x_test = standardize_features(x_train, x_test) logger.info("Done") return x_train, y_train, x_test, y_test
def get_single_train_test_split(self): splits = dict() cv_iter = ShuffleSplit(n_splits=1, random_state=self.random_state, test_size=0.80) for n_obj, arr in self.X_dict.items(): if arr.shape[0] == 1: splits[n_obj] = ([0], [0]) else: splits[n_obj] = list(cv_iter.split(arr))[0] self.X_train = dict() self.Y_train = dict() self.X_test = dict() self.Y_test = dict() for n_obj, itr in splits.items(): train_idx, test_idx = itr self.X_train[n_obj] = np.copy(self.X_dict[n_obj][train_idx]) self.X_test[n_obj] = np.copy(self.X_dict[n_obj][test_idx]) self.Y_train[n_obj] = np.copy(self.Y_dict[n_obj][train_idx]) self.Y_test[n_obj] = np.copy(self.Y_dict[n_obj][test_idx]) self.X, self.Y = self.sub_sampling_from_dictionary() self.__check_dataset_validity__() self.X, self.X_test = standardize_features(self.X, self.X_test) return self.X, self.Y, self.X_test, self.Y_test
def get_single_train_test_split(self): self.X, self.Y = self.sub_sampling_from_dictionary(train_test="train") self.__check_dataset_validity__() self.X, self.X_test = standardize_features(self.X, self.X_test) return self.X, self.Y, self.X_test, self.Y_test
def get_dataset_dictionaries(self): self.X_test, self.X_test = standardize_features(self.X, self.X_test) return self.X_train, self.Y_train, self.X_test, self.Y_test
def get_single_train_test_split(self): X_train, Y_train = self.sub_sampling_from_dictionary(train_test="train") X_test, Y_test = self.sub_sampling_from_dictionary(train_test="test") X_train, X_test = standardize_features(X_train, X_test) return X_train, Y_train, X_test, Y_test