示例#1
0
    def get_dataset_dictionaries(self, lengths=[5, 6]):
        x_train = dict()
        y_train = dict()
        x_test = dict()
        y_test = dict()
        for n_obj in lengths:
            self.kwargs["n_objects"] = n_obj

            seed = self.random_state.randint(2 ** 32, dtype="uint32")
            total_instances = self.n_test_instances + self.n_train_instances
            self.kwargs["n_instances"] = total_instances
            X, Y = self.dataset_function(**self.kwargs, seed=seed)
            x_1, x_2, y_1, y_2 = train_test_split(
                X, Y, random_state=self.random_state, test_size=self.n_test_instances
            )
            if self.standardize:
                x_1, x_2 = standardize_features(x_1, x_2)
            x_train[n_obj], x_test[n_obj], y_train[n_obj], y_test[n_obj] = (
                x_1,
                x_2,
                y_1,
                y_2,
            )
        logger.info("Done")
        return x_train, y_train, x_test, y_test
 def get_single_train_test_split(self):
     self.dataset_function()
     x_train, x_test, y_train, y_test = train_test_split(self.X, self.Y, random_state=self.random_state,
                                                         test_size=self.n_test_instances)
     if self.standardize:
         x_train, x_test = standardize_features(x_train, x_test)
     self.logger.info('Done')
     return x_train, y_train, x_test, y_test
 def splitter(self, iter):
     for train_idx, test_idx in iter:
         x_train, y_train, x_test, y_test = (
             self.X[train_idx],
             self.Y[train_idx],
             self.X[test_idx],
             self.Y[test_idx],
         )
         x_train, x_test = standardize_features(x_train, x_test)
         yield x_train, y_train, x_test, y_test
示例#4
0
 def splitter(self, iter):
     for i in iter:
         seed = self.random_state.randint(2 ** 32, dtype="uint32")
         total_instances = self.n_test_instances + self.n_train_instances
         self.kwargs["n_instances"] = total_instances
         X, Y = self.dataset_function(**self.kwargs, seed=seed)
         x_train, x_test, y_train, y_test = train_test_split(
             X, Y, random_state=self.random_state, test_size=self.n_test_instances
         )
         if self.standardize:
             x_train, x_test = standardize_features(x_train, x_test)
         yield x_train, y_train, x_test, y_test
 def get_dataset_dictionaries(self, lengths=[5, 6]):
     x_train = dict()
     y_train = dict()
     x_test = dict()
     y_test = dict()
     for n_obj in lengths:
         self.n_objects = n_obj
         self.dataset_function()
         x_1, x_2, y_1, y_2 = train_test_split(self.X, self.Y, random_state=self.random_state,
                                               test_size=self.n_test_instances)
         if self.standardize:
             x_1, x_2 = standardize_features(x_1, x_2)
         x_train[n_obj], x_test[n_obj], y_train[n_obj], y_test[n_obj] = x_1, x_2, y_1, y_2
     self.logger.info('Done')
     return x_train, y_train, x_test, y_test
示例#6
0
    def get_single_train_test_split(self):
        seed = self.random_state.randint(2 ** 32, dtype="uint32")
        total_instances = self.n_test_instances + self.n_train_instances
        self.kwargs["n_instances"] = total_instances
        self.X, self.Y = self.dataset_function(**self.kwargs, seed=seed)
        self.__check_dataset_validity__()
        x_train, x_test, y_train, y_test = train_test_split(
            self.X,
            self.Y,
            random_state=self.random_state,
            test_size=self.n_test_instances,
        )
        if self.standardize:
            x_train, x_test = standardize_features(x_train, x_test)
        logger.info("Done")

        return x_train, y_train, x_test, y_test
 def get_single_train_test_split(self):
     splits = dict()
     cv_iter = ShuffleSplit(n_splits=1,
                            random_state=self.random_state,
                            test_size=0.80)
     for n_obj, arr in self.X_dict.items():
         if arr.shape[0] == 1:
             splits[n_obj] = ([0], [0])
         else:
             splits[n_obj] = list(cv_iter.split(arr))[0]
     self.X_train = dict()
     self.Y_train = dict()
     self.X_test = dict()
     self.Y_test = dict()
     for n_obj, itr in splits.items():
         train_idx, test_idx = itr
         self.X_train[n_obj] = np.copy(self.X_dict[n_obj][train_idx])
         self.X_test[n_obj] = np.copy(self.X_dict[n_obj][test_idx])
         self.Y_train[n_obj] = np.copy(self.Y_dict[n_obj][train_idx])
         self.Y_test[n_obj] = np.copy(self.Y_dict[n_obj][test_idx])
     self.X, self.Y = self.sub_sampling_from_dictionary()
     self.__check_dataset_validity__()
     self.X, self.X_test = standardize_features(self.X, self.X_test)
     return self.X, self.Y, self.X_test, self.Y_test
示例#8
0
 def get_single_train_test_split(self):
     self.X, self.Y = self.sub_sampling_from_dictionary(train_test="train")
     self.__check_dataset_validity__()
     self.X, self.X_test = standardize_features(self.X, self.X_test)
     return self.X, self.Y, self.X_test, self.Y_test
示例#9
0
 def get_dataset_dictionaries(self):
     self.X_test, self.X_test = standardize_features(self.X, self.X_test)
     return self.X_train, self.Y_train, self.X_test, self.Y_test
 def get_single_train_test_split(self):
     X_train, Y_train = self.sub_sampling_from_dictionary(train_test="train")
     X_test, Y_test = self.sub_sampling_from_dictionary(train_test="test")
     X_train, X_test = standardize_features(X_train, X_test)
     return X_train, Y_train, X_test, Y_test