def sklearn_comp(array): self.logger.info("Original array is:\n{:}".format(array)) # Sklearn normalizer (requires float dtype input) array_sk = array.astype(float).tondarray() sk_norm = MinMaxScaler().fit(array_sk) target = CArray(sk_norm.transform(array_sk)) # Our normalizer our_norm = CNormalizerMinMax().fit(array) result = our_norm.transform(array) self.logger.info("Correct result is:\n{:}".format(target)) self.logger.info("Our result is:\n{:}".format(result)) self.assert_array_almost_equal(target, result) # Testing out of range normalization self.logger.info("Testing out of range normalization") # Sklearn normalizer (requires float dtype input) target = CArray(sk_norm.transform(array_sk * 2)) # Our normalizer result = our_norm.transform(array * 2) self.logger.info("Correct result is:\n{:}".format(target)) self.logger.info("Our result is:\n{:}".format(result)) self.assert_array_almost_equal(target, result)
def _dataset_creation(self): """Creates a blob dataset. """ self.n_features = 2 # Number of dataset features self.seed = 42 self.n_tr = 50 self.n_ts = 100 self.n_classes = 2 loader = CDLRandomBlobs(n_samples=self.n_tr + self.n_ts, n_features=self.n_features, centers=[(-1, -1), (+1, +1)], center_box=(-2, 2), cluster_std=0.8, random_state=self.seed) self.logger.info("Loading `random_blobs` with seed: {:}".format( self.seed)) dataset = loader.load() splitter = CDataSplitterShuffle(num_folds=1, train_size=self.n_tr, random_state=3) splitter.compute_indices(dataset) self.tr = dataset[splitter.tr_idx[0], :] self.ts = dataset[splitter.ts_idx[0], :] normalizer = CNormalizerMinMax(feature_range=(-1, 1)) self.tr.X = normalizer.fit_transform(self.tr.X) self.ts.X = normalizer.transform(self.ts.X) self.lb = -1 self.ub = 1 self.grid_limits = [(self.lb - 0.1, self.ub + 0.1), (self.lb - 0.1, self.ub + 0.1)]