def fit_min_max_scale(arr, arr1=None): scaler = MinMaxScaler() res = scaler.fit_transform(arr) if arr1 == None: print(scaler.get_params()) return res else: return res, scaler.tranform(arr1)
def minmax(): """ Method to load a [0,1] MinMaxScaler RETURN: scaler """ scaler = MinMaxScaler(feature_range=(0, 1), copy=True) utils.display_get_params('MinMaxScaler Description', scaler.get_params()) return (scaler)
def main(): # Get x and y pairs. x_train, y_train, x_test, y_test = get_x_y() # Scale data. logger.log('\tScaling data with params:') scaler = MinMaxScaler() logger.log('\t{}'.format(scaler.get_params())) x_train = scaler.fit_transform(x_train) run_embedding_test(x_train, y_train, x_train) # Close the logger. logger.close()
class Normalize(PreprocessingStrategy): def __init__(self, **kwargs): super().__init__() self._method = MinMaxScaler(**kwargs) self.hyperparams = self._method.get_params() def preprocess(self, data): """ Return the transformed data """ return pd.DataFrame(self._method.fit_transform(data), columns=data.columns, index=data.index) def jsonify(self): out = super().jsonify() out.update(**{'hyperparams': self.hyperparams}) return out
class NormalizationImplementation(EncodedInvariantImplementation): """ Class for application of MinMax normalization operation on data, where only not encoded features (were not converted from categorical using OneHot encoding) are used :param params: optional, dictionary with the arguments """ def __init__(self, **params: Optional[dict]): super().__init__() if not params: # Default parameters self.operation = MinMaxScaler() else: self.operation = MinMaxScaler(**params) self.params = params def get_params(self): return self.operation.get_params()
class DataScaler(BaseEstimator, TransformerMixin): def __init__(self, scaler='standard', feature_range=(0, 1)): if scaler == 'standard': self.scaler = StandardScaler() else: self.scaler = MinMaxScaler(feature_range=feature_range) return def fit(self, X, y=None): self.scaler.fit(X) return self def transform(self, X, y=None): return pd.DataFrame(self.scaler.transform(X), columns=X.columns, index=X.index) def parameters(self): return self.scaler.get_params()
def preprocess(x_train: np.ndarray, y_train: np.ndarray, x_test: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Prepocesses data. :param x_train: the training data. :param y_train: the training labels. :param x_test: the test data. :return: Preprocessed x_train and x_test. """ logger.log('Prepocessing...') # Scale data. logger.log('\tScaling data with params:') scaler = MinMaxScaler() logger.log('\t{}'.format(scaler.get_params())) x_train = scaler.fit_transform(x_train) x_test = scaler.transform(x_test) # Apply LLE. logger.log('\tApplying LLE with params:') embedding = LocallyLinearEmbedding(n_neighbors=100, n_jobs=-1, random_state=0) embedding_params = embedding.get_params() logger.log('\t' + str(embedding_params)) x_train = embedding.fit_transform(x_train) x_test = embedding.transform(x_test) # Plot the graph embedding result. if PLOTTING_MODE != 'none': plotter.subfolder = 'graphs/LLE' plotter.filename = 'embedding' plotter.xlabel = 'first feature' plotter.ylabel = 'second feature' plotter.title = 'LLE' plotter.scatter(x_train, y_train, class_labels=helpers.datasets.get_gene_name) return x_train, x_test
def normalize3DInput(data, filename="scaler.data"): print("\n\n{} {} {}\n\n".format(10 * "_ ", "SEPARATING TEST & TRAIN", 10 * "_ ")) n_batches = data.shape[0] batch_size = data.shape[1] n_features = data.shape[2] tmp_data = data.reshape((n_batches * batch_size, n_features)) print( "Converting 3D to 2D for easy processing. Batch Sample: \n\n {} \n\n Original Array Shape: {}. Temporary array with shape: {}\n" .format(data[0], data.shape, tmp_data.shape)) #(4112, 265, 12) min_max_scaler = MinMaxScaler() data_norm = min_max_scaler.fit_transform( tmp_data) # ROBUST SCALER ANOTHER OPTION scaler_max = min_max_scaler.data_max_ scaler_min = min_max_scaler.data_min_ scaler_scale = min_max_scaler.scale_ scaler_data_range = min_max_scaler.data_range_ scaler_params = min_max_scaler.get_params(deep=True) data_norm = data_norm.reshape((n_batches, batch_size, n_features)) print(""" SCALER INFORMATION MAX: {} MIN: {} SCALE: {} RANGE: {} PARAMS: {} Data Normalized and reshaped to a 3D array. Current Shape: {} Saving scaler to file: {} """.format(scaler_max, scaler_min, scaler_scale, scaler_data_range, scaler_params, data_norm.shape, data_norm[0], filename)) #(4112, 265, 12) joblib.dump(min_max_scaler, filename) return data_norm
class MetaFeaturesCollector: def __init__(self, features_size: int, instances_size: int): self.features = features_size self.instances = instances_size self.meta_features = [ StatisticalMeta(features_size, instances_size), InformationMeta(features_size, instances_size), DecisionTreeMeta(features_size, instances_size) ] self.min_max = MinMaxScaler() self.length = None def getLength(self): if self.length is None: length = 0 for meta in self.meta_features: length += meta.getLength() self.length = length return length else: return self.length def train(self, path: str): only_files = [f for f in listdir(path) if isfile(join(path, f))] results = [] for name in tqdm(only_files): stacked = np.load(f'{path}{name}') results.append(self.getNumpy(stacked)) results = np.array(results) self.min_max.fit(results) return self.min_max.get_params() def get(self, stacked: np.ndarray) -> torch.Tensor: zero_in, one_in = stacked[0], stacked[1] meta_features = self.meta_features[0].getMeta(zero_in, one_in) for meta in self.meta_features[1:]: meta_features = np.concatenate( (meta_features, meta.getMeta(zero_in, one_in))) metas = meta_features.reshape(1, -1) metas = self.min_max.transform(metas) metas = metas.T return torch.from_numpy(metas).float() def getShort(self, stacked: np.ndarray) -> torch.Tensor: zero_in, one_in = stacked[0], stacked[1] meta_features = self.meta_features[0].getMeta(zero_in, one_in) for meta in self.meta_features[1:]: meta_features = np.concatenate( (meta_features, meta.getMeta(zero_in, one_in))) metas = meta_features.reshape(1, -1) metas = self.min_max.transform(metas) metas = metas.T return torch.from_numpy(metas).float() def getNumpy(self, stacked: np.ndarray) -> np.ndarray: zero_in = stacked[0] one_in = stacked[1] meta_features = self.meta_features[0].getMeta(zero_in, one_in) for meta in self.meta_features[1:]: meta_features = np.concatenate( (meta_features, meta.getMeta(zero_in, one_in))) metas = meta_features return metas
def rescale(data): scaler = MinMaxScaler() cl = scaler.get_params(data)
def test_get_params(self): sk_scaler = SkMinMaxScaler() rasl_scaler = RaslMinMaxScaler() sk_params = sk_scaler.get_params() rasl_params = rasl_scaler.get_params() self.assertDictContainsSubset(sk_params, rasl_params)