示例#1
0
def fit_min_max_scale(arr, arr1=None):
    scaler = MinMaxScaler()
    res = scaler.fit_transform(arr)
    if arr1 == None:
        print(scaler.get_params())
        return res
    else:
        return res, scaler.tranform(arr1)
示例#2
0
文件: scaler.py 项目: HoChiak/whypy
def minmax():
    """
    Method to load a [0,1] MinMaxScaler

    RETURN:
    scaler
    """
    scaler = MinMaxScaler(feature_range=(0, 1), copy=True)
    utils.display_get_params('MinMaxScaler Description', scaler.get_params())
    return (scaler)
示例#3
0
def main():
    # Get x and y pairs.
    x_train, y_train, x_test, y_test = get_x_y()

    # Scale data.
    logger.log('\tScaling data with params:')
    scaler = MinMaxScaler()
    logger.log('\t{}'.format(scaler.get_params()))
    x_train = scaler.fit_transform(x_train)

    run_embedding_test(x_train, y_train, x_train)

    # Close the logger.
    logger.close()
示例#4
0
class Normalize(PreprocessingStrategy):
    def __init__(self, **kwargs):
        super().__init__()
        self._method = MinMaxScaler(**kwargs)
        self.hyperparams = self._method.get_params()

    def preprocess(self, data):
        """ Return the transformed data """
        return pd.DataFrame(self._method.fit_transform(data),
                            columns=data.columns,
                            index=data.index)

    def jsonify(self):
        out = super().jsonify()
        out.update(**{'hyperparams': self.hyperparams})
        return out
示例#5
0
class NormalizationImplementation(EncodedInvariantImplementation):
    """ Class for application of MinMax normalization operation on data,
    where only not encoded features (were not converted from categorical using
    OneHot encoding) are used

    :param params: optional, dictionary with the arguments
    """
    def __init__(self, **params: Optional[dict]):
        super().__init__()
        if not params:
            # Default parameters
            self.operation = MinMaxScaler()
        else:
            self.operation = MinMaxScaler(**params)
        self.params = params

    def get_params(self):
        return self.operation.get_params()
示例#6
0
class DataScaler(BaseEstimator, TransformerMixin):
    def __init__(self, scaler='standard', feature_range=(0, 1)):
        if scaler == 'standard':
            self.scaler = StandardScaler()
        else:
            self.scaler = MinMaxScaler(feature_range=feature_range)
        return

    def fit(self, X, y=None):
        self.scaler.fit(X)
        return self

    def transform(self, X, y=None):
        return pd.DataFrame(self.scaler.transform(X),
                            columns=X.columns,
                            index=X.index)

    def parameters(self):
        return self.scaler.get_params()
示例#7
0
def preprocess(x_train: np.ndarray, y_train: np.ndarray,
               x_test: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    """
    Prepocesses data.

    :param x_train: the training data.
    :param y_train: the training labels.
    :param x_test: the test data.
    :return: Preprocessed x_train and x_test.
    """
    logger.log('Prepocessing...')

    # Scale data.
    logger.log('\tScaling data with params:')
    scaler = MinMaxScaler()
    logger.log('\t{}'.format(scaler.get_params()))
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)

    # Apply LLE.
    logger.log('\tApplying LLE with params:')
    embedding = LocallyLinearEmbedding(n_neighbors=100,
                                       n_jobs=-1,
                                       random_state=0)
    embedding_params = embedding.get_params()
    logger.log('\t' + str(embedding_params))
    x_train = embedding.fit_transform(x_train)
    x_test = embedding.transform(x_test)

    # Plot the graph embedding result.
    if PLOTTING_MODE != 'none':
        plotter.subfolder = 'graphs/LLE'
        plotter.filename = 'embedding'
        plotter.xlabel = 'first feature'
        plotter.ylabel = 'second feature'
        plotter.title = 'LLE'
        plotter.scatter(x_train,
                        y_train,
                        class_labels=helpers.datasets.get_gene_name)

    return x_train, x_test
def normalize3DInput(data, filename="scaler.data"):
    print("\n\n{} {} {}\n\n".format(10 * "_ ", "SEPARATING TEST & TRAIN",
                                    10 * "_ "))
    n_batches = data.shape[0]
    batch_size = data.shape[1]
    n_features = data.shape[2]
    tmp_data = data.reshape((n_batches * batch_size, n_features))
    print(
        "Converting 3D to 2D for easy processing. Batch Sample: \n\n {} \n\n Original Array Shape: {}. Temporary array with shape: {}\n"
        .format(data[0], data.shape, tmp_data.shape))  #(4112, 265, 12)

    min_max_scaler = MinMaxScaler()
    data_norm = min_max_scaler.fit_transform(
        tmp_data)  # ROBUST SCALER ANOTHER OPTION
    scaler_max = min_max_scaler.data_max_
    scaler_min = min_max_scaler.data_min_
    scaler_scale = min_max_scaler.scale_
    scaler_data_range = min_max_scaler.data_range_
    scaler_params = min_max_scaler.get_params(deep=True)
    data_norm = data_norm.reshape((n_batches, batch_size, n_features))

    print("""
    SCALER INFORMATION
    MAX:    {}
    MIN:    {}
    SCALE:  {}
    RANGE:  {}
    PARAMS: {}
    Data Normalized and reshaped to a 3D array. 
    Current Shape: {} 
    Saving scaler to file: {}
    """.format(scaler_max, scaler_min, scaler_scale, scaler_data_range,
               scaler_params, data_norm.shape, data_norm[0],
               filename))  #(4112, 265, 12)
    joblib.dump(min_max_scaler, filename)
    return data_norm
class MetaFeaturesCollector:
    def __init__(self, features_size: int, instances_size: int):
        self.features = features_size
        self.instances = instances_size
        self.meta_features = [
            StatisticalMeta(features_size, instances_size),
            InformationMeta(features_size, instances_size),
            DecisionTreeMeta(features_size, instances_size)
        ]
        self.min_max = MinMaxScaler()
        self.length = None

    def getLength(self):
        if self.length is None:
            length = 0
            for meta in self.meta_features:
                length += meta.getLength()
            self.length = length
            return length
        else:
            return self.length

    def train(self, path: str):
        only_files = [f for f in listdir(path) if isfile(join(path, f))]
        results = []
        for name in tqdm(only_files):
            stacked = np.load(f'{path}{name}')
            results.append(self.getNumpy(stacked))
        results = np.array(results)
        self.min_max.fit(results)
        return self.min_max.get_params()

    def get(self, stacked: np.ndarray) -> torch.Tensor:
        zero_in, one_in = stacked[0], stacked[1]
        meta_features = self.meta_features[0].getMeta(zero_in, one_in)
        for meta in self.meta_features[1:]:
            meta_features = np.concatenate(
                (meta_features, meta.getMeta(zero_in, one_in)))
        metas = meta_features.reshape(1, -1)
        metas = self.min_max.transform(metas)
        metas = metas.T
        return torch.from_numpy(metas).float()

    def getShort(self, stacked: np.ndarray) -> torch.Tensor:
        zero_in, one_in = stacked[0], stacked[1]
        meta_features = self.meta_features[0].getMeta(zero_in, one_in)
        for meta in self.meta_features[1:]:
            meta_features = np.concatenate(
                (meta_features, meta.getMeta(zero_in, one_in)))
        metas = meta_features.reshape(1, -1)
        metas = self.min_max.transform(metas)
        metas = metas.T
        return torch.from_numpy(metas).float()

    def getNumpy(self, stacked: np.ndarray) -> np.ndarray:
        zero_in = stacked[0]
        one_in = stacked[1]
        meta_features = self.meta_features[0].getMeta(zero_in, one_in)
        for meta in self.meta_features[1:]:
            meta_features = np.concatenate(
                (meta_features, meta.getMeta(zero_in, one_in)))
        metas = meta_features
        return metas
示例#10
0
def rescale(data):
    scaler = MinMaxScaler()
    cl = scaler.get_params(data)
示例#11
0
 def test_get_params(self):
     sk_scaler = SkMinMaxScaler()
     rasl_scaler = RaslMinMaxScaler()
     sk_params = sk_scaler.get_params()
     rasl_params = rasl_scaler.get_params()
     self.assertDictContainsSubset(sk_params, rasl_params)