Exemplos de PowerTransformer em Python, exemplos de sklearn.preprocessing.PowerTransformer em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: bc_machine_learning.py Projeto: northcheng/quant

def get_scaler(scale_method='StandardScaler'):
    """
  Get different kinds of scalers from scikit-learn

  :param scale_method: scale method
  :returns: scaler instance
  :raises: none
  """
    scaler = None

    if scale_method == 'StandardScaler':
        scaler = preprocessing.StandardScaler()

    elif scale_method == 'MinMaxScaler':
        scaler = preprocessing.MinMaxScaler()

    elif scale_method == 'MaxAbsScaler':
        scaler = preprocessing.MaxAbsScaler()

    elif scale_method == 'RobustScaler':
        scaler = preprocessing.RobustScaler()

    elif scale_method == 'QuantileTransformer':
        scaler = preprocessing.QuantileTransformer()

    elif scale_method == 'Normalizer':
        scaler = preprocessing.Normalizer()

    elif scale_method == 'PowerTransformer':
        scaler = preprocessing.PowerTransformer()

    else:
        print(scale_method, ' not found')

    return scaler

Exemplo n.º 2

0

Exibir arquivo

Arquivo: Classification_Utils.py Projeto: throneofshadow/ReachMaster

def norm_and_zscore_ML_array(ML_array,
                             robust=False,
                             decomp=False,
                             gauss=False):
    """
    default preprocessing is simple MinMax L1 norm
    input
    ML_array : array shape : (Cut Trials, Features, Frames)   where Cut Trials refers to either the number of Trials
    inside the testing data or training data (Don't call this function for just the total ML data, split beforehand..)
    robust: boolean flag, use sci-kit learn robust scaling to normalize our data
    decomp : boolean flag, post-processing step used to return first whitened 20 PCA components to remove linear dependence
    gauss : boolean flag, use sci-kit learn gaussian distribution scaling to normalize our data
    """
    # ML_array
    if robust:
        pt = preprocessing.robust_scale()
    elif gauss:
        pt = preprocessing.PowerTransformer(method='box-cox',
                                            standardize=False)
    else:
        pt = preprocessing.MinMaxScaler()

    r_ML_array = pt.fit_transform(
        ML_array.reshape(ML_array.shape[0],
                         ML_array.shape[1] * ML_array.shape[2]))
    # apply normalization to feature axis
    if decomp:  # used to decomp linear correlations, if they exist.
        pca = decomposition.PCA(n=20, whiten=True)
        r_ML_array = pca.fit(r_ML_array)

    return r_ML_array

Exemplo n.º 3

0

Exibir arquivo

Arquivo: computeKmerProfile_blocksize.py Projeto: dwwaite/bin_detangling

def NormaliseColumnValues(df, normFactor):
    ''' If there is no normalisation needed, just return the DataFrame unmodified '''
    if normFactor == 'none':
        return df
    ''' Only consider numeric data '''
    colsToTransform = list(df.columns)
    colsToTransform.remove('Contig')

    contigNames = df.Contig

    if normFactor == 'unit':
        ''' Straight out of the preprocessing.scale documentation. '''
        df[colsToTransform] = preprocessing.scale(df[colsToTransform], axis=0)
        return df
    elif normFactor == 'yeojohnson':
        '''
            Taken from https://scikit-learn.org/stable/modules/preprocessing.html
            Since df has already been sorted, can just do an iloc slice of the values.
        '''
        pt = preprocessing.PowerTransformer(method='yeo-johnson',
                                            standardize=True)
        normArray = pt.fit_transform(df.iloc[:, 1:])
        ''' Recast the data as a DataFrame and return'''
        normDf = pd.DataFrame(normArray, columns=colsToTransform)
        normDf.insert(loc=0, column='Contig', value=contigNames)
        return normDf
    else:
        ''' Otherwise, none must have been chosen, return the frame unchanged. '''
        return df

Exemplo n.º 4

0

Exibir arquivo

Arquivo: logistic_regression_classifier.py Projeto: george-fafard/FootballML

def scale_features(X, Y, name='Standard'):
    """Scale features for training and testing.

    Parameters
    ----------
    X : list
        Feature labels
    Y : list
        Target labels
    name : str
        Name of the scale to use

    Returns
    -------
    list
        Scaled feature labels
    """
    # Select feature scaler
    if name == 'MinMax':
        scaler = scalers.MinMaxScaler()
    elif name == 'Robust':
        scaler = scalers.RobustScaler()
    elif name == 'Quantile':
        scaler = scalers.QuantileTransformer()
    elif name == 'Power':
        scaler = scalers.PowerTransformer()
    elif name == 'Standard':
        scaler = scalers.StandardScaler()
    else:
        print('Invalid scale name, defaulting to Standard')
        scaler = scalers.StandardScaler()

    # Scale features
    return scaler.fit_transform(X, Y)

Exemplo n.º 5

0

Exibir arquivo

def statistic_transform(df, **params):
    """
    特征变换
    :param df:
    :param params:
    :return:
    """
    import scipy.stats as spstats

    # for column in list(df.columns):
    #     # 计算最佳lamda值
    #     opt_lambda = spstats.boxcox(df[column])
    #     df[column] = spstats.boxcox(df[column], lmbda=opt_lambda)

    from sklearn import preprocessing
    import numpy as np
    import pandas as pd

    outlier_columns = DefaultConfig.outlier_columns
    for column in outlier_columns:
        df[column] = df[column].apply(lambda x: np.min(x) if x == np.nan else x)
        df[column] = df[column].apply(lambda x: 1e-5 if x == 0 else x)

    pt = preprocessing.PowerTransformer(method='box-cox', standardize=False)

    df[DefaultConfig.outlier_columns] = pd.DataFrame(columns=outlier_columns,
                                                     data=pt.fit_transform(df[outlier_columns]))

    return df

Exemplo n.º 6

0

Exibir arquivo

 def normal_distribution_transformer(df):
     '''
     Non-linear Transformation
     '''
     transformer = preprocessing.PowerTransformer(method='yeo-johnson')
     df_transformed = pd.DataFrame(transformer.fit_transform(df), columns=df.columns)
     return df_transformed

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_columntransformer_backend.py Projeto: vimcoper/shapash

    def test_inv_transform_ct_22(self):
        """
        test inv_transform_ct with PowerTransformer Encoder Sklearn and passthrough option
        """
        y = pd.DataFrame(data=[0, 1], columns=['y'])

        train = pd.DataFrame({'num1': [0, 1],
                              'num2': [0, 2],
                              'other': ['A', 'B']})

        enc = ColumnTransformer(
            transformers=[
                ('power', skp.PowerTransformer(), ['num1', 'num2'])
            ],
            remainder='passthrough')
        enc.fit(train, y)
        test = pd.DataFrame({'num1': [0, 1, 1],
                             'num2': [0, 2, 3],
                             'other': ['A', 'B', 'C']})

        expected = pd.DataFrame({'power_num1': [0.0, 1.0, 1.0],
                                 'power_num2': [0.0, 1.9999999997665876, 3.000000000169985],
                                 'other': ['A', 'B', 'C']})

        result = pd.DataFrame(enc.transform(test))
        result.columns = ['col1', 'col2', 'other']
        original = inverse_transform(result, enc)
        pd.testing.assert_frame_equal(original, expected)

Exemplo n.º 8

0

Exibir arquivo

def PowerTransformer(train_df, test_df, HP):
    method, standardize, copy = HP['PowerTransformer']['method'], HP['PowerTransformer']['standardize'], \
                                HP['PowerTransformer']['copy']

    train_x = train_df.iloc[:, :-1]
    train_y = train_df.iloc[:, -1:]
    test_x = test_df.iloc[:, :-1]
    test_y = test_df.iloc[:, -1:]

    transformer = preprocessing.PowerTransformer(method=method,
                                                 standardize=standardize,
                                                 copy=copy)
    train_x_copy = train_x.copy()
    train_x_transformed = transformer.fit_transform(train_x_copy)
    test_x_copy = test_x.copy()
    test_x_transformed = transformer.transform(test_x_copy)  # TODO check here

    train_column_name = list(train_x_copy.columns)
    test_column_name = list(test_x_copy.columns)

    train_x_transformed_df = pd.DataFrame(train_x_transformed)
    train_x_transformed_df.columns = train_column_name
    train_df_transformed = train_x_transformed_df.assign(label=train_y.values)

    test_x_transformed_df = pd.DataFrame(test_x_transformed)
    test_x_transformed_df.columns = test_column_name
    test_df_transformed = test_x_transformed_df.assign(label=test_y.values)

    return train_df_transformed, test_df_transformed

Exemplo n.º 9

0

Exibir arquivo

Arquivo: clusters.py Projeto: marioamz/cancer-machine-learning

def standardizing(df, methods):
    '''
    This function takes in  a dataframe and a method for standardizing, it
    returns the standardized dataframe.

    The methods are:
         - z: for z-scores
         - mm: for min-max
         - robust: for robust
         - gauss: for gaussian
    '''

    if methods == 'z':
        scaler = preprocessing.StandardScaler().fit(df)
        scaled_df = pd.DataFrame(scaler.transform(df))
    elif methods == 'mm':
        scaler = preprocessing.MinMaxScaler().fit(df)
        scaled_df = pd.DataFrame(scaler.transform(df))
    elif methods == 'robust':
        scaler = preprocessing.RobustScaler().fit(df)
        scaled_df = pd.DataFrame(scaler.transform(df))
    else:
        scaler = preprocessing.PowerTransformer(method='yeo-johnson',
                                                standardize=True)
        scaled_df = pd.DataFrame(scaler.fit_transform(df))

    return scaled_df

Exemplo n.º 10

0

Exibir arquivo

def calculateCrimeRates(crime_weather_agg, cr_type=False):
    '''
    Calculate, standardize, and classify crime rates per 100,000 people.

    This function only standardizes and classifies crime rates for all crimes,
    not by crime type.
    '''
    #Remove blank Census data.
    crime_weather_agg_na = crime_weather_agg.dropna(
        axis='index', how='any', subset=['TotalPop']).reset_index(drop=True)

    #Calculate crime rates per 100,000 people.
    crime_weather_agg_na['crime_rate'] = (
        crime_weather_agg_na['crime_counts'] /
        crime_weather_agg_na['TotalPop']) * 100000

    if cr_type == False:
        #Standardize crime rates.
        standardize = crime_weather_agg_na[['crime_rate']]
        power = preprocessing.PowerTransformer(method='box-cox',
                                               standardize=False)
        crime_weather_agg_na['crs'] = power.fit_transform(standardize)

        #Calculate statistics for classification.
        stats = crime_weather_agg_na[['crs']].describe().transpose()

        #Classify standardized crime rates.
        crime_weather_agg_na['crime_rate_cat'] = crime_weather_agg_na[[
            'crs'
        ]].apply(classifyCrimeRates, args=(stats[['mean', 'std']], ), axis=1)

    return crime_weather_agg_na

Exemplo n.º 11

0

Exibir arquivo

 def _box_cox_transform(self, columns):
     """
     Perform top Box-Cox transformation
     """
     transformer = preprocessing.PowerTransformer('box-cox')
     self.scalers['box-cox'] = transformer
     self.output_df[columns] = transformer.fit_transform(
         self.output_df[columns])

Exemplo n.º 12

0

Exibir arquivo

 def _yeo_johnson_transform(self, column):
     """
     Perform top Robust Scaling
     """
     transformer = preprocessing.PowerTransformer('yeo-johnson')
     self.scalers['robust'] = transformer
     self.output_df[column] = transformer.fit_transform(
         self.output_df[column])

Exemplo n.º 13

0

Exibir arquivo

 def __init__(self, column, dataframe, settings):
     APreprocessor.__init__(self, column, dataframe, settings)
     self.scaler = preprocessing.PowerTransformer(
         method=self.settings.get('method', 'yeo-johnson'),
         standardize=self.settings.get('standardize', True),
         copy=self.settings.get('copy', True)
     )
     self.pickle_process(dataframe)

Exemplo n.º 14

0

Exibir arquivo

 def _power_transform(self):
     for c in self.num_feats:
         powt = preprocessing.PowerTransformer()
         powt.fit(self.df[c].values.reshape(-1, 1))
         self.output_df.loc[:,
                            c] = powt.transform(self.df[c].values.reshape(
                                -1, 1))
         self.power_transform_encoder[c] = powt
     return self.output_df, self.power_transform_encoder

Exemplo n.º 15

0

Exibir arquivo

Arquivo: sktest.py Projeto: liunicholas/Scikit-Learn-Exploration

def scaleData(houseData):
    #le originál data
    dataX = houseData
    #i think this makes it so that majority of the data is within plus or minus 1, but outliers are included
    scaledXnormal = preprocessing.scale(dataX)
    #reduces skewness by applying a logarithmic scale?
    pt = preprocessing.PowerTransformer()
    scaledXpowerTransformer = pt.fit_transform(dataX)

    return dataX, scaledXnormal, scaledXpowerTransformer

Exemplo n.º 16

0

Exibir arquivo

Arquivo: feature_scaling.py Projeto: serkhanekarim/marketing-analysis

    def scaler(self, method='yeo-johnson'):
        '''
        Scale data to gaussian distribution N(0,1)

        Parameters
        ----------
        column_name : string
            Name of the column to scale data.
        method : string, optional
            Method to use for scaling transformation. The default is 'yeo-johnson'.

        Returns
        -------
        dataframe : DataFrame
            Return updated dataframe of the missing data from the column.
        scaler: object
            scaler created with the data.
            
        '''

        if method == 'standard': scaler = preprocessing.StandardScaler()
        if method == 'minmax': scaler = preprocessing.MinMaxScaler()
        if method == 'maxabs': scaler = preprocessing.MaxAbsScaler()
        if method == 'robust': scaler = preprocessing.RobustScaler()
        if method == 'quantile':
            scaler = preprocessing.QuantileTransformer(
                output_distribution='normal')

        if method == 'l1': scaler = preprocessing.normalize(method)
        if method == 'l2': scaler = preprocessing.normalize(method)
        if method == 'max': scaler = preprocessing.normalize(method)

        feature_sign = self._check_sign_feature()
        if method == 'box-cox' or feature_sign == 'positive':
            scaler = preprocessing.PowerTransformer(method)
        if method == 'yeo-johnson' or feature_sign == 'negative':
            scaler = preprocessing.PowerTransformer(method)

        scaler.fit(self.dataframe)
        self.dataframe = scaler.transform(self.dataframe)

        return self.dataframe, scaler

Exemplo n.º 17

0

Exibir arquivo

Arquivo: split_scale.py Projeto: CameronGTaylor/ds-methodologies-exercises

def gaussian_scaler(train, test):
    scaler = skl.PowerTransformer(method='yeo-johnson')
    scaler.fit(train)
    
    train_scaled = pd.DataFrame(scaler.transform(train), 
                    columns=train.columns, index=train.index)
    
    test_scaled = pd.DataFrame(scaler.transform(test), 
                    columns=test.columns, index=test.index)
    
    return scaler, train_scaled, test_scaled

Exemplo n.º 18

0

Exibir arquivo

Arquivo: utils.py Projeto: arlaine4/Multilayer-Perceptron

def scale_input_data(data):
    input_a = np.array(data)
    input_a = input_a.reshape(-1, 1)
    mean_all = preprocessing.PowerTransformer()
    mean_all = mean_all.fit(input_a)
    mean_all = mean_all.transform(input_a)
    mean_all = preprocessing.MinMaxScaler(feature_range=(-1, 1))
    new_mean_all = mean_all.fit_transform(input_a)
    last_values = []
    for i in range(len(new_mean_all)):
        last_values.append(float(new_mean_all[i]))
    return last_values

Exemplo n.º 19

0

Exibir arquivo

Arquivo: evaluateNoveltyDetectionClassifier.py Projeto: TUMFTM/Damper-Defect-Detection-using-Machine-Learning

 def allScalers():
     return {
         # "None": None,
         # "QuantileTransformerUniform": preprocessing.QuantileTransformer(output_distribution='uniform'),
         # "QuantileTransformerNormal": preprocessing.QuantileTransformer(output_distribution='normal'),
         "PowerTransformer": preprocessing.PowerTransformer(),
         # "RobustScaler": preprocessing.RobustScaler(),
         # "MaxAbsScaler": preprocessing.MaxAbsScaler(),
         # "MinMaxScaler": preprocessing.MinMaxScaler(),
         # "Normalizer": preprocessing.Normalizer(),
         # "StandardScaler": preprocessing.StandardScaler(),
     }

Exemplo n.º 20

0

Exibir arquivo

Arquivo: Classification.py Projeto: MenghsuanLiu/Python

def DataNormalize(data, method = "StandardScaler"):
    if method.lower() == "standardscaler":
        scaler = preprocessing.StandardScaler().fit(data)
    if method.lower() == "minmaxscaler":
        scaler = preprocessing.MinMaxScaler().fit(data)    
    if method.lower() == "maxabsscaler":
        scaler = preprocessing.MaxAbsScaler().fit(data)
    if method.lower() == "robustscaler":    
        scaler = preprocessing.RobustScaler().fit(data)
    if method.lower() == "powertransformer":
        scaler = preprocessing.PowerTransformer(method = "yeo-johnson", standardize = True).fit(data)
    return scaler.transform(data)

Exemplo n.º 21

0

Exibir arquivo

 def get_scaler(self, scalername=None):
     if scalername is None:
         scalername = self.scaler
     return {
         'NoScaler': None,
         'MinMaxScaler': preprocessing.MinMaxScaler(),
         'MaxAbsScaler': preprocessing.MaxAbsScaler(),
         'StandardScaler': preprocessing.StandardScaler(),
         'RobustScaler': preprocessing.RobustScaler(),
         'Normalizer': preprocessing.Normalizer(),
         'QuantileTransformer': preprocessing.QuantileTransformer(),
         'PowerTransformer': preprocessing.PowerTransformer()
     }.get(scalername)

Exemplo n.º 22

0

Exibir arquivo

Arquivo: feature_scale.py Projeto: jim-schwoebel/allie

def feature_scale(feature_scaler, X_train, y_train):

    # more information about these scalers can be found @
    # https://scikit-learn.org/stable/modules/preprocessing.html

    if feature_scaler == 'binarizer':
        # scale the X values in the set
        model = preprocessing.Binarizer()

    elif feature_scaler == 'one_hot_encoder':
        '''
		>>> enc.transform([['female', 'from US', 'uses Safari'],
			             	['male', 'from Europe', 'uses Safari']]).toarray()
			array([[1., 0., 0., 1., 0., 1.],
			       [0., 1., 1., 0., 0., 1.]])
		'''
        # This is on y values
        model = preprocessing.OneHotEncoder(handle_unknown='ignore')

    elif feature_scaler == 'maxabs':
        model = preprocessing.MaxAbsScaler()

    elif feature_scaler == 'minmax':
        model = preprocessing.MinMaxScaler()

    elif feature_scaler == 'normalize':
        # L2 normalization
        model = preprocessing.Normalizer()

    elif feature_scaler == 'poly':
        # scale the X values in the set
        model = PolynomialFeatures(2)

    elif feature_scaler == 'power_transformer':
        # scale the X values in the set
        model = preprocessing.PowerTransformer(method='yeo-johnson')

    elif feature_scaler == 'quantile_transformer_normal':
        # scale the X values in the set
        model = preprocessing.QuantileTransformer(output_distribution='normal')

    elif feature_scaler == 'robust':
        model = preprocessing.RobustScaler(quantile_range=(25, 75))

    elif feature_scaler == 'standard_scaler':
        # scale the X values in the set
        model = preprocessing.StandardScaler()

    return model

Exemplo n.º 23

0

Exibir arquivo

    def impute_method_setup(
            self,
            random_state=DEFAULT_IMPUTER_RANDOM_STATE,
            add_indicator=DEFAULT_IMPUTER_ADD_INDICATOR,
            initial_strategy=DEFAULT_IMPUTER_INITIAL_STRATEGY,
            max_iter=DEFAULT_IMPUTER_MAX_ITER,
            estimator=DEFAULT_IMPUTER_ESTIMATOR,
            output_distribution=DEFAULT_TRANSFORMER_OUTPUT_DISTRIBUTION,
            transformer_method=DEFAULT_TRANSFORMER_METHOD,
            transformer_standardize=DEFAULT_TRANSFORMER_STANDARDIZE):
        """ Initialises the IterativeImputer, QuantileTransformer and PowerTransformer methods required 
            if missing data is to be imputed.
            
            Parameters are passed to the sklearn routines. Where this is being done it is noted below. 
            For further documentation on how these functions work, and what the parameters denote, 
            please refer to the sklearn documentation.

            IterativeImputer:    https://scikit-learn.org/stable/modules/generated/sklearn.impute.IterativeImputer.html
            QuantileTransformer: https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.QuantileTransformer.html
            PowerTransformer:    https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PowerTransformer.html
            
            Args:
                random_state:           (int) (IterativeImputer & QuantileTransformer) seed for pseudo random number generator
                add_indicator:          (boolean) (IterativeImputer) if True adds a `MissingIndicator` transform to the stack
                initial_strategy:       (str) (IterativeImputer) define strategy to use for initialising missing values
                max_iter:               (int) (IterativeImputer) maximum number of imputation rounds to perform
                estimator:              (str) (IterativeImputer) estimator method to be used
                output_distribution:    (str) (QuantileTransformer) Marginal distribution for the transformed data
                transformer_method      (str) (PowerTransformer) method to use, 'box-cox' is default
                transformer_standardize (boolean) (PowerTransformer) select if zero-mean, unit-variance normalisation is applied, default is True

             Returns: None
        """

        # set the imputer options (if we are using them)
        self.imputer = IterativeImputer(random_state=random_state,
                                        add_indicator=add_indicator,
                                        initial_strategy=initial_strategy,
                                        max_iter=max_iter,
                                        verbose=self.verbose,
                                        estimator=estimator)

        # set the power transform options
        self.transformer_quantile = preprocessing.QuantileTransformer(
            output_distribution=output_distribution, random_state=random_state)

        # set the power transform options
        self.transformer_power = preprocessing.PowerTransformer(
            method=transformer_method, standardize=transformer_standardize)

Exemplo n.º 24

0

Exibir arquivo

Arquivo: data_handling.py Projeto: wei-tian/Damper-Defect-Detection-Using-CNN

def train_scaler(data,
                 preprocessing_type,
                 n_channels,
                 seq_length,
                 toscale='channels'):
    """scaling data with different modes, apply detrend per sample per channel
    :param data the dataset as array
    :type data ndarray
    :param n_channels the number of channels
    :type n_channels int
    :param seq_length the length of a sequence of one single sensor
    :type seq_length int
    :param preprocessing_type mode used for scaling, e.g. 'standard', 'zero2one'
    :type preprocessing_type str
    :param toscale mode used for scaling the time sequence of each channel('channels')
    or each timestep/feature over the corresponding samples('samples')
    :type toscale str
    :returns scaled dataset
    :rtype ndarray
    """

    scaler = None

    if toscale == 'channels':
        data = prep.concatenate_samples(data, n_channels, seq_length)
        data = data.transpose()

    if preprocessing_type == 'standard':
        # Center to the median and component wise scale according to the interquartile range
        scaler = skpreprocessing.RobustScaler().fit(data)

    elif preprocessing_type == 'gaussian':
        scaler = skpreprocessing.PowerTransformer().fit(
            data)  # scale to range [0,1]

    elif preprocessing_type == 'zero2one':
        scaler = skpreprocessing.MinMaxScaler().fit(
            data)  # scale to range [0,1]

    elif preprocessing_type == 'neg_one2one':
        scaler = skpreprocessing.MinMaxScaler(feature_range=(-1, 1)).fit(
            data)  # scale to range [-1, 1]

    elif preprocessing_type == 'fourier_samples':
        scaler = skpreprocessing.MinMaxScaler(feature_range=(-1, 1)).fit(
            data)  # scale to range [-1, 1]

    return scaler

Exemplo n.º 25

0

Exibir arquivo

Arquivo: util.py Projeto: wjunneng/2019-CCF-Passenger-Car-Segment-Sales-Forecast

def feature_transform(df, **params):
    """
    特征分布转换
    :param df:
    :param params:
    :return:
    """
    from sklearn import preprocessing
    import pandas as pd

    pt = preprocessing.PowerTransformer(method='box-cox', standardize=False)

    df[DefaultConfig.outlier_columns] = pd.DataFrame(columns=DefaultConfig.outlier_columns,
                                                     data=pt.fit_transform(df[DefaultConfig.outlier_columns]))

    return df

Exemplo n.º 26

0

Exibir arquivo

    def add_power_transform_scaling(self, data: object, gaussian_like_scale_columns: List[str] = None,
                                    gaussian_like_method: str = "yeo-johnson", standardize=True):
        """
        Apply a power transform featurewise to make data more Gaussian-like.

        https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PowerTransformer.html

        :param data: Input data to fit the model.
        :param gaussian_like_scale_columns: Columns to which we apply the scaling.
        :param gaussian_like_method: yeo-johnson or box-cox
        :param standardize: Set to True to apply zero-mean, unit-variance normalization to the output.
        :return:
        """
        scale = preprocessing.PowerTransformer(method=gaussian_like_method, standardize=standardize)

        self._normalization_steps.append((scale, gaussian_like_scale_columns))

Exemplo n.º 27

0

Exibir arquivo

def map_to_Gaussian(df, methodType):
    '''
    Mapping non-Gaussian distribution to Gaussian

    Input:
    df: DataFrame
    methodType: 'yeo-johnson' or 'box-cox'

    Output:
    Mapped dataframe
    '''

    pt = preprocessing.PowerTransformer(method=methodType, standardize=False)
    df = pt.fit_transform(df)

    return df

Exemplo n.º 28

0

Exibir arquivo

Arquivo: utils.py Projeto: viviwang1008/ETH-ML

def scaleFeatures(data, opt='standard', **kwargs):
    from sklearn import preprocessing
    if opt == 'standard':
        scl = preprocessing.StandardScaler(**kwargs)
    elif opt == 'robust':
        scl = preprocessing.RobustScaler(**kwargs)
    elif opt == 'minmax':
        scl = preprocessing.MinMaxScaler(**kwargs)
    elif opt == 'norm':
        scl = preprocessing.Normalizer(**kwargs)
    elif opt == 'gaussian':  # doesn't work! no idea why
        scl = preprocessing.PowerTransformer(method='yeo-johnson')
    elif opt == 'quantile':
        scl = preprocessing.QuantileTransformer(output_distribution='normal')
    out = pd.DataFrame(scl.fit_transform(data), columns=data.columns)
    print("Features scaled using", opt, "scaling method!")
    return out

Exemplo n.º 29

0

Exibir arquivo

Arquivo: cleaning.py Projeto: jbrown1618/mortgage-approvals

def normalize_numeric_columns(training_data, test_data):
    normalish_columns = []
    other_positive_columns = []
    other_numeric_columns = []

    # Everything should be numeric at this point, so we can loop over all the columns
    for col in training_data.columns:
        if col == target_column or col == id_column:
            continue

        n, p = stats.normaltest(training_data[col])
        if p > .05:
            normalish_columns.append(col)
        elif (training_data[col] > 0).all():
            other_positive_columns.append(col)
        else:
            other_numeric_columns.append(col)

    if len(normalish_columns) > 0:
        scaler = preprocessing.StandardScaler().fit(
            training_data[normalish_columns])
        training_data[normalish_columns] = scaler.transform(
            training_data[normalish_columns])
        test_data[normalish_columns] = scaler.transform(
            test_data[normalish_columns])

    if len(other_positive_columns) > 0:
        transformer = preprocessing.PowerTransformer(
            method='box-cox',
            standardize=True).fit(training_data[other_positive_columns])
        training_data[other_positive_columns] = transformer.transform(
            training_data[other_positive_columns])
        test_data[other_positive_columns] = transformer.transform(
            test_data[other_positive_columns])

    if len(other_numeric_columns) > 0:
        rs = preprocessing.MinMaxScaler(feature_range=(0, 1)).fit(
            training_data[other_numeric_columns])
        training_data[other_numeric_columns] = rs.transform(
            training_data[other_numeric_columns])
        test_data[other_numeric_columns] = rs.transform(
            test_data[other_numeric_columns])

    return training_data, test_data

Exemplo n.º 30

0

Exibir arquivo

def yeo_johnson(df):
    """
    Wrapper for sklearn's preprocessing.PowerTransformer (Yeo-Johnson Option)
    which can handle negative values


    Parameters
    ----------
    df : DataFrame


    Returns
    -------
    DataFrame
        Yeo-Johnson transformed
    """
    assert (isinstance(df, pd.DataFrame))
    pt = preprocessing.PowerTransformer(method='yeo-johnson',
                                        standardize=False)
    return pd.DataFrame(pt.fit_transform(df))