from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import utils as ut

config = ut.read_config_file('config.yaml')

titanic_pipe = Pipeline(
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
    [('categorical_imputer',
      pp.CategoricalImputer(
          variables=config[2]['Feature_Groups'].get('categorical_vars'))),
     ('missing_indicator',
      pp.MissingIndicator(
          variables=config[2]['Feature_Groups'].get('numerical_to_impute'))),
     ('numerical_imputer',
      pp.NumericalImputer(
          variables=config[2]['Feature_Groups'].get('numerical_to_impute'))),
     ('cabin_variable',
      pp.ExtractFirstLetter(
          variables=config[2]['Feature_Groups'].get('categorical_vars')[1])),
     ('rare_label_encoder',
      pp.RareLabelCategoricalEncoder(
          tol=0.05,
          variables=config[2]['Feature_Groups'].get('categorical_vars'))),
     ('categorical_encoder',
      pp.CategoricalEncoder(
          variables=config[2]['Feature_Groups'].get('categorical_vars'))),
     ('scaler', StandardScaler()),
     ('linear_model', LogisticRegression(C=0.0005, random_state=0))])
Пример #2
0
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import config


titanic_pipe = Pipeline(
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
    [
        ('missing_indicator',
        	pp.MissingIndicator(variables=config.NUMERICAL_VARS)),

        ('categorical_imputer',
            pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)),
         
        ('numerical_inputer',
            pp.NumericalImputer(variables=config.NUMERICAL_VARS)),

        ('extract_firstletter',
        	pp.ExtractFirstLetter(variables=config.CABIN)),

        ('rare_label_encoder',
        	pp.RareLabelCategoricalEncoder(
        		tol=0.05,
        		variables=config.CATEGORICAL_VARS)),

        ('categorical_encoder',
            pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),
Пример #3
0
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import config

titanic_pipe = Pipeline(
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
    [
        ('extract_cabin_letter',
         pp.ExtractFirstLetter(variables=config.CABIN)),
        ('missing_indicator',
         pp.MissingIndicator(variables=config.INDICATE_MISSING_VARS)),
        ('impute_categorical',
         pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)),
        ('impute_numerical',
         pp.NumericalImputer(variables=config.NUMERICAL_VARS)),
        ('encode_rare',
         pp.RareLabelCategoricalEncoder(variables=config.CATEGORICAL_VARS,
                                        tol=0.05)),
        ('encode_categorical',
         pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),
        ('scaler', StandardScaler()),
        ('model', LogisticRegression(C=0.0005, random_state=0)),
    ])
Пример #4
0
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import config

titanic_pipe = Pipeline([
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
    ('categorical_imputer', pp.CategoricalImputer(config.CATEGORICAL_VARS)),
    ('missing_indicator', pp.MissingIndicator(config.NUMERICAL_VARS)),
    ('numerical_imputer', pp.NumericalImputer(config.NUMERICAL_VARS)),
    ('cabin_extractor', pp.ExtractFirstLetter(config.CABIN)),
    ('rare_labels',
     pp.RareLabelCategoricalEncoder(tol=0.05,
                                    variables=config.CATEGORICAL_VARS)),
    ('categorical_encoder',
     pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),
    ('scaler', StandardScaler()),
    ('model', LogisticRegression(C=0.0005, random_state=0))
])
Пример #5
0
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
from config import *


titanic_pipe = Pipeline(

   	[
         ('categorical_imputer', pp.CategoricalImputer(variables=CATEGORICAL_VARS)),
   		('missing_indicator', pp.MissingIndicator(variables=NUMERICAL_VARS)),
   		('numerical_imputer', pp.NumericalImputer(variables=NUMERICAL_VARS)),
   		('extract_first_letter', pp.ExtractFirstLetter(variables=CABIN_VAR)),
   		('rare_label_encoding', pp.RareLabelCategoricalEncoder(tol=0.05, variables=CATEGORICAL_VARS)),
   		('categorical_encoding', pp.CategoricalEncoder(variables=CATEGORICAL_VARS)),
   		('scaler', StandardScaler()),
        ('model', LogisticRegression(C=0.0005, random_state=0))
   	]
  
   )
Пример #6
0
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import config

titanic_pipe = Pipeline([
    ('first_letter_extractor', pp.ExtractFirstLetter(variables=config.CABIN)),
    ('missing_indicator',
     pp.MissingIndicator(variables=config.CATEGORICAL_VARS)),
    ('categorical_imputer',
     pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)),
    ('numerical_inputer',
     pp.NumericalImputer(variables=config.NUMERICAL_VARS)),
    ('rare_label_encoder',
     pp.RareLabelCategoricalEncoder(tol=0.05,
                                    variables=config.CATEGORICAL_VARS)),
    ('categorical_encoder',
     pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),
    ('scaler', StandardScaler()),
    ('Linear_model', LogisticRegression(C=0.0005, random_state=0))
])
Пример #7
0
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import config

titanic_pipe = Pipeline(
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
    [('first_letter_extractor', pp.ExtractFirstLetter(variables=config.CABIN)),
     ('missing_indicator',
      pp.MissingIndicator(variables=config.NUMERICAL_VARS_TO_IMPUTE)),
     ('numerical_imputer',
      pp.NumericalImputer(variables=config.NUMERICAL_VARS)),
     ('categorical_imputer',
      pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)),
     ('rare_label_encoder',
      pp.RareLabelCategoricalEncoder(tol=0.05,
                                     variables=config.CATEGORICAL_VARS)),
     ('categorical_encoding',
      pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),
     ('scaler', StandardScaler()),
     ('Logistic_model', LogisticRegression(C=0.0005, random_state=0))])