Пример #1
0
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

from regression_model.processing import preprocessors as pp
from regression_model.processing import features
from regression_model.config import config

import logging

_logger = logging.getLogger(__name__)

price_pipe = Pipeline([
    ('categorical_imputer',
     pp.CategoricalImputer(variables=config.CATEGORICAL_VARS_WITH_NA)),
    ('numerical_inputer',
     pp.NumericalImputer(variables=config.NUMERICAL_VARS_WITH_NA)),
    ('temporal_variable',
     pp.TemporalVariableEstimator(variables=config.TEMPORAL_VARS,
                                  reference_variable=config.DROP_FEATURES)),
    ('rare_label_encoder',
     pp.RareLabelCategoricalEncoder(tol=0.01,
                                    variables=config.CATEGORICAL_VARS)),
    ('categorical_encoder',
     pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),
    ('log_transformer',
     features.LogTransformer(variables=config.NUMERICALS_LOG_VARS)),
    ('drop_features',
     pp.DropUnecessaryFeatures(variables_to_drop=config.DROP_FEATURES)),
    ('scaler', MinMaxScaler()), ('forest', Lasso(random_state=0))
])
Пример #2
0
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 12 13:02:04 2020

@author: rkbra
"""

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

from sklearn.ensemble import RandomForestRegressor


from regression_model.processing import preprocessors as pp
from regression_model.processing import features
from regression_model.config import config


pipeline = Pipeline(
   [
     ('duration_transformer', features.DurationTransformer()),
     ('journey_data_transformer', features.JourneyDateTransformer()),
     ('departure_time_transformer', features.DepartureTimeTransformer()),
     ('arival_time_transformer', features.ArrivalTimeTransformer()),
     ('total_stop_transformer', pp.TotalStopTransformer()),
     ('rare_label_encoder', pp.RareLabelCategoricalEncoder(tol=0.0015, variables=config.FEATURE_WITH_RARE_LABELS)),
     ('encoder', pp.Encoder(variables=config.CATEGORICAL_FEATURES)),
     ('scaler', MinMaxScaler()),
     ('model', RandomForestRegressor(n_estimators=100))
   ]
)
Пример #3
0
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline

from regression_model.processing import preprocessors as pp
from regression_model.config import config

marathon_pipeline = Pipeline([
    (
        "categorical_imputer",
        pp.CategoricalImputer(variables=config.CATEGORICAL_VARS_WITH_NA),
    ),
    (
        "rare_label_encoder",
        pp.RareLabelCategoricalEncoder(variables=config.CATEGORICAL_VARS),
    ),
    ("ordinal_encoder",
     pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),
    ("regressor",
     RandomForestRegressor(max_depth=4, n_estimators=100, random_state=0))
])
from regression_model.processing import features
from regression_model.processing import preprocessors as pp
from regression_model.config import config

import logging

_logger = logging.getLogger(__name__)

price_pipe = Pipeline([
    (
        "categorical_imputer",
        pp.CategoricalImputer(variables_path=config.CATEGORICAL_VARS_FILE),
    ),
    (
        "numerical_inputer",
        pp.NumericalImputer(variables_path=config.NUMERICAL_VARS_FILE),
    ),
    (
        "rare_label_encoder",
        pp.RareLabelCategoricalEncoder(
            tol=0.01, variables_path=config.CATEGORICAL_VARS_FILE),
    ),
    ("log_transformer",
     features.LogTransformer(variables_path=config.NUMERICAL_LOG_VARS_FILE)),
    (
        "scaler",
        MinMaxScaler(),
    ),
    ("Linear_model", Lasso(alpha=0.005, random_state=0)),
])