Exemplo n.º 1
0
    def test_count_encoder(self):
        encoder = CountEncoder(cols="data")

        data = pd.DataFrame([1, 2, 3, 1, 4, 5, 3, 1], columns=["data"])

        encoded = encoder.fit_transform(data)
        self.assertTrue((encoded.data == [3, 1, 2, 3, 1, 1, 2, 3]).all())
Exemplo n.º 2
0
    def __init__(self,
                 sparksess=None,
                 logdir='/encoder',
                 handle_unknown='-99999',
                 save_encoder=False):
        self.spark = sparksess
        self.logdir = logdir
        self.save_encoder

        self.ordinal_encoder_features = []
        self.onehot_encoder_features = []
        self.count_encoder_features = []
        self.target_encoder_features = []
        self.ordinal_encoder = OrdinalEncoder(
            cols=self.ordinal_encoder_features,
            return_df=True,
            handle_unknown=handle_unknown)
        self.onehot_encoder = OneHotEncoder(cols=self.onehot_encoder_features,
                                            return_df=True,
                                            handle_unknown=handle_unknown)
        self.count_encoder = CountEncoder(cols=self.count_encoder_features,
                                          return_df=True,
                                          handle_unknown=handle_unknown)
        self.target_encoder = TargetEncoder(cols=self.target_encoder_features,
                                            return_df=True,
                                            handle_unknown=handle_unknown)
Exemplo n.º 3
0
    def test_count_encoder(self):

        from category_encoders import CountEncoder
        import pandas as pd

        encoder = CountEncoder(cols="data")

        data = pd.DataFrame([1, 2, 3, 1, 4, 5, 3, 1], columns=["data"])

        encoded = encoder.fit_transform(data)
        self.assertTrue((encoded.data == [3, 1, 2, 3, 1, 1, 2, 3]).all())
Exemplo n.º 4
0
Arquivo: pmlp.py Projeto: kqf/lish-moa
def build_preprocessor():
    ce = make_pipeline(
        PandasSelector(["cp_type", "cp_time", "cp_dose"]),
        CountEncoder(
            cols=["cp_type", "cp_time", "cp_dose"],
            return_df=False,
            min_group_size=1,  # Makes it possible to clone
        ),
        StandardScaler(),
        TypeConversion(),
    )

    c_quantiles = make_pipeline(
        PandasSelector(startswith="c-"),
        QuantileTransformer(n_quantiles=100, output_distribution="normal"),
    )

    g_quantiles = make_pipeline(
        PandasSelector(startswith="g-"),
        QuantileTransformer(n_quantiles=100, output_distribution="normal"),
    )

    pca_features = make_pipeline(
        make_union(
            c_quantiles,
            g_quantiles,
        ),
        StandardScaler(),
    )
    return make_union(ce, pca_features)
Exemplo n.º 5
0
Arquivo: kmlp.py Projeto: kqf/lish-moa
def build_preprocessor_power():
    ce = make_pipeline(
        PandasSelector(["cp_type", "cp_time", "cp_dose"]),
        CountEncoder(
            cols=["cp_type", "cp_time", "cp_dose"],
            return_df=False,
            min_group_size=1,  # Makes it possible to clone
            normalize=True,
        ),
    )

    c_features = make_pipeline(PandasSelector(startswith="c-"), )

    g_features = make_pipeline(
        PandasSelector(startswith="g-"),
        StandardScaler(),
    )

    all_features = make_union(
        make_pipeline(
            make_union(
                ce,
                c_features,
            ),
            FixNaTransformer(),
            PowerTransformer(),
            StandardScaler(),
        ),
        g_features,
    )

    return make_pipeline(all_features, ShapeReporter())
Exemplo n.º 6
0
class RobustCountEncoder:
    def __init__(self, cols=None):
        self.nominal_cols = cols

    def fit(self, X, y=None):
        self.encoder = CountEncoder(cols=self.nominal_cols).fit(X, y)
        return self

    def transform(self, X):
        X = self.encoder.transform(X)
        X = X.fillna(0)
        return X
Exemplo n.º 7
0
Arquivo: mlp.py Projeto: kqf/lish-moa
def build_preprocessor():
    ce = make_pipeline(
        CountEncoder(
            cols=(0, 2),
            return_df=False,
            min_group_size=1,  # Makes it possible to clone
        ),
        StandardScaler(),
        TypeConversion(),
    )

    return ce
Exemplo n.º 8
0
def categ_encoder(df, df_y, cols, encoders=('target', 'count')):  #-> df
    'encode category columns'
    assert len(encoders) > 0, 'encoders is empty'
    df = df[cols]
    fitted_encoders = []
    fitted_df = []
    get_encoder = {
        'target':
        lambda: TargetEncoder(cols=cols, ).fit(df, df_y),
        'count':
        lambda: CountEncoder(cols=cols, handle_unknown=0, normalize=True).fit(
            df),
        #'onehot': lambda: OneHotEncoder(cols=cols).fit(df)
    }
    for en_name in encoders:
        encoder = get_encoder[en_name]()
        x = encoder.transform(df)
        x = x.rename({i: i + "_" + en_name for i in x.columns}, axis=1)
        fitted_encoders.append(encoder)
        fitted_df.append(x)
    return fitted_df, fitted_encoders, encoders
Exemplo n.º 9
0
def build_model():
    # Former xgboost parameters
    params = {
        "colsample_bytree": 0.6522,
        "gamma": 3.6975,
        "learning_rate": 0.0503,
        "max_delta_step": 2.0706,
        "max_depth": 10,
        "min_child_weight": 31.5800,
        "n_estimators": 166,
        "subsample": 0.8639
    }

    if has_gpu():
        params["tree_method"] = "gpu_hist"

    model = make_pipeline(
        CountEncoder(cols=[0, 2], return_df=False),
        MultiOutputClassifier(xgb.XGBClassifier(**params)),
    )

    return model
Exemplo n.º 10
0
 def fit(self, X, y=None):
     self.encoder = CountEncoder(cols=self.nominal_cols).fit(X, y)
     return self
Exemplo n.º 11
0
numeric_columns = [
    col for col in data.select_dtypes(include=np.number).columns
    if col != 'SalePrice'
]
object_columns_with_low_cardinality = [
    col for col in data.select_dtypes(include=['object']).columns
    if data[col].nunique() < 10
]
object_columns_with_high_cardinality = [
    col for col in data.select_dtypes(include=['object']).columns
    if data[col].nunique() >= 10
]
si = SimpleImputer(strategy='mean')
object_imputer = SimpleImputer(strategy='constant', fill_value='missing')
ohe = OneHotEncoder(handle_unknown='ignore', sparse=False)
ce = CountEncoder(min_group_size=0.01)
pf = PolynomialFeatures()
numeric_transformer = Pipeline(steps=[('imputer', si), ('pf', pf)])
object_transformer_with_low_cardinality = Pipeline(
    steps=[('imputer', object_imputer), ('ohe', ohe)])
object_transformer_with_high_cardinality = Pipeline(
    steps=[('imputer', object_imputer), ('ce', ce)])
preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numeric_columns),
    ('object_low_cardinality', object_transformer_with_low_cardinality,
     object_columns_with_low_cardinality),
    ('object_high_cardinality', object_transformer_with_high_cardinality,
     object_columns_with_high_cardinality)
],
                                 remainder='passthrough')
model = XGBRegressor(n_estimators=1000, learning_rate=.05)
Exemplo n.º 12
0
def get_xgboost_old(train, targets, test, NFOLDS=7):
    # drop id col
    X = train.iloc[:, 1:].to_numpy()
    X_test = test.iloc[:, 1:].to_numpy()
    y = targets.iloc[:, 1:].to_numpy()

    classifier = MultiOutputClassifier(XGBClassifier(tree_method='gpu_hist'))
    clf = Pipeline([('encode', CountEncoder(cols=[0, 2])),
                    ('classify', classifier)])

    params = {
        'classify__estimator__colsample_bytree': 0.6522,
        'classify__estimator__gamma': 3.6975,
        'classify__estimator__learning_rate': 0.0503,
        'classify__estimator__max_delta_step': 2.0706,
        'classify__estimator__max_depth': 10,
        'classify__estimator__min_child_weight': 31.5800,
        'classify__estimator__n_estimators': 166,
        'classify__estimator__subsample': 0.8639
    }
    _ = clf.set_params(**params)

    oof_preds = np.zeros(y.shape)
    test_preds = np.zeros((test.shape[0], y.shape[1]))
    oof_losses = []
    kf = KFold(n_splits=NFOLDS)
    for fn, (trn_idx, val_idx) in enumerate(kf.split(X, y)):
        print('Starting fold: ', fn)
        X_train, X_val = X[trn_idx], X[val_idx]
        y_train, y_val = y[trn_idx], y[val_idx]

        # drop where cp_type==ctl_vehicle (baseline)
        ctl_mask = X_train[:, 0] == 'ctl_vehicle'
        X_train = X_train[~ctl_mask, :]
        y_train = y_train[~ctl_mask]

        clf.fit(X_train, y_train)
        val_preds = clf.predict_proba(X_val)  # list of preds per class
        val_preds = np.array(val_preds)[:, :, 1].T  # take the positive class
        oof_preds[val_idx] = val_preds

        # .named_steps['classifier'].feature_importances_\

        # clf.named_steps['classify'].get_score(importance_type='gain')

        loss = log_loss(np.ravel(y_val), np.ravel(val_preds))
        oof_losses.append(loss)
        preds = clf.predict_proba(X_test)
        preds = np.array(preds)[:, :, 1].T  # take the positive class
        test_preds += preds / NFOLDS

    print(oof_losses)
    print('Mean OOF loss across folds', np.mean(oof_losses))
    print('STD OOF loss across folds', np.std(oof_losses))

    # set control train preds to 0
    control_mask = train['cp_type'] == 'ctl_vehicle'
    oof_preds[control_mask] = 0

    print('OOF log loss: ', log_loss(np.ravel(y), np.ravel(oof_preds)))

    # set control test preds to 0
    control_mask = test['cp_type'] == 'ctl_vehicle'

    test_preds[control_mask] = 0
    return test_preds
Exemplo n.º 13
0
def process(train_data, test_data, standardization, logarithmic, count_encoding):
    # columns with NaN-values in train and test set
    train_nan = {col : train_data[col].isna().sum() for col in train_data.columns if train_data[col].isna().sum() > 0}
    # test_nan = {col : test_data[col].isna().sum() for col in test_data.columns if train_data[col].isna().sum() > 0}

    # drop_cols - columns with more than 25 % of data missing
    drop_cols = [col for col in train_nan.keys() if train_nan[col] >= 0.25*len(train_data)]

    train_data = train_data.drop(columns=drop_cols, inplace=False, axis=1)
    test_data = test_data.drop(columns=drop_cols, inplace=False, axis=1)

    y = train_data.pop('SalePrice')
    X = train_data.copy()

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2, random_state=0)
    X_test = test_data.copy()

    total_cols = [col for col in X_train.keys()]
    categorical_cols = [col for col in X_train.keys() if X_train[col].dtype == 'object']
    numerical_cols = list(set(total_cols) - set(categorical_cols))

    # identify categorical colummns with a high number of categories // cause problems for OneHotEncoding
    bad_cat_cols = {col : X_train[col].nunique() for col in categorical_cols if X_train[col].nunique() >= 20} 
    good_cat_cols = list(set(categorical_cols)-set(bad_cat_cols)) # categories used for OneHotEncoding

    categorical_cols = good_cat_cols

    # Handle NaN's
    num_imputer = SimpleImputer(strategy='median')
    X_train_num = pd.DataFrame(num_imputer.fit_transform(X_train[numerical_cols]))
    X_val_num = pd.DataFrame(num_imputer.transform(X_val[numerical_cols]))
    X_test_num = pd.DataFrame(num_imputer.transform(X_test[numerical_cols]))
    
    X_train_num.columns = X_train[numerical_cols].columns
    X_val_num.columns = X_val[numerical_cols].columns
    X_test_num.columns = X_test[numerical_cols].columns

    cat_imputer = SimpleImputer(strategy='most_frequent')
    X_train_cat = pd.DataFrame(cat_imputer.fit_transform(X_train[categorical_cols]))
    X_val_cat = pd.DataFrame(cat_imputer.transform(X_val[categorical_cols]))
    X_test_cat = pd.DataFrame(cat_imputer.transform(X_test[categorical_cols]))
    
    X_train_cat.columns = X_train[categorical_cols].columns
    X_val_cat.columns = X_val[categorical_cols].columns
    X_test_cat.columns = X_test[categorical_cols].columns

    X_train = pd.concat(objs=[X_train_num, X_train_cat], axis=1)
    X_val = pd.concat(objs=[X_val_num, X_val_cat], axis=1)
    X_test = pd.concat(objs=[X_test_num, X_test_cat], axis=1)

    # Feature Creation 
    X_train['Total_bathrooms'] = (X_train['BsmtFullBath']+ 0.5*X_train['BsmtHalfBath']+ X_train['FullBath'] + 0.5*X_train['HalfBath'])
    X_val['Total_bathrooms'] = (X_val['BsmtFullBath']+ 0.5*X_val['BsmtHalfBath']+ X_val['FullBath'] + 0.5*X_val['HalfBath'])
    X_test['Total_bathrooms'] = (X_test['BsmtFullBath']+ 0.5*X_test['BsmtHalfBath']+ X_test['FullBath'] + 0.5*X_test['HalfBath'])

    X_train['hasPool'] = X_train['PoolArea'].apply(lambda x: 1 if x>0 else 0)
    X_val['hasPool'] = X_val['PoolArea'].apply(lambda x: 1 if x>0 else 0)
    X_test['hasPool'] = X_test['PoolArea'].apply(lambda x: 1 if x>0 else 0)

    X_train['has2ndFloor'] = X_test['2ndFlrSF'].apply(lambda x: 1 if x>0 else 0)
    X_val['has2ndFloor'] = X_val['2ndFlrSF'].apply(lambda x: 1 if x>0 else 0)
    X_test['has2ndFloor'] = X_test['2ndFlrSF'].apply(lambda x: 1 if x>0 else 0)

    X_train['hasGarage'] = X_test['GarageArea'].apply(lambda x: 1 if x>0 else 0)
    X_val['hasGarage'] = X_val['GarageArea'].apply(lambda x: 1 if x>0 else 0)
    X_test['hasGarage'] = X_test['GarageArea'].apply(lambda x: 1 if x>0 else 0)

    X_train['hasFireplace'] = X_test['Fireplaces'].apply(lambda x: 1 if x>0 else 0)
    X_val['hasFireplace'] = X_val['Fireplaces'].apply(lambda x: 1 if x>0 else 0)
    X_test['hasFireplace'] = X_test['Fireplaces'].apply(lambda x: 1 if x>0 else 0)

    X_train['hasBasement'] = X_test['TotalBsmtSF'].apply(lambda x: 1 if x>0 else 0)
    X_val['hasBasement'] = X_val['TotalBsmtSF'].apply(lambda x: 1 if x>0 else 0)
    X_test['hasBasement'] = X_test['TotalBsmtSF'].apply(lambda x: 1 if x>0 else 0)

    X_train['Total_sqr_footage'] = (X_train['BsmtFinSF1'] + X_train['BsmtFinSF2'] + X_train['1stFlrSF'] + X_train['2ndFlrSF'])
    X_val['Total_sqr_footage'] = (X_val['BsmtFinSF1'] + X_val['BsmtFinSF2'] + X_val['1stFlrSF'] + X_val['2ndFlrSF'])
    X_test['Total_sqr_footage'] = (X_test['BsmtFinSF1'] + X_test['BsmtFinSF2'] + X_test['1stFlrSF'] + X_test['2ndFlrSF'])

    total_cols = [col for col in X_train.keys()]
    categorical_cols = [col for col in X_train.keys() if X_train[col].dtype == 'object']
    numerical_cols = list(set(total_cols) - set(categorical_cols))

    # identify categorical colummns with a high number of categories // cause problems for OneHotEncoding
    bad_cat_cols = {col : X_train[col].nunique() for col in categorical_cols if X_train[col].nunique() >= 20} 
    good_cat_cols = list(set(categorical_cols)-set(bad_cat_cols)) # categories used for OneHotEncoding

    categorical_cols = good_cat_cols

    # One Hot Encoding or Count Encoding
    if count_encoding:
        count_encoder = CountEncoder(cols=categorical_cols)
        count_encoder.fit(X_train[categorical_cols])
        
        enc_cols_train = pd.DataFrame(count_encoder.transform(X_train[categorical_cols]))
        enc_cols_val = pd.DataFrame(count_encoder.transform(X_val[categorical_cols]))
        enc_cols_test = pd.DataFrame(count_encoder.transform(X_test[categorical_cols]))

    else:
        encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
        enc_cols_train = pd.DataFrame(encoder.fit_transform(X_train[categorical_cols]))
        enc_cols_val = pd.DataFrame(encoder.transform(X_val[categorical_cols]))
        enc_cols_test = pd.DataFrame(encoder.transform(X_test[categorical_cols]))
        
    enc_cols_train.index = X_train.index
    enc_cols_val.index = X_val.index
    enc_cols_test.index = X_test.index

    num_cols_train = X_train.drop(columns=categorical_cols, axis=1, inplace=False)
    num_cols_val = X_val.drop(columns=categorical_cols, axis=1, inplace=False)
    num_cols_test = X_test.drop(columns=categorical_cols, axis=1, inplace=False)

    if logarithmic:
        for col in numerical_cols:
            num_cols_train[col] = np.log(num_cols_train[col]+1)
            num_cols_val[col] = np.log(num_cols_val[col]+1)
            num_cols_test[col] = np.log(num_cols_test[col]+1)
    
    if standardization: 
        for col in numerical_cols:
            num_cols_train[col] = (num_cols_train[col]-num_cols_train[col].mean())/num_cols_train[col].std()
            num_cols_val[col] = (num_cols_val[col]-num_cols_val[col].mean())/num_cols_val[col].std()
            num_cols_test[col] = (num_cols_test[col]-num_cols_test[col].mean())/num_cols_test[col].std()

    X_train = pd.concat(objs=[num_cols_train, enc_cols_train], axis=1)
    X_val = pd.concat(objs=[num_cols_val, enc_cols_val], axis=1)
    X_test = pd.concat(objs=[num_cols_test, enc_cols_test], axis=1)

    return X_train, y_train, X_val, y_val, X_test
Exemplo n.º 14
0
for i in range(len(X_test[:, 1])):
    if X_test[i, 1] == 24:
        X_test[i, 1] = 0
    if X_test[i, 1] == 48:
        X_test[i, 1] = 1
    if X_test[i, 1] == 72:
        X_test[i, 1] = 2
for i in range(len(X_test[:, 2])):
    if X_test[i, 2] == 'D1':
        X_test[i, 2] = 0
    if X_test[i, 2] == 'D2':
        X_test[i, 2] = 1

classifier = MultiOutputClassifier(XGBClassifier(tree_method='gpu_hist'))

clf = Pipeline([('encode', CountEncoder(cols=[0, 2])),
                ('classify', classifier)])

params = {
    'classify__estimator__colsample_bytree': 0.6522,
    'classify__estimator__gamma': 3.6975,
    'classify__estimator__learning_rate': 0.0503,
    'classify__estimator__max_delta_step': 2.0706,
    'classify__estimator__max_depth': 10,
    'classify__estimator__min_child_weight': 31.5800,
    'classify__estimator__n_estimators': 166,
    'classify__estimator__subsample': 0.8639
}

_ = clf.set_params(**params)
Exemplo n.º 15
0
class Encoder():
    encode_methods = {
        'OrdinalEncoder': OrdinalEncoder,
        'OneHotEncoder': OneHotEncoder,
        'CountEncoder': CountEncoder,
        'TargetEncoder': TargetEncoder,
    }

    # spark_encode_methods = {
    #     'mean_encoder':,
    #     'target_encoder':,
    #     'label_encoder':,
    #     'onehot_encoder'
    # }
    # target_encoder,mean_encoder在编码时,不能够把训练集和验证机concat在一起进行编码
    # label_encoder,onehot_encoder可以

    def __init__(self,
                 sparksess=None,
                 logdir='/encoder',
                 handle_unknown='-99999',
                 save_encoder=False):
        self.spark = sparksess
        self.logdir = logdir
        self.save_encoder

        self.ordinal_encoder_features = []
        self.onehot_encoder_features = []
        self.count_encoder_features = []
        self.target_encoder_features = []
        self.ordinal_encoder = OrdinalEncoder(
            cols=self.ordinal_encoder_features,
            return_df=True,
            handle_unknown=handle_unknown)
        self.onehot_encoder = OneHotEncoder(cols=self.onehot_encoder_features,
                                            return_df=True,
                                            handle_unknown=handle_unknown)
        self.count_encoder = CountEncoder(cols=self.count_encoder_features,
                                          return_df=True,
                                          handle_unknown=handle_unknown)
        self.target_encoder = TargetEncoder(cols=self.target_encoder_features,
                                            return_df=True,
                                            handle_unknown=handle_unknown)

    def fit(self,
            x_train,
            x_val=None,
            y_train=None,
            y_val=None,
            method_mapper=None):
        """
        Parameters
        ----------

        x_train: pd.DataFrame

        x_val: pd.DataFrame

        y_train: pd.DataFrame

        y_val: pd.DataFrame

        method_mapper: dict
            a mapping of feature to EncodeMethod
            example mapping: 
            {
                'feature1': OrdinalEncoder,
                'feature2': OneHotEncoder,
                'feature3': CountEncoder,
                'feature4': TargetEncoder,
            }
        """
        for feat in method_mapper:
            if method_mapper[feat] == 'OrdinalEncoder':
                self.ordinal_encoder_features.append(feat)
            elif method_mapper[feat] == 'OneHotEncoder':
                self.onehot_encoder_features.append(feat)
            elif method_mapper[feat] == 'CountEncoder':
                self.count_encoder_features.append(feat)
            elif method_mapper[feat] == 'TargetEncoder':
                self.target_encoder_features.append(feat)
            else:
                raise ValueError(
                    '编码方式只支持[OrdinalEncoder, OneHotEncoder, CountEncoder, TargetEncoder], 接收到%s'
                    % feat)

        if self.spark is None:
            if len(self.ordinal_encoder_features) != 0 or len(
                    self.onehot_encoder_features) != 0:
                x_whole = x_train.append(x_val)
                y_whole = None
                if not y_train is None and not y_val is None:
                    y_whole = y_train.append(y_val)

                x_whole = self.ordinal_encoder.fit_transform(x_whole, y_whole)
                x_whole = self.onehot_encoder.fit_transform(x_whole, y_whole)
                x_train = x_whole[:len(x_train)]
                x_val = x_whole[len(x_train):]

            x_train = self.count_encoder.fit_transform(x_train, y_train)
            x_val = self.count_encoder.transform(x_val, y_val)
            x_train = self.target_encoder.fit_transform(x_train, y_train)
            x_val = self.target_encoder.transform(x_val, y_val)

            if self.save_encoder:
                self.save_encoder()
        return x_train, y_train, x_val, y_val

    def transform(self, x, y=None):
        x = self.ordinal_encoder.transform(x, y)
        x = self.onehot_encoder.transform(x, y)
        x = self.count_encoder.transform(x, y)
        x = self.target_encoder.transform(x, y)
        return x, y

    def fit_transform(self,
                      x_train,
                      x_val=None,
                      y_train=None,
                      y_val=None,
                      method_mapper=None):
        """
        Parameters
        ----------

        x_train: pd.DataFrame

        x_val: pd.DataFrame

        y_train: pd.DataFrame

        y_val: pd.DataFrame
        
        method_mapper: dict
            a mapping of feature to EncodeMethod
            example mapping: 
            {
                'feature1': OrdinalEncoder,
                'feature2': OneHotEncoder,
                'feature3': CountEncoder,
                'feature4': TargetEncoder,
            }
        """
        self.fit(x_train, x_val, y_train, y_val, method_mapper)
        x_train, y_train = self.transform(x_train, y_train)
        if x_val is not None:
            x_val, y_val = self.transform(x_val, y_val)
        return x_train, y_train, x_val, y_val

    def save_encoder(self):
        now = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime(time.time()))
        os.makedirs(os.path.join(self.logdir, now))

        with open(os.path.join(self.logdir, now, 'OrdinalEncoder.pkl'),
                  'wb') as f:
            pickle.dump(self.ordinal_encoder, f)
        with open(os.path.join(self.logdir, now, 'OneHotEncoder.pkl'),
                  'wb') as f:
            pickle.dump(self.onehot_encoder, f)
        with open(os.path.join(self.logdir, now, 'CountEncoder.pkl'),
                  'wb') as f:
            pickle.dump(self.count_encoder, f)
        with open(os.path.join(self.logdir, now, 'TargetEncoder.pkl'),
                  'wb') as f:
            pickle.dump(self.target_encoder, f)

        with open(
                os.path.join(self.logdir, now, 'OrdinalEncoderFeatures.json'),
                'w') as f:
            json.dump(self.ordinal_encoder_features, f)
        with open(os.path.join(self.logdir, now, 'OneHotEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.onehot_encoder_features, f)
        with open(os.path.join(self.logdir, now, 'CountEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.count_encoder_features, f)
        with open(os.path.join(self.logdir, now, 'TargetEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.target_encoder_features, f)

    def load_encoder(self, logdir=None):
        with open(os.path.join(self.logdir, 'OrdinalEncoder.pkl'), 'wb') as f:
            pickle.dump(self.ordinal_encoder, f)
        with open(os.path.join(self.logdir, 'OneHotEncoder.pkl'), 'wb') as f:
            pickle.dump(self.onehot_encoder, f)
        with open(os.path.join(self.logdir, 'CountEncoder.pkl'), 'wb') as f:
            pickle.dump(self.count_encoder, f)
        with open(os.path.join(self.logdir, 'TargetEncoder.pkl'), 'wb') as f:
            pickle.dump(self.target_encoder, f)

        with open(os.path.join(self.logdir, 'OrdinalEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.ordinal_encoder_features, f)
        with open(os.path.join(self.logdir, 'OneHotEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.onehot_encoder_features, f)
        with open(os.path.join(self.logdir, 'CountEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.count_encoder_features, f)
        with open(os.path.join(self.logdir, 'TargetEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.target_encoder_features, f)