Exemplo n.º 1
0
class SimpleImputer(Imputer):
    r"""Implementation of simple imputer.
    
    Date:
        2020

    Author:
        Luka Pečnik

    License:
        MIT
    
    Documentation:
        https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html

    See Also:
        * :class:`niaaml.preprocessing.imputation.Imputer`
    """
    Name = 'Simple Imputer'

    def __init__(self, **kwargs):
        r"""Initialize imputer.
        """
        self.__simple_imputer = SI(missing_values=np.nan)

    def fit(self, feature):
        r"""Fit imputer.

        Arguments:
            feature (pandas.core.frame.DataFrame): A column from DataFrame of features.
        """
        if feature.dtypes.iloc[0] != np.dtype(
                'int64') or feature.dtypes.iloc[0] != np.dtype('float64'):
            replacement_val = feature.mode().iloc[0, 0]
            self.__simple_imputer.set_params(**{
                'fill_value': replacement_val,
                'strategy': 'constant'
            })
            self.__simple_imputer.fit(feature)
        else:
            self.__simple_imputer.fit(feature)

    def transform(self, feature):
        r"""Transform feature's values.

        Arguments:
            feature (pandas.core.frame.DataFrame): A column from DataFrame of features.
        
        Returns:
            pandas.core.frame.DataFrame: A transformed column.
        """
        return self.__simple_imputer.transform(feature)

    def to_string(self):
        r"""User friendly representation of the object.

        Returns:
            str: User friendly representation of the object.
        """
        return Imputer.to_string(self).format(name=self.Name)
Exemplo n.º 2
0
    def predictor_from_file(config_path, model_path):
        with open(config_path, "rb") as f:
            config_dict = json.load(f)

        if config_dict["model_type"] == "neural_bandit":
            # initialize the pytorch model and put it in `eval` mode
            model = embed_dnn.EmbedDnn(**config_dict["model_spec"])
            model.load_state_dict(torch.load(model_path))
            model.eval()
        else:
            with open(model_path, "rb") as f:
                model = pickle.load(f)

        # initialize transforms
        transforms = {}
        for feature_name, transform_spec in config_dict["transforms"].items():
            if transform_spec is None:
                # id lists don't have transforms
                transform = None
            elif transform_spec["name"] == "StandardScaler":
                transform = preprocessing.StandardScaler()
                transform.mean_ = np.array(transform_spec["mean"])
                transform.scale_ = np.array(transform_spec["scale"])
                transform.var_ = np.array(transform_spec["var"])
            elif transform_spec["name"] == "OneHotEncoder":
                transform = preprocessing.OneHotEncoder()
                transform.sparse = transform_spec["sparse"]
                transform.categories_ = np.array(transform_spec["categories"])
            else:
                raise Exception(
                    f"Don't know how to load transform_spec of type {transform_spec['name']}"
                )
            transforms[feature_name] = transform

        # initialize imputers
        imputers = {}
        for feature_name, imputer_spec in config_dict["imputers"].items():
            if imputer_spec is None:
                # categoricals & id lists don't have imputers
                imputer = None
            else:
                imputer = SimpleImputer()
                imputer.set_params(**imputer_spec["parameters"])
                imputer.statistics_ = np.array(imputer_spec["statistics"])
            imputers[feature_name] = imputer

        return BanditPredictor(
            feature_config=config_dict["feature_config"],
            float_feature_order=config_dict["float_feature_order"],
            id_feature_order=config_dict["id_feature_order"],
            id_feature_str_to_int_map=config_dict["id_feature_str_to_int_map"],
            transforms=transforms,
            imputers=imputers,
            model=model,
            model_type=config_dict["model_type"],
            reward_type=config_dict["reward_type"],
            model_spec=config_dict["model_spec"],
            dense_features_to_use=config_dict["dense_features_to_use"],
        )
Exemplo n.º 3
0
    def get_imputer(self):
        imputer = self.kwargs.get("imputer", self.IMPUTER)
        if imputer == "SimpleImputer":
            imputer_use = SimpleImputer()
        if imputer == "KNNImputer":
            imputer_use = KNNImputer()

        imputer_params = self.kwargs.get("imputer_params", {})
        self.mlflow_log_param("imputer", imputer)
        imputer_use.set_params(**imputer_params)
        print(colored(imputer_use.__class__.__name__, "blue"))

        return imputer_use
Exemplo n.º 4
0
    def predictor_from_file(config_path, net_path):
        with open(config_path, "rb") as f:
            config_dict = json.load(f)

        # initialize the pytorch model and put it in `eval` mode
        net = embed_dnn.EmbedDnn(**config_dict["net_spec"])
        net.load_state_dict(torch.load(net_path))
        net.eval()

        # initialize transforms
        transforms = {}
        for feature_name, transform_spec in config_dict["transforms"].items():
            if transform_spec is None:
                # id lists don't have transforms
                transform = None
            elif transform_spec["name"] == "StandardScaler":
                transform = preprocessing.StandardScaler()
                transform.mean_ = np.array(transform_spec["mean"])
                transform.scale_ = np.array(transform_spec["scale"])
                transform.var_ = np.array(transform_spec["var"])
            elif transform_spec["name"] == "OneHotEncoder":
                transform = preprocessing.OneHotEncoder()
                transform.sparse = transform_spec["sparse"]
                transform.categories_ = np.array(transform_spec["categories"])
            else:
                raise Exception(
                    f"Don't know how to load transform_spec of type {transform_spec['name']}"
                )
            transforms[feature_name] = transform

        # initialize imputers
        imputers = {}
        for feature_name, imputer_spec in config_dict["imputers"].items():
            if imputer_spec is None:
                # id lists don't have imputers
                imputer = None
            else:
                imputer = SimpleImputer()
                imputer.set_params(**imputer_spec["parameters"])
                imputer.statistics_ = np.array(imputer_spec["statistics"])
            imputers[feature_name] = imputer

        return BanditPredictor(
            experiment_specific_params=config_dict["experiment_specific_params"],
            float_feature_order=config_dict["float_feature_order"],
            id_feature_order=config_dict["id_feature_order"],
            transforms=transforms,
            imputers=imputers,
            net=net,
            net_spec=config_dict["net_spec"],
        )
class PandasSimpleImputer(TransformerMixin):
    '''
    Класс, представляющий имплементацию sklearn.impute.SimpleImputer,
    которая возвращает pandas.DataFrame
    '''
    def __init__(self, *args, **kwargs):
        self.imputer = SimpleImputer(*args, **kwargs)

    def fit(self, X, y=None):
        self.imputer.fit(X)
        return self

    def transform(self, X, y=None):
        X[:] = self.imputer.transform(X)
        return X

    def fit_transform(self, X, y=None):
        return self.fit(X).transform(X)

    def set_params(self, **params):
        self.imputer.set_params(**params)
        return self