def __get_shrinkage_factor(self, X_group): """Get for all complete groups an array of shrinkages""" group_colnames = X_group.columns.to_list() counts = X_group.groupby(group_colnames).size() # Groups that are split on all most_granular_groups = [ grp for grp in self.groups_ if len(as_list(grp)) == len(group_colnames) ] # For each hierarchy level in each most granular group, get the number of observations hierarchical_counts = { granular_group: [ counts[tuple(subgroup)].sum() for subgroup in expanding_list(granular_group, tuple) ] for granular_group in most_granular_groups } # For each hierarchy level in each most granular group, get the shrinkage factor shrinkage_factors = { group: self.shrinkage_function_(counts, **self.shrinkage_kwargs) for group, counts in hierarchical_counts.items() } # Make sure that the factors sum to one shrinkage_factors = { group: value / value.sum() for group, value in shrinkage_factors.items() } return shrinkage_factors
def fit(self, X, y=None): """ Fit the model using X, y as training data. Will also learn the groups that exist within the dataset. :param X: array-like, shape=(n_columns, n_samples,) training data. :param y: array-like, shape=(n_samples,) training data. :return: Returns an instance of self. """ X, y = self.__prepare_input_data(X, y) if self.shrinkage is not None: self.__set_shrinkage_function() self.group_colnames_ = [str(_) for _ in as_list(self.groups)] if self.value_columns is not None: self.value_colnames_ = [ str(_) for _ in as_list(self.value_columns) ] else: self.value_colnames_ = [ _ for _ in X.columns if _ not in self.group_colnames_ ] self.__validate(X, y) # List of all hierarchical subsets of columns self.group_colnames_hierarchical_ = expanding_list( self.group_colnames_, list) self.fallback_ = None if self.shrinkage is None and self.use_global_model: subset_x = X[self.value_colnames_] self.fallback_ = clone(self.estimator).fit(subset_x, y) if self.shrinkage is not None: self.estimators_ = {} for level_colnames in self.group_colnames_hierarchical_: self.estimators_.update( self.__fit_grouped_estimator(X, y, self.value_colnames_, level_colnames)) else: self.estimators_ = self.__fit_grouped_estimator( X, y, self.value_colnames_, self.group_colnames_) self.groups_ = as_list(self.estimators_.keys()) if self.shrinkage is not None: self.shrinkage_factors_ = self.__get_shrinkage_factor(X) return self
def fit(self, X, y=None): """ Fit the model using X, y as training data. Will also learn the groups that exist within the dataset. :param X: array-like, shape=(n_columns, n_samples,) training data. :param y: array-like, shape=(n_samples,) training data. :return: Returns an instance of self. """ X_group, X_value = _split_groups_and_values(X, self.groups, min_value_cols=0, check_X=self.check_X, **self._check_kwargs) X_group = self.__add_shrinkage_column(X_group) if y is not None: y = check_array(y, ensure_2d=False) if self.shrinkage is not None: self.__set_shrinkage_function() # List of all hierarchical subsets of columns self.group_colnames_hierarchical_ = expanding_list( X_group.columns, list) self.fallback_ = None if self.shrinkage is None and self.use_global_model: self.fallback_ = clone(self.estimator).fit(X_value, y) if self.shrinkage is not None: self.estimators_ = self.__fit_shrinkage_groups(X_group, X_value, y) else: self.estimators_ = self.__fit_grouped_estimator( X_group, X_value, y) self.groups_ = as_list(self.estimators_.keys()) if self.shrinkage is not None: self.shrinkage_factors_ = self.__get_shrinkage_factor(X_group) return self