def set_params(self, **params): """Set parameters for the wrapper and the wrapped estimator. This method is required for compatibility with GridSearchCV. :param params: A dictionary of parameters for the wrapper and wrapped estimator. If a key doesn't match the name of a wrapper parameter, it is assumed to be for the wrapped estimator. TODO: it would be better to do what sklearn's pipeline does and provide some namespacing in case the wrapper and wrapped class share a parameter name :return: self """ if not params: return self valid_params = self.get_params(deep=True) model_params = self.model_params wrapper_params = {} for key, value in params.iteritems(): if key in valid_params: wrapper_params[key] = value else: model_params[key] = value wrapper_params['model_params'] = model_params BaseEstimator.set_params(self, **wrapper_params) return self
def _check_all_monkeypatched(): """Double-checks that instances sklearn estimators have acquired the proper "what" method. Raises an assertion error if it is not the case. """ # Make sure we have added what to sklearn stuff whatamize_sklearn(check=False) # Trick to force python to populate part of the BaseEstimator hierarchy from sklearn.ensemble.forest import RandomForestClassifier assert BaseEstimator.__subclasscheck__(RandomForestClassifier) from sklearn.cluster import KMeans assert BaseEstimator.__subclasscheck__(KMeans) from sklearn.feature_extraction import DictVectorizer assert BaseEstimator.__subclasscheck__(DictVectorizer) from sklearn.decomposition import KernelPCA assert BaseEstimator.__subclasscheck__(KernelPCA) with warnings.catch_warnings(): warnings.simplefilter('ignore') for cls in all_subclasses(BaseEstimator): if not inspect.isabstract(cls): try: obj = cls() assert hasattr(obj, 'what'), cls.__name__ assert isinstance(obj.what(), What), cls.__name__ except TypeError: pass return True
def __init__(self, n_estimators=20, max_depth=5, min_samples_split=10, min_samples_leaf=10, random_state=0, em_itrs=5, regularization=0.05, passive_dyn_func=None, passive_dyn_ctrl=None, passive_dyn_noise=None, verbose=False): ''' n_estimators - number of ensembled models ... - a batch of parameters used for RandomTreesEmbedding, see relevant documents em_itrs - maximum number of EM iterations to take regularization - small positive scalar to prevent singularity of matrix inversion passive_dyn_func - function to evaluate passive dynamics; None for MaxEnt model passive_dyn_ctrl - function to return the control matrix which might depend on the state... passive_dyn_noise - covariance of a Gaussian noise; only applicable when passive_dyn is Gaussian; None for MaxEnt model note this implies a dynamical system with constant input gain. It is extendable to have state dependent input gain then we need covariance for each data point verbose - output training information ''' BaseEstimator.__init__(self) self.n_estimators=n_estimators self.max_depth=max_depth self.min_samples_split=min_samples_split self.min_samples_leaf=min_samples_leaf self.random_state=random_state self.em_itrs=em_itrs self.reg=regularization self.passive_dyn_func=passive_dyn_func self.passive_dyn_ctrl=passive_dyn_ctrl self.passive_dyn_noise=passive_dyn_noise self.verbose=verbose return
def set_params(self, **kwargs): """Update the parameters of the feature extractor.""" # We don't want non-functional arguments polluting kwargs params = kwargs.copy() for k in ['function', 'target']: params.pop(k, None) self.kwargs.update(params) BaseEstimator.set_params(self, **kwargs)
def test_vector_alignment(self): # Mock out a generic scikit-learn classifier mocked_model = BaseEstimator() mocked_model.fit = MagicMock() mocked_model.predict = MagicMock(return_value=[True]) # Create a simple data frame extending to January 15 date_sequence = pd.date_range('1/1/2011', periods=15, freq='D') time_series = pd.DataFrame({ # This column will be accessed by name to generate the targets vector. 'Violent Crime Committed?': [True, True] + [False]*13, # Actual time series used for nonsequential prediction will contain more than one column. # However, we just need to verify that it grabs the correct slices of each column, # so one stand-in column will suffice. 'Other Data': [0]*10 + [1]*5 }, index=date_sequence) # Construct a NonsequentialPredictor with the mock predictor = NonsequentialPredictor(time_series, model=mocked_model) # The date to predict comes before the end of the time series, # so all rows from the 13th on should be discarded date_to_predict = datetime.date(2011, 1, 13) # The mock always predicts True, so predict() should return True self.assertTrue(predictor.predict(date_to_predict)) # And both fit and predict should have been called self.assertTrue(mocked_model.fit.called) self.assertTrue(mocked_model.predict.called) # When feeding training data to the sklearn model, # predict() needs to align each day of the time series with whether a violent crime was committed the NEXT day. # Thus, the first element of the Violent Crime Committed? column should have been removed # before being used as the model's targets vector because it has no previous day to partner with. expected_targets = [True] + [False]*11 # Similarly, the last element of any other column (in this case, 'Other Data') # should only go up to the day before the day we're trying to predict expected_features = [[0]]*10 + [[1]]*2 # Get the two arguments passed to mocked_model fit_args = mocked_model.fit.call_args observed_features = fit_args[0][0] observed_targets = fit_args[0][1] # Equality tests with numpy arrays are wonky, so I convert numpy arrays to Python lists self.assertEqual(observed_targets.tolist(), expected_targets) self.assertEqual(observed_features.tolist(), expected_features) # Confirm the correct argument was passed to predict print(mocked_model.predict.call_args) observed_day_to_predict = mocked_model.predict.call_args[0][0] self.assertEqual(observed_day_to_predict.tolist(), [[1]])
def __init__(self, embedding, analyzer='word', m=10, verbose=0, use_idf=True, **ev_params): """Expand a query by the nearest known tokens to its centroid """ self.embedding = embedding self.m = m self.vect = EmbeddedVectorizer(embedding, analyzer=analyzer, use_idf=use_idf, **ev_params) BaseEstimator.__init__(self)
def __init__(self, embedding, analyzer, m=10): """Initializes Embedding Based Query Expansion :embedding: TODO :analyzer: TODO :m: TODO """ BaseEstimator.__init__(self) self._embedding = embedding self._m = m self._cv = CountVectorizer(analyzer=analyzer)
def train_fchl(rep_computer: FCHLRepresentation, model: BaseEstimator, mols: List[str], y: List[float], n_jobs: int = 1, y_lower: List[float] = None) -> BaseEstimator: """Retrain an FCHL-based model Args: rep_computer: Tool used to compute the FCHL-compatible representations for each molecule model: Model to be retrained mols: List of molecules (XYZ format) in training set y: List of other properties to predict n_jobs: Number of threads to use for generating representations y_lower: Lower-fidelity estimate of the property. Used for delta learning models Returns: Retrained model """ # Convert the input molecules into FCHL-ready inputs rep_computer.n_jobs = n_jobs reps = rep_computer.transform(mols) # Retrain the model if y_lower is not None: y = np.subtract(y, y_lower) return model.fit(reps, y)
def fit(self, X, original_y): base_est = BaseEstimator() base_est.predict = lambda X: np.zeros(X.shape[0], dtype=float) self.estimators_ = [base_est] for i in range(self.n_estimators): grad = self.loss_grad(original_y, self._predict(X)) estimator = deepcopy(self.base_regressor) estimator.fit(X, grad) self.estimators_.append(estimator) self.out_ = self._outliers(grad) self.feature_importances_ = self._calc_feature_imps() return self
def generate(model: base.BaseEstimator, sentences: List[List[str]]) -> None: """Tag the sentences with the given model. Parameters ---------- sentences : list List of lists of strings representing the sentences to tag. """ print(f"Tagging {len(sentences)} sentences.") # Since the models were trained on the lemmatized version of the words, # we also lemmatize them when tagging unlabeled sentences. lemmatizer = stem.WordNetLemmatizer() for sentence in sentences: # Convert to the lemmatized versions lemmatized = [lemmatizer.lemmatize(w.lower()) for w in sentence] # Convert to conllu.TokenList because models expect that. # Since they are essentially dicts, we build them that way. tags = model.predict([[{"lemma": w} for w in lemmatized]]) print("Word\tTag") for w, t in zip(sentence, tags[0]): print(f"{w}\t{t}") print()
def evaluate_fchl(rep_computer: FCHLRepresentation, model: BaseEstimator, mols: List[str], n_jobs: int = 1, y_lower: List[float] = None) -> np.ndarray: """Run an FCHL-based model Args: rep_computer: Tool used to compute the FCHL-compatible representations for each molecule model: Model to be evaluated mols: List of molecules (XYZ format) to evaluate n_jobs: Number of threads to use for generating representations y_lower: Lower-fidelity estimate of the property. Used for delta learning models Returns: Results from the inference """ # Convert the input molecules into FCHL-ready inputs rep_computer.n_jobs = n_jobs reps = rep_computer.transform(mols) # Run the model y_pred = model.predict(reps).tolist() if y_lower is not None: y_pred = np.add(y_pred, y_lower) return y_pred
def get_params(self, deep=True): """ Get parameters for this estimator. Parameters ---------- deep : boolean, optional If True, will return the parameters for this estimator and contained subobjects that are estimators. Returns ------- params : mapping of string to any Parameter names mapped to their values. """ if self.compat: return BaseEstimator.get_params(self, deep) else: if self.estimator is not None: params = self.estimator.get_params(deep) else: # TODO: check if this is necessary params = dict() for p in self._get_param_names(): params[p] = getattr(self, p, None) return params
def summarize_feature_comparisons( base_clf: BaseEstimator, comparison_clfs: Dict[str, BaseEstimator], X_test, y_test ): from mlxtend.evaluate import mcnemar, cochrans_q, mcnemar_table summary_dict = collections.OrderedDict() mcnemar_tbs = dict() # create list of predicted values base_y_predict = base_clf.predict(X_test) y_predictions = [base_y_predict] for idx, (name, clf) in enumerate(comparison_clfs.items()): # get the probability y_predict_proba = clf.predict_proba(X_test) y_predict = clf.predict(X_test) # form mcnemar tables against base classifier tb = mcnemar_table(y_test, base_y_predict, y_predict) mcnemar_tbs[f"base vs {name}"] = tb.values() # store predictions per classifier y_predictions.append(y_predict) # first run cochrans Q test qstat, pval = cochrans_q(y_test, *y_predictions) summary_dict["cochrans_q"] = qstat summary_dict["cochrans_q_pval"] = pval # run mcnemars test against all the predictions for name, table in mcnemar_tbs.items(): chi2stat, pval = mcnemar(table, exact=True) summary_dict[f"mcnemar_{name}_chi2stat"] = chi2stat summary_dict[f"mcnemar_{name}_pval"] = pval return summary_dict
def has_cpu_params(estimator: BaseEstimator) -> bool: """Returns True if estimator has any CPU-related params.""" return any( any( param.endswith(cpu_param_name) for cpu_param_name in SKLEARN_CPU_PARAM_NAMES) for param in estimator.get_params(deep=True))
def out_of_fold( self, estimator: BaseEstimator, train_x, train_y, valid_x, valid_y): # lightGBMとcatboostの場合は、fit時に下記パラメータを与える fit_params = {} if type(estimator).__name__ in ('LGBMClassifier', 'CatBoostClassifier',): if 'eval_set' not in fit_params: fit_params['eval_set'] = [(valid_x, valid_y)] if 'early_stopping_rounds' not in fit_params: fit_params['early_stopping_rounds'] = 100 estimator.fit(train_x, train_y, **fit_params) oof = self.make_pred(estimator, valid_x) return oof
def test_is_pairwise(): """Test ``_is_pairwise``.""" # Simple checks for _is_pairwise pca = KernelPCA(kernel='precomputed') with pytest.warns(None) as record: assert _is_pairwise(pca) assert not record # Pairwise attribute that is not consistent with the pairwise tag class IncorrectTagPCA(KernelPCA): """Class with incorrect _pairwise attribute.""" _pairwise = False pca = IncorrectTagPCA(kernel='precomputed') msg = "_pairwise attribute is inconsistent with tags." with pytest.warns(FutureWarning, match=msg): assert not _is_pairwise(pca) # The _pairwise attribute is present and set to True while pairwise tag is # not present class TruePairwise(BaseEstimator): """Class without pairwise tag.""" _pairwise = True true_pairwise = TruePairwise() with pytest.warns(FutureWarning, match=msg): assert _is_pairwise(true_pairwise) # Pairwise attribute is not defined thus tag is used est = BaseEstimator() with pytest.warns(None) as record: assert not _is_pairwise(est) assert not record
def standard_report( estimator: BaseEstimator, X_test: Union[pd.DataFrame, np.ndarray], y_test: Union[pd.Series, np.ndarray], zero_division: str = "warn", ) -> None: """Display standard report of diagnostic metrics and plots for classification. Parameters ---------- estimator : BaseEstimator Fitted classification estimator for evaluation. X_test : DataFrame or ndarray of shape (n_samples, n_features) Predictor test set. y_test : Series or ndarray of shape (n_samples,) Target test set. zero_division : str, optional Value to return for division by zero: 0, 1, or 'warn'. """ table = classification_report(y_test, estimator.predict(X_test), zero_division=zero_division, heatmap=True) classification_plots(estimator, X_test, y_test) display(table)
def test_is_pairwise(): # simple checks for _is_pairwise pca = KernelPCA(kernel='precomputed') with pytest.warns(None) as record: assert _is_pairwise(pca) assert not record # pairwise attribute that is not consistent with the pairwise tag class IncorrectTagPCA(KernelPCA): _pairwise = False pca = IncorrectTagPCA(kernel='precomputed') msg = "_pairwise was deprecated in 0.24 and will be removed in 1.1" with pytest.warns(FutureWarning, match=msg): assert not _is_pairwise(pca) # the _pairwise attribute is present and set to True while pairwise tag is # not present class TruePairwise(BaseEstimator): _pairwise = True true_pairwise = TruePairwise() with pytest.warns(FutureWarning, match=msg): assert _is_pairwise(true_pairwise) # pairwise attribute is not defined thus tag is used est = BaseEstimator() with pytest.warns(None) as record: assert not _is_pairwise(est) assert not record
def get_params(self, deep=True, **kwargs): params = BaseEstimator.get_params(self, deep=deep, **kwargs) # Callback parameters are not returned by .get_params, needs # special treatment. params_cb = self._get_params_callbacks(deep=deep) params.update(params_cb) return params
def run_inference( self, batch: Sequence[numpy.ndarray], model: BaseEstimator, **kwargs) -> Iterable[PredictionResult]: # vectorize data for better performance vectorized_batch = numpy.stack(batch, axis=0) predictions = model.predict(vectorized_batch) return [PredictionResult(x, y) for x, y in zip(batch, predictions)]
def _predict_regression( self, X: np.ndarray, model: BaseEstimator, task_type: int, Y_train: Optional[np.ndarray] = None) -> np.ndarray: def send_warnings_to_log( message: Union[Warning, str], category: Type[Warning], filename: str, lineno: int, file: Optional[TextIO] = None, line: Optional[str] = None, ) -> None: self.logger.debug('%s:%s: %s:%s' % (filename, lineno, str(category), message)) return with warnings.catch_warnings(): warnings.showwarning = send_warnings_to_log Y_pred = model.predict(X) if len(Y_pred.shape) == 1: Y_pred = Y_pred.reshape((-1, 1)) return Y_pred
def get_params(self, deep=True): params = BaseEstimator.get_params(self, deep) params['dimensions'] = self.dimensions params['noise'] = self.noise params['epsilon'] = self.epsilon logging.debug("Getting params: %s", str(params)) return params
def classifier_margin(classifier: BaseEstimator, X: modALinput, **predict_proba_kwargs) -> np.ndarray: """ Classification margin uncertainty of the classifier for the provided samples. This uncertainty measure takes the first and second most likely predictions and takes the difference of their probabilities, which is the margin. Args: classifier: The classifier for which the prediction margin is to be measured. X: The samples for which the prediction margin of classification is to be measured. **predict_proba_kwargs: Keyword arguments to be passed for the :meth:`predict_proba` of the classifier. Returns: Margin uncertainty, which is the difference of the probabilities of first and second most likely predictions. """ try: classwise_uncertainty = classifier.predict_proba( X, **predict_proba_kwargs) except NotFittedError: return np.zeros(shape=(X.shape[0], )) if classwise_uncertainty.shape[1] == 1: return np.zeros(shape=(classwise_uncertainty.shape[0], )) part = np.partition(-classwise_uncertainty, 1, axis=1) margin = -part[:, 0] + part[:, 1] return margin
def _predict_proba( self, X: np.ndarray, model: BaseEstimator, task_type: int, Y_train: Optional[np.ndarray] = None, ) -> np.ndarray: def send_warnings_to_log( message: Union[Warning, str], category: Type[Warning], filename: str, lineno: int, file: Optional[TextIO] = None, line: Optional[str] = None, ) -> None: self.logger.debug('%s:%s: %s:%s' % (filename, lineno, str(category), message)) return with warnings.catch_warnings(): warnings.showwarning = send_warnings_to_log Y_pred = model.predict_proba(X, batch_size=1000) if Y_train is None: raise ValueError("Y_train is required for classification problems") Y_pred = self._ensure_prediction_array_sizes(Y_pred, Y_train) return Y_pred
def max_std_sampling(regressor: BaseEstimator, X: modALinput, n_instances: int = 1, random_tie_break=False, **predict_kwargs) -> Tuple[np.ndarray, modALinput]: """ Regressor standard deviation sampling strategy. Args: regressor: The regressor for which the labels are to be queried. X: The pool of samples to query from. n_instances: Number of samples to be queried. random_tie_break: If True, shuffles utility scores to randomize the order. This can be used to break the tie when the highest utility score is not unique. **predict_kwargs: Keyword arguments to be passed to :meth:`predict` of the CommiteeRegressor. Returns: The indices of the instances from X chosen to be labelled; the instances from X chosen to be labelled. """ _, std = regressor.predict(X, return_std=True, **predict_kwargs) std = std.reshape(X.shape[0], ) if not random_tie_break: query_idx = multi_argmax(std, n_instances=n_instances) else: query_idx = shuffled_argmax(std, n_instances=n_instances) return query_idx, X[query_idx]
def decision_boundary(self, x: np.ndarray, y: np.ndarray, model: BaseEstimator): x0 = x[:, 0] x1 = x[:, 1] x_min, x_max = x0.min() - 1, x0.max() + 1 y_min, y_max = x1.min() - 1, x1.max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1)) z = model.predict(np.c_[xx.ravel(), yy.ravel()]) z = z.reshape(xx.shape) z = z.astype(np.str) y = [str(label) for label in y] fig = px.scatter(x=x0, y=x1, color=y) # fig = go.Figure() contour = go.Contour(z=z, x=np.arange(x_min, x_max, 0.1), y=np.arange(y_min, y_max, 0.1), line_width=0, colorscale=[[0, '#ff9900'], [1, '#6666ff']], opacity=0.4, showscale=False) fig.add_trace(contour) fig.update_layout(title='Decision boundary', legend_title='Label') pyo.iplot(fig)
def evaluate_model(self, model: BaseEstimator, xtest: np.ndarray, ytest: np.ndarray) -> ModelStats: """Get the accuracy, recall, precision of this model""" ypreds = model.predict(xtest) return ModelStats(accuracy=accuracy_score(ypreds, ytest), precision=precision_score(ypreds, ytest), recall=recall_score(ypreds, ytest))
def finalize_model( model: BaseEstimator, X_train: CSVData, Y_train: CSVData, X_test: CSVData, test_ids: CSVData, output: str, smote_fn: SamplerFnType = None, outlier_detection: Any = None, header: Tuple[str, str] = ("id", "y"), label_indexing: int = 0, export_int: bool = False, ) -> None: """Train the model on the complete data and generate the submission file. Parameters ---------- model: The model X_train: The training data Y_train: The training labels X_test: The test data test_ids: The IDs for the test data output: The path where to dump the output smote_fn: The function that takes labels and returns SMOTE label_indexing: What to start indexing the label from export_int: Whether to export the CSV as integers """ print("Training model...") if outlier_detection is not None: outliers = outlier_detection.fit_predict(X_train) X_train = X_train[outliers == 1] Y_train = Y_train[outliers == 1] if smote_fn: smote = smote_fn(Y_train) X_train, Y_train = smote.fit_resample(X_train, Y_train) model.fit(X_train, Y_train) print("Model trained") Y_pred = model.predict(X_test) + label_indexing submission: Any = np.stack([test_ids, Y_pred], 1) # Add IDs create_submission_file(output, submission, header=header, export_int=export_int)
def instantiate_and_fit( index: pd.DataFrame, fold: pd.DataFrame, X: np.ndarray, y: pd.DataFrame, estimator: BaseEstimator, n_splits: int = 5, param_grid: Optional[Dict[str, Any]] = None, ) -> BaseEstimator: assert fold.shape[0] == index.shape[0] assert fold.shape[0] == X.shape[0] assert fold.shape[0] == y.shape[0] fold_vals = fold.ravel() train_inds = fold_vals == "train" val_inds = fold_vals == "val" if val_inds.sum(): raise NotImplementedError( "Explicit validation indices not yet supported.") y = y.values.ravel() nan_row, nan_col = np.nonzero(np.isnan(X) | np.isinf(X)) if len(nan_row): logger.warning( f"Setting {len(nan_row)} NaN elements to zero before fitting {estimator}." ) X[nan_row, nan_col] = 0 logger.info(f"Fitting {estimator} on data (shape: {X.shape})") if param_grid is not None: group_k_fold = GroupKFold(n_splits=n_splits).split( X[train_inds], y[train_inds], index.trial.values[train_inds]) grid_search = GridSearchCV(estimator=estimator, param_grid=param_grid, verbose=10, cv=list(group_k_fold)) grid_search.fit(X[train_inds], y[train_inds]) return grid_search.best_estimator_ estimator.fit(X[train_inds], y[train_inds]) return estimator
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range, lambda_range): """ Cross-validate to find best hyperparameters with k-fold CV. :param X: Training data. :param y: Training targets. :param model: sklearn model. :param lambda_range: Range of values for the regularization hyperparam. :param degree_range: Range of values for the degree hyperparam. :param k_folds: Number of folds for splitting the training data into. :return: A dict containing the best model parameters, with some of the keys as returned by model.get_params() """ # TODO: Do K-fold cross validation to find the best hyperparameters # Notes: # - You can implement it yourself or use the built in sklearn utilities # (recommended). See the docs for the sklearn.model_selection package # http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection # - If your model has more hyperparameters (not just lambda and degree) # you should add them to the search. # - Use get_params() on your model to see what hyperparameters is has # and their names. The parameters dict you return should use the same # names as keys. # - You can use MSE or R^2 as a score. # ====== YOUR CODE: ====== DEGREE_PARAM = "bostonfeaturestransformer__degree" LAMBDA_PARAM = "linearregressor__reg_lambda" results = {} for degree in degree_range: for reg_lambda in lambda_range: params = model.get_params() params[DEGREE_PARAM] = degree params[LAMBDA_PARAM] = reg_lambda model.set_params(**params) scores = sklearn.model_selection.cross_val_score( model, X, y, scoring="neg_mean_squared_error", cv=k_folds) score = np.mean(scores) results[score] = params best_params = max(results.items(), key=lambda x: x[0])[1] # ======================== return best_params
def __init__(self, model, periods=1, freq='30min'): """Lags a dataset. Lags all features. Missing data is dropped for fitting, and replaced with the mean for predict. :periods: Number of timesteps to lag by """ assert isinstance(model, BaseEstimator), "`model` isn't a scikit-learn model" BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.periods = periods self.freq = freq self.model = model
def _validate_onnx_data(self, X): if X.dtype not in (numpy.float32, numpy.float64): raise ValueError( "Input X must have dtype float32 or float64.") X = BaseEstimator._validate_data( self, X, reset=False, dtype=[numpy.float64, numpy.float32], order='C') return X
def __init__(self, blending_regressor: BaseEstimator, model_name: str, params: dict): super().__init__(model_name, params) self.blend_model = BlendingRegressor( blending_regressor.set_params(**params)) self.MODELS_SERIALIZING_BASEPATH = self.path.join( self.MODELS_SERIALIZING_BASEPATH, MACHINE_LEARNING_TECHNIQUE_NAME) self.SERIALIZE_FILENAME_PREFIX = SERIALIZE_FILENAME_PREFIX
def __init__(self, rf_estimator=None, lasso_estimator=None): """ @param rf_estimator random forest estimator, :epkg:`sklearn:ensemble:RandomForestRegressor` by default @param lass_estimator Lasso estimator, :epkg:`sklearn:linear_model:LassoRegression` by default """ BaseEstimator.__init__(self) RegressorMixin.__init__(self) if rf_estimator is None: rf_estimator = RandomForestRegressor() if lasso_estimator is None: lasso_estimator = Lasso() self.rf_estimator = rf_estimator self.lasso_estimator = lasso_estimator
def __init__(self, model: BaseEstimator, multi_output: bool = False): name = type(model).__name__ super().__init__(version=name) if multi_output: model = MultiOutputRegressor(model, -1) self.model = model self.params = model.get_params()
def test_unsupported(): vec = CountVectorizer() clf = BaseEstimator() res = explain_prediction(clf, 'hello, world', vec=vec) assert 'BaseEstimator' in res.error for expl in format_as_all(res, clf): assert 'Error' in expl assert 'BaseEstimator' in expl
def __init__(self, columns=None, remove=None, skip_errors=False, single=False): """ @param columns specify a columns selection @param remove modalities to remove @param skip_errors skip when a new categories appear (no 1) @param single use a single column per category, do not multiply them for each value The logging function displays a message when a new dense and big matrix is created when it should be sparse. A sparse matrix should be allocated instead. """ BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.columns = columns if isinstance( columns, list) or columns is None else [columns] self.skip_errors = skip_errors self.remove = remove self.single = single
def __init__(self, wv, m=10, analyzer=str.split, eqe=1, verbose=0, a=1, c=0, n_jobs=1): """ Initializes the embedding based query language model query expansion technique """ BaseEstimator.__init__(self) self._wv = wv self._analyzer = analyzer if eqe not in [1, 2]: raise ValueError self._eqe = eqe self.verbose = verbose self._a = a self._c = c self.m = m self.n_jobs = n_jobs self.vocabulary = None
def __init__(self, retrieval_model, matching=None, query_expansion=None, name='RM', labels=None): """TODO: to be defined1. :retrieval_model: A retrieval model satisfying fit and query. :vectorizer: A vectorizer satisfying fit and transform (and fit_transform). :matching: A matching operation satisfying fit and predict. :query_expansion: A query operation satisfying fit and transform :labels: Pre-defined mapping of indices to identifiers, will be inferred during fit, if not given. """ BaseEstimator.__init__(self) self._retrieval_model = retrieval_model self._matching = matching self._query_expansion = query_expansion self.name = name self.labels_ = np.asarray(labels) if labels is not None else None
def __init__(self, columns=None, remove=None, skip_errors=False, single=False, fLOG=None): """ constructor @param columns specify a columns selection @param remove modalities to remove @param skip_errors skip when a new categories appear (no 1) @param single use a single column per category, do not multiply them for each value @param fLOG logging function The logging function displays a message when a new dense and big matrix is created when it should be sparse. A sparse matrix should be allocated instead. """ BaseEstimator.__init__(self) TransformerMixin.__init__(self) self._p_columns = columns if isinstance( columns, list) or columns is None else [columns] self._p_skip_errors = skip_errors self._p_remove = remove self._p_single = single self.fLOG = fLOG
def get_params(self, deep=True): params = BaseEstimator.get_params(self, deep) params['max_dimensions'] = self.max_dimensions params['beta'] = self.beta params['C'] = self.C return params
def get_params(self, deep=True): params = BaseEstimator.get_params(self, deep) params['beta'] = self.beta return params
def get_params(self, deep=True): return BaseEstimator.get_params(self, deep=deep)
def set_params(self, **params): BaseEstimator.set_params(self, **params)
def __init__(self, cost_func, n_class=2): BaseEstimator.__init__(self) self.n_class = n_class self.cost_func = cost_func