Python BaseEstimator примеры, sklearn.base.BaseEstimator Python примеры использования

Пример #1

0

Показать файл

Файл: estimators.py Проект: amerberg/PDFMetadataExtractor

    def set_params(self, **params):
        """Set parameters for the wrapper and the wrapped estimator.

        This method is required for compatibility with GridSearchCV.
        :param params: A dictionary of parameters for the wrapper and wrapped estimator.
         If a key doesn't match the name of a wrapper parameter, it is assumed to be
         for the wrapped estimator.
         TODO: it would be better to do what sklearn's pipeline does and provide some
         namespacing in case the wrapper and wrapped class share a parameter name
        :return: self
        """

        if not params:
            return self
        valid_params = self.get_params(deep=True)
        model_params = self.model_params
        wrapper_params = {}
        for key, value in params.iteritems():
            if key in valid_params:
                wrapper_params[key] = value
            else:
                model_params[key] = value

        wrapper_params['model_params'] = model_params
        BaseEstimator.set_params(self, **wrapper_params)
        return self

Пример #2

0

Показать файл

Файл: what_sklearn.py Проект: sdvillal/whatami

def _check_all_monkeypatched():
    """Double-checks that instances sklearn estimators have acquired the proper "what" method.
    Raises an assertion error if it is not the case.
    """

    # Make sure we have added what to sklearn stuff
    whatamize_sklearn(check=False)

    # Trick to force python to populate part of the BaseEstimator hierarchy
    from sklearn.ensemble.forest import RandomForestClassifier
    assert BaseEstimator.__subclasscheck__(RandomForestClassifier)
    from sklearn.cluster import KMeans
    assert BaseEstimator.__subclasscheck__(KMeans)
    from sklearn.feature_extraction import DictVectorizer
    assert BaseEstimator.__subclasscheck__(DictVectorizer)
    from sklearn.decomposition import KernelPCA
    assert BaseEstimator.__subclasscheck__(KernelPCA)

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        for cls in all_subclasses(BaseEstimator):
            if not inspect.isabstract(cls):
                try:
                    obj = cls()
                    assert hasattr(obj, 'what'), cls.__name__
                    assert isinstance(obj.what(), What), cls.__name__
                except TypeError:
                    pass
    return True

Пример #3

0

Показать файл

Файл: ensemble_ioc.py Проект: KlasKronander/ensemble_ioc

    def __init__(self,  n_estimators=20, 
                        max_depth=5, min_samples_split=10, min_samples_leaf=10,
                        random_state=0,
                        em_itrs=5,
                        regularization=0.05,
                        passive_dyn_func=None,
                        passive_dyn_ctrl=None,
                        passive_dyn_noise=None,
                        verbose=False):
        '''
        n_estimators        - number of ensembled models
        ...                 - a batch of parameters used for RandomTreesEmbedding, see relevant documents
        em_itrs             - maximum number of EM iterations to take
        regularization      - small positive scalar to prevent singularity of matrix inversion
        passive_dyn_func    - function to evaluate passive dynamics; None for MaxEnt model
        passive_dyn_ctrl    - function to return the control matrix which might depend on the state...
        passive_dyn_noise   - covariance of a Gaussian noise; only applicable when passive_dyn is Gaussian; None for MaxEnt model
                                note this implies a dynamical system with constant input gain. It is extendable to have state dependent
                                input gain then we need covariance for each data point
        verbose             - output training information
        '''
        BaseEstimator.__init__(self)

        self.n_estimators=n_estimators
        self.max_depth=max_depth
        self.min_samples_split=min_samples_split
        self.min_samples_leaf=min_samples_leaf
        self.random_state=random_state
        self.em_itrs=em_itrs
        self.reg=regularization
        self.passive_dyn_func=passive_dyn_func
        self.passive_dyn_ctrl=passive_dyn_ctrl
        self.passive_dyn_noise=passive_dyn_noise
        self.verbose=verbose
        return

Пример #4

0

Показать файл

Файл: feature_extractor.py Проект: JieLuoSC/librosa

    def set_params(self, **kwargs):
        """Update the parameters of the feature extractor."""

        # We don't want non-functional arguments polluting kwargs
        params = kwargs.copy()
        for k in ['function', 'target']:
            params.pop(k, None)

        self.kwargs.update(params)
        BaseEstimator.set_params(self, **kwargs)

Пример #5

0

Показать файл

Файл: predict_tests.py Проект: IndianOnRun/chi-learn

    def test_vector_alignment(self):
        # Mock out a generic scikit-learn classifier
        mocked_model = BaseEstimator()
        mocked_model.fit = MagicMock()
        mocked_model.predict = MagicMock(return_value=[True])

        # Create a simple data frame extending to January 15
        date_sequence = pd.date_range('1/1/2011', periods=15, freq='D')
        time_series = pd.DataFrame({
            # This column will be accessed by name to generate the targets vector.
            'Violent Crime Committed?': [True, True] + [False]*13,

            # Actual time series used for nonsequential prediction will contain more than one column.
            # However, we just need to verify that it grabs the correct slices of each column,
            # so one stand-in column will suffice.
            'Other Data': [0]*10 + [1]*5
        }, index=date_sequence)

        # Construct a NonsequentialPredictor with the mock
        predictor = NonsequentialPredictor(time_series, model=mocked_model)

        # The date to predict comes before the end of the time series,
        # so all rows from the 13th on should be discarded
        date_to_predict = datetime.date(2011, 1, 13)

        # The mock always predicts True, so predict() should return True
        self.assertTrue(predictor.predict(date_to_predict))

        # And both fit and predict should have been called
        self.assertTrue(mocked_model.fit.called)
        self.assertTrue(mocked_model.predict.called)

        # When feeding training data to the sklearn model,
        # predict() needs to align each day of the time series with whether a violent crime was committed the NEXT day.
        # Thus, the first element of the Violent Crime Committed? column should have been removed
        #  before being used as the model's targets vector because it has no previous day to partner with.
        expected_targets = [True] + [False]*11

        # Similarly, the last element of any other column (in this case, 'Other Data')
        # should only go up to the day before the day we're trying to predict
        expected_features = [[0]]*10 + [[1]]*2

        # Get the two arguments passed to mocked_model
        fit_args = mocked_model.fit.call_args
        observed_features = fit_args[0][0]
        observed_targets = fit_args[0][1]

        # Equality tests with numpy arrays are wonky, so I convert numpy arrays to Python lists
        self.assertEqual(observed_targets.tolist(), expected_targets)
        self.assertEqual(observed_features.tolist(), expected_features)

        # Confirm the correct argument was passed to predict
        print(mocked_model.predict.call_args)
        observed_day_to_predict = mocked_model.predict.call_args[0][0]
        self.assertEqual(observed_day_to_predict.tolist(), [[1]])

Пример #6

0

Показать файл

Файл: query_expansion.py Проект: shatha2014/vec4ir

 def __init__(self, embedding, analyzer='word', m=10, verbose=0,
              use_idf=True, **ev_params):
     """Expand a query by the nearest known tokens to its centroid
     """
     self.embedding = embedding
     self.m = m
     self.vect = EmbeddedVectorizer(embedding,
                                    analyzer=analyzer,
                                    use_idf=use_idf,
                                    **ev_params)
     BaseEstimator.__init__(self)

Пример #7

0

Показать файл

Файл: eqlm.py Проект: shatha2014/vec4ir

    def __init__(self, embedding, analyzer, m=10):
        """Initializes Embedding Based Query Expansion

        :embedding: TODO
        :analyzer: TODO
        :m: TODO

        """
        BaseEstimator.__init__(self)

        self._embedding = embedding
        self._m = m
        self._cv = CountVectorizer(analyzer=analyzer)

Пример #8

0

Показать файл

def train_fchl(rep_computer: FCHLRepresentation,
               model: BaseEstimator,
               mols: List[str],
               y: List[float],
               n_jobs: int = 1,
               y_lower: List[float] = None) -> BaseEstimator:
    """Retrain an FCHL-based model

    Args:
        rep_computer: Tool used to compute the FCHL-compatible representations for each molecule
        model: Model to be retrained
        mols: List of molecules (XYZ format) in training set
        y: List of other properties to predict
        n_jobs: Number of threads to use for generating representations
        y_lower: Lower-fidelity estimate of the property. Used for delta learning models
    Returns:
        Retrained model
    """

    # Convert the input molecules into FCHL-ready inputs
    rep_computer.n_jobs = n_jobs
    reps = rep_computer.transform(mols)

    # Retrain the model
    if y_lower is not None:
        y = np.subtract(y, y_lower)
    return model.fit(reps, y)

Пример #9

0

Показать файл

    def fit(self, X, original_y):
        base_est = BaseEstimator()
        base_est.predict = lambda X: np.zeros(X.shape[0], dtype=float)
        self.estimators_ = [base_est]

        for i in range(self.n_estimators):
            grad = self.loss_grad(original_y, self._predict(X))
            estimator = deepcopy(self.base_regressor)
            estimator.fit(X, grad)

            self.estimators_.append(estimator)

        self.out_ = self._outliers(grad)
        self.feature_importances_ = self._calc_feature_imps()

        return self

Пример #10

0

Показать файл

Файл: generate.py Проект: cesarrodrig/pos-tagger

def generate(model: base.BaseEstimator, sentences: List[List[str]]) -> None:
    """Tag the sentences with the given model.

    Parameters
    ----------
    sentences : list
        List of lists of strings representing the sentences to tag.
    """
    print(f"Tagging {len(sentences)} sentences.")

    # Since the models were trained on the lemmatized version of the words,
    # we also lemmatize them when tagging unlabeled sentences.
    lemmatizer = stem.WordNetLemmatizer()

    for sentence in sentences:
        # Convert to the lemmatized versions
        lemmatized = [lemmatizer.lemmatize(w.lower()) for w in sentence]

        # Convert to conllu.TokenList because models expect that.
        # Since they are essentially dicts, we build them that way.
        tags = model.predict([[{"lemma": w} for w in lemmatized]])

        print("Word\tTag")
        for w, t in zip(sentence, tags[0]):
            print(f"{w}\t{t}")
        print()

Пример #11

0

Показать файл

def evaluate_fchl(rep_computer: FCHLRepresentation,
                  model: BaseEstimator,
                  mols: List[str],
                  n_jobs: int = 1,
                  y_lower: List[float] = None) -> np.ndarray:
    """Run an FCHL-based model

    Args:
        rep_computer: Tool used to compute the FCHL-compatible representations for each molecule
        model: Model to be evaluated
        mols: List of molecules (XYZ format) to evaluate
        n_jobs: Number of threads to use for generating representations
        y_lower: Lower-fidelity estimate of the property. Used for delta learning models
    Returns:
        Results from the inference
    """

    # Convert the input molecules into FCHL-ready inputs
    rep_computer.n_jobs = n_jobs
    reps = rep_computer.transform(mols)

    # Run the model
    y_pred = model.predict(reps).tolist()
    if y_lower is not None:
        y_pred = np.add(y_pred, y_lower)
    return y_pred

Пример #12

0

Показать файл

Файл: base.py Проект: achennu/sklearn-xarray

    def get_params(self, deep=True):
        """ Get parameters for this estimator.

        Parameters
        ----------
        deep : boolean, optional
            If True, will return the parameters for this estimator and
            contained subobjects that are estimators.

        Returns
        -------
        params : mapping of string to any
            Parameter names mapped to their values.
        """

        if self.compat:
            return BaseEstimator.get_params(self, deep)

        else:
            if self.estimator is not None:
                params = self.estimator.get_params(deep)
            else:
                # TODO: check if this is necessary
                params = dict()

            for p in self._get_param_names():
                params[p] = getattr(self, p, None)

            return params

Пример #13

0

Показать файл

Файл: study.py Проект: adam2392/morf-demo

def summarize_feature_comparisons(
        base_clf: BaseEstimator, comparison_clfs: Dict[str, BaseEstimator], X_test, y_test
):
    from mlxtend.evaluate import mcnemar, cochrans_q, mcnemar_table

    summary_dict = collections.OrderedDict()
    mcnemar_tbs = dict()

    # create list of predicted values
    base_y_predict = base_clf.predict(X_test)
    y_predictions = [base_y_predict]
    for idx, (name, clf) in enumerate(comparison_clfs.items()):
        # get the probability
        y_predict_proba = clf.predict_proba(X_test)
        y_predict = clf.predict(X_test)

        # form mcnemar tables against base classifier
        tb = mcnemar_table(y_test, base_y_predict, y_predict)
        mcnemar_tbs[f"base vs {name}"] = tb.values()

        # store predictions per classifier
        y_predictions.append(y_predict)

    # first run cochrans Q test
    qstat, pval = cochrans_q(y_test, *y_predictions)
    summary_dict["cochrans_q"] = qstat
    summary_dict["cochrans_q_pval"] = pval

    # run mcnemars test against all the predictions
    for name, table in mcnemar_tbs.items():
        chi2stat, pval = mcnemar(table, exact=True)
        summary_dict[f"mcnemar_{name}_chi2stat"] = chi2stat
        summary_dict[f"mcnemar_{name}_pval"] = pval

    return summary_dict

Пример #14

0

Показать файл

 def __init__(self,
              embedding,
              analyzer='word',
              m=10,
              verbose=0,
              use_idf=True,
              **ev_params):
     """Expand a query by the nearest known tokens to its centroid
     """
     self.embedding = embedding
     self.m = m
     self.vect = EmbeddedVectorizer(embedding,
                                    analyzer=analyzer,
                                    use_idf=use_idf,
                                    **ev_params)
     BaseEstimator.__init__(self)

Пример #15

0

Показать файл

Файл: sklearn_utils.py Проект: tchordia/ray

def has_cpu_params(estimator: BaseEstimator) -> bool:
    """Returns True if estimator has any CPU-related params."""
    return any(
        any(
            param.endswith(cpu_param_name)
            for cpu_param_name in SKLEARN_CPU_PARAM_NAMES)
        for param in estimator.get_params(deep=True))

Пример #16

0

Показать файл

    def out_of_fold(
            self,
            estimator: BaseEstimator,
            train_x, train_y,
            valid_x, valid_y):
        # lightGBMとcatboostの場合は、fit時に下記パラメータを与える
        fit_params = {}
        if type(estimator).__name__ in ('LGBMClassifier', 'CatBoostClassifier',):
            if 'eval_set' not in fit_params:
                fit_params['eval_set'] = [(valid_x, valid_y)]
            if 'early_stopping_rounds' not in fit_params:
                fit_params['early_stopping_rounds'] = 100

        estimator.fit(train_x, train_y, **fit_params)
        oof = self.make_pred(estimator, valid_x)
        return oof

Пример #17

0

Показать файл

Файл: test_custom_sklearn_functions.py Проект: Henry-Leexy/ESMValTool

def test_is_pairwise():
    """Test ``_is_pairwise``."""
    # Simple checks for _is_pairwise
    pca = KernelPCA(kernel='precomputed')
    with pytest.warns(None) as record:
        assert _is_pairwise(pca)
    assert not record

    # Pairwise attribute that is not consistent with the pairwise tag
    class IncorrectTagPCA(KernelPCA):
        """Class with incorrect _pairwise attribute."""

        _pairwise = False

    pca = IncorrectTagPCA(kernel='precomputed')
    msg = "_pairwise attribute is inconsistent with tags."
    with pytest.warns(FutureWarning, match=msg):
        assert not _is_pairwise(pca)

    # The _pairwise attribute is present and set to True while pairwise tag is
    # not present
    class TruePairwise(BaseEstimator):
        """Class without pairwise tag."""

        _pairwise = True

    true_pairwise = TruePairwise()
    with pytest.warns(FutureWarning, match=msg):
        assert _is_pairwise(true_pairwise)

    # Pairwise attribute is not defined thus tag is used
    est = BaseEstimator()
    with pytest.warns(None) as record:
        assert not _is_pairwise(est)
    assert not record

Пример #18

0

Показать файл

Файл: diagnostics.py Проект: ndgigliotti/dsc-phase-3-project

def standard_report(
    estimator: BaseEstimator,
    X_test: Union[pd.DataFrame, np.ndarray],
    y_test: Union[pd.Series, np.ndarray],
    zero_division: str = "warn",
) -> None:
    """Display standard report of diagnostic metrics and plots for classification.

    Parameters
    ----------
    estimator : BaseEstimator
        Fitted classification estimator for evaluation.
    X_test : DataFrame or ndarray of shape (n_samples, n_features)
        Predictor test set.
    y_test : Series or ndarray of shape (n_samples,)
        Target test set.
    zero_division : str, optional
        Value to return for division by zero: 0, 1, or 'warn'.
    """
    table = classification_report(y_test,
                                  estimator.predict(X_test),
                                  zero_division=zero_division,
                                  heatmap=True)
    classification_plots(estimator, X_test, y_test)
    display(table)

Пример #19

0

Показать файл

def test_is_pairwise():
    # simple checks for _is_pairwise
    pca = KernelPCA(kernel='precomputed')
    with pytest.warns(None) as record:
        assert _is_pairwise(pca)
    assert not record

    # pairwise attribute that is not consistent with the pairwise tag
    class IncorrectTagPCA(KernelPCA):
        _pairwise = False

    pca = IncorrectTagPCA(kernel='precomputed')
    msg = "_pairwise was deprecated in 0.24 and will be removed in 1.1"
    with pytest.warns(FutureWarning, match=msg):
        assert not _is_pairwise(pca)

    # the _pairwise attribute is present and set to True while pairwise tag is
    # not present
    class TruePairwise(BaseEstimator):
        _pairwise = True

    true_pairwise = TruePairwise()
    with pytest.warns(FutureWarning, match=msg):
        assert _is_pairwise(true_pairwise)

    # pairwise attribute is not defined thus tag is used
    est = BaseEstimator()
    with pytest.warns(None) as record:
        assert not _is_pairwise(est)
    assert not record

Пример #20

0

Показать файл

 def get_params(self, deep=True, **kwargs):
     params = BaseEstimator.get_params(self, deep=deep, **kwargs)
     # Callback parameters are not returned by .get_params, needs
     # special treatment.
     params_cb = self._get_params_callbacks(deep=deep)
     params.update(params_cb)
     return params

Пример #21

0

Показать файл

Файл: sklearn_inference.py Проект: nielm/beam

 def run_inference(
     self, batch: Sequence[numpy.ndarray], model: BaseEstimator,
     **kwargs) -> Iterable[PredictionResult]:
   # vectorize data for better performance
   vectorized_batch = numpy.stack(batch, axis=0)
   predictions = model.predict(vectorized_batch)
   return [PredictionResult(x, y) for x, y in zip(batch, predictions)]

Пример #22

0

Показать файл

Файл: abstract_evaluator.py Проект: zning86/auto-sklearn

    def _predict_regression(
            self,
            X: np.ndarray,
            model: BaseEstimator,
            task_type: int,
            Y_train: Optional[np.ndarray] = None) -> np.ndarray:
        def send_warnings_to_log(
            message: Union[Warning, str],
            category: Type[Warning],
            filename: str,
            lineno: int,
            file: Optional[TextIO] = None,
            line: Optional[str] = None,
        ) -> None:
            self.logger.debug('%s:%s: %s:%s' %
                              (filename, lineno, str(category), message))
            return

        with warnings.catch_warnings():
            warnings.showwarning = send_warnings_to_log
            Y_pred = model.predict(X)

        if len(Y_pred.shape) == 1:
            Y_pred = Y_pred.reshape((-1, 1))

        return Y_pred

Пример #23

0

Показать файл

Файл: ConeEstimatorBase.py Проект: daoudclarke/learncone

 def get_params(self, deep=True):
     params = BaseEstimator.get_params(self, deep)
     params['dimensions'] = self.dimensions
     params['noise'] = self.noise
     params['epsilon'] = self.epsilon
     logging.debug("Getting params: %s", str(params))
     return params

Пример #24

0

Показать файл

Файл: uncertainty.py Проект: IMDC/cappy

def classifier_margin(classifier: BaseEstimator, X: modALinput,
                      **predict_proba_kwargs) -> np.ndarray:
    """
    Classification margin uncertainty of the classifier for the provided samples. This uncertainty measure takes the
    first and second most likely predictions and takes the difference of their probabilities, which is the margin.

    Args:
        classifier: The classifier for which the prediction margin is to be measured.
        X: The samples for which the prediction margin of classification is to be measured.
        **predict_proba_kwargs: Keyword arguments to be passed for the :meth:`predict_proba` of the classifier.

    Returns:
        Margin uncertainty, which is the difference of the probabilities of first and second most likely predictions.
    """
    try:
        classwise_uncertainty = classifier.predict_proba(
            X, **predict_proba_kwargs)
    except NotFittedError:
        return np.zeros(shape=(X.shape[0], ))

    if classwise_uncertainty.shape[1] == 1:
        return np.zeros(shape=(classwise_uncertainty.shape[0], ))

    part = np.partition(-classwise_uncertainty, 1, axis=1)
    margin = -part[:, 0] + part[:, 1]

    return margin

Пример #25

0

Показать файл

Файл: abstract_evaluator.py Проект: zning86/auto-sklearn

    def _predict_proba(
        self,
        X: np.ndarray,
        model: BaseEstimator,
        task_type: int,
        Y_train: Optional[np.ndarray] = None,
    ) -> np.ndarray:
        def send_warnings_to_log(
            message: Union[Warning, str],
            category: Type[Warning],
            filename: str,
            lineno: int,
            file: Optional[TextIO] = None,
            line: Optional[str] = None,
        ) -> None:
            self.logger.debug('%s:%s: %s:%s' %
                              (filename, lineno, str(category), message))
            return

        with warnings.catch_warnings():
            warnings.showwarning = send_warnings_to_log
            Y_pred = model.predict_proba(X, batch_size=1000)

        if Y_train is None:
            raise ValueError("Y_train is required for classification problems")

        Y_pred = self._ensure_prediction_array_sizes(Y_pred, Y_train)
        return Y_pred

Пример #26

0

Показать файл

Файл: disagreement.py Проект: Shariar076/Enseble_of_Learners

def max_std_sampling(regressor: BaseEstimator,
                     X: modALinput,
                     n_instances: int = 1,
                     random_tie_break=False,
                     **predict_kwargs) -> Tuple[np.ndarray, modALinput]:
    """
    Regressor standard deviation sampling strategy.

    Args:
        regressor: The regressor for which the labels are to be queried.
        X: The pool of samples to query from.
        n_instances: Number of samples to be queried.
        random_tie_break: If True, shuffles utility scores to randomize the order. This
            can be used to break the tie when the highest utility score is not unique.
        **predict_kwargs: Keyword arguments to be passed to :meth:`predict` of the CommiteeRegressor.

    Returns:
        The indices of the instances from X chosen to be labelled;
        the instances from X chosen to be labelled.
    """
    _, std = regressor.predict(X, return_std=True, **predict_kwargs)
    std = std.reshape(X.shape[0], )

    if not random_tie_break:
        query_idx = multi_argmax(std, n_instances=n_instances)
    else:
        query_idx = shuffled_argmax(std, n_instances=n_instances)

    return query_idx, X[query_idx]

Пример #27

0

Показать файл

    def decision_boundary(self, x: np.ndarray, y: np.ndarray,
                          model: BaseEstimator):
        x0 = x[:, 0]
        x1 = x[:, 1]

        x_min, x_max = x0.min() - 1, x0.max() + 1
        y_min, y_max = x1.min() - 1, x1.max() + 1

        xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                             np.arange(y_min, y_max, 0.1))

        z = model.predict(np.c_[xx.ravel(), yy.ravel()])
        z = z.reshape(xx.shape)
        z = z.astype(np.str)

        y = [str(label) for label in y]

        fig = px.scatter(x=x0, y=x1, color=y)

        # fig = go.Figure()

        contour = go.Contour(z=z,
                             x=np.arange(x_min, x_max, 0.1),
                             y=np.arange(y_min, y_max, 0.1),
                             line_width=0,
                             colorscale=[[0, '#ff9900'], [1, '#6666ff']],
                             opacity=0.4,
                             showscale=False)

        fig.add_trace(contour)

        fig.update_layout(title='Decision boundary', legend_title='Label')

        pyo.iplot(fig)

Пример #28

0

Показать файл

Файл: train.py Проект: ku222/GCP

 def evaluate_model(self, model: BaseEstimator, xtest: np.ndarray,
                    ytest: np.ndarray) -> ModelStats:
     """Get the accuracy, recall, precision of this model"""
     ypreds = model.predict(xtest)
     return ModelStats(accuracy=accuracy_score(ypreds, ytest),
                       precision=precision_score(ypreds, ytest),
                       recall=recall_score(ypreds, ytest))

Пример #29

0

Показать файл

Файл: model.py Проект: rharish101/AdvancedML

def finalize_model(
    model: BaseEstimator,
    X_train: CSVData,
    Y_train: CSVData,
    X_test: CSVData,
    test_ids: CSVData,
    output: str,
    smote_fn: SamplerFnType = None,
    outlier_detection: Any = None,
    header: Tuple[str, str] = ("id", "y"),
    label_indexing: int = 0,
    export_int: bool = False,
) -> None:
    """Train the model on the complete data and generate the submission file.

    Parameters
    ----------
    model: The model
    X_train: The training data
    Y_train: The training labels
    X_test: The test data
    test_ids: The IDs for the test data
    output: The path where to dump the output
    smote_fn: The function that takes labels and returns SMOTE
    label_indexing: What to start indexing the label from
    export_int: Whether to export the CSV as integers
    """
    print("Training model...")

    if outlier_detection is not None:
        outliers = outlier_detection.fit_predict(X_train)
        X_train = X_train[outliers == 1]
        Y_train = Y_train[outliers == 1]

    if smote_fn:
        smote = smote_fn(Y_train)
        X_train, Y_train = smote.fit_resample(X_train, Y_train)

    model.fit(X_train, Y_train)

    print("Model trained")
    Y_pred = model.predict(X_test) + label_indexing
    submission: Any = np.stack([test_ids, Y_pred], 1)  # Add IDs
    create_submission_file(output,
                           submission,
                           header=header,
                           export_int=export_int)

Пример #30

0

Показать файл

Файл: base.py Проект: njtwomey/har_datasets

def instantiate_and_fit(
    index: pd.DataFrame,
    fold: pd.DataFrame,
    X: np.ndarray,
    y: pd.DataFrame,
    estimator: BaseEstimator,
    n_splits: int = 5,
    param_grid: Optional[Dict[str, Any]] = None,
) -> BaseEstimator:
    assert fold.shape[0] == index.shape[0]
    assert fold.shape[0] == X.shape[0]
    assert fold.shape[0] == y.shape[0]

    fold_vals = fold.ravel()

    train_inds = fold_vals == "train"
    val_inds = fold_vals == "val"

    if val_inds.sum():
        raise NotImplementedError(
            "Explicit validation indices not yet supported.")

    y = y.values.ravel()

    nan_row, nan_col = np.nonzero(np.isnan(X) | np.isinf(X))
    if len(nan_row):
        logger.warning(
            f"Setting {len(nan_row)} NaN elements to zero before fitting {estimator}."
        )
        X[nan_row, nan_col] = 0

    logger.info(f"Fitting {estimator} on data (shape: {X.shape})")

    if param_grid is not None:
        group_k_fold = GroupKFold(n_splits=n_splits).split(
            X[train_inds], y[train_inds], index.trial.values[train_inds])

        grid_search = GridSearchCV(estimator=estimator,
                                   param_grid=param_grid,
                                   verbose=10,
                                   cv=list(group_k_fold))
        grid_search.fit(X[train_inds], y[train_inds])

        return grid_search.best_estimator_

    estimator.fit(X[train_inds], y[train_inds])
    return estimator

Пример #31

0

Показать файл

Файл: linear_regression.py Проект: avivcaspi/DeepLearning

def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range,
                        lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    # TODO: Do K-fold cross validation to find the best hyperparameters
    #  Notes:
    #  - You can implement it yourself or use the built in sklearn utilities
    #    (recommended). See the docs for the sklearn.model_selection package
    #    http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    #  - If your model has more hyperparameters (not just lambda and degree)
    #    you should add them to the search.
    #  - Use get_params() on your model to see what hyperparameters is has
    #    and their names. The parameters dict you return should use the same
    #    names as keys.
    #  - You can use MSE or R^2 as a score.

    # ====== YOUR CODE: ======
    DEGREE_PARAM = "bostonfeaturestransformer__degree"
    LAMBDA_PARAM = "linearregressor__reg_lambda"

    results = {}
    for degree in degree_range:
        for reg_lambda in lambda_range:
            params = model.get_params()
            params[DEGREE_PARAM] = degree
            params[LAMBDA_PARAM] = reg_lambda
            model.set_params(**params)
            scores = sklearn.model_selection.cross_val_score(
                model, X, y, scoring="neg_mean_squared_error", cv=k_folds)
            score = np.mean(scores)
            results[score] = params

    best_params = max(results.items(), key=lambda x: x[0])[1]

    # ========================

    return best_params

Пример #32

0

Показать файл

    def __init__(self, model, periods=1, freq='30min'):
        """Lags a dataset.

        Lags all features.
        Missing data is dropped for fitting, and replaced with the mean for predict.

        :periods: Number of timesteps to lag by
        """
        assert isinstance(model, BaseEstimator), "`model` isn't a scikit-learn model"

        BaseEstimator.__init__(self)
        TransformerMixin.__init__(self)

        self.periods = periods
        self.freq = freq

        self.model = model

Пример #33

0

Показать файл

Файл: test_custom_embedded_any_models.py Проект: sdpython/mlprodict

 def _validate_onnx_data(self, X):
     if X.dtype not in (numpy.float32, numpy.float64):
         raise ValueError(
             "Input X must have dtype float32 or float64.")
     X = BaseEstimator._validate_data(
         self, X, reset=False, dtype=[numpy.float64, numpy.float32],
         order='C')
     return X

Пример #34

0

Показать файл

Файл: blending.py Проект: nyuge/xgboost

 def __init__(self, blending_regressor: BaseEstimator, model_name: str,
              params: dict):
     super().__init__(model_name, params)
     self.blend_model = BlendingRegressor(
         blending_regressor.set_params(**params))
     self.MODELS_SERIALIZING_BASEPATH = self.path.join(
         self.MODELS_SERIALIZING_BASEPATH, MACHINE_LEARNING_TECHNIQUE_NAME)
     self.SERIALIZE_FILENAME_PREFIX = SERIALIZE_FILENAME_PREFIX

Пример #35

0

Показать файл

Файл: lasso_random_forest_regressor.py Проект: victor-espuna/ensae_teaching_cs

 def __init__(self, rf_estimator=None, lasso_estimator=None):
     """
     @param  rf_estimator    random forest estimator,
                             :epkg:`sklearn:ensemble:RandomForestRegressor`
                             by default
     @param  lass_estimator  Lasso estimator,
                             :epkg:`sklearn:linear_model:LassoRegression`
                             by default
     """
     BaseEstimator.__init__(self)
     RegressorMixin.__init__(self)
     if rf_estimator is None:
         rf_estimator = RandomForestRegressor()
     if lasso_estimator is None:
         lasso_estimator = Lasso()
     self.rf_estimator = rf_estimator
     self.lasso_estimator = lasso_estimator

Пример #36

0

Показать файл

Файл: models.py Проект: carloszanella/pypeline

    def __init__(self, model: BaseEstimator, multi_output: bool = False):
        name = type(model).__name__
        super().__init__(version=name)
        if multi_output:
            model = MultiOutputRegressor(model, -1)

        self.model = model
        self.params = model.get_params()

Пример #37

0

Показать файл

def test_unsupported():
    vec = CountVectorizer()
    clf = BaseEstimator()
    res = explain_prediction(clf, 'hello, world', vec=vec)
    assert 'BaseEstimator' in res.error
    for expl in format_as_all(res, clf):
        assert 'Error' in expl
        assert 'BaseEstimator' in expl

Пример #38

0

Показать файл

Файл: categories_to_integers.py Проект: sdpython/mlinsights

    def __init__(self, columns=None, remove=None, skip_errors=False, single=False):
        """
        @param      columns         specify a columns selection
        @param      remove          modalities to remove
        @param      skip_errors     skip when a new categories appear (no 1)
        @param      single          use a single column per category, do not multiply them for each value

        The logging function displays a message when a new dense and big matrix
        is created when it should be sparse. A sparse matrix should be allocated instead.
        """
        BaseEstimator.__init__(self)
        TransformerMixin.__init__(self)
        self.columns = columns if isinstance(
            columns, list) or columns is None else [columns]
        self.skip_errors = skip_errors
        self.remove = remove
        self.single = single

Пример #39

0

Показать файл

Файл: query_expansion.py Проект: shatha2014/vec4ir

 def __init__(self, wv, m=10, analyzer=str.split, eqe=1, verbose=0, a=1,
              c=0, n_jobs=1):
     """
     Initializes the embedding based query language model query expansion
     technique
     """
     BaseEstimator.__init__(self)
     self._wv = wv
     self._analyzer = analyzer
     if eqe not in [1, 2]:
         raise ValueError
     self._eqe = eqe
     self.verbose = verbose
     self._a = a
     self._c = c
     self.m = m
     self.n_jobs = n_jobs
     self.vocabulary = None

Пример #40

0

Показать файл

Файл: core.py Проект: shatha2014/vec4ir

    def __init__(self, retrieval_model, matching=None,
                 query_expansion=None, name='RM',
                 labels=None):
        """TODO: to be defined1.

        :retrieval_model: A retrieval model satisfying fit and query.
        :vectorizer: A vectorizer satisfying fit and transform (and fit_transform).
        :matching: A matching operation satisfying fit and predict.
        :query_expansion: A query operation satisfying fit and transform
        :labels: Pre-defined mapping of indices to identifiers, will be inferred during fit, if not given.

        """
        BaseEstimator.__init__(self)

        self._retrieval_model = retrieval_model
        self._matching = matching
        self._query_expansion = query_expansion
        self.name = name
        self.labels_ = np.asarray(labels) if labels is not None else None

Пример #41

0

Показать файл

    def __init__(self, columns=None, remove=None, skip_errors=False, single=False, fLOG=None):
        """
        constructor

        @param      columns         specify a columns selection
        @param      remove          modalities to remove
        @param      skip_errors     skip when a new categories appear (no 1)
        @param      single          use a single column per category, do not multiply them for each value
        @param      fLOG            logging function

        The logging function displays a message when a new dense and big matrix
        is created when it should be sparse. A sparse matrix should be allocated instead.
        """
        BaseEstimator.__init__(self)
        TransformerMixin.__init__(self)
        self._p_columns = columns if isinstance(
            columns, list) or columns is None else [columns]
        self._p_skip_errors = skip_errors
        self._p_remove = remove
        self._p_single = single
        self.fLOG = fLOG

Пример #42

0

Показать файл

Файл: ConeEstimatorSVM.py Проект: daoudclarke/learncone

 def get_params(self, deep=True):
     params = BaseEstimator.get_params(self, deep)
     params['max_dimensions'] = self.max_dimensions
     params['beta'] = self.beta
     params['C'] = self.C
     return params

Пример #43

0

Показать файл

Файл: RecallSVMEstimator.py Проект: daoudclarke/learncone

 def get_params(self, deep=True):
     params = BaseEstimator.get_params(self, deep)
     params['beta'] = self.beta
     return params

Пример #44

0

Показать файл

Файл: base.py Проект: YangHaha11514/skorch

 def get_params(self, deep=True):
     return BaseEstimator.get_params(self, deep=deep)

Пример #45

0

Показать файл

Файл: base.py Проект: YangHaha11514/skorch

 def set_params(self, **params):
     BaseEstimator.set_params(self, **params)

Пример #46

0

Показать файл

Файл: customized_estimators.py Проект: challenging/kaggle

 def __init__(self, cost_func, n_class=2):
     BaseEstimator.__init__(self)
     self.n_class = n_class
     self.cost_func = cost_func

Python BaseEstimator примеры использования