Пример #1
0
 def __init__(self, node, **kwargs):
     super(CVBestSearchRefitParallel, self).__init__(wrapped_node=None)
     #### 'y/test/score_recall_mean'
     default_score = "y" + conf.SEP + \
                     conf.TEST + conf.SEP + \
                     conf.SCORE_RECALL_MEAN
     score = kwargs.pop("score") if "score" in kwargs else default_score
     arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True
     from epac.workflow.splitters import CV
     # methods = Methods(*tasks)
     cv_node = CV(node=node,
                  reducer=ClassificationReport(keep=False),
                  **kwargs)
     self.add_child(cv_node)
     self.score = score
     self.arg_max = arg_max
     self.refited = None
     self.best_params = None
     self.reducer = CVBestSearchRefitPReducer(self)
Пример #2
0
 def __init__(self, node, **kwargs):
     super(CVBestSearchRefitParallel, self).__init__(wrapped_node=None)
     #### 'y/test/score_recall_mean'
     default_score = "y" + conf.SEP + \
                     conf.TEST + conf.SEP + \
                     conf.SCORE_RECALL_MEAN
     score = kwargs.pop("score") if "score" in kwargs else default_score
     arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True
     from epac.workflow.splitters import CV
     # methods = Methods(*tasks)
     cv_node = CV(node=node,
                  reducer=ClassificationReport(keep=False),
                  **kwargs)
     self.add_child(cv_node)
     self.score = score
     self.arg_max = arg_max
     self.refited = None
     self.best_params = None
     self.reducer = CVBestSearchRefitPReducer(self)
Пример #3
0
class CVBestSearchRefitParallel(Wrapper):
    """Cross-validation + grid-search then refit with optimals parameters.

    Average results over first axis, then find the arguments that maximize or
    minimise a "score" over other axis.

    Parameters
    ----------

    See CV parameters, plus other parameters:

    score: string
        the score name to be optimized (default "mean_score_te").

    arg_max: boolean
        True/False take parameters that maximize/minimize the score. Default
        is True.

    Example
    -------

    >>> from sklearn import datasets
    >>> from sklearn.svm import SVC
    >>> from epac import Methods
    >>> from epac.workflow.splitters import CVBestSearchRefitParallel
    >>> X, y = datasets.make_classification(n_samples=12,
    ...                                     n_features=10,
    ...                                     n_informative=2,
    ...                                     random_state=1)
    >>> n_folds_nested = 2
    >>> C_values = [.1, 0.5, 1, 2, 5]
    >>> kernels = ["linear", "rbf"]
    >>> methods = Methods(*[SVC(C=C, kernel=kernel)
    ...     for C in C_values for kernel in kernels])
    >>> wf = CVBestSearchRefitParallel(methods, n_folds=n_folds_nested)
    >>> wf.run(X=X, y=y)
    [[{'y/test/pred': array([0, 0, 1, 0, 0, 0]), 'y/train/pred': array([0, 0, 0, 0, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}], [{'y/test/pred': array([0, 1, 1, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 1, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 1, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 1, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 0, 1, 0]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 0, 1, 0]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}]]
    >>> wf.reduce()
    ResultSet(
    [{'key': CVBestSearchRefitParallel, 'best_params': [{'kernel': 'rbf', 'C': 0.1, 'name': 'SVC'}], 'y/true': [1 0 0 1 0 0 1 0 1 1 0 1], 'y/pred': [1 0 0 1 0 0 1 0 1 1 0 1]}])

    """
    def __init__(self, node, **kwargs):
        super(CVBestSearchRefitParallel, self).__init__(wrapped_node=None)
        #### 'y/test/score_recall_mean'
        default_score = "y" + conf.SEP + \
                        conf.TEST + conf.SEP + \
                        conf.SCORE_RECALL_MEAN
        score = kwargs.pop("score") if "score" in kwargs else default_score
        arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True
        from epac.workflow.splitters import CV
        # methods = Methods(*tasks)
        cv_node = CV(node=node,
                     reducer=ClassificationReport(keep=False),
                     **kwargs)
        self.add_child(cv_node)
        self.score = score
        self.arg_max = arg_max
        self.refited = None
        self.best_params = None
        self.reducer = CVBestSearchRefitPReducer(self)

    def get_signature(self):
        return self.__class__.__name__

    def transform(self, **Xy):
        Xy_train, Xy_test = train_test_split(Xy)
        result = Result(key=self.get_signature(), **Xy)
        if not self.store:
            self.store = StoreMem()
        self.save_results(ResultSet(result))
        if Xy_train is Xy_test:
            return Xy
        else:
            return Xy_train

    def _results2dict(self, **cpXy):
        res_dict = {}
        for key in cpXy[self.get_signature()]:
            if not key == "key":
                res_dict[key] = cpXy[self.get_signature()][key]
        return res_dict

    def reduce(self, store_results=True):
        children_results = [
            child.reduce(store_results=False) for child in self.children
        ]
        results = ResultSet(*children_results)
        if self.reducer:
            to_refit, best_params = self.reducer.reduce(results)
            Xy = self.load_results()
            Xy = self._results2dict(**Xy)
            self.refited = to_refit
            self.best_params = best_params
            out = self.refited.top_down(**Xy)
            out[conf.BEST_PARAMS] = best_params
            result = Result(key=self.get_signature(), **out)
            return ResultSet(result)
        return results
Пример #4
0
class CVBestSearchRefitParallel(Wrapper):
    """Cross-validation + grid-search then refit with optimals parameters.

    Average results over first axis, then find the arguments that maximize or
    minimise a "score" over other axis.

    Parameters
    ----------

    See CV parameters, plus other parameters:

    score: string
        the score name to be optimized (default "mean_score_te").

    arg_max: boolean
        True/False take parameters that maximize/minimize the score. Default
        is True.

    Example
    -------

    >>> from sklearn import datasets
    >>> from sklearn.svm import SVC
    >>> from epac import Methods
    >>> from epac.workflow.splitters import CVBestSearchRefitParallel
    >>> X, y = datasets.make_classification(n_samples=12,
    ...                                     n_features=10,
    ...                                     n_informative=2,
    ...                                     random_state=1)
    >>> n_folds_nested = 2
    >>> C_values = [.1, 0.5, 1, 2, 5]
    >>> kernels = ["linear", "rbf"]
    >>> methods = Methods(*[SVC(C=C, kernel=kernel)
    ...     for C in C_values for kernel in kernels])
    >>> wf = CVBestSearchRefitParallel(methods, n_folds=n_folds_nested)
    >>> wf.run(X=X, y=y)
    [[{'y/test/pred': array([0, 0, 1, 0, 0, 0]), 'y/train/pred': array([0, 0, 0, 0, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}], [{'y/test/pred': array([0, 1, 1, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 1, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 1, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 1, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 0, 1, 0]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 0, 1, 0]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}]]
    >>> wf.reduce()
    ResultSet(
    [{'key': CVBestSearchRefitParallel, 'best_params': [{'kernel': 'rbf', 'C': 0.1, 'name': 'SVC'}], 'y/true': [1 0 0 1 0 0 1 0 1 1 0 1], 'y/pred': [1 0 0 1 0 0 1 0 1 1 0 1]}])

    """

    def __init__(self, node, **kwargs):
        super(CVBestSearchRefitParallel, self).__init__(wrapped_node=None)
        #### 'y/test/score_recall_mean'
        default_score = "y" + conf.SEP + \
                        conf.TEST + conf.SEP + \
                        conf.SCORE_RECALL_MEAN
        score = kwargs.pop("score") if "score" in kwargs else default_score
        arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True
        from epac.workflow.splitters import CV
        # methods = Methods(*tasks)
        cv_node = CV(node=node,
                     reducer=ClassificationReport(keep=False),
                     **kwargs)
        self.add_child(cv_node)
        self.score = score
        self.arg_max = arg_max
        self.refited = None
        self.best_params = None
        self.reducer = CVBestSearchRefitPReducer(self)

    def get_signature(self):
        return self.__class__.__name__

    def transform(self, **Xy):
        Xy_train, Xy_test = train_test_split(Xy)
        result = Result(key=self.get_signature(), **Xy)
        if not self.store:
            self.store = StoreMem()
        self.save_results(ResultSet(result))
        if Xy_train is Xy_test:
            return Xy
        else:
            return Xy_train

    def _results2dict(self, **cpXy):
        res_dict = {}
        for key in cpXy[self.get_signature()]:
            if not key == "key":
                res_dict[key] = cpXy[self.get_signature()][key]
        return res_dict

    def reduce(self, store_results=True):
        children_results = [child.reduce(store_results=False) for
                            child in self.children]
        results = ResultSet(*children_results)
        if self.reducer:
            to_refit, best_params = self.reducer.reduce(results)
            Xy = self.load_results()
            Xy = self._results2dict(**Xy)
            self.refited = to_refit
            self.best_params = best_params
            out = self.refited.top_down(**Xy)
            out[conf.BEST_PARAMS] = best_params
            result = Result(key=self.get_signature(), **out)
            return ResultSet(result)
        return results