示例#1
0
def _benchmark_from_data(
    experiment: Experiment,
    *,
    estimator: BaseEstimator,
    X_train: DataType,
    y_train: TargetType,
    X_test: DataType,
    y_test: TargetType,
    save_train: bool = False,
) -> None:
    with _add_timing(experiment, "fit_time"):
        estimator.fit(X_train, y_train)

    _append_info(experiment, "fitted_estimator", estimator)

    with _add_timing(experiment, "score_time"):
        test_score = estimator.score(X_test, y_test)

    _append_info(experiment, "test_score", test_score)

    if save_train:
        train_score = estimator.score(X_train, y_train)
        _append_info(experiment, "train_score", train_score)

    for output in ("transform", "predict"):
        method = getattr(estimator, output, None)
        if method is not None:
            with _add_timing(experiment, f"{output}_time"):
                _append_info(experiment, f"{output}", method(X_test))
示例#2
0
文件: validator.py 项目: altescy/colt
    def _validate(self, model: BaseEstimator, X: np.ndarray, y: np.ndarray):
        X_train, X_test, y_train, y_test = train_test_split(X, y, **self._options)

        train_score = np.array(model.score(X_train, y_train))
        test_score = np.array(model.score(X_test, y_test))

        return {"train": train_score, "test": test_score}
示例#3
0
def eval_metrics(
    model: BaseEstimator, *data: Sequence[np.ndarray]
) -> Tuple[np.ndarray, np.ndarray]:
    X_train, X_test, y_train, y_test = data

    acc_train = model.score(X_train, y_train)
    decision_func_train = model.decision_function(X_train)
    auc_train = roc_auc_score(y_train, decision_func_train)
    fpr_train, tpr_train, _ = roc_curve(y_train, decision_func_train)
    print(f"Training set: AUC={auc_train}, acc={acc_train}")

    acc_test = model.score(X_test, y_test)
    decision_func_test = model.decision_function(X_test)
    auc_test = roc_auc_score(y_test, decision_func_test)
    fpr_test, tpr_test, _ = roc_curve(y_test, decision_func_test)
    print(f"Testing set:  AUC={auc_test}, acc={acc_test}")

    return decision_func_train, decision_func_test
示例#4
0
def evaluate(clf: BaseEstimator, X: np.ndarray, y: np.ndarray) -> float:
    """Compute the accuracy of te classifier.

    Args:
        clf (BaseEstimator): a classifier
        X (np.ndarray): a numpy ndarray
        y (np.ndarray): a numpy ndarray

    Returns:
        float: te accuracy of te classifier
    """
    return clf.score(X, y)
示例#5
0
文件: RNSB.py 项目: dccuchile/wefe
    def _train_classifier(
            self,
            attribute_embeddings_dict: List[Dict[str, np.ndarray]],
            estimator: BaseEstimator = LogisticRegression,
            estimator_params: Dict[str, Any] = {
                'solver': 'liblinear',
                'max_iter': 10000,
            },
            random_state: Union[int, None] = None,
            print_model_evaluation: bool = False) -> Tuple[BaseEstimator, float]:
        """Train the sentiment classifier from the provided attribute embeddings.

        Parameters
        ----------
        attribute_embeddings_dict : dict[str, np.ndarray]
            A dict with the attributes keys and embeddings

        estimator : BaseEstimator, optional
            A scikit-learn classifier class that implements predict_proba function,
            by default None,

        estimator_params : dict, optional
            Parameters that will use the classifier, by default { 'solver': 'liblinear',
            'max_iter': 10000, }

        random_state : Union[int, None], optional
            Seed that allows to make the execution of the query reproducible. 
            by default None

        print_model_evaluation : bool, optional
            Indicates whether the classifier evaluation is printed after the
            training process is completed., by default False

        Returns
        -------
        Tuple[BaseEstimator, float]
            The trained classifier and the accuracy obtained by the model.
        """
        attribute_0_embeddings = np.array(list(attribute_embeddings_dict[0].values()))
        attribute_1_embeddings = np.array(list(attribute_embeddings_dict[1].values()))

        # generate the labels (1, -1) for each embedding
        positive_attribute_labels = np.ones(attribute_0_embeddings.shape[0])
        negative_attribute_labels = -np.ones(attribute_1_embeddings.shape[0])

        attributes_embeddings = np.concatenate(
            (attribute_0_embeddings, attribute_1_embeddings))
        attributes_labels = np.concatenate(
            (negative_attribute_labels, positive_attribute_labels))

        split = train_test_split(attributes_embeddings,
                                 attributes_labels,
                                 shuffle=True,
                                 random_state=random_state,
                                 test_size=0.33,
                                 stratify=attributes_labels)
        X_embeddings_train, X_embeddings_test, y_train, y_test = split

        num_train_negative_examples = np.count_nonzero((y_train == -1))
        num_train_positive_examples = np.count_nonzero((y_train == 1))

        # Check the number of train and test examples.
        if num_train_positive_examples == 1:
            raise Exception(
                'After dividing the datset using stratified train_test_split, '
                'the attribute 0 has 0 training examples.')

        if num_train_negative_examples < 1:
            raise Exception(
                'After dividing the datset using stratified train_test_split, '
                'the attribute 1 has 0 training examples.')

        # when random_state is not none, set it on classifier params.
        if random_state is not None:
            estimator_params['random_state'] = random_state

        estimator = estimator(**estimator_params)
        estimator.fit(X_embeddings_train, y_train)

        # evaluate
        y_pred = estimator.predict(X_embeddings_test)
        score = estimator.score(X_embeddings_test, y_test)

        if print_model_evaluation:
            print("Classification Report:\n{}".format(
                classification_report(y_test, y_pred, labels=estimator.classes_)))

        return estimator, score