示例#1
0
def test_X_y_from_arff():
    # https://www.openml.org/d/23380
    x, y = X_y_from_arff("tests/data/openml_d_23380.arff", split_column="TR")

    assert isinstance(x, pd.DataFrame)
    assert (2796, 34) == x.shape
    assert 68100 == x.isnull().sum().sum()
    assert 32 == sum([dtype in NUMERIC_TYPES for dtype in x.dtypes])
    assert 2 == sum([dtype.name == "category" for dtype in x.dtypes])

    assert isinstance(y, pd.Series)
    assert (2796, ) == y.shape
    assert 0 == y.isnull().sum()
    assert 6 == len(y.dtype.categories)
示例#2
0
    def predict_proba_arff(
        self,
        arff_file_path: str,
        target_column: Optional[str] = None,
        encoding: Optional[str] = None,
    ):
        """ Predict the class probabilities for input in the arff_file.

        Parameters
        ----------
        arff_file_path: str
            An ARFF file with the same columns as the one that used in fit.
            Target column must be present in file, but its values are ignored.
        target_column: str, optional (default=None)
            Specifies which column the model should predict.
            If left None, the last column is taken to be the target.
        encoding: str, optional
            Encoding of the ARFF file.

        Returns
        -------
        numpy.ndarray
            Numpy array with class probabilities.
            The array is of shape (N, K) where N is len(X),
            and K is the number of class labels found in `y` of `fit`.
        """
        x, _ = X_y_from_arff(arff_file_path, target_column, encoding)
        x = self._prepare_for_prediction(x)
        return self._predict_proba(x)
示例#3
0
文件: gama.py 项目: vumichien/gama
    def score_arff(
        self,
        arff_file_path: str,
        target_column: Optional[str] = None,
        encoding: Optional[str] = None,
    ) -> float:
        """ Calculate `self.scoring` metric of the model on data in the file.

        Parameters
        ----------
        arff_file_path: str
            An ARFF file with which to calculate the score.
        target_column: str, optional (default=None)
            Specifies which column the model should predict.
            If left None, the last column is taken to be the target.
        encoding: str, optional
            Encoding of the ARFF file.

        Returns
        -------
        float
            The score obtained on the given test data according to the `scoring` metric.
        """
        x, y = X_y_from_arff(arff_file_path,
                             split_column=target_column,
                             encoding=encoding)
        return self.score(x, y)
示例#4
0
文件: gama.py 项目: vumichien/gama
    def fit_arff(
        self,
        arff_file_path: str,
        target_column: Optional[str] = None,
        encoding: Optional[str] = None,
        *args,
        **kwargs,
    ) -> None:
        """ Find and fit a model to predict the target column (last) from other columns.

        Parameters
        ----------
        arff_file_path: str
            Path to an ARFF file containing the training data.
        target_column: str, optional (default=None)
            Specifies which column the model should predict.
            If left None, the last column is taken to be the target.
        encoding: str, optional
            Encoding of the ARFF file.

        """
        x, y = X_y_from_arff(arff_file_path,
                             split_column=target_column,
                             encoding=encoding)
        self.fit(x, y, *args, **kwargs)
示例#5
0
文件: gama.py 项目: vumichien/gama
    def predict_arff(
        self,
        arff_file_path: str,
        target_column: Optional[str] = None,
        encoding: Optional[str] = None,
    ) -> np.ndarray:
        """ Predict the target for input found in the ARFF file.

        Parameters
        ----------
        arff_file_path: str
            An ARFF file with the same columns as the one that used in fit.
            Target column must be present in file, but its values are ignored.
        target_column: str, optional (default=None)
            Specifies which column the model should predict.
            If left None, the last column is taken to be the target.
        encoding: str, optional
            Encoding of the ARFF file.

        Returns
        -------
        numpy.ndarray
            array with predictions for each row in the ARFF file.
        """
        x, _ = X_y_from_arff(arff_file_path,
                             split_column=target_column,
                             encoding=encoding)
        x = self._prepare_for_prediction(x)
        return self._predict(x)