def fit(self, data):
        """
        Fits a transformer using the SFrame `data`. The `fit` phase does not
        train a deep learning model, it only checks that the trained model
        is comptable with the data provided. If the `auto` model is chosen, then
        the fit phase choses the right model to extract features from.

        Parameters
        ----------
        data : SFrame
            The data used to fit the transformer.

        Returns
        -------
        self (A fitted object)

        See Also
        --------
        transform, fit_transform

        Examples
        --------

        # Create data.
        >>> import graphlab as gl

        # Import data from MNIST
        >>> data = gl.SFrame('http://s3.amazonaws.com/dato-datasets/mnist/sframe/train6k')

        # Create a DeepFeatureExtractorObject
        >>> extractor = gl.feature_engineering.DeepFeatureExtractor(
                                                    features = 'image')

        # Fit the encoder for a given dataset.
        >>> extractor = extractor.fit(data)

        # Return the model used for the deep feature extraction.
        >>> extractor['model']
        """
        _mt._get_metric_tracker().track(self.__class__.__module__ + '.fit')

        # Check that the column is in the SFrame.
        _raise_error_if_not_of_type(data, [_SFrame])
        _raise_error_if_column_exists(data, self._state["features"])

        # Make sure the output column_name exists.
        count = 1
        old_output_column_name = self._state["output_column_name"]
        output_column_name = old_output_column_name
        while output_column_name in data.column_names():
            output_column_name = "%s.%s" % (old_output_column_name, count)
            count = count + 1
        self._state["output_column_name"]  = output_column_name

        if data[self._state["features"]].dtype() != _Image:
            raise ToolkitError(
               "Feature `%s` must be of type Image." % self._state["features"])

        return self
    def evaluate(self, dataset, metric='auto', missing_value_action='auto'):
        """
        Evaluate the model on the given dataset.


        Parameters
        ----------
        dataset : SFrame
            Dataset in the same format used for training. The columns names and
            types of the dataset must be the same as that used in training.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:
            'auto'      : Compute all metrics.
            'rmse'      : Rooted mean squared error.
            'max_error' : Maximum error.

        missing_value_action : str, optional
            Action to perform when missing values are encountered. Can be
            one of:

            - 'auto': By default the model will treat missing value as is.
            - 'impute': Proceed with evaluation by filling in the missing
              values with the mean of the training data. Missing
              values are also imputed if an entire column of data is
              missing during evaluation.
            - 'error': Do not proceed with evaluation and terminate with
              an error message.

        Returns
        -------
        out : dict
            A dictionary containing the evaluation result.

        See Also
        ----------
        create, predict

        Examples
        --------
        >>> results = model.evaluate(test_data, 'rmse')

        """
        _mt._get_metric_tracker().track('toolkit.regression.random_forest_regression.evaluate')
        _raise_error_evaluation_metric_is_valid(metric, ['auto', 'rmse', 'max_error'])

        results = {}
        if metric in ['rmse', 'auto']:
            results = super(RandomForestRegression, self).evaluate(dataset, metric=metric,
                                                                   missing_value_action=missing_value_action)

        if metric in ['max_error', 'auto']:
            predictions = self.predict(dataset, missing_value_action=missing_value_action)
            target = self.get('target')
            _raise_error_if_column_exists(dataset, predictions, 'dataset', target + '(target column)')
            results['max_error'] = _graphlab.evaluation.max_error(predictions, dataset[target])
        return results
    def evaluate(self, dataset, metric='auto'):
        """
        Evaluate the model on the given dataset.

        Parameters
        ----------
        dataset : SFrame
            Dataset in the same format used for training. The columns names and
            types of the dataset must be the same as that used in training.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:
            'auto'             : Returns all available metrics.
            'accuracy '        : Classification accuracy.
            'confusion_matrix' : An SFrame with counts of possible prediction/true 
                                 label combinations.

        Returns
        -------
        out : dict
            A dictionary containing the evaluation result.

        See Also
        ----------
        create, predict, classify

        Examples
        --------
        >>> results = model.evaluate(test_data)
        >>> results = model.evaluate(test_data, metric='accuracy')
        >>> results = model.evaluate(test_data, metric='confusion_matrix')

        Notes
        -----
        When evaluating for classification metrics (e.g. auc,
        confusion_matrix), the classification threshold is set to 0.5. For more
        flexible classification accuracy, please use functions in the
        :py:mod:`~graphlab.toolkits.evaluation` module.
        """
        _mt._get_metric_tracker().track('toolkit.classifier.boosted_trees_classifier.evaluate')
        _raise_error_evaluation_metric_is_valid(metric,
                                      ['auto', 'accuracy', 'confusion_matrix'])

        results = {}
        if metric in ['auto', 'accuracy']:
          results = super(_Classifier, self).evaluate(dataset, metric = metric)

        if metric in ['confusion_matrix', 'auto']:
          predictions = self.predict(dataset, output_type = 'class')
          target = self.get('target')
          _raise_error_if_column_exists(dataset, predictions, 'dataset', target + '(target column)')
          results['confusion_matrix'] = _graphlab.evaluation.confusion_matrix(\
                                         predictions, dataset[target])
        return results
    def evaluate(self, dataset, metric='auto'):
        """
        Evaluate the model on the given dataset.


        Parameters
        ----------
        dataset : SFrame
            Dataset in the same format used for training. The columns names and
            types of the dataset must be the same as that used in training.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:
            'auto'      : Compute all metrics.
            'rmse'      : Rooted mean squared error.
            'max_error' : Maximum error.

        Returns
        -------
        out : dict
            A dictionary containing the evaluation result.

        See Also
        ----------
        create, predict

        Examples
        --------
        >>> results = model.evaluate(test_data, 'rmse')

        Notes
        -----
        When evaluating for classifier metrics (e.g. auc,
        confusion_matrix), the classifier threshold is set to 0.5. 
        """        
        _mt._get_metric_tracker().track('toolkit.regression.boosted_trees_regression.evaluate')
        _raise_error_evaluation_metric_is_valid(metric,
                                      ['auto', 'rmse', 'max_error'])

        results = {}
        if metric in ['rmse', 'auto']:
          results = super(BoostedTreesRegression, self).evaluate(dataset, metric = metric)

        if metric in ['max_error', 'auto']:
          predictions = self.predict(dataset)
          target = self.get('target')
          _raise_error_if_column_exists(dataset, predictions, 'dataset', target + '(target column)')
          results['max_error'] = _graphlab.evaluation.max_error(\
                                         predictions, dataset[target])
        return results
    def fit(self, data):
        """
        Fits a transformer using the SFrame `data`. The `fit` phase does not
        train a deep learning model, it only checks that the trained model
        is comptable with the data provided. If the `auto` model is chosen, then
        the fit phase choses the right model to extract features from.

        Parameters
        ----------
        data : SFrame
            The data used to fit the transformer.

        Returns
        -------
        self (A fitted object)

        See Also
        --------
        transform, fit_transform

        Examples
        --------

        # Create data.
        >>> import graphlab as gl

        # Import data from MNIST
        >>> data = gl.SFrame('https://static.turi.com/datasets/mnist/sframe/train6k')

        # Create a DeepFeatureExtractorObject
        >>> extractor = gl.feature_engineering.DeepFeatureExtractor(features = 'image')

        # Fit the encoder for a given dataset.
        >>> extractor = extractor.fit(data)

        # Return the model used for the deep feature extraction.
        >>> extractor['model']
        """
        _mt._get_metric_tracker().track(self.__class__.__module__ + '.fit')

        # Check that the column is in the SFrame.
        _raise_error_if_not_of_type(data, [_SFrame])

        for feature in self._state["features"]:
            _raise_error_if_column_exists(data, feature)
            if data[feature].dtype() != _Image:
                raise ToolkitError("Feature `%s` must be of type Image." %
                                   feature)

        return self
def create(data, row_label=None, features=None, feature_model='auto',
           method='lsh', verbose=True):
    """
    Create a similarity search model, which can be used to quickly retrieve
    items similar to a query observation. In the case of images, this model
    automatically performs the appropriate feature engineering steps. NOTE:
    If you are using a CPU for the creation step with feature_model='auto',
    creation time may take a while. This is because extracting features for
    images on a CPU is expensive. With a GPU, one can expect large speedups.

    .. warning::

        The similarity search toolkit is currently in beta, and feedback is
        welcome! Please send comments to [email protected].

    Parameters
    ----------
    dataset : SFrame
        The SFrame that represents the training data for the model, including at
        least one column of images.

    row_label : str, optional
        Name of the SFrame column with row id's. If 'row_label' is not
        specified, row numbers are used to identify reference dataset rows when
        the model is queried.

    features : str, optional
        The name of an image column in the input 'dataset' SFrame.

    feature_model : 'auto' | A model of type NeuralNetClassifier, optional
        A trained model for extracting features from raw data objects. By
        default ('auto'), we choose an appropriate model from our set of
        pre-trained models. See
        :class:`~graphlab.toolkits.feature_engineering.DeepFeatureExtractor` for
        more information.

    method : {'lsh', 'brute_force'}, optional
        The method used for nearest neighbor search. The 'lsh' option uses
        locality-sensitive hashing to find approximate results more quickly.

    verbose : bool, optional
        If True, print verbose output during model creation.

    Returns
    -------
    out : SimilaritySearchModel

    See Also
    --------
    SimilaritySearchModel
    graphlab.toolkits.nearest_neighbors
    graphlab.toolkits.feature_engineering

    Notes
    -----
    The similarity search toolkit currently uses cosine distance to evaluate the
    similarity between each query and candidate results.

    Examples
    --------
    First, split data into reference and query.

    >>> import graphlab as gl

    >>> data = gl.SFrame('http://s3.amazonaws.com/dato-datasets/mnist/sframe/train6k')
    >>> reference, query = data.random_split(0.8)

    Build neuralnet feature extractor for images:

    >>> nn_model = gl.neuralnet_classifier.create(reference, target='label')

    Construct SimilaritySearchModel:

    >>> model = gl.similarity_search.create(reference, features= 'image',
    ...                                     feature_model=nn_model)

    Find the most similar items in the reference set for each item in the query
    set:

    >>> model.search(query)
    """

    _mt._get_metric_tracker().track(__name__ + '.create')

    _raise_error_if_not_of_type(data, [_SFrame])
    _raise_error_if_not_of_type(features, [str])
    _raise_error_if_column_exists(data, features)

    if data[features].dtype() != _Image:
        raise _ToolkitError("Feature `%s` must be of type Image" \
                % features)

    return SimilaritySearchModel(data, row_label=row_label, feature=features,
            feature_model=feature_model, method=method, verbose=verbose)
    def search(self, data, row_label=None, k=5):
        """
        Search for the nearest neighbors from the reference set for each element
        of the query set. The query SFrame must include columns with the same
        names as the row_label and feature columns used to create the
        SimilaritySearchModel.

        Parameters
        ----------
        data : SFrame
            Query data. Must contain columns with the same names and types as
            the features used to train the model. Additional columns are
            allowed, but ignored.

        row_label : string, optional
            Name of the query SFrame column with row id's. If 'row_label' is not
            specified, row numbers are used to identify query dataset rows in
            the output SFrame.

        k : int, optional
            Number of nearest neighbors to return from the reference set for
            each query observation. The default is 5 neighbors.

        Returns
        -------
        out
            A SFrame that contains all the nearest neighbors.

        Examples
        --------
        First, split data into reference and query:

        >>> import graphlab as gl
        >>> data = gl.SFrame('http://s3.amazonaws.com/dato-datasets/mnist/sframe/train6k')
        >>> reference, query = data.random_split(0.8)

        Build a neural net feature extractor for images:

        >>> nn_model = gl.neuralnet_classifier.create(reference, target='label')

        Construct the SimilaritySearchModel:

        >>> model = gl.similarity_search.create(reference, features='image',
        ...                                     feature_model=nn_model)

        Find the most similar items in the reference set for each query:

        >>> model.search(query)
        """

        _raise_error_if_not_of_type(row_label, [str, _NoneType])
        feature = self._state['features']
        _raise_error_if_column_exists(data, feature)

        if (data[feature].dtype() != self._feature_type):
            raise ValueError('Feature columns must have same data type in both reference and query set')

        if row_label != None:
            _raise_error_if_column_exists(data, row_label)

        if data[feature].dtype() == _Image:
            transformed_data = self._extractor.transform(data)
        else:
            transformed_data = data
            transformed_data[self._state['output_column_name']] = transformed_data[feature]

        return self._neighbors_model.query(transformed_data, label=row_label, k=k)
    def evaluate(self, dataset, metric='auto', max_neighbors=10, radius=None):
        """
        Evaluate model accuracy by making predicting target classes for a new
        dataset and comparing to actual target values.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the target and features used for model training. Additional
            columns are ignored.

        metric : string, optional
            Name of the evaluation metric.  Possible values are:

            'auto'             : Returns all available metrics.
            'accuracy '        : Classification accuracy.
            'confusion_matrix' : An SFrame with counts of possible prediction/true
                                 label combinations.
            'roc_curve'        : An SFrame containing information needed for an roc curve


        max_neighbors : int, optional
            Maximum number of neighbors to consider for each point.

        radius : float, optional
            Maximum distance from each point to a neighbor in the reference
            dataset.

        Returns
        -------
        out : dict
            Evaluation results. The dictionary keys are *accuracy* and
            *confusion_matrix* and *roc_curve*.

        See also
        --------
        create, predict, predict_topk, classify

        Notes
        -----
        - Because the model randomly breaks ties between predicted classes, the
          results of repeated calls to `evaluate` method may differ.

        Examples
        --------
        >>> sf_train = graphlab.SFrame({'species': ['cat', 'dog', 'fossa', 'dog'],
        ...                             'height': [9, 25, 20, 23],
        ...                             'weight': [13, 28, 33, 22]})
        >>> m = graphlab.nearest_neighbor_classifier.create(sf, target='species')
        >>> ans = m.evaluate(sf_train, max_neighbors=2,
        ...                  metric='confusion_matrix')
        >>> print ans['confusion_matrix']
        +--------------+-----------------+-------+
        | target_label | predicted_label | count |
        +--------------+-----------------+-------+
        |     cat      |       dog       |   1   |
        |     dog      |       dog       |   2   |
        |    fossa     |       dog       |   1   |
        +--------------+-----------------+-------+
        """
        _mt._get_metric_tracker().track(
            'toolkit.classifier.nearest_neighbor_classifier.evaluate')

        ## Validate the metric name
        _raise_error_evaluation_metric_is_valid(metric,
                    ['auto', 'accuracy', 'confusion_matrix', 'roc_curve'])

        ## Make sure the input dataset has a target column with an appropriate
        #  type.
        target = self.get('target')
        _raise_error_if_column_exists(dataset, target, 'dataset', target)

        if not dataset[target].dtype() == str and not dataset[target].dtype() == int:
            raise TypeError("The target column of the evaluation dataset must "
                            "contain integers or strings.")


        ## Compute predictions with the input dataset.
        ystar = self.predict(dataset, output_type='class',
                             max_neighbors=max_neighbors, radius=radius)
        ystar_prob = self.predict(dataset, output_type='probability',
                             max_neighbors=max_neighbors, radius=radius)


        ## Compile accuracy metrics
        results = {}

        if metric in ['accuracy', 'auto']:
            results['accuracy'] = _gl.evaluation.accuracy(targets=dataset[target],
                                                          predictions=ystar)

        if metric in ['confusion_matrix', 'auto']:
            results['confusion_matrix'] = \
                _gl.evaluation.confusion_matrix(targets=dataset[target],
                                                predictions=ystar)

        if metric in ['roc_curve', 'auto']:
            results['roc_curve'] = \
                _gl.evaluation.roc_curve(targets=dataset[target],
                                         predictions=ystar_prob)

        return results
def create(data, row_label=None, features=None, feature_model='auto',
           method='lsh', verbose=True):
    """
    Create a similarity search model, which can be used to quickly retrieve
    items similar to a query observation. In the case of images, this model
    automatically performs the appropriate feature engineering steps. NOTE:
    If you are using a CPU for the creation step with feature_model='auto',
    creation time may take a while. This is because extracting features for
    images on a CPU is expensive. With a GPU, one can expect large speedups.

    Parameters
    ----------
    dataset : SFrame
        The SFrame that represents the training data for the model, including at
        least one column of images.

    row_label : str, optional
        Name of the SFrame column with row id's. If 'row_label' is not
        specified, row numbers are used to identify reference dataset rows when
        the model is queried.

    features : str, optional
        The name of an image column in the input 'dataset' SFrame.

    feature_model : 'auto' | A model of type NeuralNetClassifier, optional
        A trained model for extracting features from raw data objects. By
        default ('auto'), we choose an appropriate model from our set of
        pre-trained models. See
        :class:`~graphlab.toolkits.feature_engineering.DeepFeatureExtractor` for
        more information.

    method : {'lsh', 'brute_force'}, optional
        The method used for nearest neighbor search. The 'lsh' option uses
        locality-sensitive hashing to find approximate results more quickly.

    verbose : bool, optional
        If True, print verbose output during model creation.

    Returns
    -------
    out : SimilaritySearchModel

    See Also
    --------
    SimilaritySearchModel
    graphlab.toolkits.nearest_neighbors
    graphlab.toolkits.feature_engineering

    Notes
    -----
    The similarity search toolkit currently uses cosine distance to evaluate the
    similarity between each query and candidate results.

    Examples
    --------
    First, split data into reference and query.

    >>> import graphlab as gl

    >>> data = gl.SFrame('https://static.turi.com/datasets/mnist/sframe/train6k')
    >>> reference, query = data.random_split(0.8)

    Build neuralnet feature extractor for images:

    >>> nn_model = gl.neuralnet_classifier.create(reference, target='label')

    Construct SimilaritySearchModel:

    >>> model = gl.similarity_search.create(reference, features= 'image',
    ...                                     feature_model=nn_model)

    Find the most similar items in the reference set for each item in the query
    set:

    >>> model.search(query)
    """

    _mt._get_metric_tracker().track(__name__ + '.create')

    _raise_error_if_not_of_type(data, [_SFrame])
    _raise_error_if_not_of_type(features, [str])
    _raise_error_if_column_exists(data, features)

    if data[features].dtype() != _Image:
        raise _ToolkitError("Feature `%s` must be of type Image" \
                % features)

    return SimilaritySearchModel(data, row_label=row_label, feature=features,
            feature_model=feature_model, method=method, verbose=verbose)
    def search(self, data, row_label=None, k=5):
        """
        Search for the nearest neighbors from the reference set for each element
        of the query set. The query SFrame must include columns with the same
        names as the row_label and feature columns used to create the
        SimilaritySearchModel.

        Parameters
        ----------
        data : SFrame
            Query data. Must contain columns with the same names and types as
            the features used to train the model. Additional columns are
            allowed, but ignored.

        row_label : string, optional
            Name of the query SFrame column with row id's. If 'row_label' is not
            specified, row numbers are used to identify query dataset rows in
            the output SFrame.

        k : int, optional
            Number of nearest neighbors to return from the reference set for
            each query observation. The default is 5 neighbors.

        Returns
        -------
        out
            A SFrame that contains all the nearest neighbors.

        Examples
        --------
        First, split data into reference and query:

        >>> import graphlab as gl
        >>> data = gl.SFrame('https://static.turi.com/datasets/mnist/sframe/train6k')
        >>> reference, query = data.random_split(0.8)

        Build a neural net feature extractor for images:

        >>> nn_model = gl.neuralnet_classifier.create(reference, target='label')

        Construct the SimilaritySearchModel:

        >>> model = gl.similarity_search.create(reference, features='image',
        ...                                     feature_model=nn_model)

        Find the most similar items in the reference set for each query:

        >>> model.search(query)
        """

        _raise_error_if_not_of_type(row_label, [str, type(None)])
        feature = self._state['features']
        _raise_error_if_column_exists(data, feature)

        if (data[feature].dtype() != self._feature_type):
            raise ValueError('Feature columns must have same data type in both reference and query set')

        if row_label != None:
            _raise_error_if_column_exists(data, row_label)

        if data[feature].dtype() == _Image:
            transformed_data = self._extractor.transform(data)
        else:
            transformed_data = data
            transformed_data[self._state['output_column_name']] = transformed_data[feature]

        return self._neighbors_model.query(transformed_data, label=row_label, k=k)
    def evaluate(self, dataset, metric='auto', missing_value_action='auto'):
        """
        Evaluate the model on the given dataset.

        Parameters
        ----------
        dataset : SFrame
            Dataset in the same format used for training. The columns names and
            types of the dataset must be the same as that used in training.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:

             - 'auto'             : Returns all available metrics.
             - 'accuracy '        : Classification accuracy.
             - 'confusion_matrix' : An SFrame with counts of possible prediction/true
                                  label combinations.
             - 'roc_curve'        : An SFrame containing information needed for an roc curve

        missing_value_action : str, optional
            Action to perform when missing values are encountered. Can be
            one of:

             - 'auto': By default the model will treat missing value as is.
             - 'impute': Proceed with evaluation by filling in the missing
               values with the mean of the training data. Missing
               values are also imputed if an entire column of data is
               missing during evaluation.
             - 'error': Do not proceed with evaluation and terminate with
              an error message.

        Returns
        -------
        out : dict
            A dictionary containing the evaluation result.

        See Also
        ----------
        create, predict, classify

        Examples
        --------
        >>> results = model.evaluate(test_data)
        >>> results = model.evaluate(test_data, metric='accuracy')
        >>> results = model.evaluate(test_data, metric='confusion_matrix')

        Notes
        -----
        When evaluating for classification metrics (e.g. auc,
        confusion_matrix), the classification threshold is set to 0.5. For more
        flexible classification accuracy, please use functions in the
        :py:mod:`~graphlab.toolkits.evaluation` module.
        """
        _mt._get_metric_tracker().track('toolkit.classifier.boosted_trees_classifier.evaluate')
        _raise_error_evaluation_metric_is_valid(metric,
                                                ['auto', 'accuracy', 'confusion_matrix', 'roc_curve'])

        results = {}
        if metric in ['auto', 'accuracy', 'roc_curve']:
            results = super(_Classifier, self).evaluate(dataset, metric=metric,
                                                        missing_value_action=missing_value_action)

        if metric in ['confusion_matrix', 'auto']:
            predictions = self.predict(dataset, output_type='class', missing_value_action=missing_value_action)
            target = self.get('target')
            _raise_error_if_column_exists(dataset, target, 'dataset', target)
            results['confusion_matrix'] = _graphlab.evaluation.confusion_matrix(dataset[target], predictions)

        return results
示例#12
0
    def evaluate(self, dataset, metric='auto', max_neighbors=10, radius=None):
        """
        Evaluate the model's predictive accuracy. This is done by predicting the
        target class for instances in a new dataset and comparing to known
        target values.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the target and features used for model training. Additional
            columns are ignored.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:

            - 'auto': Returns all available metrics.

            - 'accuracy': Classification accuracy.

            - 'confusion_matrix': An SFrame with counts of possible
              prediction/true label combinations.

            - 'roc_curve': An SFrame containing information needed for an roc
              curve (binary classification only).

        max_neighbors : int, optional
            Maximum number of neighbors to consider for each point.

        radius : float, optional
            Maximum distance from each point to a neighbor in the reference
            dataset.

        Returns
        -------
        out : dict
            Evaluation results. The dictionary keys are *accuracy* and
            *confusion_matrix* and *roc_curve* (if applicable).

        See also
        --------
        create, predict, predict_topk, classify

        Notes
        -----
        - Because the model randomly breaks ties between predicted classes, the
          results of repeated calls to `evaluate` method may differ.

        Examples
        --------
        >>> sf_train = graphlab.SFrame({'species': ['cat', 'dog', 'fossa', 'dog'],
        ...                             'height': [9, 25, 20, 23],
        ...                             'weight': [13, 28, 33, 22]})
        >>> m = graphlab.nearest_neighbor_classifier.create(sf, target='species')
        >>> ans = m.evaluate(sf_train, max_neighbors=2,
        ...                  metric='confusion_matrix')
        >>> print ans['confusion_matrix']
        +--------------+-----------------+-------+
        | target_label | predicted_label | count |
        +--------------+-----------------+-------+
        |     cat      |       dog       |   1   |
        |     dog      |       dog       |   2   |
        |    fossa     |       dog       |   1   |
        +--------------+-----------------+-------+
        """
        _mt._get_metric_tracker().track(
            'toolkit.classifier.nearest_neighbor_classifier.evaluate')

        ## Validate the metric name
        _raise_error_evaluation_metric_is_valid(
            metric, ['auto', 'accuracy', 'confusion_matrix', 'roc_curve'])

        ## Make sure the input dataset has a target column with an appropriate
        #  type.
        target = self.get('target')
        _raise_error_if_column_exists(dataset, target, 'dataset', target)

        if not dataset[target].dtype() == str and not dataset[target].dtype(
        ) == int:
            raise TypeError("The target column of the evaluation dataset must "
                            "contain integers or strings.")

        if self._state["num_classes"] != 2:
            if (metric == 'roc_curve') or (metric == ['roc_curve']):
                err_msg = "Currently, ROC curve is not supported for "
                err_msg += "multi-class classification in this model."
                raise _ToolkitError(err_msg)
            else:
                warn_msg = "WARNING: Ignoring `roc_curve`. "
                warn_msg += "Not supported for multi-class classification."
                print(warn_msg)

        ## Compute predictions with the input dataset.
        ystar = self.predict(dataset,
                             output_type='class',
                             max_neighbors=max_neighbors,
                             radius=radius)
        ystar_prob = self.predict(dataset,
                                  output_type='probability',
                                  max_neighbors=max_neighbors,
                                  radius=radius)

        ## Compile accuracy metrics
        results = {}

        if metric in ['accuracy', 'auto']:
            results['accuracy'] = _gl.evaluation.accuracy(
                targets=dataset[target], predictions=ystar)

        if metric in ['confusion_matrix', 'auto']:
            results['confusion_matrix'] = \
                _gl.evaluation.confusion_matrix(targets=dataset[target],
                                                predictions=ystar)

        if self._state["num_classes"] == 2:
            if metric in ['roc_curve', 'auto']:
                results['roc_curve'] = \
                      _gl.evaluation.roc_curve(targets=dataset[target],
                                               predictions=ystar_prob)
        return results