def _balltree(*, train, test, x_predict=None, metrics, X, leaf_size=40, metric='minkowski', **kwargs):
    """
    For more info visit :
    https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.BallTree.html#sklearn.neighbors.BallTree
    """

    model = BallTree(X, leaf_size=leaf_size, metric=metric, **kwargs)
    model.fit(train[0], train[1])
    model_name = 'Ball Tree'
    y_hat = model.predict(test[0])

    if metrics == 'accuracy':
        accuracy = accuracy_score(test[1], y_hat)

    if metrics == 'f1':
        accuracy = f1_score(test[1], y_hat)

    if metrics == 'jaccard':
        accuracy = jaccard_score(test[1], y_hat)

    if x_predict is None:
        return (model_name, accuracy, None)

    y_predict = model.predict(x_predict)
    return (model_name, accuracy, y_predict)
Пример #2
0
    def kde_logarithmic_knn(self, X: np.ndarray, **kwargs) -> np.ndarray:
        """

        Parameters
        ----------
        X
        kwargs

        Returns
        -------

        """
        # -- constants --
        _allowed_algorithms = {'BallTree', 'NearestNeighbors'}

        # -- defaults --
        _algorithm = kwargs.get('knn_algorithm', 'BallTree')

        assert _algorithm in _allowed_algorithms, f'The algorithm name you provided: {_algorithm} is not among ' \
            f'allowed knn algorithms: {_allowed_algorithms}'

        _log_regularizer = kwargs.get('log_regularizer', 0.0)

        _leaf_size = kwargs.get('leaf_size', 6)

        _n_neighbors = kwargs.get('knn_n_neighbors', 6)

        # -- code --
        if _algorithm == 'BallTree':
            # create the local graph
            _graph = BallTree(X, leaf_size=_leaf_size)

        elif _algorithm == 'NearestNeighbors':
            # create the local graph
            _graph = NearestNeighbors(n_neighbors=_n_neighbors, leaf_size=_leaf_size,
                                      algorithm='ball_tree', p=2, n_jobs=os.cpu_count() - 1)
            # fit graph
            _graph.fit(X)

            # define the query function, so to be consistent with other functions
            _graph.query = lambda X, k: _graph.kneighbors(X=X, n_neighbors=k)

        else:
            raise ValueError('Something horrible has happened, contact the project owner!')

        # -- functions --
        def _log_density_estimator(in_vector: np.ndarray, k: int = _n_neighbors) -> float:
            return np.log(k) - np.log(X.shape[0]) - \
                      X.shape[1] * np.log(_log_regularizer + _graph.query(in_vector, k=k)[0][0][-1])

        # -- code --

        # it might happen that the `_log_density_estimator` is negative, so you must shift it
        _densities = np.array(list(map(lambda ind: _log_density_estimator(X[ind:ind+1]), range(X.shape[0]))))

        return _densities + np.abs(np.min((0.0, np.min(_densities))))
Пример #3
0
    def predict(self, X):
        ball_tree = BallTree()
        ball_tree.fit(self.cluster_centers_)

        _, indexes = ball_tree.query(X)
        result = []
        for idx, in indexes:
            result.append(self.labels_[idx])

        return result