示例#1
0
class BaseDecisionTree(six.with_metaclass(ABCMeta, BaseEstimator)):
    """Base class for decision trees.
       Warning: This class should not be used directly.
       Use derived classes instead.
       """
    @abstractmethod
    def __init__(self,
                 criterion,
                 splitter,
                 max_depth,
                 min_samples_split,
                 min_samples_leaf,
                 min_weight_fraction_leaf,
                 max_features,
                 max_leaf_nodes,
                 random_state,
                 min_impurity_decrease,
                 min_impurity_split,
                 class_weight=None,
                 presort=False):
        self.criterion = criterion
        self.splitter = splitter
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.max_features = max_features
        self.random_state = random_state
        self.max_leaf_nodes = max_leaf_nodes
        self.min_impurity_decrease = min_impurity_decrease
        self.min_impurity_split = min_impurity_split
        self.class_weight = class_weight
        self.presort = presort

    def fit(self,
            X,
            y,
            sample_weight=None,
            check_input=True,
            X_idx_sorted=None):
        n_samples, self.n_features_ = X.shape
        y = np.atleast_1d(y)
        expanded_class_weight = None

        # is_classification = is_classifier(self)
        # if is_classification:
        #     y = np.copy(y)
        #     y_encoded = np.zeros(y.shape, dtype=np.int)
        #     classes_k , y_encoded = np.unique( y , return_inverse=True)
        #     y = y_encoded
        # else:
        self.classes_ = [None]

        self.n_classes_ = [1]

        # Build tree

        criterion = self.criterion
        splitter = self.splitter

        self.tree_ = Tree(self.n_features_, self.n_classes_, self.n_outputs_)

        builder = DepthFirstTreeBuilder(splitter, self.min_samples_split,
                                        self.min_samples_leaf,
                                        self.min_weight_leaf, self.max_depth,
                                        self.min_impurity_decrease,
                                        self.min_impurity_split)

        builder.build(self.tree_, X, y, sample_weight, X_idx_sorted)

        return self

    def predict(self, X, check_input=True):
        proba = self.tree_.predict(X)
        return proba[:, 0]

    def apply(self, X, check_input=True):
        return self.tree_.apply(X)

    @property
    def feature_importances_(self):
        """Return the feature importances.
        The importance of a feature is computed as the (normalized) total
        reduction of the criterion brought by that feature.
        It is also known as the Gini importance.
        Returns
        -------
        feature_importances_ : array, shape = [n_features]
        """

        return self.tree_.compute_feature_importances()
SPLITTERS = DENSE_SPLITTERS
splitter = SPLITTERS[splitter](criterion, max_features, min_samples_leaf,
                               min_weight_leaf, random_state)

tree_ = Tree(n_features_, n_classes_, n_outputs)

builder = DepthFirstTreeBuilder(splitter, min_samples_split, min_samples_leaf,
                                min_weight_leaf, max_depth,
                                min_impurity_decrease, min_impurity_split)

builder.build(tree_, X_train, y_train)
classes_ = classes[0]
n_classes_ = n_classes_[0]

# Prune tree

n_classes_ = np.atleast_1d(n_classes_)
pruned_tree = Tree(n_features_, n_classes_, n_outputs)
_build_pruned_tree_ccp(pruned_tree, tree_, ccp_alpha)
tree_ = pruned_tree

# =============================================================================
# Decision Tree Algorithm - predicting
# =============================================================================

X_test = check_array(X_test, dtype=DTYPE, accept_sparse="csr")
proba = tree_.predict(X_test)
n_samples = X_test.shape[0]
predictions = classes_.take(np.argmax(proba, axis=1), axis=0)
metrics.accuracy_score(y_test, predictions)