class BaseDecisionTree(six.with_metaclass(ABCMeta, BaseEstimator)): """Base class for decision trees. Warning: This class should not be used directly. Use derived classes instead. """ @abstractmethod def __init__(self, criterion, splitter, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_features, max_leaf_nodes, random_state, min_impurity_decrease, min_impurity_split, class_weight=None, presort=False): self.criterion = criterion self.splitter = splitter self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.min_weight_fraction_leaf = min_weight_fraction_leaf self.max_features = max_features self.random_state = random_state self.max_leaf_nodes = max_leaf_nodes self.min_impurity_decrease = min_impurity_decrease self.min_impurity_split = min_impurity_split self.class_weight = class_weight self.presort = presort def fit(self, X, y, sample_weight=None, check_input=True, X_idx_sorted=None): n_samples, self.n_features_ = X.shape y = np.atleast_1d(y) expanded_class_weight = None # is_classification = is_classifier(self) # if is_classification: # y = np.copy(y) # y_encoded = np.zeros(y.shape, dtype=np.int) # classes_k , y_encoded = np.unique( y , return_inverse=True) # y = y_encoded # else: self.classes_ = [None] self.n_classes_ = [1] # Build tree criterion = self.criterion splitter = self.splitter self.tree_ = Tree(self.n_features_, self.n_classes_, self.n_outputs_) builder = DepthFirstTreeBuilder(splitter, self.min_samples_split, self.min_samples_leaf, self.min_weight_leaf, self.max_depth, self.min_impurity_decrease, self.min_impurity_split) builder.build(self.tree_, X, y, sample_weight, X_idx_sorted) return self def predict(self, X, check_input=True): proba = self.tree_.predict(X) return proba[:, 0] def apply(self, X, check_input=True): return self.tree_.apply(X) @property def feature_importances_(self): """Return the feature importances. The importance of a feature is computed as the (normalized) total reduction of the criterion brought by that feature. It is also known as the Gini importance. Returns ------- feature_importances_ : array, shape = [n_features] """ return self.tree_.compute_feature_importances()
SPLITTERS = DENSE_SPLITTERS splitter = SPLITTERS[splitter](criterion, max_features, min_samples_leaf, min_weight_leaf, random_state) tree_ = Tree(n_features_, n_classes_, n_outputs) builder = DepthFirstTreeBuilder(splitter, min_samples_split, min_samples_leaf, min_weight_leaf, max_depth, min_impurity_decrease, min_impurity_split) builder.build(tree_, X_train, y_train) classes_ = classes[0] n_classes_ = n_classes_[0] # Prune tree n_classes_ = np.atleast_1d(n_classes_) pruned_tree = Tree(n_features_, n_classes_, n_outputs) _build_pruned_tree_ccp(pruned_tree, tree_, ccp_alpha) tree_ = pruned_tree # ============================================================================= # Decision Tree Algorithm - predicting # ============================================================================= X_test = check_array(X_test, dtype=DTYPE, accept_sparse="csr") proba = tree_.predict(X_test) n_samples = X_test.shape[0] predictions = classes_.take(np.argmax(proba, axis=1), axis=0) metrics.accuracy_score(y_test, predictions)