def _build_metafeatures(self, X, y): """ Build the meta-features associated with a particular node. These are various features that can be used in training and prediction time, e.g the number of training samples available for the classifier trained at that node, the number of targets (classes) to be predicted at that node, etc. Parameters ---------- X : (sparse) array-like, shape = [n_samples, n_features] The training data matrix at current node. Returns ------- metafeatures : dict Python dictionary of meta-features. The following meta-features are computed by default: * 'n_samples' - Number of samples used to train classifier at given node. * 'n_targets' - Number of targets (classes) to classify into at given node. """ # Indices of non-zero rows in X, i.e rows corresponding to relevant samples for this node. ix = nnz_rows_ix(X) return dict( n_samples=len(ix), n_targets=len(np.unique(y[ix])), )
def _train_local_classifier(self, X, y, node_id): if self.graph_.out_degree(node_id) == 0: # Leaf node if self.algorithm == "lcpn": # Leaf nodes do not get a classifier assigned in LCPN algorithm mode. self.logger.debug( "_train_local_classifier() - skipping leaf node %s when algorithm is 'lcpn'", node_id, ) return X = self.graph_.node[node_id]["X"] nnz_rows = nnz_rows_ix(X) X_ = X[nnz_rows, :] y_rolled_up = rollup_nodes( graph=self.graph_, source=node_id, targets=[y[idx] for idx in nnz_rows], ) if self.is_tree_: y_ = flatten_list(y_rolled_up) else: # Class hierarchy graph is a DAG X_, y_ = apply_rollup_Xy(X_, y_rolled_up) num_targets = len(np.unique(y_)) self.logger.debug( "_train_local_classifier() - Training local classifier for node: %s, X_.shape: %s, len(y): %s, n_targets: %s", # noqa:E501 node_id, X_.shape, len(y_), num_targets, ) if X_.shape[0] == 0: # No training data could be materialized for current node # TODO: support a 'strict' mode flag to explicitly enable/disable fallback logic here? self.logger.warning( "_train_local_classifier() - not enough training data available to train, classification in branch will terminate at node %s", # noqa:E501 node_id, ) return elif num_targets == 1: # Training data could be materialized for only a single target at current node # TODO: support a 'strict' mode flag to explicitly enable/disable fallback logic here? constant = y_[0] self.logger.debug( "_train_local_classifier() - only a single target (child node) available to train classifier for node %s, Will trivially predict %s", # noqa:E501 node_id, constant, ) clf = DummyClassifier(strategy="constant", constant=constant) else: clf = self._base_estimator_for(node_id) clf.fit(X=X_, y=y_) self.graph_.node[node_id][CLASSIFIER] = clf