Exemplo n.º 1
0
 def _score_disc_bin():
     n_values = len(attr.values)
     if n_values == 2:
         return _score_disc()
     score, mapping = _tree_scorers.find_binarization_MSE(
         col_x, col_y, n_values, self.min_samples_leaf)
     # The score is already adjusted for missing attribute values, so
     # we don't do it here
     if score == 0:
         return REJECT_ATTRIBUTE
     mapping, branches = MappedDiscreteNode.branches_from_mapping(
         data.X[:, attr_no], mapping, len(attr.values))
     node = MappedDiscreteNode(attr, attr_no, mapping, None)
     return score, node, branches, 2
Exemplo n.º 2
0
 def _score_disc_bin():
     n_values = len(attr.values)
     if n_values == 2:
         return _score_disc()
     score, mapping = _tree_scorers.find_binarization_MSE(
         col_x, col_y, n_values, self.min_samples_leaf)
     # The score is already adjusted for missing attribute values, so
     # we don't do it here
     if score == 0:
         return REJECT_ATTRIBUTE
     mapping, branches = MappedDiscreteNode.branches_from_mapping(
         col_x, mapping, len(attr.values))
     node = MappedDiscreteNode(attr, attr_no, mapping, None)
     return score, node, branches, 2
Exemplo n.º 3
0
 def _score_disc_bin():
     """Scoring for discrete attributes, with binarization"""
     n_values = len(attr.values)
     if n_values <= 2:
         return _score_disc()
     cont = contingency.Discrete(data, attr)
     attr_distr = np.sum(cont, axis=0)
     # Skip instances with missing value of the attribute
     cls_distr = np.sum(cont, axis=1)
     if np.sum(attr_distr) == 0:  # all values are missing
         return REJECT_ATTRIBUTE
     best_score, best_mapping = _tree_scorers.find_binarization_entropy(
         cont, cls_distr, attr_distr, self.min_samples_leaf)
     if best_score <= 0:
         return REJECT_ATTRIBUTE
     best_score *= 1 - np.sum(cont.unknowns) / len(data)
     mapping, branches = MappedDiscreteNode.branches_from_mapping(
         col_x, best_mapping, n_values)
     node = MappedDiscreteNode(attr, attr_no, mapping, None)
     return best_score, node, branches, 2
Exemplo n.º 4
0
 def _score_disc_bin():
     """Scoring for discrete attributes, with binarization"""
     n_values = len(attr.values)
     if n_values <= 2:
         return _score_disc()
     cont = contingency.Discrete(data, attr)
     attr_distr = np.sum(cont, axis=0)
     # Skip instances with missing value of the attribute
     cls_distr = np.sum(cont, axis=1)
     if np.sum(attr_distr) == 0:  # all values are missing
         return REJECT_ATTRIBUTE
     best_score, best_mapping = _tree_scorers.find_binarization_entropy(
         cont, cls_distr, attr_distr, self.min_samples_leaf)
     if best_score <= 0:
         return REJECT_ATTRIBUTE
     best_score *= 1 - np.sum(cont.unknowns) / len(data)
     mapping, branches = MappedDiscreteNode.branches_from_mapping(
         data.X[:, attr_no], best_mapping, n_values)
     node = MappedDiscreteNode(attr, attr_no, mapping, None)
     return best_score, node, branches, 2