Exemplo n.º 1
0
    def predict_proportional(self, input_data, path=None,
                             missing_found=False, median=False, parent=None):
        """Makes a prediction based on a number of field values averaging
           the predictions of the leaves that fall in a subtree.

           Each time a splitting field has no value assigned, we consider
           both branches of the split to be true, merging their
           predictions. The function returns the merged distribution and the
           last node reached by a unique path.

        """

        if path is None:
            path = []

        final_distribution = {}
        if not self.children:
            distribution = self.distribution if not self.weighted else \
                self.weighted_distribution
            return (merge_distributions({}, dict((x[0], x[1])
                                                 for x in distribution)),
                    self.min, self.max, self, self.count, parent)
        if one_branch(self.children, input_data) or \
                self.fields[split(self.children)]["optype"] in \
                ["text", "items"]:
            for child in self.children:
                if child.predicate.apply(input_data, self.fields):
                    new_rule = child.predicate.to_rule(self.fields)
                    if new_rule not in path and not missing_found:
                        path.append(new_rule)
                    return child.predict_proportional(input_data, path,
                                                      missing_found, median,
                                                      parent=self)
        else:
            # missing value found, the unique path stops
            missing_found = True
            minimums = []
            maximums = []
            population = 0
            for child in self.children:
                (subtree_distribution, subtree_min,
                 subtree_max, _, subtree_pop, _) = \
                    child.predict_proportional(input_data, path,
                                               missing_found, median,
                                               parent=self)
                if subtree_min is not None:
                    minimums.append(subtree_min)
                if subtree_max is not None:
                    maximums.append(subtree_max)
                population += subtree_pop
                final_distribution = merge_distributions(
                    final_distribution, subtree_distribution)
            return (final_distribution,
                    min(minimums) if minimums else None,
                    max(maximums) if maximums else None, self, population,
                    self)
Exemplo n.º 2
0
    def predict_proportional(self, input_data, path=None,
                             missing_found=False, median=False, parent=None):
        """Makes a prediction based on a number of field values averaging
           the predictions of the leaves that fall in a subtree.

           Each time a splitting field has no value assigned, we consider
           both branches of the split to be true, merging their
           predictions. The function returns the merged distribution and the
           last node reached by a unique path.

        """

        if path is None:
            path = []

        final_distribution = {}
        if not self.children:
            distribution = self.distribution if not self.weighted else \
                self.weighted_distribution
            return (merge_distributions({}, dict((x[0], x[1])
                                                 for x in distribution)),
                    self.min, self.max, self, self.count, parent)
        if one_branch(self.children, input_data) or \
                self.fields[split(self.children)]["optype"] in \
                ["text", "items"]:
            for child in self.children:
                if child.predicate.apply(input_data, self.fields):
                    new_rule = child.predicate.to_rule(self.fields)
                    if new_rule not in path and not missing_found:
                        path.append(new_rule)
                    return child.predict_proportional(input_data, path,
                                                      missing_found, median,
                                                      parent=self)
        else:
            # missing value found, the unique path stops
            missing_found = True
            minimums = []
            maximums = []
            population = 0
            for child in self.children:
                (subtree_distribution, subtree_min,
                 subtree_max, _, subtree_pop, _) = \
                    child.predict_proportional(input_data, path,
                                               missing_found, median,
                                               parent=self)
                if subtree_min is not None:
                    minimums.append(subtree_min)
                if subtree_max is not None:
                    maximums.append(subtree_max)
                population += subtree_pop
                final_distribution = merge_distributions(
                    final_distribution, subtree_distribution)
            return (final_distribution,
                    min(minimums) if minimums else None,
                    max(maximums) if maximums else None, self, population,
                    self)
Exemplo n.º 3
0
    def predict_proportional(self, input_data, path=None,
                             missing_found=False, median=False):
        """Makes a prediction based on a number of field values averaging
           the predictions of the leaves that fall in a subtree.

           Each time a splitting field has no value assigned, we consider
           both branches of the split to be true, merging their
           predictions. The function returns the merged distribution and the
           last node reached by a unique path.

        """

        if path is None:
            path = []

        final_distribution = {}
        if not self.children:
            return (merge_distributions({}, dict((x[0], x[1])
                                                 for x in self.distribution)),
                    self.min, self.max, self)
        if one_branch(self.children, input_data):
            for child in self.children:
                if child.predicate.apply(input_data, self.fields):
                    new_rule = child.predicate.to_rule(self.fields)
                    if not new_rule in path and not missing_found:
                        path.append(new_rule)
                    return child.predict_proportional(input_data, path,
                                                      missing_found, median)
        else:
            # missing value found, the unique path stops
            missing_found = True
            minimums = []
            maximums = []
            for child in self.children:
                subtree_distribution, subtree_min, subtree_max, _ = \
                    child.predict_proportional(input_data, path,
                                               missing_found, median)
                if subtree_min is not None:
                    minimums.append(subtree_min)
                if subtree_max is not None:
                    maximums.append(subtree_max)
                final_distribution = merge_distributions(
                    final_distribution, subtree_distribution)
            return (final_distribution,
                    min(minimums) if minimums else None,
                    max(maximums) if maximums else None, self)
Exemplo n.º 4
0
    def predict_proportional(self,
                             input_data,
                             path=None,
                             missing_found=False,
                             median=False):
        """Makes a prediction based on a number of field values averaging
           the predictions of the leaves that fall in a subtree.

           Each time a splitting field has no value assigned, we consider
           both branches of the split to be true, merging their
           predictions. The function returns the merged distribution and the
           last node reached by a unique path.

        """

        if path is None:
            path = []

        final_distribution = {}
        if not self.children:
            return (merge_distributions({},
                                        dict(
                                            (x[0], x[1])
                                            for x in self.distribution)), self)
        if one_branch(self.children, input_data):
            for child in self.children:
                if child.predicate.apply(input_data, self.fields):
                    new_rule = child.predicate.to_rule(self.fields)
                    if not new_rule in path and not missing_found:
                        path.append(new_rule)
                    return child.predict_proportional(input_data, path,
                                                      missing_found, median)
        else:
            # missing value found, the unique path stops
            missing_found = True
            for child in self.children:
                final_distribution = merge_distributions(
                    final_distribution,
                    child.predict_proportional(input_data, path, missing_found,
                                               median)[0])
            return final_distribution, self
Exemplo n.º 5
0
def proportional_predict(tree, offsets, fields, input_data, path=None,
                         missing_found=False, median=False, parent=None):
    """Makes a prediction based on a number of field values averaging
       the predictions of the leaves that fall in a subtree.

       Each time a splitting field has no value assigned, we consider
       both branches of the split to be true, merging their
       predictions. The function returns the merged distribution and the
       last node reached by a unique path.

    """

    if path is None:
        path = []

    node = get_node(tree)

    final_distribution = {}
    children_number = node[offsets["children#"]]
    if "wdistribution" in offsets:
        distribution = node[offsets["wdistribution"]]
    else:
        distribution = node[offsets["distribution"]]
    children = [] if children_number == 0 else node[offsets["children"]]
    t_min = None if offsets.get("min") is None else node[offsets["min"]]
    t_max = None if offsets.get("max") is None else node[offsets["max"]]
    count = node[offsets["count"]]

    if children_number == 0:
        return (merge_distributions({}, dict((x[0], x[1])
                                             for x in distribution)),
                t_min, t_max, node, count, parent, path)
    if one_branch(children, input_data) or \
            fields[children[0][FIELD_OFFSET]]["optype"] in \
            ["text", "items"]:
        for child in children:
            [operator, field, value, term, missing] = get_predicate(child)
            if apply_predicate(operator, field, value, term, missing,
                               input_data, fields[field]):
                new_rule = predicate_to_rule(operator, fields[field], value,
                                             term, missing)
                if new_rule not in path and not missing_found:
                    path.append(new_rule)
                return proportional_predict( \
                    child, offsets, fields,
                    input_data, path,
                    missing_found, median, parent=node)
    else:
        # missing value found, the unique path stops
        missing_found = True
        minimums = []
        maximums = []
        population = 0
        for child in children:
            (subtree_distribution, subtree_min,
             subtree_max, _, subtree_pop, _, path) = \
                proportional_predict( \
                    child, offsets, fields,
                    input_data, path, missing_found, median, parent=node)
            if subtree_min is not None:
                minimums.append(subtree_min)
            if subtree_max is not None:
                maximums.append(subtree_max)
            population += subtree_pop
            final_distribution = merge_distributions(
                final_distribution, subtree_distribution)
        return (final_distribution,
                min(minimums) if minimums else None,
                max(maximums) if maximums else None, node, population,
                parent, path)