Пример #1
0
def one_branch(children, input_data):
    """Check if there's only one branch to be followed

    """
    missing = split(children) in input_data
    return (missing or missing_branch(children)
            or none_value(children))
Пример #2
0
    def predict_proportional(self, input_data, path=None):
        """Makes a prediction based on a number of field values averaging
           the predictions of the leaves that fall in a subtree.

           Each time a splitting field has no value assigned, we consider
           both branches of the split to be true, merging their
           predictions.

        """

        if path is None:
            path = []

        final_distribution = {}
        if not self.children:
            return merge_distributions({}, dict((x[0], x[1])
                                                for x in self.distribution))
        if split(self.children) in input_data:
            for child in self.children:
                if child.predicate.apply(input_data, self.fields):
                    new_rule = child.predicate.to_rule(self.fields)
                    if not new_rule in path:
                        path.append(new_rule)
                    return child.predict_proportional(input_data, path)
        else:
            for child in self.children:
                final_distribution = merge_distributions(
                    final_distribution,
                    child.predict_proportional(input_data, path))
            return final_distribution
Пример #3
0
    def predict_proportional(self, input_data, path=None):
        """Makes a prediction based on a number of field values averaging
           the predictions of the leaves that fall in a subtree.

           Each time a splitting field has no value assigned, we consider
           both branches of the split to be true, merging their
           predictions.

        """

        if path is None:
            path = []

        final_distribution = {}
        if not self.children:
            return merge_distributions({},
                                       dict((x[0], x[1])
                                            for x in self.distribution))
        if split(self.children) in input_data:
            for child in self.children:
                if child.predicate.apply(input_data, self.fields):
                    new_rule = child.predicate.to_rule(self.fields)
                    if not new_rule in path:
                        path.append(new_rule)
                    return child.predict_proportional(input_data, path)
        else:
            for child in self.children:
                final_distribution = merge_distributions(
                    final_distribution,
                    child.predict_proportional(input_data, path))
            return final_distribution
Пример #4
0
def one_branch(children, input_data):
    """Check if there's only one branch to be followed

    """
    missing = split(children) in input_data
    return (missing or missing_branch(children)
            or none_value(children))
Пример #5
0
    def predict(self, input_data, path=None):
        """Makes a prediction based on a number of field values.

        The input fields must be keyed by Id.

        """
        def get_instances(distribution):
            """Returns the total number of instances in a distribution

            """
            return sum(x[1] for x in distribution) if distribution else 0

        if path is None:
            path = []
        if self.children and split(self.children) in input_data:
            for child in self.children:
                if apply(OPERATOR[child.predicate.operator],
                         [input_data[child.predicate.field],
                         child.predicate.value]):
                    path.append(u"%s %s %s" % (
                                self.fields[child.predicate.field]['name'],
                                child.predicate.operator,
                                child.predicate.value))
                    return child.predict(input_data, path)
        return (self.output, path, self.confidence,
                self.distribution, get_instances(self.distribution))
Пример #6
0
    def python_body(self, depth=1, cmv=False):
        """Translate the model into a set of "if" python statements.

        `depth` controls the size of indentation. If `cmv` (control missing
        values) is set to True then as soon as a value is missing to
        evaluate a predicate the output at that node is returned without
        further evaluation.

        """
        body = ""
        if self.children:
            if cmv:
                field = split(self.children)
                body += ("%sif (%s is None):\n " %
                        (INDENT * depth,
                         self.fields[field]['slug']))
                if self.fields[self.objective_field]['optype'] == 'numeric':
                    body += ("%s return %s\n" %
                            (INDENT * (depth + 1),
                             self.output))
                else:
                    body += ("%s return '%s'\n" %
                            (INDENT * (depth + 1),
                             self.output))

            for child in self.children:
                body += ("%sif (%s %s %s):\n" %
                        (INDENT * depth,
                         self.fields[child.predicate.field]['slug'],
                         PYTHON_OPERATOR[child.predicate.operator],
                         `child.predicate.value`))
                body += child.python_body(depth + 1)
Пример #7
0
    def predict(self, input_data, path=None):
        """Makes a prediction based on a number of field values.

        The input fields must be keyed by Id.

        """
        def get_instances(distribution):
            """Returns the total number of instances in a distribution

            """
            return sum(x[1] for x in distribution) if distribution else 0

        if path is None:
            path = []
        if self.children and split(self.children) in input_data:
            for child in self.children:
                if apply(OPERATOR[child.predicate.operator],
                         [input_data[child.predicate.field],
                         child.predicate.value]):
                    path.append(u"%s %s %s" % (
                                self.fields[child.predicate.field]['name'],
                                child.predicate.operator,
                                child.predicate.value))
                    return child.predict(input_data, path)
        return (self.output, path, self.confidence,
                self.distribution, get_instances(self.distribution))
Пример #8
0
    def tableau_body(self, body=u"", conditions=None, cmv=None,
                     ids_path=None, subtree=True):
        """Translate the model into a set of "if" statements in Tableau syntax

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """

        if cmv is None:
            cmv = []
        if conditions is None:
            conditions = []
            alternate = u"IF"
        else:
            alternate = u"ELSEIF"

        children = filter_nodes(self.children, ids=ids_path,
                                subtree=subtree)
        if children:
            field = split(children)
            if not self.fields[field]['name'] in cmv:
                conditions.append("ISNULL([%s])" % self.fields[field]['name'])
                body += (u"%s %s THEN " %
                         (alternate, " AND ".join(conditions)))
                if self.fields[self.objective_field]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = tableau_string(self.output)
                body += (u"%s\n" % value)
                cmv.append(self.fields[field]['name'])
                alternate = u"ELSEIF"
                del conditions[-1]

            for child in children:
                optype = self.fields[child.predicate.field]['optype']
                if optype == 'text':
                    return u""
                if (optype == 'numeric'):
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)
                conditions.append("[%s]%s%s" % (
                    self.fields[child.predicate.field]['name'],
                    PYTHON_OPERATOR[child.predicate.operator],
                    value))
                body = child.tableau_body(body, conditions[:], cmv=cmv[:],
                                          ids_path=ids_path, subtree=subtree)
                del conditions[-1]
        else:
            if self.fields[self.objective_field]['optype'] == 'numeric':
                value = self.output
            else:
                value = tableau_string(self.output)
            body += (
                u"%s %s THEN" % (alternate, " AND ".join(conditions)))
            body += u" %s\n" % value

        return body
Пример #9
0
    def predict_proportional(self, input_data, path=None,
                             missing_found=False, median=False, parent=None):
        """Makes a prediction based on a number of field values averaging
           the predictions of the leaves that fall in a subtree.

           Each time a splitting field has no value assigned, we consider
           both branches of the split to be true, merging their
           predictions. The function returns the merged distribution and the
           last node reached by a unique path.

        """

        if path is None:
            path = []

        final_distribution = {}
        if not self.children:
            distribution = self.distribution if not self.weighted else \
                self.weighted_distribution
            return (merge_distributions({}, dict((x[0], x[1])
                                                 for x in distribution)),
                    self.min, self.max, self, self.count, parent)
        if one_branch(self.children, input_data) or \
                self.fields[split(self.children)]["optype"] in \
                ["text", "items"]:
            for child in self.children:
                if child.predicate.apply(input_data, self.fields):
                    new_rule = child.predicate.to_rule(self.fields)
                    if new_rule not in path and not missing_found:
                        path.append(new_rule)
                    return child.predict_proportional(input_data, path,
                                                      missing_found, median,
                                                      parent=self)
        else:
            # missing value found, the unique path stops
            missing_found = True
            minimums = []
            maximums = []
            population = 0
            for child in self.children:
                (subtree_distribution, subtree_min,
                 subtree_max, _, subtree_pop, _) = \
                    child.predict_proportional(input_data, path,
                                               missing_found, median,
                                               parent=self)
                if subtree_min is not None:
                    minimums.append(subtree_min)
                if subtree_max is not None:
                    maximums.append(subtree_max)
                population += subtree_pop
                final_distribution = merge_distributions(
                    final_distribution, subtree_distribution)
            return (final_distribution,
                    min(minimums) if minimums else None,
                    max(maximums) if maximums else None, self, population,
                    self)
Пример #10
0
    def python_body(self, depth=1, cmv=None, input_map=False):
        """Translate the model into a set of "if" python statements.

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """

        def map_data(field, missing=False):
            """Returns the subject of the condition in map format when
               more than MAX_ARGS_LENGTH arguments are used.
            """
            if input_map:
                if missing:
                    return "not '%s' in data or data['%s']" % (field, field)
                else:
                    return "data['%s']" % field
            return field
        if cmv is None:
            cmv = []
        body = u""
        if self.children:
            field = split(self.children)
            if not self.fields[field]['slug'] in cmv:
                body += (u"%sif (%s is None):\n" %
                        (INDENT * depth,
                         map_data(self.fields[field]['slug'], True)))
                if self.fields[self.objective_field]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = repr(self.output)
                body += (u"%sreturn %s\n" %
                        (INDENT * (depth + 1),
                         value))
                cmv.append(self.fields[field]['slug'])

            for child in self.children:
                if self.fields[child.predicate.field]['optype'] == 'numeric':
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)
                body += (u"%sif (%s %s %s):\n" %
                        (INDENT * depth,
                         map_data(self.fields[child.predicate.field]['slug'],
                         False),
                         PYTHON_OPERATOR[child.predicate.operator],
                         value))
                body += child.python_body(depth + 1, cmv=cmv[:],
                                          input_map=input_map)
        else:
            if self.fields[self.objective_field]['optype'] == 'numeric':
                value = self.output
            else:
                value = repr(self.output)
            body = u"%sreturn %s\n" % (INDENT * depth, value)
        return body
Пример #11
0
    def predict_proportional(self, input_data, path=None,
                             missing_found=False, median=False, parent=None):
        """Makes a prediction based on a number of field values averaging
           the predictions of the leaves that fall in a subtree.

           Each time a splitting field has no value assigned, we consider
           both branches of the split to be true, merging their
           predictions. The function returns the merged distribution and the
           last node reached by a unique path.

        """

        if path is None:
            path = []

        final_distribution = {}
        if not self.children:
            distribution = self.distribution if not self.weighted else \
                self.weighted_distribution
            return (merge_distributions({}, dict((x[0], x[1])
                                                 for x in distribution)),
                    self.min, self.max, self, self.count, parent)
        if one_branch(self.children, input_data) or \
                self.fields[split(self.children)]["optype"] in \
                ["text", "items"]:
            for child in self.children:
                if child.predicate.apply(input_data, self.fields):
                    new_rule = child.predicate.to_rule(self.fields)
                    if new_rule not in path and not missing_found:
                        path.append(new_rule)
                    return child.predict_proportional(input_data, path,
                                                      missing_found, median,
                                                      parent=self)
        else:
            # missing value found, the unique path stops
            missing_found = True
            minimums = []
            maximums = []
            population = 0
            for child in self.children:
                (subtree_distribution, subtree_min,
                 subtree_max, _, subtree_pop, _) = \
                    child.predict_proportional(input_data, path,
                                               missing_found, median,
                                               parent=self)
                if subtree_min is not None:
                    minimums.append(subtree_min)
                if subtree_max is not None:
                    maximums.append(subtree_max)
                population += subtree_pop
                final_distribution = merge_distributions(
                    final_distribution, subtree_distribution)
            return (final_distribution,
                    min(minimums) if minimums else None,
                    max(maximums) if maximums else None, self, population,
                    self)
Пример #12
0
    def python_body(self, depth=1, cmv=None, input_map=False):
        """Translate the model into a set of "if" python statements.

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """

        def map_data(field, missing=False):
            """Returns the subject of the condition in map format when
               more than MAX_ARGS_LENGTH arguments are used.
            """
            if input_map:
                if missing:
                    return "not '%s' in data or data['%s']" % (field, field)
                else:
                    return "data['%s']" % field
            return field
        if cmv is None:
            cmv = []
        body = u""
        if self.children:
            field = split(self.children)
            if not self.fields[field]['slug'] in cmv:
                body += (u"%sif (%s is None):\n" %
                        (INDENT * depth,
                         map_data(self.fields[field]['slug'], True)))
                if self.fields[self.objective_field]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = repr(self.output)
                body += (u"%sreturn %s\n" %
                        (INDENT * (depth + 1),
                         value))
                cmv.append(self.fields[field]['slug'])

            for child in self.children:
                if self.fields[child.predicate.field]['optype'] == 'numeric':
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)
                body += (u"%sif (%s %s %s):\n" %
                        (INDENT * depth,
                         map_data(self.fields[child.predicate.field]['slug'],
                         False),
                         PYTHON_OPERATOR[child.predicate.operator],
                         value))
                body += child.python_body(depth + 1, cmv=cmv[:],
                                          input_map=input_map)
        else:
            if self.fields[self.objective_field]['optype'] == 'numeric':
                value = self.output
            else:
                value = repr(self.output)
            body = u"%sreturn %s\n" % (INDENT * depth, value)
        return body
Пример #13
0
    def predict(self, input_data, path=None, missing_strategy=LAST_PREDICTION):
        """Makes a prediction based on a number of field values.

        The input fields must be keyed by Id. There are two possible
        strategies to predict when the value for the splitting field
        is missing:
            0 - LAST_PREDICTION: the last issued prediction is returned.
            1 - PROPORTIONAL: as we cannot choose between the two branches
                in the tree that stem from this split, we consider both. The
                algorithm goes on until the final leaves are reached and
                all their predictions are used to decide the final prediction.
        """

        if path is None:
            path = []
        if missing_strategy == PROPORTIONAL:
            final_distribution = self.predict_proportional(input_data,
                                                           path=path)

            if self.regression:
                # sort elements by their mean
                distribution = [
                    list(element)
                    for element in sorted(final_distribution.items(),
                                          key=lambda x: x[0])
                ]
                distribution = merge_bins(distribution, BINS_LIMIT)
                prediction = mean(distribution)
                total_instances = sum(
                    [instances for _, instances in distribution])
                confidence = regression_error(
                    unbiased_sample_variance(distribution, prediction),
                    total_instances)
                return (prediction, path, confidence, distribution,
                        total_instances)
            else:
                distribution = [
                    list(element)
                    for element in sorted(final_distribution.items(),
                                          key=lambda x: (-x[1], x[0]))
                ]
                return (distribution[0][0], path,
                        ws_confidence(distribution[0][0],
                                      final_distribution), distribution,
                        get_instances(distribution))

        else:
            if self.children and split(self.children) in input_data:
                for child in self.children:
                    if child.predicate.apply(input_data, self.fields):
                        path.append(child.predicate.to_rule(self.fields))
                        return child.predict(input_data, path)
            return (self.output, path, self.confidence, self.distribution,
                    get_instances(self.distribution))
Пример #14
0
    def predict(self, input_data, path=None, missing_strategy=LAST_PREDICTION):
        """Makes a prediction based on a number of field values.

        The input fields must be keyed by Id. There are two possible
        strategies to predict when the value for the splitting field
        is missing:
            0 - LAST_PREDICTION: the last issued prediction is returned.
            1 - PROPORTIONAL: as we cannot choose between the two branches
                in the tree that stem from this split, we consider both. The
                algorithm goes on until the final leaves are reached and
                all their predictions are used to decide the final prediction.
        """

        if path is None:
            path = []
        if missing_strategy == PROPORTIONAL:
            final_distribution = self.predict_proportional(input_data,
                                                           path=path)

            if self.regression:
                # sort elements by their mean
                distribution = [list(element) for element in
                                sorted(final_distribution.items(),
                                       key=lambda x: x[0])]
                distribution = merge_bins(distribution, BINS_LIMIT)
                prediction = mean(distribution)
                total_instances = sum([instances
                                       for _, instances in distribution])
                confidence = regression_error(
                    unbiased_sample_variance(distribution, prediction),
                    total_instances)
                return (prediction, path, confidence,
                        distribution, total_instances)
            else:
                distribution = [list(element) for element in
                                sorted(final_distribution.items(),
                                       key=lambda x: (-x[1], x[0]))]
                return (distribution[0][0], path,
                        ws_confidence(distribution[0][0], final_distribution),
                        distribution, get_instances(distribution))

        else:
            if self.children and split(self.children) in input_data:
                for child in self.children:
                    if child.predicate.apply(input_data, self.fields):
                        path.append(child.predicate.to_rule(self.fields))
                        return child.predict(input_data, path)
            return (self.output, path, self.confidence,
                    self.distribution, get_instances(self.distribution))
Пример #15
0
    def predict(self, input_data, path=[]):
        """Makes a prediction based on a number of field values.

        The input fields must be keyed by Id.

        """
        if self.children and split(self.children) in input_data:
            for child in self.children:
                if apply(OPERATOR[child.predicate.operator],
                         [input_data[child.predicate.field],
                         child.predicate.value]):
                    path.append("%s %s %s" % (
                                self.fields[child.predicate.field]['name'],
                                child.predicate.operator,
                                child.predicate.value))
                    return child.predict(input_data, path)
        else:
            return self.output, path
Пример #16
0
    def predict_proportional(self, input_data, path=None,
                             missing_found=False):
        """Makes a prediction based on a number of field values considering all
           the predictions of the leaves that fall in a subtree.

           Each time a splitting field has no value assigned, we consider
           both branches of the split to be true, merging their
           predictions. The function returns the merged distribution and the
           last node reached by a unique path.

        """

        if path is None:
            path = []

        if not self.children:
            return (self.g_sum, self.h_sum, self.count, path)
        if one_branch(self.children, input_data) or \
                self.fields[split(self.children)]["optype"] in \
                ["text", "items"]:
            for child in self.children:
                if child.predicate.apply(input_data, self.fields):
                    new_rule = child.predicate.to_rule(self.fields)
                    if new_rule not in path and not missing_found:
                        path.append(new_rule)
                    return child.predict_proportional(input_data, path,
                                                      missing_found)
        else:
            # missing value found, the unique path stops
            missing_found = True
            g_sums = 0.0
            h_sums = 0.0
            population = 0
            for child in self.children:
                g_sum, h_sum, count, _ = \
                    child.predict_proportional(input_data, path,
                                               missing_found)
                g_sums += g_sum
                h_sums += h_sum
                population += count
            return (g_sums, h_sums, population, path)
Пример #17
0
    def predict_proportional(self, input_data, path=None,
                             missing_found=False):
        """Makes a prediction based on a number of field values considering all
           the predictions of the leaves that fall in a subtree.

           Each time a splitting field has no value assigned, we consider
           both branches of the split to be true, merging their
           predictions. The function returns the merged distribution and the
           last node reached by a unique path.

        """

        if path is None:
            path = []

        if not self.children:
            return (self.g_sum, self.h_sum, self.count, path)
        if one_branch(self.children, input_data) or \
                self.fields[split(self.children)]["optype"] in \
                ["text", "items"]:
            for child in self.children:
                if child.predicate.apply(input_data, self.fields):
                    new_rule = child.predicate.to_rule(self.fields)
                    if new_rule not in path and not missing_found:
                        path.append(new_rule)
                    return child.predict_proportional(input_data, path,
                                                      missing_found)
        else:
            # missing value found, the unique path stops
            missing_found = True
            g_sums = 0.0
            h_sums = 0.0
            population = 0
            for child in self.children:
                g_sum, h_sum, count, _ = \
                    child.predict_proportional(input_data, path,
                                               missing_found)
                g_sums += g_sum
                h_sums += h_sum
                population += count
            return (g_sums, h_sums, population, path)
Пример #18
0
    def predict(self, input_data, path=None):
        """Makes a prediction based on a number of field values.

        The input fields must be keyed by Id.

        """
        def get_instances(distribution):
            """Returns the total number of instances in a distribution

            """
            return sum(x[1] for x in distribution) if distribution else 0

        if path is None:
            path = []
        if self.children and split(self.children) in input_data:
            for child in self.children:
                if child.predicate.apply(input_data, self.fields):
                    path.append(child.predicate.to_rule(self.fields))
                    return child.predict(input_data, path)
        return (self.output, path, self.confidence,
                self.distribution, get_instances(self.distribution))
Пример #19
0
    def predict(self, input_data, path=None):
        """Makes a prediction based on a number of field values.

        The input fields must be keyed by Id.

        """
        def get_instances(distribution):
            """Returns the total number of instances in a distribution

            """
            return sum(x[1] for x in distribution) if distribution else 0

        if path is None:
            path = []
        if self.children and split(self.children) in input_data:
            for child in self.children:
                if child.predicate.apply(input_data, self.fields):
                    path.append(child.predicate.to_rule(self.fields))
                    return child.predict(input_data, path)
        return (self.output, path, self.confidence, self.distribution,
                get_instances(self.distribution))
Пример #20
0
    def python_body(self,
                    depth=1,
                    cmv=None,
                    input_map=False,
                    ids_path=None,
                    subtree=True):
        """Translate the model into a set of "if" python statements.

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """
        def map_data(field, missing=False):
            """Returns the subject of the condition in map format when
               more than MAX_ARGS_LENGTH arguments are used.
            """
            if input_map:
                if missing:
                    return "data.get('%s')" % field
                else:
                    return "data['%s']" % field
            return field

        if cmv is None:
            cmv = []
        body = u""
        term_analysis_fields = []
        children = filter_nodes(self.children, ids=ids_path, subtree=subtree)
        if children:
            field = split(children)
            has_missing_branch = (missing_branch(children)
                                  or none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            if (not has_missing_branch
                    and not self.fields[field]['slug'] in cmv):
                body += (u"%sif (%s is None):\n" %
                         (INDENT * depth,
                          map_data(self.fields[field]['slug'], True)))
                if self.fields[self.objective_id]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = repr(self.output)
                body += (u"%sreturn %s\n" % (INDENT * (depth + 1), value))
                cmv.append(self.fields[field]['slug'])

            for child in children:
                field = child.predicate.field
                pre_condition = u""
                if has_missing_branch and child.predicate.value is not None:
                    negation = u"" if child.predicate.missing else u" not"
                    connection = u"or" if child.predicate.missing else u"and"
                    pre_condition = (u"%s is%s None %s " %
                                     (map_data(self.fields[field]['slug'],
                                               True), negation, connection))
                    if not child.predicate.missing:
                        cmv.append(self.fields[field]['slug'])
                optype = self.fields[field]['optype']
                if (optype == 'numeric' or optype == 'text'
                        or child.predicate.value is None):
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)
                if optype == 'text':
                    body += (
                        u"%sif (%sterm_matches(%s, \"%s\", %s\"%s\") %s %s):"
                        u"\n" %
                        (INDENT * depth, pre_condition,
                         map_data(self.fields[field]['slug'],
                                  False), self.fields[field]['slug'],
                         ('u' if isinstance(child.predicate.term, unicode) else
                          ''), child.predicate.term.replace("\"", "\\\""),
                         PYTHON_OPERATOR[child.predicate.operator], value))
                    term_analysis_fields.append((field, child.predicate.term))
                else:
                    operator = (MISSING_OPERATOR[child.predicate.operator]
                                if child.predicate.value is None else
                                PYTHON_OPERATOR[child.predicate.operator])
                    if child.predicate.value is None:
                        cmv.append(self.fields[field]['slug'])
                    body += (u"%sif (%s%s %s %s):\n" %
                             (INDENT * depth, pre_condition,
                              map_data(self.fields[field]['slug'],
                                       False), operator, value))
                next_level = child.python_body(depth + 1,
                                               cmv=cmv[:],
                                               input_map=input_map,
                                               ids_path=ids_path,
                                               subtree=subtree)
                body += next_level[0]
                term_analysis_fields.extend(next_level[1])
        else:
            if self.fields[self.objective_id]['optype'] == 'numeric':
                value = self.output
            else:
                value = repr(self.output)
            body = u"%sreturn %s\n" % (INDENT * depth, value)

        return body, term_analysis_fields
Пример #21
0
    def tableau_body(self,
                     body=u"",
                     conditions=None,
                     cmv=None,
                     ids_path=None,
                     subtree=True):
        """Translate the model into a set of "if" statements in Tableau syntax

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """

        if cmv is None:
            cmv = []
        if body:
            alternate = u"ELSEIF"
        else:
            if conditions is None:
                conditions = []
            alternate = u"IF"

        children = filter_nodes(self.children, ids=ids_path, subtree=subtree)
        if children:
            field = split(children)
            has_missing_branch = (missing_branch(children)
                                  or none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            if (not has_missing_branch
                    and not self.fields[field]['name'] in cmv):
                conditions.append("ISNULL([%s])" % self.fields[field]['name'])
                body += (u"%s %s THEN " %
                         (alternate, " AND ".join(conditions)))
                if self.fields[self.objective_id]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = tableau_string(self.output)
                body += (u"%s\n" % value)
                cmv.append(self.fields[field]['name'])
                alternate = u"ELSEIF"
                del conditions[-1]

            for child in children:
                pre_condition = u""
                post_condition = u""
                if has_missing_branch and child.predicate.value is not None:
                    negation = u"" if child.predicate.missing else u"NOT "
                    connection = u"OR" if child.predicate.missing else u"AND"
                    pre_condition = (
                        u"(%sISNULL([%s]) %s " %
                        (negation, self.fields[field]['name'], connection))
                    if not child.predicate.missing:
                        cmv.append(self.fields[field]['name'])
                    post_condition = u")"
                optype = self.fields[child.predicate.field]['optype']
                if child.predicate.value is None:
                    value = ""
                elif optype == 'text':
                    return u""
                elif optype == 'numeric':
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)

                operator = ("" if child.predicate.value is None else
                            PYTHON_OPERATOR[child.predicate.operator])
                if child.predicate.value is None:
                    pre_condition = (
                        T_MISSING_OPERATOR[child.predicate.operator])
                    post_condition = ")"

                conditions.append(
                    "%s[%s]%s%s%s" %
                    (pre_condition, self.fields[child.predicate.field]['name'],
                     operator, value, post_condition))
                body = child.tableau_body(body,
                                          conditions[:],
                                          cmv=cmv[:],
                                          ids_path=ids_path,
                                          subtree=subtree)
                del conditions[-1]
        else:
            if self.fields[self.objective_id]['optype'] == 'numeric':
                value = self.output
            else:
                value = tableau_string(self.output)
            body += (u"%s %s THEN" % (alternate, " AND ".join(conditions)))
            body += u" %s\n" % value

        return body
Пример #22
0
    def tableau_body(self, body=u"", conditions=None, cmv=None,
                     ids_path=None, subtree=True):
        """Translate the model into a set of "if" statements in Tableau syntax

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """

        if cmv is None:
            cmv = []
        if body:
            alternate = u"ELSEIF"
        else:
            if conditions is None:
                conditions = []
            alternate = u"IF"

        children = filter_nodes(self.children, ids=ids_path,
                                subtree=subtree)
        if children:
            field = split(children)
            has_missing_branch = (missing_branch(children) or
                                  none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            if (not has_missing_branch and
                    not self.fields[field]['name'] in cmv):
                conditions.append("ISNULL([%s])" % self.fields[field]['name'])
                body += (u"%s %s THEN " %
                         (alternate, " AND ".join(conditions)))
                if self.fields[self.objective_id]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = tableau_string(self.output)
                body += (u"%s\n" % value)
                cmv.append(self.fields[field]['name'])
                alternate = u"ELSEIF"
                del conditions[-1]

            for child in children:
                pre_condition = u""
                post_condition = u""
                if has_missing_branch and child.predicate.value is not None:
                    negation = u"" if child.predicate.missing else u"NOT "
                    connection = u"OR" if child.predicate.missing else u"AND"
                    pre_condition = (
                        u"(%sISNULL([%s]) %s " % (
                            negation, self.fields[field]['name'], connection))
                    if not child.predicate.missing:
                        cmv.append(self.fields[field]['name'])
                    post_condition = u")"
                optype = self.fields[child.predicate.field]['optype']
                if child.predicate.value is None:
                    value = ""
                elif optype == 'text':
                    return u""
                elif optype == 'numeric':
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)

                operator = ("" if child.predicate.value is None else
                            PYTHON_OPERATOR[child.predicate.operator])
                if child.predicate.value is None:
                    pre_condition = (
                        T_MISSING_OPERATOR[child.predicate.operator])
                    post_condition = ")"

                conditions.append("%s[%s]%s%s%s" % (
                    pre_condition,
                    self.fields[child.predicate.field]['name'],
                    operator,
                    value,
                    post_condition))
                body = child.tableau_body(body, conditions[:], cmv=cmv[:],
                                          ids_path=ids_path, subtree=subtree)
                del conditions[-1]
        else:
            if self.fields[self.objective_id]['optype'] == 'numeric':
                value = self.output
            else:
                value = tableau_string(self.output)
            body += (
                u"%s %s THEN" % (alternate, " AND ".join(conditions)))
            body += u" %s\n" % value

        return body
Пример #23
0
    def plug_in_body(self, depth=1, cmv=None, ids_path=None, subtree=True):
        """Translate the model into a set of "if" javascript statements.

        `depth` controls the size of indentation. As soon as a value is missing
        to evaluate a predicate the output at that node is returned without
        further evaluation.

        """
        metric = "error" if self.regression else "confidence"
        if cmv is None:
            cmv = []
        body = u""
        term_analysis_fields = []
        item_analysis_fields = []
        prefix = u""
        field_obj = self.fields[self.objective_id]

        if len(self.fields) > MAX_ARGS_LENGTH:
            prefix = u"data."
        children = filter_nodes(self.children, ids=ids_path,
                                subtree=subtree)

        if children:

            # field used in the split
            field = split(children)

            has_missing_branch = missing_branch(children)
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            one_branch = not has_missing_branch or \
                self.fields[field]['optype'] in COMPOSED_FIELDS
            if (one_branch and
                    not self.fields[field]['camelCase'] in cmv):
                body += self.missing_check_code(field, depth, prefix, cmv,
                                                metric)

            for child in children:

                field = child.predicate.field

                pre_condition = u""
                # code when missing_splits has been used
                if has_missing_branch and child.predicate.value is not None:
                    pre_condition = self.missing_prefix_code(child, field,
                                                             prefix, cmv)

                # complete split condition code
                body += child.split_condition_code( \
                    field, depth, prefix, pre_condition,
                    term_analysis_fields, item_analysis_fields)

                # value to be determined in next node
                next_level = child.plug_in_body(depth + 1, cmv=cmv[:],
                                                ids_path=ids_path,
                                                subtree=subtree)
                body += next_level[0]
                body += u"%s}\n" % (INDENT * depth)
                term_analysis_fields.extend(next_level[1])
                item_analysis_fields.extend(next_level[2])

        else:
            value = value_to_print(self.output,
                                   self.fields[self.objective_id]['optype'])
            body = u"%sreturn {prediction: %s, %s: %s};\n" % ( \
                INDENT * depth,
                value,
                metric,
                self.confidence)
        return body, term_analysis_fields, item_analysis_fields
Пример #24
0
    def python_body(self, depth=1, cmv=None, input_map=False,
                    ids_path=None, subtree=True):
        """Translate the model into a set of "if" python statements.

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """

        def map_data(field, missing=False):
            """Returns the subject of the condition in map format when
               more than MAX_ARGS_LENGTH arguments are used.
            """
            if input_map:
                if missing:
                    return "data.get('%s')" % field
                else:
                    return "data['%s']" % field
            return field
        if cmv is None:
            cmv = []
        body = u""
        term_analysis_fields = []
        children = filter_nodes(self.children, ids=ids_path,
                                subtree=subtree)
        if children:
            field = split(children)
            has_missing_branch = (missing_branch(children) or
                                  none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            if (not has_missing_branch and
                    not self.fields[field]['slug'] in cmv):
                body += (u"%sif (%s is None):\n" %
                         (INDENT * depth,
                          map_data(self.fields[field]['slug'], True)))
                if self.fields[self.objective_id]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = repr(self.output)
                body += (u"%sreturn %s\n" %
                         (INDENT * (depth + 1),
                          value))
                cmv.append(self.fields[field]['slug'])

            for child in children:
                field = child.predicate.field
                pre_condition = u""
                if has_missing_branch and child.predicate.value is not None:
                    negation = u"" if child.predicate.missing else u" not"
                    connection = u"or" if child.predicate.missing else u"and"
                    pre_condition = (
                        u"%s is%s None %s " % (
                            map_data(self.fields[field]['slug'], True),
                            negation,
                            connection))
                    if not child.predicate.missing:
                        cmv.append(self.fields[field]['slug'])
                optype = self.fields[field]['optype']
                if (optype == 'numeric' or optype == 'text' or
                        child.predicate.value is None):
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)
                if optype == 'text':
                    body += (
                        u"%sif (%sterm_matches(%s, \"%s\", %s\"%s\") %s %s):"
                        u"\n" %
                        (INDENT * depth, pre_condition,
                         map_data(self.fields[field]['slug'],
                                  False),
                         self.fields[field]['slug'],
                         ('u' if isinstance(child.predicate.term, unicode)
                          else ''),
                         child.predicate.term.replace("\"", "\\\""),
                         PYTHON_OPERATOR[child.predicate.operator],
                         value))
                    term_analysis_fields.append((field,
                                                 child.predicate.term))
                else:
                    operator = (MISSING_OPERATOR[child.predicate.operator] if
                                child.predicate.value is None else
                                PYTHON_OPERATOR[child.predicate.operator])
                    if child.predicate.value is None:
                        cmv.append(self.fields[field]['slug'])
                    body += (
                        u"%sif (%s%s %s %s):\n" %
                        (INDENT * depth, pre_condition,
                         map_data(self.fields[field]['slug'],
                                  False),
                         operator,
                         value))
                next_level = child.python_body(depth + 1, cmv=cmv[:],
                                               input_map=input_map,
                                               ids_path=ids_path,
                                               subtree=subtree)
                body += next_level[0]
                term_analysis_fields.extend(next_level[1])
        else:
            if self.fields[self.objective_id]['optype'] == 'numeric':
                value = self.output
            else:
                value = repr(self.output)
            body = u"%sreturn %s\n" % (INDENT * depth, value)

        return body, term_analysis_fields
Пример #25
0
    def plug_in_body(self, depth=1, cmv=None, ids_path=None, subtree=True):
        """Translate the model into a set of "if" javascript statements.

        `depth` controls the size of indentation. As soon as a value is missing
        to evaluate a predicate the output at that node is returned without
        further evaluation.

        """
        metric = "error" if self.regression else "confidence"
        if cmv is None:
            cmv = []
        body = u""
        term_analysis_fields = []
        item_analysis_fields = []
        prefix = u""
        field_obj = self.fields[self.objective_id]

        if len(self.fields) > MAX_ARGS_LENGTH:
            prefix = u"data."
        children = filter_nodes(self.children, ids=ids_path, subtree=subtree)

        if children:

            # field used in the split
            field = split(children)

            has_missing_branch = missing_branch(children)
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            one_branch = not has_missing_branch or \
                self.fields[field]['optype'] in COMPOSED_FIELDS
            if (one_branch and not self.fields[field]['camelCase'] in cmv):
                body += self.missing_check_code(field, depth, prefix, cmv,
                                                metric)

            for child in children:

                field = child.predicate.field

                pre_condition = u""
                # code when missing_splits has been used
                if has_missing_branch and child.predicate.value is not None:
                    pre_condition = self.missing_prefix_code(
                        child, field, prefix, cmv)

                # complete split condition code
                body += child.split_condition_code( \
                    field, depth, prefix, pre_condition,
                    term_analysis_fields, item_analysis_fields)

                # value to be determined in next node
                next_level = child.plug_in_body(depth + 1,
                                                cmv=cmv[:],
                                                ids_path=ids_path,
                                                subtree=subtree)
                body += next_level[0]
                body += u"%s}\n" % (INDENT * depth)
                term_analysis_fields.extend(next_level[1])
                item_analysis_fields.extend(next_level[2])

        else:
            value = value_to_print(self.output,
                                   self.fields[self.objective_id]['optype'])
            body = u"%sreturn {prediction: %s, %s: %s};\n" % ( \
                INDENT * depth,
                value,
                metric,
                self.confidence)
        return body, term_analysis_fields, item_analysis_fields
Пример #26
0
    def python_body(self,
                    depth=1,
                    cmv=None,
                    input_map=False,
                    ids_path=None,
                    subtree=True):
        """Translate the model into a set of "if" python statements.

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """
        def map_data(field, missing=False):
            """Returns the subject of the condition in map format when
               more than MAX_ARGS_LENGTH arguments are used.
            """
            if input_map:
                if missing:
                    return "not '%s' in data or data['%s']" % (field, field)
                else:
                    return "data['%s']" % field
            return field

        if cmv is None:
            cmv = []
        body = u""
        term_analysis_fields = []
        children = filter_nodes(self.children, ids=ids_path, subtree=subtree)
        if children:
            field = split(children)
            if not self.fields[field]['slug'] in cmv:
                body += (u"%sif (%s is None):\n" %
                         (INDENT * depth,
                          map_data(self.fields[field]['slug'], True)))
                if self.fields[self.objective_field]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = repr(self.output)
                body += (u"%sreturn %s\n" % (INDENT * (depth + 1), value))
                cmv.append(self.fields[field]['slug'])

            for child in children:
                optype = self.fields[child.predicate.field]['optype']
                if (optype == 'numeric' or optype == 'text'):
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)
                if optype == 'text':
                    body += (
                        u"%sif (term_matches(%s, \"%s\", %s\"%s\") %s %s):\n" %
                        (INDENT * depth,
                         map_data(self.fields[child.predicate.field]['slug'],
                                  False),
                         self.fields[child.predicate.field]['slug'],
                         ('u' if isinstance(child.predicate.term, unicode) else
                          ''), child.predicate.term.replace("\"", "\\\""),
                         PYTHON_OPERATOR[child.predicate.operator], value))
                    term_analysis_fields.append(
                        (child.predicate.field, child.predicate.term))
                else:
                    body += (
                        u"%sif (%s %s %s):\n" %
                        (INDENT * depth,
                         map_data(self.fields[child.predicate.field]['slug'],
                                  False),
                         PYTHON_OPERATOR[child.predicate.operator], value))
                next_level = child.python_body(depth + 1,
                                               cmv=cmv[:],
                                               input_map=input_map,
                                               ids_path=ids_path,
                                               subtree=subtree)
                body += next_level[0]
                term_analysis_fields.extend(next_level[1])
        else:
            if self.fields[self.objective_field]['optype'] == 'numeric':
                value = self.output
            else:
                value = repr(self.output)
            body = u"%sreturn %s\n" % (INDENT * depth, value)

        return body, term_analysis_fields
Пример #27
0
    def tableau_body(self,
                     body=u"",
                     conditions=None,
                     cmv=None,
                     ids_path=None,
                     subtree=True):
        """Translate the model into a set of "if" statements in Tableau syntax

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """

        if cmv is None:
            cmv = []
        if conditions is None:
            conditions = []
            alternate = u"IF"
        else:
            alternate = u"ELSEIF"

        children = filter_nodes(self.children, ids=ids_path, subtree=subtree)
        if children:
            field = split(children)
            if not self.fields[field]['name'] in cmv:
                conditions.append("ISNULL([%s])" % self.fields[field]['name'])
                body += (u"%s %s THEN " %
                         (alternate, " AND ".join(conditions)))
                if self.fields[self.objective_field]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = tableau_string(self.output)
                body += (u"%s\n" % value)
                cmv.append(self.fields[field]['name'])
                alternate = u"ELSEIF"
                del conditions[-1]

            for child in children:
                optype = self.fields[child.predicate.field]['optype']
                if optype == 'text':
                    return u""
                if (optype == 'numeric'):
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)
                conditions.append(
                    "[%s]%s%s" %
                    (self.fields[child.predicate.field]['name'],
                     PYTHON_OPERATOR[child.predicate.operator], value))
                body = child.tableau_body(body,
                                          conditions[:],
                                          cmv=cmv[:],
                                          ids_path=ids_path,
                                          subtree=subtree)
                del conditions[-1]
        else:
            if self.fields[self.objective_field]['optype'] == 'numeric':
                value = self.output
            else:
                value = tableau_string(self.output)
            body += (u"%s %s THEN" % (alternate, " AND ".join(conditions)))
            body += u" %s\n" % value

        return body
Пример #28
0
    def python_body(self, depth=1, cmv=None, input_map=False,
                    ids_path=None, subtree=True):
        """Translate the model into a set of "if" python statements.

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """

        def map_data(field, missing=False):
            """Returns the subject of the condition in map format when
               more than MAX_ARGS_LENGTH arguments are used.
            """
            if input_map:
                if missing:
                    return "not '%s' in data or data['%s']" % (field, field)
                else:
                    return "data['%s']" % field
            return field
        if cmv is None:
            cmv = []
        body = u""
        term_analysis_fields = []
        children = filter_nodes(self.children, ids=ids_path,
                                subtree=subtree)
        if children:
            field = split(children)
            if not self.fields[field]['slug'] in cmv:
                body += (u"%sif (%s is None):\n" %
                        (INDENT * depth,
                         map_data(self.fields[field]['slug'], True)))
                if self.fields[self.objective_field]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = repr(self.output)
                body += (u"%sreturn %s\n" %
                        (INDENT * (depth + 1),
                         value))
                cmv.append(self.fields[field]['slug'])

            for child in children:
                optype = self.fields[child.predicate.field]['optype']
                if (optype == 'numeric' or optype == 'text'):
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)
                if optype == 'text':
                    body += (
                        u"%sif (term_matches(%s, \"%s\", %s\"%s\") %s %s):\n" %
                        (INDENT * depth,
                         map_data(self.fields[child.predicate.field]['slug'],
                         False),
                         self.fields[child.predicate.field]['slug'],
                         ('u' if isinstance(child.predicate.term, unicode)
                          else ''),
                         child.predicate.term.replace("\"", "\\\""),
                         PYTHON_OPERATOR[child.predicate.operator],
                         value))
                    term_analysis_fields.append((child.predicate.field,
                                                 child.predicate.term))
                else:
                    body += (
                        u"%sif (%s %s %s):\n" %
                        (INDENT * depth,
                         map_data(self.fields[child.predicate.field]['slug'],
                         False),
                         PYTHON_OPERATOR[child.predicate.operator],
                         value))
                next_level = child.python_body(depth + 1, cmv=cmv[:],
                                               input_map=input_map,
                                               ids_path=ids_path,
                                               subtree=subtree)
                body += next_level[0]
                term_analysis_fields.extend(next_level[1])
        else:
            if self.fields[self.objective_field]['optype'] == 'numeric':
                value = self.output
            else:
                value = repr(self.output)
            body = u"%sreturn %s\n" % (INDENT * depth, value)

        return body, term_analysis_fields