示例#1
0
        def check_name(name):
            if name not in Parser.udf_functions:
                raise NoSuchFunctionException(lineno)

            func = Parser.udf_functions[name]
            if not isinstance(func, StatefulFunc):
                raise NoSuchFunctionException(lineno)
            if not sexpr.expression_contains_aggregate(func.sexpr):
                raise NoSuchFunctionException(lineno)
            return func
示例#2
0
        def check_name(name):
            if name not in Parser.udf_functions:
                raise NoSuchFunctionException(lineno)

            func = Parser.udf_functions[name]
            if not isinstance(func, StatefulFunc):
                raise NoSuchFunctionException(lineno)
            if not sexpr.expression_contains_aggregate(func.sexpr):
                raise NoSuchFunctionException(lineno)
            return func
示例#3
0
    def toRA(self, program):
        """Emit a relational plan for this rule"""
        if program.compiling(self.head):
            # recursive rule
            if not self.fixpoint:
                self.fixpoint = algebra.Fixpoint()
            state = algebra.State(self.head.name, self.fixpoint)
            return state
        else:
            self.compiling = True

        # get the terms, like A(X,Y,"foo")
        terms = [c for c in self.body if isinstance(c, Term)]

        # get the conditions, like Z=3
        conditions = [c for c in self.body if isinstance(c, expression.BinaryBooleanOperator)]
        if len(conditions) > 0:
            LOG.debug("found conditions: %s (type=%s) for program %s", conditions, type(conditions[0]), program)  # noqa
        else:
            LOG.debug("found conditions: %s (type=%s) for program %s", conditions, None, program)  # noqa

        # construct the join graph
        joingraph = nx.Graph()
        N = len(terms)
        for i, term1 in enumerate(terms):
            # store the order for explaining queries later -- not strictly
            # necessary
            term1.originalorder = i

            # for each term, add it as a vertex,
            # and for each term it joins to, add an edge
            joingraph.add_node(term1, term=term1)
            for j in range(i + 1, N):
                term2 = terms[j]
                LOG.debug("joinsto? %s %s", term1, term2)
                joins = term1.joinsto(term2, conditions)
                if joins:
                    conjunction = reduce(expression.AND, joins)
                    LOG.debug("add edge: %s --[%s]--> %s", term1, conjunction, term2)
                    joingraph.add_edge(term1, term2, condition=conjunction, terms=(term1, term2))

        # find connected components (some non-determinism in the order here)
        comps = nx.connected_component_subgraphs(joingraph)

        component_plans = []

        # for each component, choose a join order
        for component in comps:
            cycleconditions = []
            # check for cycles
            cycles = nx.cycle_basis(component)
            while cycles:
                LOG.debug("found cycles: %s", cycles)

                # choose an edge to break the cycle
                # that edge will be a selection condition after the final join
                # oneedge = cycles[0][-2:]
                # try to make the chosen edge from cycle deterministic
                oneedge = sorted(cycles[0], key=lambda v: v.originalorder)[-2:]

                data = component.get_edge_data(*oneedge)
                LOG.debug("picked edge: %s, data: %s", oneedge, data)
                cycleconditions.append(data)
                component.remove_edge(*oneedge)
                cycles = nx.cycle_basis(component)

            if len(component) == 1:
                # no joins to plan
                onlyterm = component.nodes()[0]
                plan = onlyterm.makeLeaf(conditions, program)
            else:
                LOG.debug("component: %s", component)
                # TODO: clean this up.
                # joingraph -> joinsequence -> relational plan
                planner = BFSLeftDeepPlanner(component)

                joinsequence = planner.chooseplan()
                LOG.debug("join sequence: %s", joinsequence)

                # create a relational plan, finally
                # pass in the conditions to make the leaves of the plan
                plan = joinsequence.makePlan(conditions, program)

            LOG.debug("cycleconditions: %s", cycleconditions)
            for condition_info in cycleconditions:
                predicate = condition_info["condition"]
                terms = condition_info["terms"]

                # change all UnnamedAttributes based on the
                # offset of its Term
                termsToOffset = dict((t, joinsequence.offset(t)) for t in terms)

                LOG.debug("before add offset %s", predicate)
                predicate.add_offset_by_terms(termsToOffset)
                LOG.debug("after add offset %s", predicate)

                # create selections after each cycle
                plan = algebra.Select(predicate, plan)

            component_plans.append(plan)

        # link the components with a cross product
        plan = component_plans[0]
        for newplan in component_plans[1:]:
            plan = algebra.CrossProduct(plan, newplan)

        try:
            scheme = plan.scheme()
        except AttributeError:
            scheme = Scheme([make_attr(i, r, self.head.name) for i, r in enumerate(self.head.valuerefs)])  # noqa

        # Helper function for the next two steps (TODO: move this to a method?)
        def findvar(variable):
            var = variable.var
            if var not in scheme:
                msg = "Head variable %s does not appear in rule body: %s" % (var, self)  # noqa
                raise SyntaxError(msg)
            return expression.UnnamedAttributeRef(scheme.getPosition(var))

        class FindVarExpressionVisitor(SimpleExpressionVisitor):
            def __init__(self):
                self.stack = []

            def getresult(self):
                assert len(self.stack) == 1
                return self.stack.pop()

            def visit_unary(self, unaryexpr):
                inputexpr = self.stack.pop()
                self.stack.append(unaryexpr.__class__(inputexpr))

            def visit_binary(self, binaryexpr):
                right = self.stack.pop()
                left = self.stack.pop()
                self.stack.append(binaryexpr.__class__(left, right))

            def visit_zeroary(self, zeroaryexpr):
                self.stack.append(zeroaryexpr.__class__())

            def visit_literal(self, literalexpr):
                self.stack.append(literalexpr.__class__(literalexpr.value))

            def visit_nary(self, naryexpr):
                raise NotImplementedError("TODO: implement findvar visit of nary expression")

            def visit_attr(self, attr):
                assert False, "FindVar should not be used on expressions with attributes"

            def visit_Case(self, caseExpr):
                raise NotImplementedError("Case now implemented for Datalog?")

            def visit_Var(self, var):
                asAttr = findvar(var)
                self.stack.append(asAttr)

            # TODO: add the other aggregates
            # TODO and move aggregates to expression-visitor
            def visit_SUM(self, x):
                self.visit_unary(x)

            def visit_COUNT(self, x):
                self.visit_unary(x)

        # if this Rule includes a server specification, add a partition
        # operator
        if self.isParallel():
            if isinstance(self.head.serverspec, Broadcast):
                plan = algebra.Broadcast(plan)
            if isinstance(self.head.serverspec, PartitionBy):
                positions = [findvar(v) for v in self.head.serverspec.variables]
                plan = algebra.PartitionBy(positions, plan)

        def toAttrRef(e):
            """
             Resolve variable references in the head; pass through aggregate
             expressions

             If expression requires an Apply then return True, else False
             """
            LOG.debug("find reference for %s", e)
            visitor = FindVarExpressionVisitor()
            e.accept(visitor)
            return visitor.getresult()

        columnlist = [toAttrRef(v) for v in self.head.valuerefs]
        LOG.debug("columnlist for Project (or group by) is %s", columnlist)

        # If any of the expressions in the head are aggregate expression,
        # construct a group by
        if any(expression.expression_contains_aggregate(v) for v in self.head.valuerefs):
            emit_clause = [(None, a_or_g) for a_or_g in columnlist]
            return raco.myrial.groupby.groupby(plan, emit_clause, [])
        elif any([not isinstance(e, Var) for e in self.head.valuerefs]):
            # If complex expressions in head, then precede Project with Apply
            # NOTE: should Apply actually just append emitters to schema
            # instead of doing column select?
            # we decided probably not in
            # https://github.com/uwescience/raco/pull/209
            plan = algebra.Apply([(None, e) for e in columnlist], plan)
        else:
            # otherwise, just build a Project
            plan = algebra.Apply(emitters=[(None, c) for c in columnlist], input=plan)

        # If we found a cycle, the "root" of the plan is the fixpoint operator
        if self.fixpoint:
            self.fixpoint.loopBody(plan)
            plan = self.fixpoint
            self.fixpoint = None

        self.compiling = False

        return plan
示例#4
0
    def toRA(self, program):
        """Emit a relational plan for this rule"""
        if program.compiling(self.head):
            # recursive rule
            if not self.fixpoint:
                self.fixpoint = algebra.Fixpoint()
            state = algebra.State(self.head.name, self.fixpoint)
            return state
        else:
            self.compiling = True

        # get the terms, like A(X,Y,"foo")
        terms = [c for c in self.body if isinstance(c, Term)]

        # get the conditions, like Z=3
        conditions = [c for c in self.body
                      if isinstance(c, expression.BinaryBooleanOperator)]
        if len(conditions) > 0:
            LOG.debug("found conditions: %s (type=%s) for program %s", conditions, type(conditions[0]), program)  # noqa
        else:
            LOG.debug("found conditions: %s (type=%s) for program %s", conditions, None, program)  # noqa

        # construct the join graph
        joingraph = nx.Graph()
        N = len(terms)
        for i, term1 in enumerate(terms):
            # store the order for explaining queries later -- not strictly
            # necessary
            term1.originalorder = i

            # for each term, add it as a vertex,
            # and for each term it joins to, add an edge
            joingraph.add_node(term1, term=term1)
            for j in range(i + 1, N):
                term2 = terms[j]
                LOG.debug("joinsto? %s %s", term1, term2)
                joins = term1.joinsto(term2, conditions)
                if joins:
                    conjunction = reduce(expression.AND, joins)
                    LOG.debug("add edge: %s --[%s]--> %s", term1, conjunction,
                              term2)
                    joingraph.add_edge(term1, term2, condition=conjunction,
                                       terms=(term1, term2))

        # find connected components (some non-determinism in the order here)
        comps = nx.connected_component_subgraphs(joingraph)

        component_plans = []

        # for each component, choose a join order
        for component in comps:
            cycleconditions = []
            # check for cycles
            cycles = nx.cycle_basis(component)
            while cycles:
                LOG.debug("found cycles: %s", cycles)

                # choose an edge to break the cycle
                # that edge will be a selection condition after the final join
                # oneedge = cycles[0][-2:]
                # try to make the chosen edge from cycle deterministic
                oneedge = sorted(cycles[0], key=lambda v: v.originalorder)[-2:]

                data = component.get_edge_data(*oneedge)
                LOG.debug("picked edge: %s, data: %s", oneedge, data)
                cycleconditions.append(data)
                component.remove_edge(*oneedge)
                cycles = nx.cycle_basis(component)

            if len(component) == 1:
                # no joins to plan
                onlyterm = component.nodes()[0]
                plan = onlyterm.makeLeaf(conditions, program)
            else:
                LOG.debug("component: %s", component)
                # TODO: clean this up.
                # joingraph -> joinsequence -> relational plan
                planner = BFSLeftDeepPlanner(component)

                joinsequence = planner.chooseplan()
                LOG.debug("join sequence: %s", joinsequence)

                # create a relational plan, finally
                # pass in the conditions to make the leaves of the plan
                plan = joinsequence.makePlan(conditions, program)

            LOG.debug("cycleconditions: %s", cycleconditions)
            for condition_info in cycleconditions:
                predicate = condition_info["condition"]
                terms = condition_info["terms"]

                # change all UnnamedAttributes based on the
                # offset of its Term
                termsToOffset = dict((t, joinsequence.offset(t))
                                     for t in terms)

                LOG.debug("before add offset %s", predicate)
                predicate.add_offset_by_terms(termsToOffset)
                LOG.debug("after add offset %s", predicate)

                # create selections after each cycle
                plan = algebra.Select(predicate, plan)

            component_plans.append(plan)

        # link the components with a cross product
        plan = component_plans[0]
        for newplan in component_plans[1:]:
            plan = algebra.CrossProduct(plan, newplan)

        try:
            scheme = plan.scheme()
        except AttributeError:
            scheme = Scheme([make_attr(i, r, self.head.name) for i, r in enumerate(self.head.valuerefs)])  # noqa

        # Helper function for the next two steps (TODO: move this to a method?)
        def findvar(variable):
            var = variable.var
            if var not in scheme:
                msg = "Head variable %s does not appear in rule body: %s" % (var, self)  # noqa
                raise SyntaxError(msg)
            return expression.UnnamedAttributeRef(scheme.getPosition(var))

        class FindVarExpressionVisitor(SimpleExpressionVisitor):
            def __init__(self):
                self.stack = []

            def getresult(self):
                assert len(self.stack) == 1
                return self.stack.pop()

            def visit_unary(self, unaryexpr):
                inputexpr = self.stack.pop()
                self.stack.append(unaryexpr.__class__(inputexpr))

            def visit_binary(self, binaryexpr):
                right = self.stack.pop()
                left = self.stack.pop()
                self.stack.append(binaryexpr.__class__(left, right))

            def visit_zeroary(self, zeroaryexpr):
                self.stack.append(zeroaryexpr.__class__())

            def visit_literal(self, literalexpr):
                self.stack.append(literalexpr.__class__(literalexpr.value))

            def visit_nary(self, naryexpr):
                raise NotImplementedError(
                    "TODO: implement findvar visit of nary expression")

            def visit_attr(self, attr):
                assert False, \
                    "FindVar should not be used on expressions with attributes"

            def visit_Case(self, caseExpr):
                raise NotImplementedError("Case now implemented for Datalog?")

            def visit_Var(self, var):
                asAttr = findvar(var)
                self.stack.append(asAttr)

            # TODO: add the other aggregates
            # TODO and move aggregates to expression-visitor
            def visit_SUM(self, x):
                self.visit_unary(x)

            def visit_COUNT(self, x):
                self.visit_unary(x)

        # if this Rule includes a server specification, add a partition
        # operator
        if self.isParallel():
            if isinstance(self.head.serverspec, Broadcast):
                plan = algebra.Broadcast(plan)
            if isinstance(self.head.serverspec, PartitionBy):
                positions = [findvar(v)
                             for v in self.head.serverspec.variables]
                plan = algebra.PartitionBy(positions, plan)

        def toAttrRef(e):
            """
             Resolve variable references in the head; pass through aggregate
             expressions

             If expression requires an Apply then return True, else False
             """
            LOG.debug("find reference for %s", e)
            visitor = FindVarExpressionVisitor()
            e.accept(visitor)
            return visitor.getresult()

        columnlist = [toAttrRef(v) for v in self.head.valuerefs]
        LOG.debug("columnlist for Project (or group by) is %s", columnlist)

        # If any of the expressions in the head are aggregate expression,
        # construct a group by
        if any(expression.expression_contains_aggregate(v)
               for v in self.head.valuerefs):
            emit_clause = [(None, a_or_g) for a_or_g in columnlist]
            return raco.myrial.groupby.groupby(plan, emit_clause, [])
        elif any([not isinstance(e, Var) for e in self.head.valuerefs]):
            # If complex expressions in head, then precede Project with Apply
            # NOTE: should Apply actually just append emitters to schema
            # instead of doing column select?
            # we decided probably not in
            # https://github.com/uwescience/raco/pull/209
            plan = algebra.Apply([(None, e) for e in columnlist], plan)
        else:
            # otherwise, just build a Project
            plan = algebra.Apply(emitters=[(None, c) for c in columnlist],
                                 input=plan)

        # If we found a cycle, the "root" of the plan is the fixpoint operator
        if self.fixpoint:
            self.fixpoint.loopBody(plan)
            plan = self.fixpoint
            self.fixpoint = None

        self.compiling = False

        return plan