def check_name(name): if name not in Parser.udf_functions: raise NoSuchFunctionException(lineno) func = Parser.udf_functions[name] if not isinstance(func, StatefulFunc): raise NoSuchFunctionException(lineno) if not sexpr.expression_contains_aggregate(func.sexpr): raise NoSuchFunctionException(lineno) return func
def toRA(self, program): """Emit a relational plan for this rule""" if program.compiling(self.head): # recursive rule if not self.fixpoint: self.fixpoint = algebra.Fixpoint() state = algebra.State(self.head.name, self.fixpoint) return state else: self.compiling = True # get the terms, like A(X,Y,"foo") terms = [c for c in self.body if isinstance(c, Term)] # get the conditions, like Z=3 conditions = [c for c in self.body if isinstance(c, expression.BinaryBooleanOperator)] if len(conditions) > 0: LOG.debug("found conditions: %s (type=%s) for program %s", conditions, type(conditions[0]), program) # noqa else: LOG.debug("found conditions: %s (type=%s) for program %s", conditions, None, program) # noqa # construct the join graph joingraph = nx.Graph() N = len(terms) for i, term1 in enumerate(terms): # store the order for explaining queries later -- not strictly # necessary term1.originalorder = i # for each term, add it as a vertex, # and for each term it joins to, add an edge joingraph.add_node(term1, term=term1) for j in range(i + 1, N): term2 = terms[j] LOG.debug("joinsto? %s %s", term1, term2) joins = term1.joinsto(term2, conditions) if joins: conjunction = reduce(expression.AND, joins) LOG.debug("add edge: %s --[%s]--> %s", term1, conjunction, term2) joingraph.add_edge(term1, term2, condition=conjunction, terms=(term1, term2)) # find connected components (some non-determinism in the order here) comps = nx.connected_component_subgraphs(joingraph) component_plans = [] # for each component, choose a join order for component in comps: cycleconditions = [] # check for cycles cycles = nx.cycle_basis(component) while cycles: LOG.debug("found cycles: %s", cycles) # choose an edge to break the cycle # that edge will be a selection condition after the final join # oneedge = cycles[0][-2:] # try to make the chosen edge from cycle deterministic oneedge = sorted(cycles[0], key=lambda v: v.originalorder)[-2:] data = component.get_edge_data(*oneedge) LOG.debug("picked edge: %s, data: %s", oneedge, data) cycleconditions.append(data) component.remove_edge(*oneedge) cycles = nx.cycle_basis(component) if len(component) == 1: # no joins to plan onlyterm = component.nodes()[0] plan = onlyterm.makeLeaf(conditions, program) else: LOG.debug("component: %s", component) # TODO: clean this up. # joingraph -> joinsequence -> relational plan planner = BFSLeftDeepPlanner(component) joinsequence = planner.chooseplan() LOG.debug("join sequence: %s", joinsequence) # create a relational plan, finally # pass in the conditions to make the leaves of the plan plan = joinsequence.makePlan(conditions, program) LOG.debug("cycleconditions: %s", cycleconditions) for condition_info in cycleconditions: predicate = condition_info["condition"] terms = condition_info["terms"] # change all UnnamedAttributes based on the # offset of its Term termsToOffset = dict((t, joinsequence.offset(t)) for t in terms) LOG.debug("before add offset %s", predicate) predicate.add_offset_by_terms(termsToOffset) LOG.debug("after add offset %s", predicate) # create selections after each cycle plan = algebra.Select(predicate, plan) component_plans.append(plan) # link the components with a cross product plan = component_plans[0] for newplan in component_plans[1:]: plan = algebra.CrossProduct(plan, newplan) try: scheme = plan.scheme() except AttributeError: scheme = Scheme([make_attr(i, r, self.head.name) for i, r in enumerate(self.head.valuerefs)]) # noqa # Helper function for the next two steps (TODO: move this to a method?) def findvar(variable): var = variable.var if var not in scheme: msg = "Head variable %s does not appear in rule body: %s" % (var, self) # noqa raise SyntaxError(msg) return expression.UnnamedAttributeRef(scheme.getPosition(var)) class FindVarExpressionVisitor(SimpleExpressionVisitor): def __init__(self): self.stack = [] def getresult(self): assert len(self.stack) == 1 return self.stack.pop() def visit_unary(self, unaryexpr): inputexpr = self.stack.pop() self.stack.append(unaryexpr.__class__(inputexpr)) def visit_binary(self, binaryexpr): right = self.stack.pop() left = self.stack.pop() self.stack.append(binaryexpr.__class__(left, right)) def visit_zeroary(self, zeroaryexpr): self.stack.append(zeroaryexpr.__class__()) def visit_literal(self, literalexpr): self.stack.append(literalexpr.__class__(literalexpr.value)) def visit_nary(self, naryexpr): raise NotImplementedError("TODO: implement findvar visit of nary expression") def visit_attr(self, attr): assert False, "FindVar should not be used on expressions with attributes" def visit_Case(self, caseExpr): raise NotImplementedError("Case now implemented for Datalog?") def visit_Var(self, var): asAttr = findvar(var) self.stack.append(asAttr) # TODO: add the other aggregates # TODO and move aggregates to expression-visitor def visit_SUM(self, x): self.visit_unary(x) def visit_COUNT(self, x): self.visit_unary(x) # if this Rule includes a server specification, add a partition # operator if self.isParallel(): if isinstance(self.head.serverspec, Broadcast): plan = algebra.Broadcast(plan) if isinstance(self.head.serverspec, PartitionBy): positions = [findvar(v) for v in self.head.serverspec.variables] plan = algebra.PartitionBy(positions, plan) def toAttrRef(e): """ Resolve variable references in the head; pass through aggregate expressions If expression requires an Apply then return True, else False """ LOG.debug("find reference for %s", e) visitor = FindVarExpressionVisitor() e.accept(visitor) return visitor.getresult() columnlist = [toAttrRef(v) for v in self.head.valuerefs] LOG.debug("columnlist for Project (or group by) is %s", columnlist) # If any of the expressions in the head are aggregate expression, # construct a group by if any(expression.expression_contains_aggregate(v) for v in self.head.valuerefs): emit_clause = [(None, a_or_g) for a_or_g in columnlist] return raco.myrial.groupby.groupby(plan, emit_clause, []) elif any([not isinstance(e, Var) for e in self.head.valuerefs]): # If complex expressions in head, then precede Project with Apply # NOTE: should Apply actually just append emitters to schema # instead of doing column select? # we decided probably not in # https://github.com/uwescience/raco/pull/209 plan = algebra.Apply([(None, e) for e in columnlist], plan) else: # otherwise, just build a Project plan = algebra.Apply(emitters=[(None, c) for c in columnlist], input=plan) # If we found a cycle, the "root" of the plan is the fixpoint operator if self.fixpoint: self.fixpoint.loopBody(plan) plan = self.fixpoint self.fixpoint = None self.compiling = False return plan
def toRA(self, program): """Emit a relational plan for this rule""" if program.compiling(self.head): # recursive rule if not self.fixpoint: self.fixpoint = algebra.Fixpoint() state = algebra.State(self.head.name, self.fixpoint) return state else: self.compiling = True # get the terms, like A(X,Y,"foo") terms = [c for c in self.body if isinstance(c, Term)] # get the conditions, like Z=3 conditions = [c for c in self.body if isinstance(c, expression.BinaryBooleanOperator)] if len(conditions) > 0: LOG.debug("found conditions: %s (type=%s) for program %s", conditions, type(conditions[0]), program) # noqa else: LOG.debug("found conditions: %s (type=%s) for program %s", conditions, None, program) # noqa # construct the join graph joingraph = nx.Graph() N = len(terms) for i, term1 in enumerate(terms): # store the order for explaining queries later -- not strictly # necessary term1.originalorder = i # for each term, add it as a vertex, # and for each term it joins to, add an edge joingraph.add_node(term1, term=term1) for j in range(i + 1, N): term2 = terms[j] LOG.debug("joinsto? %s %s", term1, term2) joins = term1.joinsto(term2, conditions) if joins: conjunction = reduce(expression.AND, joins) LOG.debug("add edge: %s --[%s]--> %s", term1, conjunction, term2) joingraph.add_edge(term1, term2, condition=conjunction, terms=(term1, term2)) # find connected components (some non-determinism in the order here) comps = nx.connected_component_subgraphs(joingraph) component_plans = [] # for each component, choose a join order for component in comps: cycleconditions = [] # check for cycles cycles = nx.cycle_basis(component) while cycles: LOG.debug("found cycles: %s", cycles) # choose an edge to break the cycle # that edge will be a selection condition after the final join # oneedge = cycles[0][-2:] # try to make the chosen edge from cycle deterministic oneedge = sorted(cycles[0], key=lambda v: v.originalorder)[-2:] data = component.get_edge_data(*oneedge) LOG.debug("picked edge: %s, data: %s", oneedge, data) cycleconditions.append(data) component.remove_edge(*oneedge) cycles = nx.cycle_basis(component) if len(component) == 1: # no joins to plan onlyterm = component.nodes()[0] plan = onlyterm.makeLeaf(conditions, program) else: LOG.debug("component: %s", component) # TODO: clean this up. # joingraph -> joinsequence -> relational plan planner = BFSLeftDeepPlanner(component) joinsequence = planner.chooseplan() LOG.debug("join sequence: %s", joinsequence) # create a relational plan, finally # pass in the conditions to make the leaves of the plan plan = joinsequence.makePlan(conditions, program) LOG.debug("cycleconditions: %s", cycleconditions) for condition_info in cycleconditions: predicate = condition_info["condition"] terms = condition_info["terms"] # change all UnnamedAttributes based on the # offset of its Term termsToOffset = dict((t, joinsequence.offset(t)) for t in terms) LOG.debug("before add offset %s", predicate) predicate.add_offset_by_terms(termsToOffset) LOG.debug("after add offset %s", predicate) # create selections after each cycle plan = algebra.Select(predicate, plan) component_plans.append(plan) # link the components with a cross product plan = component_plans[0] for newplan in component_plans[1:]: plan = algebra.CrossProduct(plan, newplan) try: scheme = plan.scheme() except AttributeError: scheme = Scheme([make_attr(i, r, self.head.name) for i, r in enumerate(self.head.valuerefs)]) # noqa # Helper function for the next two steps (TODO: move this to a method?) def findvar(variable): var = variable.var if var not in scheme: msg = "Head variable %s does not appear in rule body: %s" % (var, self) # noqa raise SyntaxError(msg) return expression.UnnamedAttributeRef(scheme.getPosition(var)) class FindVarExpressionVisitor(SimpleExpressionVisitor): def __init__(self): self.stack = [] def getresult(self): assert len(self.stack) == 1 return self.stack.pop() def visit_unary(self, unaryexpr): inputexpr = self.stack.pop() self.stack.append(unaryexpr.__class__(inputexpr)) def visit_binary(self, binaryexpr): right = self.stack.pop() left = self.stack.pop() self.stack.append(binaryexpr.__class__(left, right)) def visit_zeroary(self, zeroaryexpr): self.stack.append(zeroaryexpr.__class__()) def visit_literal(self, literalexpr): self.stack.append(literalexpr.__class__(literalexpr.value)) def visit_nary(self, naryexpr): raise NotImplementedError( "TODO: implement findvar visit of nary expression") def visit_attr(self, attr): assert False, \ "FindVar should not be used on expressions with attributes" def visit_Case(self, caseExpr): raise NotImplementedError("Case now implemented for Datalog?") def visit_Var(self, var): asAttr = findvar(var) self.stack.append(asAttr) # TODO: add the other aggregates # TODO and move aggregates to expression-visitor def visit_SUM(self, x): self.visit_unary(x) def visit_COUNT(self, x): self.visit_unary(x) # if this Rule includes a server specification, add a partition # operator if self.isParallel(): if isinstance(self.head.serverspec, Broadcast): plan = algebra.Broadcast(plan) if isinstance(self.head.serverspec, PartitionBy): positions = [findvar(v) for v in self.head.serverspec.variables] plan = algebra.PartitionBy(positions, plan) def toAttrRef(e): """ Resolve variable references in the head; pass through aggregate expressions If expression requires an Apply then return True, else False """ LOG.debug("find reference for %s", e) visitor = FindVarExpressionVisitor() e.accept(visitor) return visitor.getresult() columnlist = [toAttrRef(v) for v in self.head.valuerefs] LOG.debug("columnlist for Project (or group by) is %s", columnlist) # If any of the expressions in the head are aggregate expression, # construct a group by if any(expression.expression_contains_aggregate(v) for v in self.head.valuerefs): emit_clause = [(None, a_or_g) for a_or_g in columnlist] return raco.myrial.groupby.groupby(plan, emit_clause, []) elif any([not isinstance(e, Var) for e in self.head.valuerefs]): # If complex expressions in head, then precede Project with Apply # NOTE: should Apply actually just append emitters to schema # instead of doing column select? # we decided probably not in # https://github.com/uwescience/raco/pull/209 plan = algebra.Apply([(None, e) for e in columnlist], plan) else: # otherwise, just build a Project plan = algebra.Apply(emitters=[(None, c) for c in columnlist], input=plan) # If we found a cycle, the "root" of the plan is the fixpoint operator if self.fixpoint: self.fixpoint.loopBody(plan) plan = self.fixpoint self.fixpoint = None self.compiling = False return plan