def union_subplans(self, plans): op = Xunion(self.id_operator, self.eddies, inputs=len(plans)) self.operators.append(op) union_vars = set() height = 0 total_res = 0 for plan in plans: union_vars.update(plan.variables) height = max(height, plan.height) total_res += plan.total_res height += 1 tree_plan = TreePlan(op, union_vars, None, self.sources, plans, None, height, total_res) # Update signature of tuples. self.operators_sym.update({self.id_operator: False}) self.operators_desc[self.id_operator] = {} for source in tree_plan.sources: self.operators_desc[self.id_operator].update({source: 0}) self.eofs_desc[source] = self.eofs_desc[source] | pow( 2, self.id_operator) self.source_by_operator[ source] = self.source_by_operator[source] | pow( 2, self.id_operator) self.plan_order[self.id_operator] = tree_plan.height self.operators_vars[self.id_operator] = tree_plan.vars self.id_operator += 1 return tree_plan
def create_triple_pattern_plan(self, triple_pattern): self.eofs_desc.update({self.source_id: 0}) self.sources[self.source_id] = triple_pattern.variables self.source_by_operator[self.source_id] = pow(2, self.id_operator) self.operators_desc.setdefault(self.id_operator, {})[self.source_id] = 0 left = IndependentOperator(self.source_id, self.source, triple_pattern, self.source_by_operator, triple_pattern.variables, self.eddies, self.source_by_operator, sparql_limit=self.sparql_limit) left.total_res = triple_pattern.cardinality op = Xunion(self.id_operator, self.eddies, inputs=1) self.operators.append(op) # Update signature of tuples. self.operators_sym.update({self.id_operator: False}) self.operators_desc[self.id_operator] = {} source = self.source_id self.operators_desc[self.id_operator].update({source: 0}) self.eofs_desc[source] = self.eofs_desc[source] | pow( 2, self.id_operator) self.source_by_operator[ source] = self.source_by_operator[source] | pow( 2, self.id_operator) self.plan_order[self.id_operator] = 1 self.operators_vars[self.id_operator] = left.vars tree_plan = TreePlan(op, left.vars, left.vars, self.sources, left, None, 1, left.total_res) self.source_id += 1 self.independent_sources += 1 self.id_operator += 1 return tree_plan
def add_order_by(self, tree): op = Xorderby(self.id_operator, self.eddies, self.query.order_by) self.operators.append(op) tree = TreePlan(op, tree.vars, tree.join_vars, tree.sources, tree, None, tree.height + 1, tree.total_res) # Update signature of tuples. self.operators_sym.update({self.id_operator: False}) self.operators_desc[self.id_operator] = {} for source in tree.sources: self.operators_desc[self.id_operator].update({source: 0}) self.eofs_desc[source] = self.eofs_desc[source] | pow( 2, self.id_operator) self.sources_desc[source] = self.sources_desc[source] | pow( 2, self.id_operator) self.plan_order[self.id_operator] = tree.height self.operators_vars[self.id_operator] = tree.vars self.id_operator += 1 return tree
def join_subplans(self, left, right, join_type=None, card=-1, logial_plan=None): # Set Operator ID if logial_plan: self.operator_id2logical_plan[self.id_operator] = logial_plan logial_plan.operator_id = self.id_operator # Get Metadata for operator if isinstance(left, TriplePattern): # Get cardinality; Query only if necessary left_card = left.count if not left.count is None else get_metadata( self.source, left) else: left_card = left.total_res if isinstance(right, TriplePattern): # Get cardinality; Query only if necessary right_card = right.count if not right.count is None else get_metadata( self.source, right) else: right_card = right.total_res # Pre-decided Join Type if join_type: xn_join = True if (issubclass(join_type, Xnjoin)) else False xn_optional = issubclass(join_type, Xnoptional) xg_optional = issubclass(join_type, Xgoptional) if xn_join or xn_optional: # Switch sides for NLJ if left_card > right_card: tmp = left left = right right = tmp # Decide based in heursitics else: # Decide Join Type: xn = NLJ, FJ = SHJ if isinstance(left, IndependentOperator): xn_join = True if left_card < (right_card / 100.0) else False else: xn_join = True if left_card <= right_card else False # Joins Variable info join_vars = set(left.variables).intersection(right.variables) all_variables = set(left.variables).union(right.variables) # If the subplans have no varibale in common, # always place a Hash Join to handle the Cross-Product if len(join_vars) == 0: xn_join = False # Tree Plans as Leafs if isinstance(left, TreePlan): leaf_left = left for source in left.sources.keys(): self.source_by_operator[ source] = self.source_by_operator[source] | pow( 2, self.id_operator) self.operators_desc.setdefault(self.id_operator, {})[source] = 0 self.operators_desc.setdefault(self.id_operator, {})[self.source_id] = 0 if isinstance(right, TreePlan): leaf_right = right for source in right.sources.keys(): self.source_by_operator[ source] = self.source_by_operator[source] | pow( 2, self.id_operator) self.operators_desc.setdefault(self.id_operator, {})[source] = 1 if xn_join and isinstance(left, TreePlan) and isinstance( right, TreePlan): print("Invalid plan") if xn_optional and isinstance(left, TreePlan) and isinstance( right, TreePlan): print("Invalid plan") # Operator Leafs if isinstance(left, TriplePattern) or isinstance(left, BGP): self.eofs_desc.update({self.source_id: 0}) self.sources[self.source_id] = left.variables self.source_by_operator[self.source_id] = pow(2, self.id_operator) self.eofs_desc[self.source_id] = pow(2, self.id_operator) self.operators_desc.setdefault(self.id_operator, {})[self.source_id] = 0 # Base on operator, create operator # If SHJ(FJ), use IO # Or if it is a NLJ(XN) and left_plan is a TP, then use IO if (not xn_join) or (xn_join and (isinstance(right, TriplePattern) or isinstance(right, BGP))): leaf_left = IndependentOperator(self.source_id, self.source, left, self.source_by_operator, left.variables, self.eddies, self.source_by_operator, sparql_limit=self.sparql_limit) self.independent_sources += 1 elif (xn_join or xn_optional) and isinstance(right, TreePlan): leaf_left = DependentOperator(self.source_id, self.source, left, self.source_by_operator, left.variables, self.source_by_operator) self.dependent_sources += 1 leaf_left.total_res = left_card self.source_id += 1 if isinstance(right, TriplePattern) or isinstance(right, BGP): self.eofs_desc.update({self.source_id: 0}) self.sources[self.source_id] = right.variables self.source_by_operator[self.source_id] = pow(2, self.id_operator) self.eofs_desc[self.source_id] = pow(2, self.id_operator) self.operators_desc.setdefault(self.id_operator, {})[self.source_id] = 1 # Base on operator, create operator if xn_join or xn_optional: leaf_right = DependentOperator(self.source_id, self.source, right, self.source_by_operator, right.variables, self.source_by_operator) self.dependent_sources += 1 else: leaf_right = IndependentOperator( self.source_id, self.source, right, self.source_by_operator, right.variables, self.eddies, self.source_by_operator, sparql_limit=self.sparql_limit) self.independent_sources += 1 leaf_right.total_res = right_card self.source_id += 1 self.operators_vars[self.id_operator] = join_vars self.plan_order[self.id_operator] = max(leaf_left.height, leaf_right.height) # Place Join if xn_join: # NLJ #if isinstance(left, TreePlan) and isinstance(right, TriplePattern) and self.poly: # First condition only # needed for poly bind join if (isinstance(right, TriplePattern) or isinstance(right, BGP)) and self.poly: logger.debug("Placing Poly XN Join") op = Poly_Xnjoin(self.id_operator, join_vars, self.eddies, brtpf_mappings=self.brtpf_mappings, sparql_mappings=self.sparql_mappings) #logger.debug("Placing Poly Bind Join") #op = Poly_Bind_Join(self.id_operator, join_vars, self.eddies, left_card=card) else: logger.debug("Placing XN Join") op = Poly_Xnjoin(self.id_operator, join_vars, self.eddies, brtpf_mappings=1, sparql_mappings=1) #op = Xnjoin(self.id_operator, join_vars, self.eddies) self.operators_sym.update({self.id_operator: True}) # If Right side has to be DP if not isinstance(leaf_right, DependentOperator): # Switch Leafs tmp = leaf_right leaf_right = leaf_left leaf_left = tmp # Update operators_descs for current operator id for key, value in self.operators_desc[ self.id_operator].items(): # Leaf Right is now the DP and needs to be input Right, i.e. 1 if key == leaf_right.sources.keys()[0]: self.operators_desc[self.id_operator][key] = 1 # All other will be on the left_plan input else: self.operators_desc[self.id_operator][key] = 0 elif not xn_optional and not xg_optional: # SHJ #op = Fjoin(self.id_operator, join_vars, self.eddies) if isinstance(left, TreePlan) and isinstance( right, TriplePattern) and self.poly: # Place Polymorphic Hash Join Operator op = Fjoin(self.id_operator, join_vars, self.eddies) #logger.debug("Placing Poly FJoin") #op = Poly_Fjoin(self.id_operator, join_vars, self.eddies, leaf_left, leaf_right) else: op = Fjoin(self.id_operator, join_vars, self.eddies) self.operators_sym.update({self.id_operator: False}) elif not xg_optional: # XN Optional op = Xnoptional(self.id_operator, left.variables, right.variables, self.eddies) #op = Xnjoin(self.id_operator, join_vars, self.eddies) self.operators_sym.update({self.id_operator: True}) else: # XG Optional op = Xgoptional(self.id_operator, left.variables, right.variables, self.eddies) self.operators_sym.update({self.id_operator: False}) # Add Operator self.operators.append(op) tree_height = max(leaf_left.height, leaf_right.height) + 1 #tree_sources = {k: v for k, v in self.sources.items()} # 2020-03-04: Changed here to route everything properly tree_sources = dict(leaf_left.sources) tree_sources.update(dict(leaf_right.sources)) # Create Tree Plan join_card = card tree_plan = TreePlan(op, all_variables, join_vars, tree_sources, leaf_left, leaf_right, tree_height, join_card) if isinstance(op, Xnjoin) and isinstance( leaf_left, TreePlan) and isinstance(leaf_right, TreePlan): raise Exception self.id_operator += 1 return tree_plan
def create_plan_original(self, query, eddies, source): # Plan structures. tree_height = 0 id_operator = 0 operators = [] operators_desc = {} plan_order = {} operators_vars = {} ordered_subtrees = [] independent_sources = 0 eofs_operators_desc = {} operators_sym = {} sources_desc = {} eofs_desc = {} subtrees = [] # Create initial signatures and leaves of the plan. for subquery in query.where.left.triple_patterns: sources_desc.update({id_operator: 0}) eofs_desc.update({id_operator: 0}) leaf = IndependentOperator(id_operator, source, subquery, sources_desc, subquery.get_variables(), eddies, eofs_desc) leaf.total_res = get_metadata(leaf.server, leaf.query) subtrees.append(leaf) ordered_subtrees.append(leaf.total_res) id_operator += 1 # Order leaves depending on the cardinality of fragments. keydict = dict(zip(subtrees, ordered_subtrees)) subtrees.sort(key=keydict.get) # Stage 1: Generate left_plan-linear index nested stars. stars = [] id_operator = 0 while len(subtrees) > 0: to_delete = [] star_tree = subtrees.pop(0) star_vars = star_tree.vars tree_height = 0 independent_sources = independent_sources + 1 for j in range(0, len(subtrees)): subtree_j = subtrees[j] join_variables = set(star_vars) & set(subtree_j.join_vars) all_variables = set(star_tree.vars) | set(subtree_j.vars) # Case: There is a join. if len(join_variables) > 0: to_delete.append(subtree_j) # Update signatures. sources = {} sources.update(star_tree.sources) sources.update(subtree_j.sources) operators_desc[id_operator] = {} operators_vars[id_operator] = join_variables eofs_operators_desc[id_operator] = {} # The current tree is the left_plan argument of the plan. for source in star_tree.sources.keys(): if len(set(sources[source]) & join_variables) > 0: # TODO: Change the next 0 for len of something operators_desc[id_operator].update({source: 0}) sources_desc[source] = sources_desc[source] | pow(2, id_operator) # TODO: check this. eofs_operators_desc[id_operator].update({source: 0}) eofs_desc[source] = eofs_desc[source] | pow(2, id_operator) # The subtree j is the right_plan argument of the plan. for source in subtree_j.sources.keys(): if len(set(sources[source]) & join_variables) > 0: # TODO: Change the next q for len of something operators_desc[id_operator].update({source: 1}) sources_desc[source] = sources_desc[source] | pow(2, id_operator) # TODO: check this. eofs_operators_desc[id_operator].update({source: 1}) eofs_desc[source] = eofs_desc[source] | pow(2, id_operator) plan_order[id_operator] = tree_height operators_vars[id_operator] = join_variables tree_height = tree_height + 1 # Place physical operator estimating cardinality. if isinstance(star_tree, IndependentOperator): res = self.estimate_card(star_tree.total_res, subtree_j.total_res) # Place a Nested Loop join. if star_tree.total_res < (subtree_j.total_res / 100.0): subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query, subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res) op = Xnjoin(id_operator, join_variables, eddies) operators.append(op) star_tree = TreePlan(op, all_variables, join_variables, sources, star_tree, subtree_j, tree_height, 0) operators_sym.update({id_operator: False}) # Place a Symmetric Hash join. else: op = Fjoin(id_operator, join_variables, eddies) operators.append(op) star_tree = TreePlan(op, all_variables, join_variables, sources, star_tree, subtree_j, tree_height, res) independent_sources = independent_sources + 1 operators_sym.update({id_operator: True}) else: # TODO: new change here res = self.estimate_card(star_tree.total_res, subtree_j.total_res) #res = (2.0 * star_tree.total_res * subtree_j.total_res) / (star_tree.total_res + subtree_j.total_res) #res = (star_tree.total_res + subtree_j.total_res) / 2 if (star_tree.total_res / float(subtree_j.total_res) < 0.30) or (subtree_j.total_res > 100*1000 and star_tree.total_res < 100*1000) or (subtree_j.total_res < 100*5): subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query, subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res) op = Xnjoin(id_operator, join_variables, eddies) operators.append(op) star_tree = TreePlan(op, all_variables, join_variables, sources, star_tree, subtree_j, tree_height) operators_sym.update({id_operator: False}) else: op = Fjoin(id_operator, join_variables, eddies) operators.append(op) star_tree = TreePlan(op, all_variables, join_variables, sources, star_tree, subtree_j, tree_height, res) independent_sources = independent_sources + 1 operators_sym.update({id_operator: True}) id_operator += 1 # Add current tree to the list of stars and # remove from the list of subtrees to process. stars.append(star_tree) for elem in to_delete: subtrees.remove(elem) # Stage 2: Build bushy tree to combine SSGs with common variables. while len(stars) > 1: subtree_i = stars.pop(0) for j in range(0, len(stars)): subtree_j = stars[j] all_variables = set(subtree_i.vars) | set(subtree_j.vars) join_variables = set(subtree_i.join_vars) & set(subtree_j.join_vars) # Case: There is a join between stars. if len(join_variables) > 0: # Update signatures. sources = {} sources.update(subtree_i.sources) sources.update(subtree_j.sources) operators_desc[id_operator] = {} operators_vars[id_operator] = join_variables eofs_operators_desc[id_operator] = {} for source in subtree_i.sources.keys(): # This models the restriction: a tuple must have the join # variable instantiated to be routed to a certain join. if len(set(sources[source]) & join_variables) > 0: # TODO: Change the next 0 for len of something operators_desc[id_operator].update({source: 0}) sources_desc[source] = sources_desc[source] | pow(2, id_operator) # TODO: Check this. eofs_operators_desc[id_operator].update({source: 0}) eofs_desc[source] = eofs_desc[source] | pow(2, id_operator) for source in subtree_j.sources.keys(): # This models the restriction: a tuple must have the join # variable instantiated to be routed to a certain join. if len(set(sources[source]) & join_variables) > 0: # TODO: Change the next 1 for len of something operators_desc[id_operator].update({source: 1}) sources_desc[source] = sources_desc[source] | pow(2, id_operator) # TODO: Check this. eofs_operators_desc[id_operator].update({source: 1}) eofs_desc[source] = eofs_desc[source] | pow(2, id_operator) plan_order[id_operator] = max(subtree_i.height, subtree_j.height) stars.pop(j) # Place physical operators between stars. if isinstance(subtree_j, IndependentOperator): res = self.estimate_card(star_tree.total_res, subtree_j.total_res) # This case models a satellite, therefore apply cardinality estimation. if subtree_i.total_res < (subtree_j.total_res/100.0): subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query, subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res) op = Xnjoin(id_operator, join_variables, eddies) operators.append(op) stars.append(TreePlan(op, all_variables, join_variables, sources, subtree_i, subtree_j, max(subtree_i.height, subtree_j.height, res))) # Adjust number of asynchronous leaves. independent_sources = independent_sources - 1 operators_sym.update({id_operator: False}) else: op = Fjoin(id_operator, join_variables, eddies) operators.append(op) stars.append(TreePlan(op, all_variables, join_variables, sources, subtree_i, subtree_j, max(subtree_i.height, subtree_j.height, res))) operators_sym.update({id_operator: True}) else: res = (subtree_i.total_res + subtree_j.total_res) / 2 op = Fjoin(id_operator, join_variables, eddies) operators.append(op) stars.append(TreePlan(op, all_variables, join_variables, sources, subtree_i, subtree_j, max(subtree_i.height, subtree_j.height, res))) operators_sym.update({id_operator: True}) id_operator += 1 break if len(subtrees) % 2 == 0: tree_height += 1 tree_height += 1 tree = stars.pop() # Adds the projection operator to the plan. if query.projection: op = Xproject(id_operator, query.projection, eddies) operators.append(op) tree = TreePlan(op, tree.vars, tree.join_vars, tree.sources, tree, None, tree_height+1, tree.total_res) # Update signature of tuples. operators_sym.update({id_operator: False}) operators_desc[id_operator] = {} eofs_operators_desc[id_operator] = {} for source in tree.sources: operators_desc[id_operator].update({source: 0}) eofs_operators_desc[id_operator].update({source: 0}) eofs_desc[source] = eofs_desc[source] | pow(2, id_operator) sources_desc[source] = sources_desc[source] | pow(2, id_operator) plan_order[id_operator] = tree_height operators_vars[id_operator] = tree.vars id_operator += 1 tree_height += 1 # Adds the distinct operator to the plan. if query.distinct: op = Xdistinct(id_operator, eddies) operators.append(op) tree = TreePlan(op, tree.vars, tree.join_vars, tree.sources, tree, None, tree_height + 1, tree.total_res) # Update signature of tuples. operators_sym.update ({id_operator: False}) operators_desc[id_operator] = {} eofs_operators_desc[id_operator] = {} for source in tree.sources: operators_desc[id_operator].update({source: 0}) eofs_operators_desc[id_operator].update({source: 0}) eofs_desc[source] = eofs_desc[source] | pow(2, id_operator) sources_desc[source] = sources_desc[source] | pow(2, id_operator) plan_order[id_operator] = tree_height operators_vars[id_operator] = tree.vars id_operator += 1 tree_height += 1 physical_plan = Plan(query_tree=tree, tree_height=tree.height, operators_desc=operators_desc, sources_desc=sources_desc, plan_order=plan_order, operators_vars=operators_vars, independent_sources=independent_sources, operators_sym=operators_sym, operators=operators) return physical_plan