Пример #1
0
    def union_subplans(self, plans):

        op = Xunion(self.id_operator, self.eddies, inputs=len(plans))
        self.operators.append(op)

        union_vars = set()
        height = 0
        total_res = 0
        for plan in plans:
            union_vars.update(plan.variables)
            height = max(height, plan.height)
            total_res += plan.total_res
        height += 1

        tree_plan = TreePlan(op, union_vars, None, self.sources, plans, None,
                             height, total_res)

        # Update signature of tuples.
        self.operators_sym.update({self.id_operator: False})
        self.operators_desc[self.id_operator] = {}

        for source in tree_plan.sources:
            self.operators_desc[self.id_operator].update({source: 0})
            self.eofs_desc[source] = self.eofs_desc[source] | pow(
                2, self.id_operator)
            self.source_by_operator[
                source] = self.source_by_operator[source] | pow(
                    2, self.id_operator)

        self.plan_order[self.id_operator] = tree_plan.height
        self.operators_vars[self.id_operator] = tree_plan.vars
        self.id_operator += 1

        return tree_plan
Пример #2
0
    def create_triple_pattern_plan(self, triple_pattern):

        self.eofs_desc.update({self.source_id: 0})
        self.sources[self.source_id] = triple_pattern.variables
        self.source_by_operator[self.source_id] = pow(2, self.id_operator)
        self.operators_desc.setdefault(self.id_operator,
                                       {})[self.source_id] = 0

        left = IndependentOperator(self.source_id,
                                   self.source,
                                   triple_pattern,
                                   self.source_by_operator,
                                   triple_pattern.variables,
                                   self.eddies,
                                   self.source_by_operator,
                                   sparql_limit=self.sparql_limit)
        left.total_res = triple_pattern.cardinality

        op = Xunion(self.id_operator, self.eddies, inputs=1)
        self.operators.append(op)

        # Update signature of tuples.
        self.operators_sym.update({self.id_operator: False})
        self.operators_desc[self.id_operator] = {}

        source = self.source_id
        self.operators_desc[self.id_operator].update({source: 0})

        self.eofs_desc[source] = self.eofs_desc[source] | pow(
            2, self.id_operator)
        self.source_by_operator[
            source] = self.source_by_operator[source] | pow(
                2, self.id_operator)

        self.plan_order[self.id_operator] = 1
        self.operators_vars[self.id_operator] = left.vars

        tree_plan = TreePlan(op, left.vars, left.vars, self.sources, left,
                             None, 1, left.total_res)

        self.source_id += 1
        self.independent_sources += 1
        self.id_operator += 1
        return tree_plan
Пример #3
0
    def add_order_by(self, tree):
        op = Xorderby(self.id_operator, self.eddies, self.query.order_by)
        self.operators.append(op)
        tree = TreePlan(op, tree.vars, tree.join_vars, tree.sources, tree,
                        None, tree.height + 1, tree.total_res)

        # Update signature of tuples.
        self.operators_sym.update({self.id_operator: False})
        self.operators_desc[self.id_operator] = {}
        for source in tree.sources:
            self.operators_desc[self.id_operator].update({source: 0})
            self.eofs_desc[source] = self.eofs_desc[source] | pow(
                2, self.id_operator)
            self.sources_desc[source] = self.sources_desc[source] | pow(
                2, self.id_operator)
        self.plan_order[self.id_operator] = tree.height
        self.operators_vars[self.id_operator] = tree.vars
        self.id_operator += 1
        return tree
Пример #4
0
    def join_subplans(self,
                      left,
                      right,
                      join_type=None,
                      card=-1,
                      logial_plan=None):

        # Set Operator ID
        if logial_plan:
            self.operator_id2logical_plan[self.id_operator] = logial_plan
            logial_plan.operator_id = self.id_operator

        # Get Metadata for operator
        if isinstance(left, TriplePattern):
            # Get cardinality; Query only if necessary
            left_card = left.count if not left.count is None else get_metadata(
                self.source, left)
        else:
            left_card = left.total_res

        if isinstance(right, TriplePattern):
            # Get cardinality; Query only if necessary
            right_card = right.count if not right.count is None else get_metadata(
                self.source, right)
        else:
            right_card = right.total_res

        # Pre-decided Join Type
        if join_type:
            xn_join = True if (issubclass(join_type, Xnjoin)) else False
            xn_optional = issubclass(join_type, Xnoptional)
            xg_optional = issubclass(join_type, Xgoptional)
            if xn_join or xn_optional:
                # Switch sides for NLJ
                if left_card > right_card:
                    tmp = left
                    left = right
                    right = tmp

        # Decide based in heursitics
        else:
            # Decide Join Type: xn = NLJ, FJ = SHJ
            if isinstance(left, IndependentOperator):
                xn_join = True if left_card < (right_card / 100.0) else False
            else:
                xn_join = True if left_card <= right_card else False

        # Joins Variable info
        join_vars = set(left.variables).intersection(right.variables)
        all_variables = set(left.variables).union(right.variables)

        # If the subplans have no varibale in common,
        # always place a Hash Join to handle the Cross-Product
        if len(join_vars) == 0:
            xn_join = False

        # Tree Plans as Leafs
        if isinstance(left, TreePlan):
            leaf_left = left
            for source in left.sources.keys():
                self.source_by_operator[
                    source] = self.source_by_operator[source] | pow(
                        2, self.id_operator)
                self.operators_desc.setdefault(self.id_operator,
                                               {})[source] = 0

            self.operators_desc.setdefault(self.id_operator,
                                           {})[self.source_id] = 0

        if isinstance(right, TreePlan):
            leaf_right = right
            for source in right.sources.keys():
                self.source_by_operator[
                    source] = self.source_by_operator[source] | pow(
                        2, self.id_operator)
                self.operators_desc.setdefault(self.id_operator,
                                               {})[source] = 1

        if xn_join and isinstance(left, TreePlan) and isinstance(
                right, TreePlan):
            print("Invalid plan")
        if xn_optional and isinstance(left, TreePlan) and isinstance(
                right, TreePlan):
            print("Invalid plan")

        # Operator Leafs
        if isinstance(left, TriplePattern) or isinstance(left, BGP):

            self.eofs_desc.update({self.source_id: 0})
            self.sources[self.source_id] = left.variables
            self.source_by_operator[self.source_id] = pow(2, self.id_operator)
            self.eofs_desc[self.source_id] = pow(2, self.id_operator)
            self.operators_desc.setdefault(self.id_operator,
                                           {})[self.source_id] = 0

            # Base on operator, create operator
            # If SHJ(FJ), use IO
            # Or if it is a NLJ(XN) and left_plan is a TP, then use IO
            if (not xn_join) or (xn_join and (isinstance(right, TriplePattern)
                                              or isinstance(right, BGP))):
                leaf_left = IndependentOperator(self.source_id,
                                                self.source,
                                                left,
                                                self.source_by_operator,
                                                left.variables,
                                                self.eddies,
                                                self.source_by_operator,
                                                sparql_limit=self.sparql_limit)
                self.independent_sources += 1

            elif (xn_join or xn_optional) and isinstance(right, TreePlan):
                leaf_left = DependentOperator(self.source_id, self.source,
                                              left, self.source_by_operator,
                                              left.variables,
                                              self.source_by_operator)
                self.dependent_sources += 1

            leaf_left.total_res = left_card
            self.source_id += 1

        if isinstance(right, TriplePattern) or isinstance(right, BGP):

            self.eofs_desc.update({self.source_id: 0})
            self.sources[self.source_id] = right.variables
            self.source_by_operator[self.source_id] = pow(2, self.id_operator)
            self.eofs_desc[self.source_id] = pow(2, self.id_operator)
            self.operators_desc.setdefault(self.id_operator,
                                           {})[self.source_id] = 1

            # Base on operator, create operator
            if xn_join or xn_optional:
                leaf_right = DependentOperator(self.source_id, self.source,
                                               right, self.source_by_operator,
                                               right.variables,
                                               self.source_by_operator)
                self.dependent_sources += 1

            else:
                leaf_right = IndependentOperator(
                    self.source_id,
                    self.source,
                    right,
                    self.source_by_operator,
                    right.variables,
                    self.eddies,
                    self.source_by_operator,
                    sparql_limit=self.sparql_limit)
                self.independent_sources += 1

            leaf_right.total_res = right_card
            self.source_id += 1

        self.operators_vars[self.id_operator] = join_vars

        self.plan_order[self.id_operator] = max(leaf_left.height,
                                                leaf_right.height)

        # Place Join
        if xn_join:  # NLJ
            #if isinstance(left, TreePlan) and isinstance(right, TriplePattern) and self.poly: # First condition only
            # needed for poly bind join
            if (isinstance(right, TriplePattern)
                    or isinstance(right, BGP)) and self.poly:
                logger.debug("Placing Poly XN Join")
                op = Poly_Xnjoin(self.id_operator,
                                 join_vars,
                                 self.eddies,
                                 brtpf_mappings=self.brtpf_mappings,
                                 sparql_mappings=self.sparql_mappings)

                #logger.debug("Placing Poly Bind Join")
                #op = Poly_Bind_Join(self.id_operator, join_vars, self.eddies, left_card=card)
            else:
                logger.debug("Placing XN Join")
                op = Poly_Xnjoin(self.id_operator,
                                 join_vars,
                                 self.eddies,
                                 brtpf_mappings=1,
                                 sparql_mappings=1)
                #op = Xnjoin(self.id_operator, join_vars, self.eddies)

            self.operators_sym.update({self.id_operator: True})

            # If Right side has to be DP
            if not isinstance(leaf_right, DependentOperator):
                # Switch Leafs
                tmp = leaf_right
                leaf_right = leaf_left
                leaf_left = tmp

                # Update operators_descs for current operator id
                for key, value in self.operators_desc[
                        self.id_operator].items():
                    # Leaf Right is now the DP and needs to be input Right, i.e. 1
                    if key == leaf_right.sources.keys()[0]:
                        self.operators_desc[self.id_operator][key] = 1
                    # All other will be on the left_plan input
                    else:
                        self.operators_desc[self.id_operator][key] = 0

        elif not xn_optional and not xg_optional:  # SHJ
            #op = Fjoin(self.id_operator, join_vars, self.eddies)
            if isinstance(left, TreePlan) and isinstance(
                    right, TriplePattern) and self.poly:
                # Place Polymorphic Hash Join Operator
                op = Fjoin(self.id_operator, join_vars, self.eddies)
                #logger.debug("Placing Poly FJoin")
                #op = Poly_Fjoin(self.id_operator, join_vars, self.eddies, leaf_left, leaf_right)
            else:
                op = Fjoin(self.id_operator, join_vars, self.eddies)
            self.operators_sym.update({self.id_operator: False})

        elif not xg_optional:  # XN Optional
            op = Xnoptional(self.id_operator, left.variables, right.variables,
                            self.eddies)
            #op = Xnjoin(self.id_operator, join_vars, self.eddies)
            self.operators_sym.update({self.id_operator: True})

        else:  # XG Optional
            op = Xgoptional(self.id_operator, left.variables, right.variables,
                            self.eddies)
            self.operators_sym.update({self.id_operator: False})

        # Add Operator
        self.operators.append(op)

        tree_height = max(leaf_left.height, leaf_right.height) + 1
        #tree_sources = {k: v for k, v in self.sources.items()}
        # 2020-03-04: Changed here to route everything properly
        tree_sources = dict(leaf_left.sources)
        tree_sources.update(dict(leaf_right.sources))
        # Create Tree Plan
        join_card = card
        tree_plan = TreePlan(op, all_variables, join_vars, tree_sources,
                             leaf_left, leaf_right, tree_height, join_card)

        if isinstance(op, Xnjoin) and isinstance(
                leaf_left, TreePlan) and isinstance(leaf_right, TreePlan):
            raise Exception

        self.id_operator += 1
        return tree_plan
Пример #5
0
    def create_plan_original(self, query, eddies, source):

        # Plan structures.
        tree_height = 0
        id_operator = 0
        operators = []
        operators_desc = {}
        plan_order = {}
        operators_vars = {}
        ordered_subtrees = []
        independent_sources = 0
        eofs_operators_desc = {}
        operators_sym = {}
        sources_desc = {}
        eofs_desc = {}
        subtrees = []

        # Create initial signatures and leaves of the plan.
        for subquery in query.where.left.triple_patterns:
            sources_desc.update({id_operator: 0})
            eofs_desc.update({id_operator: 0})
            leaf = IndependentOperator(id_operator, source, subquery, sources_desc, subquery.get_variables(), eddies, eofs_desc)
            leaf.total_res = get_metadata(leaf.server, leaf.query)
            subtrees.append(leaf)
            ordered_subtrees.append(leaf.total_res)
            id_operator += 1

        # Order leaves depending on the cardinality of fragments.
        keydict = dict(zip(subtrees, ordered_subtrees))
        subtrees.sort(key=keydict.get)

        # Stage 1: Generate left_plan-linear index nested stars.
        stars = []
        id_operator = 0
        while len(subtrees) > 0:

            to_delete = []
            star_tree = subtrees.pop(0)
            star_vars = star_tree.vars
            tree_height = 0
            independent_sources = independent_sources + 1

            for j in range(0, len(subtrees)):
                subtree_j = subtrees[j]
                join_variables = set(star_vars) & set(subtree_j.join_vars)
                all_variables = set(star_tree.vars) | set(subtree_j.vars)

                # Case: There is a join.
                if len(join_variables) > 0:

                    to_delete.append(subtree_j)

                    # Update signatures.
                    sources = {}
                    sources.update(star_tree.sources)
                    sources.update(subtree_j.sources)
                    operators_desc[id_operator] = {}
                    operators_vars[id_operator] = join_variables
                    eofs_operators_desc[id_operator] = {}

                    # The current tree is the left_plan argument of the plan.
                    for source in star_tree.sources.keys():
                        if len(set(sources[source]) & join_variables) > 0:
                            # TODO: Change the next 0 for len of something
                            operators_desc[id_operator].update({source: 0})
                        sources_desc[source] = sources_desc[source] | pow(2, id_operator)

                        # TODO: check this.
                        eofs_operators_desc[id_operator].update({source: 0})
                        eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                    # The subtree j is the right_plan argument of the plan.
                    for source in subtree_j.sources.keys():
                        if len(set(sources[source]) & join_variables) > 0:
                            # TODO: Change the next q for len of something
                            operators_desc[id_operator].update({source: 1})
                        sources_desc[source] = sources_desc[source] | pow(2, id_operator)

                        # TODO: check this.
                        eofs_operators_desc[id_operator].update({source: 1})
                        eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                    plan_order[id_operator] = tree_height
                    operators_vars[id_operator] = join_variables
                    tree_height = tree_height + 1

                    # Place physical operator estimating cardinality.
                    if isinstance(star_tree, IndependentOperator):
                        res = self.estimate_card(star_tree.total_res, subtree_j.total_res)
                        # Place a Nested Loop join.
                        if star_tree.total_res < (subtree_j.total_res / 100.0):
                            subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query,
                                                          subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res)
                            op = Xnjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            star_tree = TreePlan(op, all_variables, join_variables, sources,
                                                 star_tree, subtree_j, tree_height, 0)
                            operators_sym.update({id_operator: False})

                        # Place a Symmetric Hash join.
                        else:
                            op = Fjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            star_tree = TreePlan(op, all_variables, join_variables, sources,
                                                 star_tree, subtree_j, tree_height, res)
                            independent_sources = independent_sources + 1
                            operators_sym.update({id_operator: True})
                    else:
                        # TODO: new change here
                        res = self.estimate_card(star_tree.total_res, subtree_j.total_res)
                        #res = (2.0 * star_tree.total_res * subtree_j.total_res) / (star_tree.total_res + subtree_j.total_res)
                        #res = (star_tree.total_res + subtree_j.total_res) / 2
                        if (star_tree.total_res / float(subtree_j.total_res) < 0.30) or (subtree_j.total_res > 100*1000 and star_tree.total_res < 100*1000) or (subtree_j.total_res < 100*5):
                            subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query,
                                                          subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res)
                            op = Xnjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            star_tree = TreePlan(op, all_variables, join_variables, sources,
                                                 star_tree, subtree_j, tree_height)
                            operators_sym.update({id_operator: False})
                        else:
                            op = Fjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            star_tree = TreePlan(op, all_variables,
                                                 join_variables, sources, star_tree, subtree_j, tree_height, res)
                            independent_sources = independent_sources + 1
                            operators_sym.update({id_operator: True})
                    id_operator += 1

            # Add current tree to the list of stars and
            # remove from the list of subtrees to process.
            stars.append(star_tree)
            for elem in to_delete:
                subtrees.remove(elem)

        # Stage 2: Build bushy tree to combine SSGs with common variables.
        while len(stars) > 1:

            subtree_i = stars.pop(0)

            for j in range(0, len(stars)):
                subtree_j = stars[j]

                all_variables = set(subtree_i.vars) | set(subtree_j.vars)
                join_variables = set(subtree_i.join_vars) & set(subtree_j.join_vars)

                # Case: There is a join between stars.
                if len(join_variables) > 0:

                    # Update signatures.
                    sources = {}
                    sources.update(subtree_i.sources)
                    sources.update(subtree_j.sources)

                    operators_desc[id_operator] = {}
                    operators_vars[id_operator] = join_variables
                    eofs_operators_desc[id_operator] = {}

                    for source in subtree_i.sources.keys():
                        # This models the restriction: a tuple must have the join
                        # variable instantiated to be routed to a certain join.
                        if len(set(sources[source]) & join_variables) > 0:
                            # TODO: Change the next 0 for len of something
                            operators_desc[id_operator].update({source: 0})
                        sources_desc[source] = sources_desc[source] | pow(2, id_operator)

                        # TODO: Check this.
                        eofs_operators_desc[id_operator].update({source: 0})
                        eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                    for source in subtree_j.sources.keys():
                        # This models the restriction: a tuple must have the join
                        # variable instantiated to be routed to a certain join.
                        if len(set(sources[source]) & join_variables) > 0:
                            # TODO: Change the next 1 for len of something
                            operators_desc[id_operator].update({source: 1})
                        sources_desc[source] = sources_desc[source] | pow(2, id_operator)

                        # TODO: Check this.
                        eofs_operators_desc[id_operator].update({source: 1})
                        eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                    plan_order[id_operator] = max(subtree_i.height, subtree_j.height)
                    stars.pop(j)

                    # Place physical operators between stars.
                    if isinstance(subtree_j, IndependentOperator):
                        res = self.estimate_card(star_tree.total_res, subtree_j.total_res)

                        # This case models a satellite, therefore apply cardinality estimation.
                        if subtree_i.total_res < (subtree_j.total_res/100.0):
                            subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query,
                                                          subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res)
                            op = Xnjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            stars.append(TreePlan(op, all_variables,
                                                  join_variables, sources, subtree_i, subtree_j,
                                                  max(subtree_i.height, subtree_j.height, res)))
                            # Adjust number of asynchronous leaves.
                            independent_sources = independent_sources - 1
                            operators_sym.update({id_operator: False})
                        else:
                            op = Fjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            stars.append(TreePlan(op, all_variables, join_variables,
                                                  sources, subtree_i, subtree_j,
                                                  max(subtree_i.height, subtree_j.height, res)))
                            operators_sym.update({id_operator: True})
                    else:
                        res = (subtree_i.total_res + subtree_j.total_res) / 2
                        op = Fjoin(id_operator, join_variables, eddies)
                        operators.append(op)
                        stars.append(TreePlan(op, all_variables, join_variables,
                                              sources, subtree_i, subtree_j,
                                              max(subtree_i.height, subtree_j.height, res)))
                        operators_sym.update({id_operator: True})
                    id_operator += 1
                    break

            if len(subtrees) % 2 == 0:
                tree_height += 1

        tree_height += 1
        tree = stars.pop()


        # Adds the projection operator to the plan.
        if query.projection:
            op = Xproject(id_operator, query.projection, eddies)
            operators.append(op)
            tree = TreePlan(op,
                            tree.vars, tree.join_vars, tree.sources, tree, None, tree_height+1, tree.total_res)

            # Update signature of tuples.
            operators_sym.update({id_operator: False})
            operators_desc[id_operator] = {}
            eofs_operators_desc[id_operator] = {}
            for source in tree.sources:
                operators_desc[id_operator].update({source: 0})
                eofs_operators_desc[id_operator].update({source: 0})
                eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)
                sources_desc[source] = sources_desc[source] | pow(2, id_operator)
            plan_order[id_operator] = tree_height
            operators_vars[id_operator] = tree.vars
            id_operator += 1
            tree_height += 1

        # Adds the distinct operator to the plan.
        if query.distinct:
            op = Xdistinct(id_operator, eddies)
            operators.append(op)
            tree = TreePlan(op, tree.vars, tree.join_vars, tree.sources,
                            tree, None, tree_height + 1, tree.total_res)

            # Update signature of tuples.
            operators_sym.update ({id_operator: False})
            operators_desc[id_operator] = {}
            eofs_operators_desc[id_operator] = {}
            for source in tree.sources:
                operators_desc[id_operator].update({source: 0})
                eofs_operators_desc[id_operator].update({source: 0})
                eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)
                sources_desc[source] = sources_desc[source] | pow(2, id_operator)
            plan_order[id_operator] = tree_height
            operators_vars[id_operator] = tree.vars
            id_operator += 1
            tree_height += 1

        physical_plan = Plan(query_tree=tree, tree_height=tree.height,
                                  operators_desc=operators_desc, sources_desc=sources_desc,
                                  plan_order=plan_order, operators_vars=operators_vars,
                                  independent_sources=independent_sources,
                                  operators_sym=operators_sym, operators=operators)

        return physical_plan