Пример #1
0
def create_plan(query, eddies, server):

    # Plan structures.
    tree_height = 0
    id_operator = 0
    operators = []
    operators_desc = {}
    plan_order = {}
    operators_vars = {}
    ordered_subtrees = []
    independent_sources = 0
    eofs_operators_desc = {}
    operators_sym = {}
    sources_desc = {}
    eofs_desc = None
    subtrees = []

    # Create initial signatures and leaves of the plan.
    for subquery in query.where.left.triple_patterns:
        sources_desc.update({id_operator: 0})
        eofs_desc.update({id_operator: 0})
        leaf = IndependentOperator(id_operator, source, subquery, sources_desc,
                                   subquery.get_variables(), eddies, eofs_desc)
        leaf.total_res = get_metadata_ldf(leaf.server, leaf.query)
        subtrees.append(leaf)
        ordered_subtrees.append(leaf.total_res)
        id_operator += 1

    # Now we are going to create a plan with a Join.
    id_operator = 0
    left = subtrees[0]
    right = subtrees[1]
    join_variables = set(left) & set(right.join_vars)
    all_variables = set(left.vars) | set(right.vars)
    sources = {}
    sources.update(left.sources)
    sources.update(right.sources)
    res = left.total_res + right.total_res

    operators_desc[id_operator] = {}
    operators_desc[id_operator].update({left.sources[0]: -1})
    operators_desc[id_operator].update({right.sources[0]: 1})

    op = Fjoin(id_operator, join_variables, eddies)
    operators.append(op)
    star_tree = TreePlan(op, all_variables, join_variables, sources, left,
                         right, 1, res)
    independent_sources = independent_sources + 1
    operators_sym.update({id_operator: True})

    id_operator = 1

    return tree, tree.height, operators_desc, sources_desc, plan_order, operators_vars, independent_sources, operators_desc, operators_sym, operators
Пример #2
0
    def join_subplans(self,
                      left,
                      right,
                      join_type=None,
                      card=-1,
                      logial_plan=None):

        # Set Operator ID
        if logial_plan:
            self.operator_id2logical_plan[self.id_operator] = logial_plan
            logial_plan.operator_id = self.id_operator

        # Get Metadata for operator
        if isinstance(left, TriplePattern):
            # Get cardinality; Query only if necessary
            left_card = left.count if not left.count is None else get_metadata(
                self.source, left)
        else:
            left_card = left.total_res

        if isinstance(right, TriplePattern):
            # Get cardinality; Query only if necessary
            right_card = right.count if not right.count is None else get_metadata(
                self.source, right)
        else:
            right_card = right.total_res

        # Pre-decided Join Type
        if join_type:
            xn_join = True if (issubclass(join_type, Xnjoin)) else False
            xn_optional = issubclass(join_type, Xnoptional)
            xg_optional = issubclass(join_type, Xgoptional)
            if xn_join or xn_optional:
                # Switch sides for NLJ
                if left_card > right_card:
                    tmp = left
                    left = right
                    right = tmp

        # Decide based in heursitics
        else:
            # Decide Join Type: xn = NLJ, FJ = SHJ
            if isinstance(left, IndependentOperator):
                xn_join = True if left_card < (right_card / 100.0) else False
            else:
                xn_join = True if left_card <= right_card else False

        # Joins Variable info
        join_vars = set(left.variables).intersection(right.variables)
        all_variables = set(left.variables).union(right.variables)

        # If the subplans have no varibale in common,
        # always place a Hash Join to handle the Cross-Product
        if len(join_vars) == 0:
            xn_join = False

        # Tree Plans as Leafs
        if isinstance(left, TreePlan):
            leaf_left = left
            for source in left.sources.keys():
                self.source_by_operator[
                    source] = self.source_by_operator[source] | pow(
                        2, self.id_operator)
                self.operators_desc.setdefault(self.id_operator,
                                               {})[source] = 0

            self.operators_desc.setdefault(self.id_operator,
                                           {})[self.source_id] = 0

        if isinstance(right, TreePlan):
            leaf_right = right
            for source in right.sources.keys():
                self.source_by_operator[
                    source] = self.source_by_operator[source] | pow(
                        2, self.id_operator)
                self.operators_desc.setdefault(self.id_operator,
                                               {})[source] = 1

        if xn_join and isinstance(left, TreePlan) and isinstance(
                right, TreePlan):
            print("Invalid plan")
        if xn_optional and isinstance(left, TreePlan) and isinstance(
                right, TreePlan):
            print("Invalid plan")

        # Operator Leafs
        if isinstance(left, TriplePattern) or isinstance(left, BGP):

            self.eofs_desc.update({self.source_id: 0})
            self.sources[self.source_id] = left.variables
            self.source_by_operator[self.source_id] = pow(2, self.id_operator)
            self.eofs_desc[self.source_id] = pow(2, self.id_operator)
            self.operators_desc.setdefault(self.id_operator,
                                           {})[self.source_id] = 0

            # Base on operator, create operator
            # If SHJ(FJ), use IO
            # Or if it is a NLJ(XN) and left_plan is a TP, then use IO
            if (not xn_join) or (xn_join and (isinstance(right, TriplePattern)
                                              or isinstance(right, BGP))):
                leaf_left = IndependentOperator(self.source_id,
                                                self.source,
                                                left,
                                                self.source_by_operator,
                                                left.variables,
                                                self.eddies,
                                                self.source_by_operator,
                                                sparql_limit=self.sparql_limit)
                self.independent_sources += 1

            elif (xn_join or xn_optional) and isinstance(right, TreePlan):
                leaf_left = DependentOperator(self.source_id, self.source,
                                              left, self.source_by_operator,
                                              left.variables,
                                              self.source_by_operator)
                self.dependent_sources += 1

            leaf_left.total_res = left_card
            self.source_id += 1

        if isinstance(right, TriplePattern) or isinstance(right, BGP):

            self.eofs_desc.update({self.source_id: 0})
            self.sources[self.source_id] = right.variables
            self.source_by_operator[self.source_id] = pow(2, self.id_operator)
            self.eofs_desc[self.source_id] = pow(2, self.id_operator)
            self.operators_desc.setdefault(self.id_operator,
                                           {})[self.source_id] = 1

            # Base on operator, create operator
            if xn_join or xn_optional:
                leaf_right = DependentOperator(self.source_id, self.source,
                                               right, self.source_by_operator,
                                               right.variables,
                                               self.source_by_operator)
                self.dependent_sources += 1

            else:
                leaf_right = IndependentOperator(
                    self.source_id,
                    self.source,
                    right,
                    self.source_by_operator,
                    right.variables,
                    self.eddies,
                    self.source_by_operator,
                    sparql_limit=self.sparql_limit)
                self.independent_sources += 1

            leaf_right.total_res = right_card
            self.source_id += 1

        self.operators_vars[self.id_operator] = join_vars

        self.plan_order[self.id_operator] = max(leaf_left.height,
                                                leaf_right.height)

        # Place Join
        if xn_join:  # NLJ
            #if isinstance(left, TreePlan) and isinstance(right, TriplePattern) and self.poly: # First condition only
            # needed for poly bind join
            if (isinstance(right, TriplePattern)
                    or isinstance(right, BGP)) and self.poly:
                logger.debug("Placing Poly XN Join")
                op = Poly_Xnjoin(self.id_operator,
                                 join_vars,
                                 self.eddies,
                                 brtpf_mappings=self.brtpf_mappings,
                                 sparql_mappings=self.sparql_mappings)

                #logger.debug("Placing Poly Bind Join")
                #op = Poly_Bind_Join(self.id_operator, join_vars, self.eddies, left_card=card)
            else:
                logger.debug("Placing XN Join")
                op = Poly_Xnjoin(self.id_operator,
                                 join_vars,
                                 self.eddies,
                                 brtpf_mappings=1,
                                 sparql_mappings=1)
                #op = Xnjoin(self.id_operator, join_vars, self.eddies)

            self.operators_sym.update({self.id_operator: True})

            # If Right side has to be DP
            if not isinstance(leaf_right, DependentOperator):
                # Switch Leafs
                tmp = leaf_right
                leaf_right = leaf_left
                leaf_left = tmp

                # Update operators_descs for current operator id
                for key, value in self.operators_desc[
                        self.id_operator].items():
                    # Leaf Right is now the DP and needs to be input Right, i.e. 1
                    if key == leaf_right.sources.keys()[0]:
                        self.operators_desc[self.id_operator][key] = 1
                    # All other will be on the left_plan input
                    else:
                        self.operators_desc[self.id_operator][key] = 0

        elif not xn_optional and not xg_optional:  # SHJ
            #op = Fjoin(self.id_operator, join_vars, self.eddies)
            if isinstance(left, TreePlan) and isinstance(
                    right, TriplePattern) and self.poly:
                # Place Polymorphic Hash Join Operator
                op = Fjoin(self.id_operator, join_vars, self.eddies)
                #logger.debug("Placing Poly FJoin")
                #op = Poly_Fjoin(self.id_operator, join_vars, self.eddies, leaf_left, leaf_right)
            else:
                op = Fjoin(self.id_operator, join_vars, self.eddies)
            self.operators_sym.update({self.id_operator: False})

        elif not xg_optional:  # XN Optional
            op = Xnoptional(self.id_operator, left.variables, right.variables,
                            self.eddies)
            #op = Xnjoin(self.id_operator, join_vars, self.eddies)
            self.operators_sym.update({self.id_operator: True})

        else:  # XG Optional
            op = Xgoptional(self.id_operator, left.variables, right.variables,
                            self.eddies)
            self.operators_sym.update({self.id_operator: False})

        # Add Operator
        self.operators.append(op)

        tree_height = max(leaf_left.height, leaf_right.height) + 1
        #tree_sources = {k: v for k, v in self.sources.items()}
        # 2020-03-04: Changed here to route everything properly
        tree_sources = dict(leaf_left.sources)
        tree_sources.update(dict(leaf_right.sources))
        # Create Tree Plan
        join_card = card
        tree_plan = TreePlan(op, all_variables, join_vars, tree_sources,
                             leaf_left, leaf_right, tree_height, join_card)

        if isinstance(op, Xnjoin) and isinstance(
                leaf_left, TreePlan) and isinstance(leaf_right, TreePlan):
            raise Exception

        self.id_operator += 1
        return tree_plan
Пример #3
0
    def create_plan_original(self, query, eddies, source):

        # Plan structures.
        tree_height = 0
        id_operator = 0
        operators = []
        operators_desc = {}
        plan_order = {}
        operators_vars = {}
        ordered_subtrees = []
        independent_sources = 0
        eofs_operators_desc = {}
        operators_sym = {}
        sources_desc = {}
        eofs_desc = {}
        subtrees = []

        # Create initial signatures and leaves of the plan.
        for subquery in query.where.left.triple_patterns:
            sources_desc.update({id_operator: 0})
            eofs_desc.update({id_operator: 0})
            leaf = IndependentOperator(id_operator, source, subquery, sources_desc, subquery.get_variables(), eddies, eofs_desc)
            leaf.total_res = get_metadata(leaf.server, leaf.query)
            subtrees.append(leaf)
            ordered_subtrees.append(leaf.total_res)
            id_operator += 1

        # Order leaves depending on the cardinality of fragments.
        keydict = dict(zip(subtrees, ordered_subtrees))
        subtrees.sort(key=keydict.get)

        # Stage 1: Generate left_plan-linear index nested stars.
        stars = []
        id_operator = 0
        while len(subtrees) > 0:

            to_delete = []
            star_tree = subtrees.pop(0)
            star_vars = star_tree.vars
            tree_height = 0
            independent_sources = independent_sources + 1

            for j in range(0, len(subtrees)):
                subtree_j = subtrees[j]
                join_variables = set(star_vars) & set(subtree_j.join_vars)
                all_variables = set(star_tree.vars) | set(subtree_j.vars)

                # Case: There is a join.
                if len(join_variables) > 0:

                    to_delete.append(subtree_j)

                    # Update signatures.
                    sources = {}
                    sources.update(star_tree.sources)
                    sources.update(subtree_j.sources)
                    operators_desc[id_operator] = {}
                    operators_vars[id_operator] = join_variables
                    eofs_operators_desc[id_operator] = {}

                    # The current tree is the left_plan argument of the plan.
                    for source in star_tree.sources.keys():
                        if len(set(sources[source]) & join_variables) > 0:
                            # TODO: Change the next 0 for len of something
                            operators_desc[id_operator].update({source: 0})
                        sources_desc[source] = sources_desc[source] | pow(2, id_operator)

                        # TODO: check this.
                        eofs_operators_desc[id_operator].update({source: 0})
                        eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                    # The subtree j is the right_plan argument of the plan.
                    for source in subtree_j.sources.keys():
                        if len(set(sources[source]) & join_variables) > 0:
                            # TODO: Change the next q for len of something
                            operators_desc[id_operator].update({source: 1})
                        sources_desc[source] = sources_desc[source] | pow(2, id_operator)

                        # TODO: check this.
                        eofs_operators_desc[id_operator].update({source: 1})
                        eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                    plan_order[id_operator] = tree_height
                    operators_vars[id_operator] = join_variables
                    tree_height = tree_height + 1

                    # Place physical operator estimating cardinality.
                    if isinstance(star_tree, IndependentOperator):
                        res = self.estimate_card(star_tree.total_res, subtree_j.total_res)
                        # Place a Nested Loop join.
                        if star_tree.total_res < (subtree_j.total_res / 100.0):
                            subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query,
                                                          subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res)
                            op = Xnjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            star_tree = TreePlan(op, all_variables, join_variables, sources,
                                                 star_tree, subtree_j, tree_height, 0)
                            operators_sym.update({id_operator: False})

                        # Place a Symmetric Hash join.
                        else:
                            op = Fjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            star_tree = TreePlan(op, all_variables, join_variables, sources,
                                                 star_tree, subtree_j, tree_height, res)
                            independent_sources = independent_sources + 1
                            operators_sym.update({id_operator: True})
                    else:
                        # TODO: new change here
                        res = self.estimate_card(star_tree.total_res, subtree_j.total_res)
                        #res = (2.0 * star_tree.total_res * subtree_j.total_res) / (star_tree.total_res + subtree_j.total_res)
                        #res = (star_tree.total_res + subtree_j.total_res) / 2
                        if (star_tree.total_res / float(subtree_j.total_res) < 0.30) or (subtree_j.total_res > 100*1000 and star_tree.total_res < 100*1000) or (subtree_j.total_res < 100*5):
                            subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query,
                                                          subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res)
                            op = Xnjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            star_tree = TreePlan(op, all_variables, join_variables, sources,
                                                 star_tree, subtree_j, tree_height)
                            operators_sym.update({id_operator: False})
                        else:
                            op = Fjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            star_tree = TreePlan(op, all_variables,
                                                 join_variables, sources, star_tree, subtree_j, tree_height, res)
                            independent_sources = independent_sources + 1
                            operators_sym.update({id_operator: True})
                    id_operator += 1

            # Add current tree to the list of stars and
            # remove from the list of subtrees to process.
            stars.append(star_tree)
            for elem in to_delete:
                subtrees.remove(elem)

        # Stage 2: Build bushy tree to combine SSGs with common variables.
        while len(stars) > 1:

            subtree_i = stars.pop(0)

            for j in range(0, len(stars)):
                subtree_j = stars[j]

                all_variables = set(subtree_i.vars) | set(subtree_j.vars)
                join_variables = set(subtree_i.join_vars) & set(subtree_j.join_vars)

                # Case: There is a join between stars.
                if len(join_variables) > 0:

                    # Update signatures.
                    sources = {}
                    sources.update(subtree_i.sources)
                    sources.update(subtree_j.sources)

                    operators_desc[id_operator] = {}
                    operators_vars[id_operator] = join_variables
                    eofs_operators_desc[id_operator] = {}

                    for source in subtree_i.sources.keys():
                        # This models the restriction: a tuple must have the join
                        # variable instantiated to be routed to a certain join.
                        if len(set(sources[source]) & join_variables) > 0:
                            # TODO: Change the next 0 for len of something
                            operators_desc[id_operator].update({source: 0})
                        sources_desc[source] = sources_desc[source] | pow(2, id_operator)

                        # TODO: Check this.
                        eofs_operators_desc[id_operator].update({source: 0})
                        eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                    for source in subtree_j.sources.keys():
                        # This models the restriction: a tuple must have the join
                        # variable instantiated to be routed to a certain join.
                        if len(set(sources[source]) & join_variables) > 0:
                            # TODO: Change the next 1 for len of something
                            operators_desc[id_operator].update({source: 1})
                        sources_desc[source] = sources_desc[source] | pow(2, id_operator)

                        # TODO: Check this.
                        eofs_operators_desc[id_operator].update({source: 1})
                        eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                    plan_order[id_operator] = max(subtree_i.height, subtree_j.height)
                    stars.pop(j)

                    # Place physical operators between stars.
                    if isinstance(subtree_j, IndependentOperator):
                        res = self.estimate_card(star_tree.total_res, subtree_j.total_res)

                        # This case models a satellite, therefore apply cardinality estimation.
                        if subtree_i.total_res < (subtree_j.total_res/100.0):
                            subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query,
                                                          subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res)
                            op = Xnjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            stars.append(TreePlan(op, all_variables,
                                                  join_variables, sources, subtree_i, subtree_j,
                                                  max(subtree_i.height, subtree_j.height, res)))
                            # Adjust number of asynchronous leaves.
                            independent_sources = independent_sources - 1
                            operators_sym.update({id_operator: False})
                        else:
                            op = Fjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            stars.append(TreePlan(op, all_variables, join_variables,
                                                  sources, subtree_i, subtree_j,
                                                  max(subtree_i.height, subtree_j.height, res)))
                            operators_sym.update({id_operator: True})
                    else:
                        res = (subtree_i.total_res + subtree_j.total_res) / 2
                        op = Fjoin(id_operator, join_variables, eddies)
                        operators.append(op)
                        stars.append(TreePlan(op, all_variables, join_variables,
                                              sources, subtree_i, subtree_j,
                                              max(subtree_i.height, subtree_j.height, res)))
                        operators_sym.update({id_operator: True})
                    id_operator += 1
                    break

            if len(subtrees) % 2 == 0:
                tree_height += 1

        tree_height += 1
        tree = stars.pop()


        # Adds the projection operator to the plan.
        if query.projection:
            op = Xproject(id_operator, query.projection, eddies)
            operators.append(op)
            tree = TreePlan(op,
                            tree.vars, tree.join_vars, tree.sources, tree, None, tree_height+1, tree.total_res)

            # Update signature of tuples.
            operators_sym.update({id_operator: False})
            operators_desc[id_operator] = {}
            eofs_operators_desc[id_operator] = {}
            for source in tree.sources:
                operators_desc[id_operator].update({source: 0})
                eofs_operators_desc[id_operator].update({source: 0})
                eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)
                sources_desc[source] = sources_desc[source] | pow(2, id_operator)
            plan_order[id_operator] = tree_height
            operators_vars[id_operator] = tree.vars
            id_operator += 1
            tree_height += 1

        # Adds the distinct operator to the plan.
        if query.distinct:
            op = Xdistinct(id_operator, eddies)
            operators.append(op)
            tree = TreePlan(op, tree.vars, tree.join_vars, tree.sources,
                            tree, None, tree_height + 1, tree.total_res)

            # Update signature of tuples.
            operators_sym.update ({id_operator: False})
            operators_desc[id_operator] = {}
            eofs_operators_desc[id_operator] = {}
            for source in tree.sources:
                operators_desc[id_operator].update({source: 0})
                eofs_operators_desc[id_operator].update({source: 0})
                eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)
                sources_desc[source] = sources_desc[source] | pow(2, id_operator)
            plan_order[id_operator] = tree_height
            operators_vars[id_operator] = tree.vars
            id_operator += 1
            tree_height += 1

        physical_plan = Plan(query_tree=tree, tree_height=tree.height,
                                  operators_desc=operators_desc, sources_desc=sources_desc,
                                  plan_order=plan_order, operators_vars=operators_vars,
                                  independent_sources=independent_sources,
                                  operators_sym=operators_sym, operators=operators)

        return physical_plan
Пример #4
0
def create_plan(query, eddies, source):

    # Plan structures.
    tree_height = 0
    id_operator = 0
    operators = []
    operators_desc = {}
    plan_order = {}
    operators_vars = {}
    ordered_subtrees = []
    independent_sources = 0
    eofs_operators_desc = {}
    operators_sym = {}
    sources_desc = {}
    eofs_desc = None
    subtrees = []

    # Create initial signatures and leaves of the plan.
    for subquery in query.where.left.triple_patterns:
        sources_desc.update({id_operator: 0})
        leaf = IndependentOperator(id_operator, source, subquery, sources_desc,
                                   subquery.get_variables(), eddies,
                                   sources_desc)
        leaf.total_res = get_metadata_ldf(leaf.server, leaf.query)
        subtrees.append(leaf)
        ordered_subtrees.append(leaf.total_res)
        id_operator += 1

    # Now we are going to join the first two triple patterns in the query.

    ## CHANGE THESE LINES
    id_operator = 0
    left = subtrees[0]
    right = subtrees[1]

    join_variables = set(left.vars) & set(right.vars)
    all_variables = set(left.vars) | set(right.vars)
    sources = {}
    sources.update(left.sources)
    sources.update(right.sources)
    res = (left.total_res + right.total_res) / 2
    operators_desc[id_operator] = {}

    ## CHANGE THESE LINES
    operators_desc[id_operator].update({list(left.sources)[0]: 0})
    operators_desc[id_operator].update({list(right.sources)[0]: 1})
    sources_desc[list(left.sources)[0]] = 3  # The ready is 11
    sources_desc[list(right.sources)[0]] = 3  # The ready is 11

    op = Fjoin(id_operator, join_variables, eddies)
    operators.append(op)
    # Example here of an array of left plans.
    tree = TreePlan(op, all_variables, join_variables, sources, [left], right,
                    tree_height, res)
    operators_vars[id_operator] = join_variables
    independent_sources = independent_sources + 2
    operators_sym.update({id_operator: True})
    plan_order[id_operator] = tree_height
    tree_height = tree_height + 1

    # Now add the third triple pattern to the plan.

    ## CHANGE THESE LINES
    id_operator = 1
    left = tree
    right = subtrees[2]

    join_variables = set(left.vars) & set(right.vars)
    all_variables = set(left.vars) | set(right.vars)
    sources = {}
    sources.update(left.sources)
    sources.update(right.sources)
    res = (left.total_res + right.total_res) / 2
    operators_desc[id_operator] = {}

    ## CHANGE THESE LINES
    operators_desc[id_operator].update({
        list(left.sources)[0]: 0,
        list(left.sources)[1]: 0
    })
    operators_desc[id_operator].update({list(right.sources)[0]: 1})
    sources_desc[list(right.sources)[0]] = 2  # The ready is 10

    op = Fjoin(id_operator, join_variables, eddies)
    operators.append(op)
    tree = TreePlan(op, all_variables, join_variables, sources, left, right,
                    tree_height, res)
    operators_vars[id_operator] = join_variables
    independent_sources = independent_sources + 1
    operators_sym.update({id_operator: True})
    plan_order[id_operator] = tree_height
    tree_height = tree_height + 1

    return tree, tree.height, operators_desc, sources_desc, plan_order, operators_vars, independent_sources, operators_desc, operators_sym, operators
Пример #5
0
def createBushyTreeOLD(subtrees, sources_desc):

    tree_height = 0
    id_operator = 0
    operators_desc = {}
    plan_order = {}
    ordered_subtrees = []
    operators_vars = {}

    #print "here"

    for leaf in subtrees:
        #print "leaf", type(leaf)
        #total_res =
        leaf.total_res = getMetadataLDF(leaf.server, leaf.query)
        ordered_subtrees.append(leaf.total_res)

    # Order leaves depeding on the selectivity
    keydict = dict(zip(subtrees, ordered_subtrees))
    subtrees.sort(key=keydict.get)

    while (len(subtrees) > 1):
        #print "len(subtrees)", len(subtrees)
        subtree_i = subtrees.pop(0)

        for j in range(0, len(subtrees)):
            subtree_j = subtrees[j]

            #print "join_vars", set(subtree_i.join_vars), set(subtree_j.join_vars)

            all_variables = set(subtree_i.vars) | set(subtree_j.vars)
            join_variables = (set(subtree_i.join_vars)
                              & set(subtree_j.join_vars)) & all_variables

            if (join_variables != set([])):
                sources = {}
                sources.update(subtree_i.sources)
                sources.update(subtree_j.sources)
                operators_desc[id_operator] = {}
                operators_vars[id_operator] = join_variables

                for source in subtree_i.sources.keys():
                    if (set(sources[source]) & join_variables != set([])):
                        operators_desc[id_operator].update({source: -1})
                        sources_desc[source] = sources_desc[source] | pow(
                            2, id_operator)

                for source in subtree_j.sources.keys():
                    if (set(sources[source]) & join_variables != set([])):
                        operators_desc[id_operator].update({source: 1})
                        sources_desc[source] = sources_desc[source] | pow(
                            2, id_operator)

                plan_order[id_operator] = tree_height

                subtrees.pop(j)

                # TODO: new, to detect the physical operators.
                if isinstance(subtree_i, IndependentOperator) and isinstance(
                        subtree_j, IndependentOperator):
                    if subtree_i.total_res < (subtree_j.total_res /
                                              100.0):  # TODO: fixed page size
                        subtree_j = DependentOperator(subtree_j.sources,
                                                      subtree_j.server,
                                                      subtree_j.query,
                                                      subtree_j.sources_desc,
                                                      subtree_j.vars)
                        subtrees.append(
                            TreePlan(Xnjoin(id_operator, join_variables),
                                     all_variables, join_variables, sources,
                                     subtree_i, subtree_j))
                        print "Xnjoin", subtree_i.sources, subtree_j.sources
                    else:
                        subtrees.append(
                            TreePlan(Fjoin(id_operator, join_variables),
                                     all_variables, join_variables, sources,
                                     subtree_i, subtree_j))
                        print "Xgjoin", subtree_i.sources, subtree_j.sources
                else:
                    subtrees.append(
                        TreePlan(Fjoin(id_operator, join_variables),
                                 all_variables, join_variables, sources,
                                 subtree_i, subtree_j))
                    print "Xgjoin", subtree_i.sources, subtree_j.sources

                #TODO: Check whether this forbids Cartesian product.
                #subtrees.append(TreePlan(Fjoin(id_operator, join_variables), all_variables, join_variables, sources, subtree_i, subtree_j))
                #subtrees.append(TreePlan(Fjoin(id_operator, join_variables), join_variables, sources, subtree_i, subtree_j))
                #subtrees.append(TreePlan(SymmetricHashJoin(id_operator, join_variables), all_variables, sources, subtree_i, subtree_j))
                #print "Join", subtree_i.vars, "and", subtree_j.vars, "join for:", join_variables
                id_operator += 1
                break

        if ((len(subtrees) % 2) == 0):
            tree_height += 1

    tree_height += 1
    return (subtrees.pop(), tree_height, operators_desc, sources_desc,
            plan_order, operators_vars)
Пример #6
0
def createBushyTree(subtrees, sources_desc, eddies, eofs_desc):

    tree_height = 0
    id_operator = 0
    operators_desc = {}
    plan_order = {}
    operators_vars = {}
    ordered_subtrees = []
    stars = []
    independent_sources = 0
    eofs_operators_desc = {}
    operators_sym = {}

    for leaf in subtrees:
        leaf.total_res = getMetadataLDF(leaf.server, leaf.query)
        ordered_subtrees.append(leaf.total_res)

    # Order leaves depending on the cardinality of fragments.
    keydict = dict(zip(subtrees, ordered_subtrees))
    subtrees.sort(key=keydict.get)

    # Generate left-linear index nested stars.
    while (len(subtrees) > 0):
        to_delete = []
        star_tree = subtrees.pop(0)
        star_vars = star_tree.vars
        tree_height = 0
        independent_sources = independent_sources + 1

        #print
        #print "root of star", star_tree.sources

        for j in range(0, len(subtrees)):
            subtree_j = subtrees[j]
            join_variables = (set(star_vars) & set(subtree_j.join_vars))
            all_variables = set(star_tree.vars) | set(subtree_j.vars)

            #print "processing", subtree_j.sources, type(subtree_j)
            #print "join_variables", join_variables

            if (len(join_variables) == 1 and addTriplePatternToCurrentStar(
                    subtree_j, star_tree, subtrees, 100)):
                #if (len(join_variables) == 1):
                #if (join_variables != set([])):

                #if (addTriplePatternToCurrentStar(subtree_j, star_tree, subtrees, 100)):
                #    pass

                star_vars = set(star_vars) | set(subtree_j.join_vars)

                to_delete.append(subtree_j)

                sources = {}
                sources.update(star_tree.sources)
                sources.update(subtree_j.sources)
                operators_desc[id_operator] = {}
                operators_vars[id_operator] = join_variables
                eofs_operators_desc[id_operator] = {}

                for source in star_tree.sources.keys():
                    if (set(sources[source]) & join_variables != set([])):
                        operators_desc[id_operator].update({source: -1})
                        sources_desc[source] = sources_desc[source] | pow(
                            2, id_operator)

                    eofs_operators_desc[id_operator].update({source: -1})
                    eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                for source in subtree_j.sources.keys():
                    if (set(sources[source]) & join_variables != set([])):
                        operators_desc[id_operator].update({source: 1})
                        sources_desc[source] = sources_desc[source] | pow(
                            2, id_operator)

                    eofs_operators_desc[id_operator].update({source: 1})
                    eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                plan_order[id_operator] = tree_height
                operators_vars[id_operator] = join_variables

                tree_height = tree_height + 1

                res = star_tree.total_res + subtree_j.total_res
                if isinstance(star_tree, IndependentOperator):
                    #print
                    #print "first triple", star_tree, star_tree.total_res < (subtree_j.total_res /100.0)
                    #print
                    #if True:
                    if star_tree.total_res < (subtree_j.total_res / 100.0):
                        #print "-------------------------"
                        print "Xnjoin < 100", id_operator, star_tree.sources, subtree_j.sources, "cost", star_tree.total_res
                        subtree_j = DependentOperator(subtree_j.sources,
                                                      subtree_j.server,
                                                      subtree_j.query,
                                                      subtree_j.sources_desc,
                                                      subtree_j.vars,
                                                      star_tree.total_res)
                        star_tree = TreePlan(
                            Xnjoin(id_operator, join_variables,
                                   eddies), all_variables, join_variables,
                            sources, star_tree, subtree_j, tree_height, res)
                        operators_sym.update({id_operator: False})
                    else:
                        #print "HERE!", star_tree.sources, subtree_j.sources
                        print "Xgjoin", id_operator, star_tree.sources, subtree_j.sources, "cost", res
                        star_tree = TreePlan(
                            Fjoin(id_operator, join_variables,
                                  eddies), all_variables, join_variables,
                            sources, star_tree, subtree_j, tree_height, res)
                        independent_sources = independent_sources + 1
                        operators_sym.update({id_operator: True})
                else:
                    if (star_tree.total_res <= subtree_j.total_res):
                        print "Xnjoin", id_operator, star_tree.sources, subtree_j.sources, "cost", star_tree.total_res
                        subtree_j = DependentOperator(subtree_j.sources,
                                                      subtree_j.server,
                                                      subtree_j.query,
                                                      subtree_j.sources_desc,
                                                      subtree_j.vars,
                                                      star_tree.total_res)
                        star_tree = TreePlan(
                            Xnjoin(id_operator, join_variables,
                                   eddies), all_variables, join_variables,
                            sources, star_tree, subtree_j, tree_height)
                        operators_sym.update({id_operator: False})
                    else:
                        print "Xgjoin", id_operator, star_tree.sources, subtree_j.sources, "cost", res
                        star_tree = TreePlan(
                            Fjoin(id_operator, join_variables,
                                  eddies), all_variables, join_variables,
                            sources, star_tree, subtree_j, tree_height, res)
                        independent_sources = independent_sources + 1
                        operators_sym.update({id_operator: True})
                id_operator += 1
                #print "id_operator in loop", id_operator

        stars.append(star_tree)
        for elem in to_delete:
            subtrees.remove(elem)

    #print "independent_sources", independent_sources
    print "stars", len(stars), stars
    #id_operator += 1
    #print "id_orpera", id_operator
    #for s in stars:

    # Stage 2: Add joins
    while len(stars) > 1:
        subtree_i = stars.pop(0)

        print "HeRE", len(stars)
        for j in range(0, len(stars)):
            subtree_j = stars[j]

            all_variables = set(subtree_i.vars) | set(subtree_j.vars)
            join_variables = (set(subtree_i.join_vars)
                              & set(subtree_j.join_vars))  # & all_variables

            print "join_vars", join_variables, subtree_i.join_vars, subtree_j.join_vars
            print "joiny_vars", join_variables
            if (join_variables != set([])):

                sources = {}
                sources.update(subtree_i.sources)
                sources.update(subtree_j.sources)
                operators_desc[id_operator] = {}
                operators_vars[id_operator] = join_variables
                eofs_operators_desc[id_operator] = {}

                for source in subtree_i.sources.keys():

                    # This models the restriction: a tuple must have the join
                    # variable instantiated to be routed to a certain join.
                    if (set(sources[source]) & join_variables != set([])):
                        operators_desc[id_operator].update({source: -1})
                        sources_desc[source] = sources_desc[source] | pow(
                            2, id_operator)

                    eofs_operators_desc[id_operator].update({source: -1})
                    eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                for source in subtree_j.sources.keys():
                    # This models the restriction: a tuple must have the join
                    # variable instantiated to be routed to a certain join.
                    if (set(sources[source]) & join_variables != set([])):
                        operators_desc[id_operator].update({source: 1})
                        sources_desc[source] = sources_desc[source] | pow(
                            2, id_operator)

                    eofs_operators_desc[id_operator].update({source: 1})
                    eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                plan_order[id_operator] = max(subtree_i.height,
                                              subtree_j.height)

                stars.pop(j)
                #

                #                # TODO: new, to detect the physical operators.
                #                if isinstance(subtree_i, IndependentOperator) and isinstance(subtree_j, IndependentOperator):
                #                    if subtree_i.total_res < subtree_j.total_res / 100.0: # TODO: fixed page size
                #                        subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query, subtree_j.sources_desc, subtree_j.vars)
                #                        subtrees.append(TreePlan(Xnjoin(id_operator, join_variables), all_variables, join_variables, sources, subtree_i, subtree_j))
                #                        print "Xnjoin",  subtree_i.sources, subtree_j.sources
                #                    else:
                #                        subtrees.append(TreePlan(Fjoin(id_operator, join_variables), all_variables, join_variables, sources, subtree_i, subtree_j))
                #                        print "Xgjoin",  subtree_i.sources, subtree_j.sources
                #               else:

                if isinstance(subtree_j, IndependentOperator):
                    print "SATELLITE!"
                    #print "first triple", star_tree, star_tree.total_res < (subtree_j.total_res /100.0)
                    #print
                    if subtree_i.total_res < (subtree_j.total_res / 100.0):
                        #print "-------------------------"
                        print "Xnjoin < 100", id_operator, subtree_i.sources, subtree_j.sources, subtree_j.query
                        res = subtree_i.total_res + subtree_j.total_res
                        subtree_j = DependentOperator(subtree_j.sources,
                                                      subtree_j.server,
                                                      subtree_j.query,
                                                      subtree_j.sources_desc,
                                                      subtree_j.vars,
                                                      star_tree.total_res)
                        stars.append(
                            TreePlan(
                                Xnjoin(id_operator, join_variables,
                                       eddies), all_variables, join_variables,
                                sources, subtree_i, subtree_j,
                                max(subtree_i.height, subtree_j.height, res)))
                        independent_sources = independent_sources - 1
                        operators_sym.update({id_operator: False})
                    else:
                        #print "HERE!", star_tree.sources, subtree_j.sources
                        print "Xgjoin", id_operator, star_tree.sources, subtree_j.sources
                        res = subtree_i.total_res + subtree_j.total_res
                        stars.append(
                            TreePlan(
                                Fjoin(id_operator, join_variables,
                                      eddies), all_variables, join_variables,
                                sources, subtree_i, subtree_j,
                                max(subtree_i.height, subtree_j.height, res)))
                        #independent_sources = independent_sources + 1
                        operators_sym.update({id_operator: True})
                else:

                    res = subtree_i.total_res + subtree_j.total_res
                    stars.append(
                        TreePlan(Fjoin(id_operator, join_variables,
                                       eddies), all_variables, join_variables,
                                 sources, subtree_i, subtree_j,
                                 max(subtree_i.height, subtree_j.height, res)))
                    print "Fjoin", id_operator, subtree_i.sources, subtree_j.sources
                    operators_sym.update({id_operator: True})
#
#                #TODO: Check whether this forbids Cartesian product.
#                #subtrees.append(TreePlan(Xgjoin(id_operator, join_variables), join_variables, sources, subtree_i, subtree_j))
#                #subtrees.append(TreePlan(SymmetricHashJoin(id_operator, join_variables), all_variables, sources, subtree_i, subtree_j))
#print "Join", subtree_i.vars, "and", subtree_j.vars, "join for:", join_variables
                id_operator += 1
                break
#
        if ((len(subtrees) % 2) == 0):
            tree_height += 1


#
    tree_height += 1
    tree = stars.pop()
    return (tree, tree.height, operators_desc, sources_desc, plan_order,
            operators_vars, independent_sources, eofs_operators_desc,
            operators_sym)