Пример #1
0
  def pickJoinOrder(self, plan):
    rels = set(plan.relations())
    optPlans = {} #Map a set of relations to the optimized plan
    #toBeProcessed = [] #Set of relations pending processing

    self.combsTried = 0
    self.plansProcessed = 0

    for r in rels:
      set_r = frozenset({r})
      #toBeProcessed.append(set_r)
      newScan = TableScan(r, self.db.relationSchema(r))
      newScan.prepare(self.db)
      optPlans[set_r] = newScan

    #For each join operator, fetch its relative relations
    #Map a set of relations to (relative relations, operator)
    joinMap = {}
    for (_, op) in plan.flatten():
      if isinstance(op, Join):
        relativeR = self.relativeRelations(rels, op)
        for r in [frozenset({r}) for r in relativeR]:
          if r in joinMap.keys():
            joinMap[r].append((relativeR, op))
          else:
            joinMap[r] = [(relativeR, op)]

    n = len(rels)
    for i in range(2, n + 1):
      for union in [frozenset(union) for union in self.kRelsComb(i, rels)]:
        for right in [frozenset(right) for right in self.kRelsComb(1, union)]:
          left = frozenset(union - right)
          for t in left:
            self.combsTried += 1
            value = joinMap[frozenset({t})]

            if not value:
              continue
            else:
              for tuple in value:
                if not (set(tuple[0]).issubset(union) and left in optPlans and right in optPlans):
                  continue

                self.plansProcessed += 1
                newJoin = Join(optPlans[left], optPlans[right], expr=tuple[1].joinExpr, method="block-nested-loops")
                newJoin.prepare(self.db)

                if not union in optPlans:
                  optPlans[union] = newJoin
                  self.addPlanCost(newJoin, newJoin.cost(estimated=True))
                else:
                  formerCost = self.getPlanCost(optPlans[union])
                  if newJoin.cost(estimated=True) < formerCost:
                    optPlans[union] = newJoin
                    self.addPlanCost(newJoin, newJoin.cost(estimated=True))

    newRoot = optPlans[frozenset(rels)]
    return Plan(root=newRoot)

    '''
Пример #2
0
    def pickJoinOrder(self, plan):
        self.combsTried = 0
        self.plansProcessed = 0

        self.rels = set(plan.relations())
        #toBeProcessed = set()

        self.tableScans = {}
        for r in self.rels:
            ts = TableScan(r, self.db.relationSchema(r))
            ts.prepare(self.db)
            self.tableScans[frozenset({r})] = ts

        self.joinMap = {}
        for (_, op) in plan.flatten():
            if isinstance(op, Join):
                relativeR = self.relativeRelations(self.rels, op)
                for r in [frozenset({r}) for r in relativeR]:
                    if r in self.joinMap.keys():
                        self.joinMap[r].append((relativeR, op))
                    else:
                        self.joinMap[r] = [(relativeR, op)]

        n = len(self.rels)
        currBestPlan = None
        formerBestPlan = None
        formerRels = None
        currRels = None

        for i in range(2, n + 1):
            currBestCost = float('inf')
            if i == 2:
                for left in [frozenset({left}) for left in self.rels]:
                    (newCost, newJoin,
                     newRels) = self.processJoin(self.tableScans[left], left)

                    if newCost < currBestCost:
                        currRels = newRels
                        currBestPlan = newJoin
                        currBestCost = newCost
            else:
                (newCost, newJoin,
                 newRels) = self.processJoin(formerBestPlan, formerRels)

                if newCost < currBestCost:
                    currRels = newRels
                    currBestPlan = newJoin
                    currBestCost = newCost

            formerBestPlan = currBestPlan
            currBestPlan = None
            formerRels = currRels
            currRels = None

        newRoot = formerBestPlan
        return Plan(root=newRoot)
Пример #3
0
  def pickJoinOrder(self, plan):
    relations = plan.relations()
    fieldDict = self.obtainFieldDict(plan)
    (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict)
    # makes dicts that maps a list of relations to exprs involving that list
    # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C
    # and on top of it the select exprs that involve 2 tables A,C or B,C

    isGroupBy = True if plan.root.operatorType() == "GroupBy" else False
    outputSchema = plan.schema() 
    self.reportPlanCount = 0

    worklist = []
    for r in relations:
      table = TableScan(r,self.db.relationSchema(r))
      table.prepare(self.db)
      if (r,) in selectTablesDict: 
        selectExprs = selectTablesDict[(r,)]
        selectString = self.combineSelects(selectExprs)
        select = Select(table,selectString)
        select.prepare(self.db)
        worklist.append(Plan(root=select))
      else:
        worklist.append(Plan(root=table))

    while(len(worklist) > 1):
      combos = itertools.combinations(worklist,2)
      bestJoin = None
      sourcePair = None

      for pair in combos:
        op1 = pair[0].root
        op2 = pair[1].root

        selectExpr = self.createExpression(pair[0].relations(), pair[1].relations(), selectTablesDict)
        joinExpr = self.createExpression(pair[0].relations(), pair[1].relations(), joinTablesDict)
        
        join1BnljOp = Join(op1, op2, expr=joinExpr, method="block-nested-loops" )
        join2BnljOp = Join(op2, op1, expr=joinExpr, method="block-nested-loops" )


        join1NljOp = Join(op1, op2, expr=joinExpr, method="nested-loops" )
        join2NljOp = Join(op2, op1, expr=joinExpr, method="nested-loops" )

        if selectExpr == "True":
          full1BnljOp = join1BnljOp
          full2BnljOp = join2BnljOp
          
          full1NljOp = join1NljOp
          full2NljOp = join2NljOp

        else:
          full1BnljOp = Select(join1BnljOp, selectExpr)
          full2BnljOp = Select(join2BnljOp, selectExpr)
          
          full1NljOp = Select(join1NljOp, selectExpr)
          full2NljOp = Select(join2NljOp, selectExpr)
        

        joinList = [full1BnljOp, full2BnljOp, full1NljOp, full2NljOp]

        for j in joinList:
          joinplan = Plan(root=j)
          joinplan.prepare(self.db)
          joinplan.sample(100)

          if bestJoin == None or joinplan.cost(True) < bestJoin.cost(True):
            bestJoin = joinplan
            sourcePair = pair

        self.reportPlanCount += 4
        self.clearSampleFiles()



      worklist.remove(sourcePair[0])
      worklist.remove(sourcePair[1])
      worklist.append(bestJoin)

    # after System R algorithm
    newPlan = worklist[0]

    if isGroupBy:
      newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \
        aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \
        aggExprs=plan.root.aggExprs, \
        groupHashFn=plan.root.groupHashFn)
      newGroupBy.prepare(self.db)
      newPlan = Plan(root=newGroupBy)

    if set(outputSchema.schema()) != set(newPlan.schema().schema()):
      projectDict = {}

      for f, t in outputSchema.schema():
        projectDict[f] = (f, t) 
      
      currRoot = newPlan.root
      project = Project(currRoot, projectDict)
      project.prepare(self.db)
      newPlan = Plan(root=project)
  
    return newPlan
Пример #4
0
    def pickJoinOrder(self, plan):
        relations = plan.relations()
        fieldDict = self.obtainFieldDict(plan)
        (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict)
        # makes dicts that maps a list of relations to exprs involving that list
        # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C
        # and on top of it the select exprs that involve 2 tables A,C or B,C

        isGroupBy = True if plan.root.operatorType() == "GroupBy" else False
        outputSchema = plan.schema()
        self.reportPlanCount = 0

        worklist = []
        for r in relations:
            table = TableScan(r, self.db.relationSchema(r))
            table.prepare(self.db)
            if (r, ) in selectTablesDict:
                selectExprs = selectTablesDict[(r, )]
                selectString = self.combineSelects(selectExprs)
                select = Select(table, selectString)
                select.prepare(self.db)
                worklist.append(Plan(root=select))
            else:
                worklist.append(Plan(root=table))

        while (len(worklist) > 1):
            combos = itertools.combinations(worklist, 2)
            bestJoin = None
            sourcePair = None

            for pair in combos:
                op1 = pair[0].root
                op2 = pair[1].root

                selectExpr = self.createExpression(pair[0].relations(),
                                                   pair[1].relations(),
                                                   selectTablesDict)
                joinExpr = self.createExpression(pair[0].relations(),
                                                 pair[1].relations(),
                                                 joinTablesDict)

                join1BnljOp = Join(op1,
                                   op2,
                                   expr=joinExpr,
                                   method="block-nested-loops")
                join2BnljOp = Join(op2,
                                   op1,
                                   expr=joinExpr,
                                   method="block-nested-loops")

                join1NljOp = Join(op1,
                                  op2,
                                  expr=joinExpr,
                                  method="nested-loops")
                join2NljOp = Join(op2,
                                  op1,
                                  expr=joinExpr,
                                  method="nested-loops")

                if selectExpr == "True":
                    full1BnljOp = join1BnljOp
                    full2BnljOp = join2BnljOp

                    full1NljOp = join1NljOp
                    full2NljOp = join2NljOp

                else:
                    full1BnljOp = Select(join1BnljOp, selectExpr)
                    full2BnljOp = Select(join2BnljOp, selectExpr)

                    full1NljOp = Select(join1NljOp, selectExpr)
                    full2NljOp = Select(join2NljOp, selectExpr)

                joinList = [full1BnljOp, full2BnljOp, full1NljOp, full2NljOp]

                for j in joinList:
                    joinplan = Plan(root=j)
                    joinplan.prepare(self.db)
                    joinplan.sample(100)

                    if bestJoin == None or joinplan.cost(True) < bestJoin.cost(
                            True):
                        bestJoin = joinplan
                        sourcePair = pair

                self.reportPlanCount += 4
                self.clearSampleFiles()

            worklist.remove(sourcePair[0])
            worklist.remove(sourcePair[1])
            worklist.append(bestJoin)

        # after System R algorithm
        newPlan = worklist[0]

        if isGroupBy:
            newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \
              aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \
              aggExprs=plan.root.aggExprs, \
              groupHashFn=plan.root.groupHashFn)
            newGroupBy.prepare(self.db)
            newPlan = Plan(root=newGroupBy)

        if set(outputSchema.schema()) != set(newPlan.schema().schema()):
            projectDict = {}

            for f, t in outputSchema.schema():
                projectDict[f] = (f, t)

            currRoot = newPlan.root
            project = Project(currRoot, projectDict)
            project.prepare(self.db)
            newPlan = Plan(root=project)

        return newPlan