Exemplo n.º 1
0
  def pickJoinOrder(self, plan):
    rels = set(plan.relations())
    optPlans = {} #Map a set of relations to the optimized plan
    #toBeProcessed = [] #Set of relations pending processing

    self.combsTried = 0
    self.plansProcessed = 0

    for r in rels:
      set_r = frozenset({r})
      #toBeProcessed.append(set_r)
      newScan = TableScan(r, self.db.relationSchema(r))
      newScan.prepare(self.db)
      optPlans[set_r] = newScan

    #For each join operator, fetch its relative relations
    #Map a set of relations to (relative relations, operator)
    joinMap = {}
    for (_, op) in plan.flatten():
      if isinstance(op, Join):
        relativeR = self.relativeRelations(rels, op)
        for r in [frozenset({r}) for r in relativeR]:
          if r in joinMap.keys():
            joinMap[r].append((relativeR, op))
          else:
            joinMap[r] = [(relativeR, op)]

    n = len(rels)
    for i in range(2, n + 1):
      for union in [frozenset(union) for union in self.kRelsComb(i, rels)]:
        for right in [frozenset(right) for right in self.kRelsComb(1, union)]:
          left = frozenset(union - right)
          for t in left:
            self.combsTried += 1
            value = joinMap[frozenset({t})]

            if not value:
              continue
            else:
              for tuple in value:
                if not (set(tuple[0]).issubset(union) and left in optPlans and right in optPlans):
                  continue

                self.plansProcessed += 1
                newJoin = Join(optPlans[left], optPlans[right], expr=tuple[1].joinExpr, method="block-nested-loops")
                newJoin.prepare(self.db)

                if not union in optPlans:
                  optPlans[union] = newJoin
                  self.addPlanCost(newJoin, newJoin.cost(estimated=True))
                else:
                  formerCost = self.getPlanCost(optPlans[union])
                  if newJoin.cost(estimated=True) < formerCost:
                    optPlans[union] = newJoin
                    self.addPlanCost(newJoin, newJoin.cost(estimated=True))

    newRoot = optPlans[frozenset(rels)]
    return Plan(root=newRoot)

    '''
Exemplo n.º 2
0
    def pickJoinOrder(self, plan):
        self.combsTried = 0
        self.plansProcessed = 0

        self.rels = set(plan.relations())
        #toBeProcessed = set()

        self.tableScans = {}
        for r in self.rels:
            ts = TableScan(r, self.db.relationSchema(r))
            ts.prepare(self.db)
            self.tableScans[frozenset({r})] = ts

        self.joinMap = {}
        for (_, op) in plan.flatten():
            if isinstance(op, Join):
                relativeR = self.relativeRelations(self.rels, op)
                for r in [frozenset({r}) for r in relativeR]:
                    if r in self.joinMap.keys():
                        self.joinMap[r].append((relativeR, op))
                    else:
                        self.joinMap[r] = [(relativeR, op)]

        n = len(self.rels)
        currBestPlan = None
        formerBestPlan = None
        formerRels = None
        currRels = None

        for i in range(2, n + 1):
            currBestCost = float('inf')
            if i == 2:
                for left in [frozenset({left}) for left in self.rels]:
                    (newCost, newJoin,
                     newRels) = self.processJoin(self.tableScans[left], left)

                    if newCost < currBestCost:
                        currRels = newRels
                        currBestPlan = newJoin
                        currBestCost = newCost
            else:
                (newCost, newJoin,
                 newRels) = self.processJoin(formerBestPlan, formerRels)

                if newCost < currBestCost:
                    currRels = newRels
                    currBestPlan = newJoin
                    currBestCost = newCost

            formerBestPlan = currBestPlan
            currBestPlan = None
            formerRels = currRels
            currRels = None

        newRoot = formerBestPlan
        return Plan(root=newRoot)
Exemplo n.º 3
0
 def fromTable(self, relId):
     if self.database:
         schema = self.database.relationSchema(relId)
         return PlanBuilder(operator=TableScan(relId, schema))
Exemplo n.º 4
0
  def pickJoinOrder(self, plan):
    relations = plan.relations()
    fieldDict = self.obtainFieldDict(plan)
    (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict)
    # makes dicts that maps a list of relations to exprs involving that list
    # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C
    # and on top of it the select exprs that involve 2 tables A,C or B,C

    isGroupBy = True if plan.root.operatorType() == "GroupBy" else False
    outputSchema = plan.schema() 
    self.reportPlanCount = 0

    worklist = []
    for r in relations:
      table = TableScan(r,self.db.relationSchema(r))
      table.prepare(self.db)
      if (r,) in selectTablesDict: 
        selectExprs = selectTablesDict[(r,)]
        selectString = self.combineSelects(selectExprs)
        select = Select(table,selectString)
        select.prepare(self.db)
        worklist.append(Plan(root=select))
      else:
        worklist.append(Plan(root=table))

    while(len(worklist) > 1):
      combos = itertools.combinations(worklist,2)
      bestJoin = None
      sourcePair = None

      for pair in combos:
        op1 = pair[0].root
        op2 = pair[1].root

        selectExpr = self.createExpression(pair[0].relations(), pair[1].relations(), selectTablesDict)
        joinExpr = self.createExpression(pair[0].relations(), pair[1].relations(), joinTablesDict)
        
        join1BnljOp = Join(op1, op2, expr=joinExpr, method="block-nested-loops" )
        join2BnljOp = Join(op2, op1, expr=joinExpr, method="block-nested-loops" )


        join1NljOp = Join(op1, op2, expr=joinExpr, method="nested-loops" )
        join2NljOp = Join(op2, op1, expr=joinExpr, method="nested-loops" )

        if selectExpr == "True":
          full1BnljOp = join1BnljOp
          full2BnljOp = join2BnljOp
          
          full1NljOp = join1NljOp
          full2NljOp = join2NljOp

        else:
          full1BnljOp = Select(join1BnljOp, selectExpr)
          full2BnljOp = Select(join2BnljOp, selectExpr)
          
          full1NljOp = Select(join1NljOp, selectExpr)
          full2NljOp = Select(join2NljOp, selectExpr)
        

        joinList = [full1BnljOp, full2BnljOp, full1NljOp, full2NljOp]

        for j in joinList:
          joinplan = Plan(root=j)
          joinplan.prepare(self.db)
          joinplan.sample(100)

          if bestJoin == None or joinplan.cost(True) < bestJoin.cost(True):
            bestJoin = joinplan
            sourcePair = pair

        self.reportPlanCount += 4
        self.clearSampleFiles()



      worklist.remove(sourcePair[0])
      worklist.remove(sourcePair[1])
      worklist.append(bestJoin)

    # after System R algorithm
    newPlan = worklist[0]

    if isGroupBy:
      newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \
        aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \
        aggExprs=plan.root.aggExprs, \
        groupHashFn=plan.root.groupHashFn)
      newGroupBy.prepare(self.db)
      newPlan = Plan(root=newGroupBy)

    if set(outputSchema.schema()) != set(newPlan.schema().schema()):
      projectDict = {}

      for f, t in outputSchema.schema():
        projectDict[f] = (f, t) 
      
      currRoot = newPlan.root
      project = Project(currRoot, projectDict)
      project.prepare(self.db)
      newPlan = Plan(root=project)
  
    return newPlan
Exemplo n.º 5
0
  def hashJoin(self):
    if self.joinExpr == None:
      self.joinExpr = self.lhsKeySchema.fields[0] + "==" + self.rhsKeySchema.fields[0];
    
    self.tmpFilesL = list();
    self.tmpFilesR = list();
    bufPool        = self.storage.bufferPool;
    
    self.logger("start...");
    self.cleanBufferPool(bufPool);
    
    tmpFilesL = dict();
    tmpFilesR = dict();
    
    self.logger("building L partition");
    for (PageId, Page) in iter(self.lhsPlan):
      self.buildPartitionL(PageId, Page, tmpFilesL);
    
    self.logger("building R partition");
    for (PageId, Page) in iter(self.rhsPlan):
      self.buildPartitionR(PageId, Page, tmpFilesR);
      
    # Schema prep
    lSchema = self.inputSchemas()[0];
    rSchema = self.inputSchemas()[1];
      
    for relIdLKey in tmpFilesL.keys():
       
      # Clean up before running.  
      if relIdLKey in tmpFilesR:
          (_, relIdTmpR) = tmpFilesR[ relIdLKey ];
          (_, relIdTmpL) = tmpFilesL[ relIdLKey ];
      else:
          continue;
      
      self.cleanBufferPool( bufPool );
      
      lhsPlan = TableScan(relIdTmpL, self.inputSchemas()[0]);
      rhsPlan = TableScan(relIdTmpR, self.inputSchemas()[1]);
        
      lhsPlan.storage = self.storage;
      rhsPlan.storage = self.storage;
      
      self.lhsPlan = lhsPlan;
      self.rhsPlan = rhsPlan;
      
         for lPageId in pageBlock:
            lhsPage = bufPool.getPage(lPageId);
            for ltuple in iter( lhsPage ):
              tupleObj = lSchema.unpack( ltuple );
              key      = lSchema.project( tupleObj, self.lhsKeySchema )[0];
              if key in hasher:
                hasher[ key ].append( ltuple );
              else:
                hasher[ key ] = [ ltuple ];

          # iterating all rtuples to pack output
          for (rPageId, rhsPage) in iter(rhsPlan):
            print( rPageId.pageIndex );
            for rTuple in iter( rhsPage ):
              tupleObj = rSchema.unpack( rTuple );
              print( tupleObj );
              key      = rSchema.project( tupleObj, self.rhsKeySchema )[0];
              if key in hasher:
                for lTuple in hasher[ key ]:
                  joinIns = self.loadSchema( lSchema, lTuple )
                  joinIns.update( self.loadSchema( rSchema, rTuple ) );
                  outputTuple = self.joinSchema.instantiate(*[joinIns[f] for f in self.joinSchema.fields]);
                  print( outputTuple );
                  outputTupleP = self.joinSchema.pack(outputTuple);
                  self.storage.fileMgr.relationFile(self.relationId())[1].insertTuple(outputTupleP);
                  
          for lPageId in pageBlock:
            bufPool.unpinPage(lPageId);
            bufPool.discardPage(lPageId);
          
          self.cleanBufferPool(bufPool);
          del hasher;
Exemplo n.º 6
0
    def pickJoinOrder(self, plan):
        relations = plan.relations()
        fieldDict = self.obtainFieldDict(plan)
        (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict)
        # makes dicts that maps a list of relations to exprs involving that list
        # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C
        # and on top of it the select exprs that involve 2 tables A,C or B,C

        isGroupBy = True if plan.root.operatorType() == "GroupBy" else False
        outputSchema = plan.schema()
        self.reportPlanCount = 0

        worklist = []
        for r in relations:
            table = TableScan(r, self.db.relationSchema(r))
            table.prepare(self.db)
            if (r, ) in selectTablesDict:
                selectExprs = selectTablesDict[(r, )]
                selectString = self.combineSelects(selectExprs)
                select = Select(table, selectString)
                select.prepare(self.db)
                worklist.append(Plan(root=select))
            else:
                worklist.append(Plan(root=table))

        while (len(worklist) > 1):
            combos = itertools.combinations(worklist, 2)
            bestJoin = None
            sourcePair = None

            for pair in combos:
                op1 = pair[0].root
                op2 = pair[1].root

                selectExpr = self.createExpression(pair[0].relations(),
                                                   pair[1].relations(),
                                                   selectTablesDict)
                joinExpr = self.createExpression(pair[0].relations(),
                                                 pair[1].relations(),
                                                 joinTablesDict)

                join1BnljOp = Join(op1,
                                   op2,
                                   expr=joinExpr,
                                   method="block-nested-loops")
                join2BnljOp = Join(op2,
                                   op1,
                                   expr=joinExpr,
                                   method="block-nested-loops")

                join1NljOp = Join(op1,
                                  op2,
                                  expr=joinExpr,
                                  method="nested-loops")
                join2NljOp = Join(op2,
                                  op1,
                                  expr=joinExpr,
                                  method="nested-loops")

                if selectExpr == "True":
                    full1BnljOp = join1BnljOp
                    full2BnljOp = join2BnljOp

                    full1NljOp = join1NljOp
                    full2NljOp = join2NljOp

                else:
                    full1BnljOp = Select(join1BnljOp, selectExpr)
                    full2BnljOp = Select(join2BnljOp, selectExpr)

                    full1NljOp = Select(join1NljOp, selectExpr)
                    full2NljOp = Select(join2NljOp, selectExpr)

                joinList = [full1BnljOp, full2BnljOp, full1NljOp, full2NljOp]

                for j in joinList:
                    joinplan = Plan(root=j)
                    joinplan.prepare(self.db)
                    joinplan.sample(100)

                    if bestJoin == None or joinplan.cost(True) < bestJoin.cost(
                            True):
                        bestJoin = joinplan
                        sourcePair = pair

                self.reportPlanCount += 4
                self.clearSampleFiles()

            worklist.remove(sourcePair[0])
            worklist.remove(sourcePair[1])
            worklist.append(bestJoin)

        # after System R algorithm
        newPlan = worklist[0]

        if isGroupBy:
            newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \
              aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \
              aggExprs=plan.root.aggExprs, \
              groupHashFn=plan.root.groupHashFn)
            newGroupBy.prepare(self.db)
            newPlan = Plan(root=newGroupBy)

        if set(outputSchema.schema()) != set(newPlan.schema().schema()):
            projectDict = {}

            for f, t in outputSchema.schema():
                projectDict[f] = (f, t)

            currRoot = newPlan.root
            project = Project(currRoot, projectDict)
            project.prepare(self.db)
            newPlan = Plan(root=project)

        return newPlan
Exemplo n.º 7
0
    def pickJoinOrder(self, plan):

        relations = plan.relations()
        fieldDict = self.obtainFieldDict(plan)

        (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict)
        # makes dicts that maps a list of relations to exprs involving that list
        # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C
        # and on top of it the select exprs that involve 2 tables A,C or B,C

        isGroupBy = True if plan.root.operatorType() == "GroupBy" else False
        outputSchema = plan.schema()
        optDict = {}
        self.reportPlanCount = 0

        for npass in range(1, len(relations) + 1):
            if npass == 1:
                for r in relations:
                    table = TableScan(r, self.db.relationSchema(r))
                    if (r, ) in selectTablesDict:
                        selectExprs = selectTablesDict[(r, )]
                        selectString = self.combineSelects(selectExprs)
                        select = Select(table, selectString)
                        optDict[(r, )] = Plan(root=select)
                    else:
                        optDict[(r, )] = Plan(root=table)
                    self.reportPlanCount += 1
            else:
                combinations = itertools.combinations(relations, npass)
                for c in combinations:
                    fullList = sorted(c)
                    clist = self.getCombos(fullList)
                    bestJoin = None
                    for subcombo in clist:
                        complement = self.getComplement(fullList, subcombo)

                        leftOps = optDict[tuple(complement)].root
                        rightOps = optDict[tuple(subcombo)].root

                        selectExpr = self.createExpression(
                            complement, subcombo, selectTablesDict)
                        joinExpr = self.createExpression(
                            complement, subcombo, joinTablesDict)

                        joinBnljOp = Join(leftOps,
                                          rightOps,
                                          expr=joinExpr,
                                          method="block-nested-loops")
                        fullBnljOp = Select(joinBnljOp, selectExpr)

                        if selectExpr == "True":
                            joinBnlj = Plan(root=joinBnljOp)
                        else:
                            joinBnlj = Plan(root=fullBnljOp)

                        joinBnlj.prepare(self.db)
                        joinBnlj.sample(100)

                        joinNljOp = Join(leftOps,
                                         rightOps,
                                         expr=joinExpr,
                                         method="nested-loops")
                        fullNljOp = Select(joinNljOp, selectExpr)

                        if selectExpr == "True":
                            joinNlj = Plan(root=joinNljOp)
                        else:
                            joinNlj = Plan(root=fullNljOp)

                        joinNlj.prepare(self.db)
                        joinNlj.sample(100)

                        if joinBnlj.cost(True) < joinNlj.cost(True):
                            if bestJoin == None or joinBnlj.cost(
                                    True) < bestJoin.cost(True):
                                bestJoin = joinBnlj
                        else:
                            if bestJoin == None or joinNlj.cost(
                                    True) < bestJoin.cost(True):
                                bestJoin = joinNlj

                        self.reportPlanCount += 2
                        self.clearSampleFiles()

                    optDict[tuple(fullList)] = bestJoin

        # after System R algorithm
        newPlan = optDict[tuple(sorted(relations))]

        if isGroupBy:
            newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \
              aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \
              aggExprs=plan.root.aggExprs, \
              groupHashFn=plan.root.groupHashFn)
            newGroupBy.prepare(self.db)
            newPlan = Plan(root=newGroupBy)

        if set(outputSchema.schema()) != set(newPlan.schema().schema()):
            projectDict = {}

            for f, t in outputSchema.schema():
                projectDict[f] = (f, t)

            currRoot = newPlan.root
            project = Project(currRoot, projectDict)
            project.prepare(self.db)
            newPlan = Plan(root=project)

        return newPlan