Exemplo n.º 1
0
    def singlePushDown(self, operator):

        if operator.operatorType() == 'Select':
            selectOperator = operator
            selectOperator.subplan = self.singlePushDown(
                selectOperator.subPlan)

            subPlan = selectOperator.subPlan
            subplanType = subPlan.operatorType()

            if subplanType.endswith('Join'):
                lhsPlan = subPlan.lhsPlan
                rhsPlan = subPlan.rhsPlan

                lhsFields = lhsPlan.schema().fields
                rhsFields = rhsPlan.schema().fields

                sendToLeft = ''
                sendToRight = ''
                kept = ''

                selectExprs = ExpressionInfo(
                    selectOperator.selectExpr).decomposeCNF()

                for selectExpr in selectExprs:
                    attributes = ExpressionInfo(selectExpr).getAttributes()
                    for attr in attributes:
                        if attr in lhsFields:
                            sendToLeft += selectExpr
                            sendToLeft += ' and '
                        elif attr in rhsFields:
                            sendToRight += selectExpr
                            sendToRight += ' and '
                        else:
                            kept += selectExpr
                            kept += ' and '

                if len(sendToLeft) > 0:
                    sendToLeft = sendToLeft[:-5]
                    selectOperator.subPlan.lhsPlan = self.singlePushDown(
                        Select(selectOperator.subPlan.lhsPlan, sendToLeft))
                if len(sendToRight) > 0:
                    sendToRight = sendToRight[:-5]
                    selectOperator.subPlan.rhsPlan = self.singlePushDown(
                        Select(selectOperator.subPlan.rhsPlan, sendToRight))
                if len(kept) > 0:
                    kept = kept[:-5]
                    return Select(selectOperator.subplan, kept)

            elif subplanType == 'UnionAll':
                subPlan.lhsPlan = self.singlePushDown(
                    Select(subPlan.lhsPlan, selectOperator.selectExpr))
                subPlan.rhsPlan = self.singlePushDown(
                    Select(subPlan.rhsPlan, selectOperator.selectExpr))

            else:  # We only push down selects through joins and unions
                return selectOperator

            return selectOperator.subPlan  # This is the very last return statement

        elif operator.operatorType() == 'Project':
            projectOperator = operator
            projectOperator.subPlan = self.singlePushDown(
                projectOperator.subPlan)

            subPlan = projectOperator.subPlan
            subplanType = subPlan.operatorType()

            if subplanType == 'Select':
                selectCriteria = ExpressionInfo(
                    subPlan.selectExpr).getAttributes()

                for selection in selectCriteria:
                    if selection not in operator.projectExprs:
                        return operator

                operator.subPlan = operator.subPlan.subPlan
                operator.subPlan.subPlan = self.singlePushDown(operator)

            elif subplanType.endswith('Join'):
                lhsPlan = subPlan.lhsPlan
                rhsPlan = subPlan.rhsPlan

                lhsFields = lhsPlan.schema().fields
                rhsFields = rhsPlan.schema().fields

                sendToLeft = {}
                sendToRight = {}
                kept = {}

                projectExprs = projectOperator.projectExprs

                for key in projectExprs:
                    if key in lhsFields:
                        sendToLeft[key] = projectExprs[key]
                    elif key in rhsFields:
                        sendToRight[key] = projectExprs[key]
                    else:
                        kept[key] = projectExprs[key]

                if sendToLeft:
                    projectOperator.subPlan.lhsPlan = self.singlePushDown(
                        Project(projectOperator.subPlan.lhsPlan, sendToLeft))
                if sendToRight:
                    projectOperator.subPlan.rhsPlan = self.singlePushDown(
                        Project(projectOperator.subPlan.rhsPlan, sendToRight))
                if kept:
                    return projectOperator  # There are project Exprs that are not join predicates

            elif subplanType == 'UnionAll':
                subPlan.lhsPlan = self.singlePushDown(
                    Project(subPlan.lhsPlan, projectOperator.projectExprs))
                subPlan.rhsPlan = self.singlePushDown(
                    Project(subPlan.rhsPlan, projectOperator.projectExprs))

            else:
                return operator

            return projectOperator.subPlan

        elif operator.operatorType() == 'UnionAll' or operator.operatorType(
        ).endswith('Join'):
            operator.lhsPlan = self.singlePushDown(operator.lhsPlan)
            operator.rhsPlan = self.singlePushDown(operator.rhsPlan)
            return operator

        elif operator.operatorType() == 'GroupBy':
            operator.subPlan = self.singlePushDown(operator.subPlan)
            return operator

        else:
            return operator
Exemplo n.º 2
0
    def pushdownProject(self, op):
        # First pushdown operators below:
        op.subPlan = self.pushdownOperator(op.subPlan)

        if op.subPlan.operatorType() in ["GroupBy", "TableScan"]:
            return op

        elif op.subPlan.operatorType() == "Project":
            # Attempt to remove redundant projections:
            bools = [
                op.subPlan.projectExprs[key][0].isAttribute()
                for key in op.projectExprs
            ]
            if False not in bools:
                op.subPlan = op.subPlan.subPlan
            return self.pushdownOperator(op)

        elif op.subPlan.operatorType() == "Select":
            # Move op below its subplan if op provides all attributes needed for the selectExpr
            selectAttrs = ExpressionInfo(op.subPlan.selectExpr).getAttributes()
            outputAttrs = set(op.projectExprs.keys())
            result = op
            if selectAttrs.issubset(outputAttrs):
                result = op.subPlan
                op.subPlan = result.subPlan
                result.subPlan = self.pushdownOperator(op)
            return result

        elif op.subPlan.operatorType() == "Sort":
            # TODO
            return op

        elif op.subPlan.operatorType() == "UnionAll":
            # Place a copy of op on each side of the union
            result = op.subPlan
            result.lhsPlan = self.pushdownOperator(
                Project(result.lhsPlan, op.projectExprs))
            result.rhsPlan = self.pushdownOperator(
                Project(result.rhsPlan, op.projectExprs))
            return result

        elif "Join" in op.subPlan.operatorType():
            # Partition the projections among the input relations, as much as possible
            lhsAttrs = set(op.subPlan.lhsPlan.schema().fields)
            rhsAttrs = set(op.subPlan.rhsPlan.schema().fields)
            lhsProjectExprs = {}
            rhsProjectExprs = {}
            remainingProjectExprs = False

            for attr in op.projectExprs:
                requiredAttrs = ExpressionInfo(
                    op.projectExprs[attr][0]).getAttributes()
                if requiredAttrs.issubset(lhsAttrs):
                    lhsProjectExprs[attr] = op.projectExprs[attr]
                elif requiredAttrs.issubset(rhsAttrs):
                    rhsProjectExprs[attr] = op.projectExprs[attr]
                else:
                    remainingProjectExprs = True

            if lhsProjectExprs:
                op.subPlan.lhsPlan = self.pushdownOperator(
                    Project(op.subPlan.lhsPlan, lhsProjectExprs))
            if rhsProjectExprs:
                op.subPlan.rhsPlan = self.pushdownOperator(
                    Project(op.subPlan.rhsPlan, rhsProjectExprs))

            result = op
            # Remove op from the tree if there are no remaining project expressions, and each side of the join recieved a projection
            if not remainingProjectExprs and lhsProjectExprs and rhsProjectExprs:
                result = op.subPlan
            return result
        else:
            print("Unmatched operatorType in pushdownOperator(): " +
                  op.operatorType())
            raise NotImplementedError
Exemplo n.º 3
0
 def select(self, projectExprs):
     if self.operator:
         return PlanBuilder(operator=Project(self.operator, projectExprs))
     else:
         raise ValueError("Invalid select list")
Exemplo n.º 4
0
  def projectPushDown(self, plan):
    root = plan.root
    result = []

    #Keep info in the form (current op, parent, accumulate Porject)
    queue = deque([(root, None, None)])

    while queue:
      (curr, parent, accuProject) = queue.popleft()
      children = curr.inputs()

      if children:
        #Add current project into accumulate project
        if isinstance(curr, Project):
          if not accuProject:
            accuProject = curr.projectExprs
          else:
            accuProject.update({curr.projectExprs})

          queue.extendleft([(children[0], curr, accuProject)])

        elif isinstance(curr, Select):
          newProject = None
          if accuProject:
            selectAttrs = ExpressionInfo(curr.selectExpr).getAttributes()
            projectAttrs = self.getProjectAttrs(accuProject)
            newProject = Project(curr, accuProject)
            if set(selectAttrs).issubset(set(projectAttrs)):
              result.append((curr, parent))
              queue.extendleft([(children[0], curr, accuProject)])
              '''
              #If considering the order of select and project:
              #Project can go through select
              #but if the selectivity of select is smaller, we do not let project pass
              curr.useSampling(sampled=True, sampleFactor=10.0)
              newProject.useSampling(sampled=True, sampleFactor=10.0)
              if curr.selectivity(estimated=True) < newProject.selectivity(estimated=True):
                result.append((newProject, parent))
                result.append((curr, newProject))
                queue.extendleft([(children[0], curr, None)])
              else:
                result.append((curr, parent))
                queue.extendleft([(children[0], curr, accuProject)])
              '''
            #If select operation has attributes that don't belongs to project
            #project has to stop here
            else:
              result.append((newProject, parent))
              result.append((curr, newProject))
              queue.extendleft([(children[0], curr, None)])

          else:
            result.append((curr, parent))
            queue.extendleft([(children[0], curr, accuProject)])

        elif isinstance(curr, Join):
          #If we don't decompose project
          if accuProject:
            newProject = Project(curr, accuProject)
            result.append((newProject, parent))
            result.append((curr, newProject))
          else:
            result.append((curr, parent))
          queue.extendleft([(curr.lhsPlan, curr, None)])
          queue.extendleft([(curr.rhsPlan, curr, None)])
          '''
          #This part can be used to decompose project operation
          leftProject = {}
          rightProject = {}
          newProject = None
          leftFields = curr.lhsSchema.fields
          rightFields = curr.rhsSchema.fields
          put = {}

          if accuProject:
            projectAttrs = self.getProjectAttrs(accuProject)
            joinAttrs = ExpressionInfo(curr.joinExpr).getAttributes()
            if set(joinAttrs).issubset(set(projectAttrs)):
              for (k,v) in accuProject.items():
                flag = False
                f = ExpressionInfo(k).getAttributes()
                if set(f).issubset(set(leftFields)):
                  leftProject.update({k: v})
                  flag = True
                if set(f).issubset(set(rightFields)):
                  rightProject.update({k: v})
                  flag = True
                if not flag:
                  put.update({k: v})

              if put:
                newProject = Project(curr, put)
                result.append((newProject, parent))

            else:
              newProject = Project(curr, accuProject)
              result.append((newProject, parent))

          if newProject:
            result.append((curr, newProject))
          else:
            result.append((curr, parent))

          queue.extendleft([(curr.lhsPlan, curr, leftProject)])
          queue.extendleft([(curr.rhsPlan, curr, rightProject)])
          '''

        elif isinstance(curr, GroupBy):
          newProject = None

          if accuProject:
            newProject = Project(curr, accuProject)
            result.append((newProject, parent))


          if newProject:
            result.append((curr, newProject))
          else:
            result.append((curr, parent))

          queue.extendleft([(children[0], curr, None)])

        else:
          #If we don't decompose project
          if accuProject:
            newProject = Project(curr, accuProject)
            result.append((newProject, parent))
            result.append((curr, newProject))
          else:
            result.append((curr, parent))
          queue.extendleft([(curr.lhsPlan, curr, None)])
          queue.extendleft([(curr.rhsPlan, curr, None)])
          '''
          #This part can be used to decompose project
          leftProject = {}
          rightProject = {}
          newProject = None
          attrs = curr.unionSchema.fields
          put = {}

          if accuProject:
            projectAttrs = self.getProjectAttrs(accuProject)
            if set(attrs).issubset(set(projectAttrs)):
              leftProject = accuProject
              rightProject = accuProject
            else:
              newProject = Project(curr, accuProject)
              result.append((newProject, parent))

          if newProject:
            result.append((curr, newProject))
          else:
            result.append((curr, parent))

          queue.extendleft([(curr.lhsPlan, curr, leftProject)])
          queue.extendleft([(curr.rhsPlan, curr, rightProject)])
          '''

      else:
        newProject = None
        if accuProject:
          newProject = Project(curr, accuProject)
        if newProject:
          result.append((newProject, parent))
          result.append((curr, newProject))
        else:
          result.append((curr, parent))

    newRoot = result[0][0]
    return Plan(root=newRoot)
Exemplo n.º 5
0
    def pushdownProjections(self, operator):

        if operator.operatorType() == "TableScan":
            return operator
        elif (operator.operatorType() == "Select"
              or operator.operatorType() == "GroupBy"):
            newSubPlan = self.pushdownProjections(operator.subPlan)
            operator.subPlan = newSubPlan
            return operator
        elif (operator.operatorType() == "UnionAll"
              or operator.operatorType()[-4:] == "Join"):
            newlPlan = self.pushdownProjections(operator.lhsPlan)
            newrPlan = self.pushdownProjections(operator.rhsPlan)
            operator.lhsPlan = newlPlan
            operator.rhsPlan = newrPlan
            return operator
        else:
            subPlan = operator.subPlan
            if subPlan.operatorType() == "TableScan":
                return operator
            elif subPlan.operatorType() == "Select":
                subSubPlan = subPlan.subPlan
                operator.subPlan = subSubPlan
                subPlan.subPlan = operator
                return self.pushdownProjections(subPlan)
            elif subPlan.operatorType() == "GroupBy":
                newSubSubPlan = self.pushdownProjections(subPlan.subPlan)
                subPlan.subPlan = newSubSubPlan
                return operator
            elif subPlan.operatorType() == "Project":
                # Note that here we need to combine two projections
                # We assume that the upper projection must be based on the outputschema
                # of the lower one;
                subRepExp = {
                    k: v1
                    for (k, (v1, _)) in subPlan.projectExprs.items()
                }
                newExpr = dict()

                # Combine projections
                # TODO: Here we don't guarantee 100% success of replacement
                for (k, (v1, v2)) in operator.projectExprs.items():
                    newV1 = v1
                    for (key, value) in subRepExp.items():
                        newV1 = newV1.replace(key, value)
                    newExpr[k] = (newV1, v2)

                # Reorder the projection operator
                operator.projectExprs = newExpr
                operator.outputSchema  = DBSchema(operator.relationId(), \
                                  [(k, v[1]) for (k,v) in operator.projectExprs.items()])
                operator.subPlan = subPlan.subPlan
                return self.pushdownProjections(operator)
            elif subPlan.operatorType() == "UnionAll":
                # For Union operator, the push down is very simple
                subPlan.lhsPlan = Project(subPlan.lhsPlan,
                                          operator.projectExprs)
                subPlan.rhsPlan = Project(subPlan.rhsPlan,
                                          operator.projectExprs)
                subPlan.validateSchema()
                del operator
                return self.pushdownProjections(subPlan)
            else:
                # Here we deal with the Join Case
                # This is a lot harder than other cases
                # The first step is to collect input fields needed directly.
                # We grab out the fields in the projectExprs first
                # and then filter them with the project inputSchema
                fields = set()
                outputNames = [
                    k for (k, (v1, _)) in operator.projectExprs.items()
                ]
                inputNames = operator.inputSchemas()[0].fields
                lhsPlanNames = subPlan.lhsPlan.schema().fields
                rhsPlanNames = subPlan.rhsPlan.schema().fields

                for (k, (v1, _)) in operator.projectExprs.items():
                    attributes = ExpressionInfo(v1).getAttributes()
                    # filter attributes
                    for name in attributes:
                        if name not in inputNames:
                            attributes.remove(name)
                    fields = fields.union(attributes)

                # collecting join condition fields;
                if subPlan.joinMethod == "nested-loops" or subPlan.joinMethod == "block-nested-loops":
                    fields = fields.union(
                        ExpressionInfo(subPlan.joinExpr).getAttributes())
                elif subPlan.joinMethod == "hash":
                    fields = fields.union(
                        set(subPlan.lhsKeySchema.fields +
                            subPlan.rhsKeySchema.fields))
                else:
                    # We don't support indexed
                    raise NotImplementedError

                # constructing virtual l and r projections
                lprojectExpr = dict()
                rprojectExpr = dict()
                for (f, v) in subPlan.lhsPlan.schema().schema():
                    if f in fields:
                        lprojectExpr[f] = (f, v)
                for (f, v) in subPlan.rhsPlan.schema().schema():
                    if f in fields:
                        rprojectExpr[f] = (f, v)

                if len(lprojectExpr) != len(lhsPlanNames):
                    subPlan.lhsPlan = Project(subPlan.lhsPlan, lprojectExpr)
                    subPlan.lhsPlan.outputSchema  = DBSchema(subPlan.lhsPlan.relationId(), \
                                    [(k, v[1]) for (k,v) in subPlan.lhsPlan.projectExprs.items()])

                if len(rprojectExpr) != len(rhsPlanNames):
                    subPlan.rhsPlan = Project(subPlan.rhsPlan, rprojectExpr)
                    subPlan.rhsPlan.outputSchema  = DBSchema(subPlan.rhsPlan.relationId(), \
                                    [(k, v[1]) for (k,v) in subPlan.rhsPlan.projectExprs.items()])

                if subPlan.validateJoin():
                    subPlan.initializeSchema()
                # push down project through join
                operator.subPlan = self.pushdownProjections(subPlan)
                return operator
Exemplo n.º 6
0
  def pickJoinOrder(self, plan):
    relations = plan.relations()
    fieldDict = self.obtainFieldDict(plan)
    (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict)
    # makes dicts that maps a list of relations to exprs involving that list
    # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C
    # and on top of it the select exprs that involve 2 tables A,C or B,C

    isGroupBy = True if plan.root.operatorType() == "GroupBy" else False
    outputSchema = plan.schema() 
    self.reportPlanCount = 0

    worklist = []
    for r in relations:
      table = TableScan(r,self.db.relationSchema(r))
      table.prepare(self.db)
      if (r,) in selectTablesDict: 
        selectExprs = selectTablesDict[(r,)]
        selectString = self.combineSelects(selectExprs)
        select = Select(table,selectString)
        select.prepare(self.db)
        worklist.append(Plan(root=select))
      else:
        worklist.append(Plan(root=table))

    while(len(worklist) > 1):
      combos = itertools.combinations(worklist,2)
      bestJoin = None
      sourcePair = None

      for pair in combos:
        op1 = pair[0].root
        op2 = pair[1].root

        selectExpr = self.createExpression(pair[0].relations(), pair[1].relations(), selectTablesDict)
        joinExpr = self.createExpression(pair[0].relations(), pair[1].relations(), joinTablesDict)
        
        join1BnljOp = Join(op1, op2, expr=joinExpr, method="block-nested-loops" )
        join2BnljOp = Join(op2, op1, expr=joinExpr, method="block-nested-loops" )


        join1NljOp = Join(op1, op2, expr=joinExpr, method="nested-loops" )
        join2NljOp = Join(op2, op1, expr=joinExpr, method="nested-loops" )

        if selectExpr == "True":
          full1BnljOp = join1BnljOp
          full2BnljOp = join2BnljOp
          
          full1NljOp = join1NljOp
          full2NljOp = join2NljOp

        else:
          full1BnljOp = Select(join1BnljOp, selectExpr)
          full2BnljOp = Select(join2BnljOp, selectExpr)
          
          full1NljOp = Select(join1NljOp, selectExpr)
          full2NljOp = Select(join2NljOp, selectExpr)
        

        joinList = [full1BnljOp, full2BnljOp, full1NljOp, full2NljOp]

        for j in joinList:
          joinplan = Plan(root=j)
          joinplan.prepare(self.db)
          joinplan.sample(100)

          if bestJoin == None or joinplan.cost(True) < bestJoin.cost(True):
            bestJoin = joinplan
            sourcePair = pair

        self.reportPlanCount += 4
        self.clearSampleFiles()



      worklist.remove(sourcePair[0])
      worklist.remove(sourcePair[1])
      worklist.append(bestJoin)

    # after System R algorithm
    newPlan = worklist[0]

    if isGroupBy:
      newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \
        aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \
        aggExprs=plan.root.aggExprs, \
        groupHashFn=plan.root.groupHashFn)
      newGroupBy.prepare(self.db)
      newPlan = Plan(root=newGroupBy)

    if set(outputSchema.schema()) != set(newPlan.schema().schema()):
      projectDict = {}

      for f, t in outputSchema.schema():
        projectDict[f] = (f, t) 
      
      currRoot = newPlan.root
      project = Project(currRoot, projectDict)
      project.prepare(self.db)
      newPlan = Plan(root=project)
  
    return newPlan
Exemplo n.º 7
0
  def pickJoinOrder(self, plan):
    
    relations = plan.relations()
    fieldDict = self.obtainFieldDict(plan)
    

    (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict)
    # makes dicts that maps a list of relations to exprs involving that list
    # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C
    # and on top of it the select exprs that involve 2 tables A,C or B,C

    isGroupBy = True if plan.root.operatorType() == "GroupBy" else False
    outputSchema = plan.schema() 
    optDict = {}
    self.reportPlanCount = 0

    for npass in range(1, len(relations) + 1):
      if npass == 1:
        for r in relations:
          table = TableScan(r,self.db.relationSchema(r))
          if (r,) in selectTablesDict: 
            selectExprs = selectTablesDict[(r,)]
            selectString = self.combineSelects(selectExprs)
            select = Select(table,selectString)
            optDict[(r,)] = Plan(root=select)
          else:
            optDict[(r,)] = Plan(root=table)
          self.reportPlanCount += 1
      else:
        combinations = itertools.combinations(relations,npass)
        for c in combinations:
          fullList = sorted(c)
          clist = self.getCombos(fullList)
          bestJoin = None
          for subcombo in clist:
            complement = self.getComplement(fullList, subcombo)
            
            leftOps = optDict[tuple(complement)].root
            rightOps = optDict[tuple(subcombo)].root

            selectExpr = self.createExpression(complement, subcombo, selectTablesDict)
            joinExpr = self.createExpression(complement, subcombo, joinTablesDict)
            
            joinBnljOp = Join(leftOps, rightOps, expr=joinExpr, method="block-nested-loops" )
            fullBnljOp = Select(joinBnljOp, selectExpr)

            if selectExpr == "True":
              joinBnlj = Plan(root=joinBnljOp)
            else:
              joinBnlj = Plan(root=fullBnljOp)
            
            joinBnlj.prepare(self.db)
            joinBnlj.sample(100)
            
            joinNljOp = Join(leftOps, rightOps, expr=joinExpr, method="nested-loops" )
            fullNljOp = Select(joinNljOp, selectExpr)

            if selectExpr == "True":
              joinNlj = Plan(root=joinNljOp)
            else:
              joinNlj = Plan(root=fullNljOp)
            
            joinNlj.prepare(self.db)
            joinNlj.sample(100)

            if joinBnlj.cost(True) < joinNlj.cost(True):
              if bestJoin == None or joinBnlj.cost(True) < bestJoin.cost(True):
                bestJoin = joinBnlj
            else:
              if bestJoin == None or joinNlj.cost(True) < bestJoin.cost(True):
                bestJoin = joinNlj

            self.reportPlanCount += 2
            self.clearSampleFiles()

          optDict[tuple(fullList)] = bestJoin
          
    # after System R algorithm
    newPlan = optDict[tuple(sorted(relations))]

    if isGroupBy:
      newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \
        aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \
        aggExprs=plan.root.aggExprs, \
        groupHashFn=plan.root.groupHashFn)
      newGroupBy.prepare(self.db)
      newPlan = Plan(root=newGroupBy)

    if set(outputSchema.schema()) != set(newPlan.schema().schema()):
      projectDict = {}

      for f, t in outputSchema.schema():
        projectDict[f] = (f, t) 
      
      currRoot = newPlan.root
      project = Project(currRoot, projectDict)
      project.prepare(self.db)
      newPlan = Plan(root=project)
  
    return newPlan
Exemplo n.º 8
0
    def pickJoinOrder(self, plan):
        relations = plan.relations()
        fieldDict = self.obtainFieldDict(plan)
        (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict)
        # makes dicts that maps a list of relations to exprs involving that list
        # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C
        # and on top of it the select exprs that involve 2 tables A,C or B,C

        isGroupBy = True if plan.root.operatorType() == "GroupBy" else False
        outputSchema = plan.schema()
        self.reportPlanCount = 0

        worklist = []
        for r in relations:
            table = TableScan(r, self.db.relationSchema(r))
            table.prepare(self.db)
            if (r, ) in selectTablesDict:
                selectExprs = selectTablesDict[(r, )]
                selectString = self.combineSelects(selectExprs)
                select = Select(table, selectString)
                select.prepare(self.db)
                worklist.append(Plan(root=select))
            else:
                worklist.append(Plan(root=table))

        while (len(worklist) > 1):
            combos = itertools.combinations(worklist, 2)
            bestJoin = None
            sourcePair = None

            for pair in combos:
                op1 = pair[0].root
                op2 = pair[1].root

                selectExpr = self.createExpression(pair[0].relations(),
                                                   pair[1].relations(),
                                                   selectTablesDict)
                joinExpr = self.createExpression(pair[0].relations(),
                                                 pair[1].relations(),
                                                 joinTablesDict)

                join1BnljOp = Join(op1,
                                   op2,
                                   expr=joinExpr,
                                   method="block-nested-loops")
                join2BnljOp = Join(op2,
                                   op1,
                                   expr=joinExpr,
                                   method="block-nested-loops")

                join1NljOp = Join(op1,
                                  op2,
                                  expr=joinExpr,
                                  method="nested-loops")
                join2NljOp = Join(op2,
                                  op1,
                                  expr=joinExpr,
                                  method="nested-loops")

                if selectExpr == "True":
                    full1BnljOp = join1BnljOp
                    full2BnljOp = join2BnljOp

                    full1NljOp = join1NljOp
                    full2NljOp = join2NljOp

                else:
                    full1BnljOp = Select(join1BnljOp, selectExpr)
                    full2BnljOp = Select(join2BnljOp, selectExpr)

                    full1NljOp = Select(join1NljOp, selectExpr)
                    full2NljOp = Select(join2NljOp, selectExpr)

                joinList = [full1BnljOp, full2BnljOp, full1NljOp, full2NljOp]

                for j in joinList:
                    joinplan = Plan(root=j)
                    joinplan.prepare(self.db)
                    joinplan.sample(100)

                    if bestJoin == None or joinplan.cost(True) < bestJoin.cost(
                            True):
                        bestJoin = joinplan
                        sourcePair = pair

                self.reportPlanCount += 4
                self.clearSampleFiles()

            worklist.remove(sourcePair[0])
            worklist.remove(sourcePair[1])
            worklist.append(bestJoin)

        # after System R algorithm
        newPlan = worklist[0]

        if isGroupBy:
            newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \
              aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \
              aggExprs=plan.root.aggExprs, \
              groupHashFn=plan.root.groupHashFn)
            newGroupBy.prepare(self.db)
            newPlan = Plan(root=newGroupBy)

        if set(outputSchema.schema()) != set(newPlan.schema().schema()):
            projectDict = {}

            for f, t in outputSchema.schema():
                projectDict[f] = (f, t)

            currRoot = newPlan.root
            project = Project(currRoot, projectDict)
            project.prepare(self.db)
            newPlan = Plan(root=project)

        return newPlan
Exemplo n.º 9
0
    def pickJoinOrder(self, plan):

        relations = plan.relations()
        fieldDict = self.obtainFieldDict(plan)

        (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict)
        # makes dicts that maps a list of relations to exprs involving that list
        # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C
        # and on top of it the select exprs that involve 2 tables A,C or B,C

        isGroupBy = True if plan.root.operatorType() == "GroupBy" else False
        outputSchema = plan.schema()
        optDict = {}
        self.reportPlanCount = 0

        for npass in range(1, len(relations) + 1):
            if npass == 1:
                for r in relations:
                    table = TableScan(r, self.db.relationSchema(r))
                    if (r, ) in selectTablesDict:
                        selectExprs = selectTablesDict[(r, )]
                        selectString = self.combineSelects(selectExprs)
                        select = Select(table, selectString)
                        optDict[(r, )] = Plan(root=select)
                    else:
                        optDict[(r, )] = Plan(root=table)
                    self.reportPlanCount += 1
            else:
                combinations = itertools.combinations(relations, npass)
                for c in combinations:
                    fullList = sorted(c)
                    clist = self.getCombos(fullList)
                    bestJoin = None
                    for subcombo in clist:
                        complement = self.getComplement(fullList, subcombo)

                        leftOps = optDict[tuple(complement)].root
                        rightOps = optDict[tuple(subcombo)].root

                        selectExpr = self.createExpression(
                            complement, subcombo, selectTablesDict)
                        joinExpr = self.createExpression(
                            complement, subcombo, joinTablesDict)

                        joinBnljOp = Join(leftOps,
                                          rightOps,
                                          expr=joinExpr,
                                          method="block-nested-loops")
                        fullBnljOp = Select(joinBnljOp, selectExpr)

                        if selectExpr == "True":
                            joinBnlj = Plan(root=joinBnljOp)
                        else:
                            joinBnlj = Plan(root=fullBnljOp)

                        joinBnlj.prepare(self.db)
                        joinBnlj.sample(100)

                        joinNljOp = Join(leftOps,
                                         rightOps,
                                         expr=joinExpr,
                                         method="nested-loops")
                        fullNljOp = Select(joinNljOp, selectExpr)

                        if selectExpr == "True":
                            joinNlj = Plan(root=joinNljOp)
                        else:
                            joinNlj = Plan(root=fullNljOp)

                        joinNlj.prepare(self.db)
                        joinNlj.sample(100)

                        if joinBnlj.cost(True) < joinNlj.cost(True):
                            if bestJoin == None or joinBnlj.cost(
                                    True) < bestJoin.cost(True):
                                bestJoin = joinBnlj
                        else:
                            if bestJoin == None or joinNlj.cost(
                                    True) < bestJoin.cost(True):
                                bestJoin = joinNlj

                        self.reportPlanCount += 2
                        self.clearSampleFiles()

                    optDict[tuple(fullList)] = bestJoin

        # after System R algorithm
        newPlan = optDict[tuple(sorted(relations))]

        if isGroupBy:
            newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \
              aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \
              aggExprs=plan.root.aggExprs, \
              groupHashFn=plan.root.groupHashFn)
            newGroupBy.prepare(self.db)
            newPlan = Plan(root=newGroupBy)

        if set(outputSchema.schema()) != set(newPlan.schema().schema()):
            projectDict = {}

            for f, t in outputSchema.schema():
                projectDict[f] = (f, t)

            currRoot = newPlan.root
            project = Project(currRoot, projectDict)
            project.prepare(self.db)
            newPlan = Plan(root=project)

        return newPlan
Exemplo n.º 10
0
  def pushdownHelper(self, operator):
    #first determine operator type
    opertorType = operator.operatorType()

    #first check if valid operatorType
    if operatorType != "Project" and operatorType != "Select" and operatorType != "GroupBy" and operatorType != "Sort" and operatorType != "UnionAll" and operatorType[-4:] != "Join":
      return operator

    elif operatorType == "Project":
      operator.subPlan = self.pushdownHelper(operator.subPlan)
      subplanType = operator.subPlan.operatorType()

      #call second helper
      if subplanType == "Select":

        '''
        Check keys - if not in keys, cannot pushdown anymore
        '''
        for select in ExpressionInfo(operator.subPlan.selectExpr).getAttributes():
          keys = operator.projectExprs.keys()
          if select not in keys:
            return operator

        operator.subPlan = operator.subPlan.subPlan
        operator.subPlan.subPlan = self.pushdownHelper(operator)

      elif subplanType[-4:] == "Join":

        items = operator.projectExprs.items()

        right = operator.subPlan.rhsPlan.schema().fields
        rightProject = {}

        left = operator.subPlan.lhsPlan.schema().fields
        leftProject = {}

        for (attribute, (expr, rand)) in items:
          pros = ExpressionInfo(expr)getAttributes()

          result = True
          #left
          for e in pros:
            if e not in left:
              result = False

          # if True
          if result:
            leftProject[attribute] = operator.projectExprs[attribute]
            continue

          #repeat with right now
          result = True
          for e in pros:
            if e not in right:
              result = False

          if result:
            rightProject[attribute] = operator.projectExprs[attribute]

        #end for

        #if left dictionary not empty
        #remember empty dic evaluates to false
        if leftProject:
          lPlan = operator.subPlan.lhsPlan
          operator.subPlan.lhsPlan = self.pushdownHelper(Project(lPlan, leftProject))

        if rightProject:
          rPlan = operator.subPlan.rhsPlan
          operator.subPlan.rhsPlan = self.pushdownHelper(Project(rPlan, rightProject))


        #length check - must be same size iIOT pushdown
        fullSize = len(operator.projectExprs)
        rightSize = len(rightProject)
        leftSize = len(leftProject)

        if fullSize != (rightSize + leftSize):
          return operator

      #end subPlan "Join"

      elif subplanType == "UnionAll":
        tempLeft = Project(operator.subPlan.lhsPlan)
        tempRight = Project(operator.subPlan.rhsPlan)

        operator.subPlan.lhsPlan = self.pushdownHelper(tempLeft, operator.projectExprs)
        operator.subPlan.rhsPlan = self.pushdownHelper(tempRight, operator.projectExprs)

      #else not Join or Union
      else:
        return operator

      return operator.subPlan

    #end "Project"

    #safety check above, so operatorType must be "Select"
    elif operatorType == "Select":

      #first part same as with "Project": subPlan pushdown
      operator.subPlan = self.pushdownHelper(operator.subPlan)
      subplanType = operator.subPlan.operatorType()

      if subplanType == "Sort" or "sort":
        operator.subPlan = operator.subPlan.subPlan
        operator.subPlan.subPlan = self.pushdownHelper(operator)
      elif subplanType[-4:] == "Join":

        selectExpress = ExpressionInfo(operator.selectExpr).decomposeCNF()



        left = operator.subPlan.lhsPlan.schema().fields
        right = operator.subPlan.rhsPlan.schema().fields
        leftExpress = []
        leftAttributes = set(operator.subPlan.lhsPlan.schema().fields)
        rightAttributes = set(operator.subPlan.rhsPlan.schema().fields)
        rightExpress = []
        unpushedExpress = []

        for expr in selectExpress:
          select = ExpressionInfo(selectExpr).getAttributes()
          if select.issubset(leftAttributes):
            left.append(select)
          elif select.issubset(rightAttributes):
            right.append(select)
          else:
            unpushedExpress.append(select)


        if leftExpress:
          newExpression = ' and '.join(leftExpress)
          #lSelect
          op.subPlan.lhsPlan = self.pushdownHelper(Select(operator.subPlan.lhsPlan, newExpression))

        if rightExpress:
          newExpression = ' and '.join(rightExpress)
          op.subPlan.rhsPlan = self.pushdownHelper(Select(operator.subPlan.rhsPlan, newExpression))

        if unpushedExpress:
          return Select(operator.subPlan, ' and '.join(unpushedExpress))

        else:
          return operator
        return operator.subPlan

    elif operatorType == "UnionAll" or operatorType[-4:] == "Join":
      operator.lhsPlan = self.pushdownHelper(operator.lhsPlan)
      operator.rhsPlan = self.pushdownHelper(operator.rhsPlan)
      return operator

    elif operatorType == "GroupBy" or operatorType == "Sort":
      operator.subPlan = self.pushdownHelper(operator.subPlan)
      return operator