def pickJoinOrder(self, plan): rels = set(plan.relations()) optPlans = {} #Map a set of relations to the optimized plan #toBeProcessed = [] #Set of relations pending processing self.combsTried = 0 self.plansProcessed = 0 for r in rels: set_r = frozenset({r}) #toBeProcessed.append(set_r) newScan = TableScan(r, self.db.relationSchema(r)) newScan.prepare(self.db) optPlans[set_r] = newScan #For each join operator, fetch its relative relations #Map a set of relations to (relative relations, operator) joinMap = {} for (_, op) in plan.flatten(): if isinstance(op, Join): relativeR = self.relativeRelations(rels, op) for r in [frozenset({r}) for r in relativeR]: if r in joinMap.keys(): joinMap[r].append((relativeR, op)) else: joinMap[r] = [(relativeR, op)] n = len(rels) for i in range(2, n + 1): for union in [frozenset(union) for union in self.kRelsComb(i, rels)]: for right in [frozenset(right) for right in self.kRelsComb(1, union)]: left = frozenset(union - right) for t in left: self.combsTried += 1 value = joinMap[frozenset({t})] if not value: continue else: for tuple in value: if not (set(tuple[0]).issubset(union) and left in optPlans and right in optPlans): continue self.plansProcessed += 1 newJoin = Join(optPlans[left], optPlans[right], expr=tuple[1].joinExpr, method="block-nested-loops") newJoin.prepare(self.db) if not union in optPlans: optPlans[union] = newJoin self.addPlanCost(newJoin, newJoin.cost(estimated=True)) else: formerCost = self.getPlanCost(optPlans[union]) if newJoin.cost(estimated=True) < formerCost: optPlans[union] = newJoin self.addPlanCost(newJoin, newJoin.cost(estimated=True)) newRoot = optPlans[frozenset(rels)] return Plan(root=newRoot) '''
def pickJoinOrder(self, plan): self.combsTried = 0 self.plansProcessed = 0 self.rels = set(plan.relations()) #toBeProcessed = set() self.tableScans = {} for r in self.rels: ts = TableScan(r, self.db.relationSchema(r)) ts.prepare(self.db) self.tableScans[frozenset({r})] = ts self.joinMap = {} for (_, op) in plan.flatten(): if isinstance(op, Join): relativeR = self.relativeRelations(self.rels, op) for r in [frozenset({r}) for r in relativeR]: if r in self.joinMap.keys(): self.joinMap[r].append((relativeR, op)) else: self.joinMap[r] = [(relativeR, op)] n = len(self.rels) currBestPlan = None formerBestPlan = None formerRels = None currRels = None for i in range(2, n + 1): currBestCost = float('inf') if i == 2: for left in [frozenset({left}) for left in self.rels]: (newCost, newJoin, newRels) = self.processJoin(self.tableScans[left], left) if newCost < currBestCost: currRels = newRels currBestPlan = newJoin currBestCost = newCost else: (newCost, newJoin, newRels) = self.processJoin(formerBestPlan, formerRels) if newCost < currBestCost: currRels = newRels currBestPlan = newJoin currBestCost = newCost formerBestPlan = currBestPlan currBestPlan = None formerRels = currRels currRels = None newRoot = formerBestPlan return Plan(root=newRoot)
def pickJoinOrder(self, plan): relations = plan.relations() fieldDict = self.obtainFieldDict(plan) (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict) # makes dicts that maps a list of relations to exprs involving that list # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C # and on top of it the select exprs that involve 2 tables A,C or B,C isGroupBy = True if plan.root.operatorType() == "GroupBy" else False outputSchema = plan.schema() self.reportPlanCount = 0 worklist = [] for r in relations: table = TableScan(r,self.db.relationSchema(r)) table.prepare(self.db) if (r,) in selectTablesDict: selectExprs = selectTablesDict[(r,)] selectString = self.combineSelects(selectExprs) select = Select(table,selectString) select.prepare(self.db) worklist.append(Plan(root=select)) else: worklist.append(Plan(root=table)) while(len(worklist) > 1): combos = itertools.combinations(worklist,2) bestJoin = None sourcePair = None for pair in combos: op1 = pair[0].root op2 = pair[1].root selectExpr = self.createExpression(pair[0].relations(), pair[1].relations(), selectTablesDict) joinExpr = self.createExpression(pair[0].relations(), pair[1].relations(), joinTablesDict) join1BnljOp = Join(op1, op2, expr=joinExpr, method="block-nested-loops" ) join2BnljOp = Join(op2, op1, expr=joinExpr, method="block-nested-loops" ) join1NljOp = Join(op1, op2, expr=joinExpr, method="nested-loops" ) join2NljOp = Join(op2, op1, expr=joinExpr, method="nested-loops" ) if selectExpr == "True": full1BnljOp = join1BnljOp full2BnljOp = join2BnljOp full1NljOp = join1NljOp full2NljOp = join2NljOp else: full1BnljOp = Select(join1BnljOp, selectExpr) full2BnljOp = Select(join2BnljOp, selectExpr) full1NljOp = Select(join1NljOp, selectExpr) full2NljOp = Select(join2NljOp, selectExpr) joinList = [full1BnljOp, full2BnljOp, full1NljOp, full2NljOp] for j in joinList: joinplan = Plan(root=j) joinplan.prepare(self.db) joinplan.sample(100) if bestJoin == None or joinplan.cost(True) < bestJoin.cost(True): bestJoin = joinplan sourcePair = pair self.reportPlanCount += 4 self.clearSampleFiles() worklist.remove(sourcePair[0]) worklist.remove(sourcePair[1]) worklist.append(bestJoin) # after System R algorithm newPlan = worklist[0] if isGroupBy: newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \ aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \ aggExprs=plan.root.aggExprs, \ groupHashFn=plan.root.groupHashFn) newGroupBy.prepare(self.db) newPlan = Plan(root=newGroupBy) if set(outputSchema.schema()) != set(newPlan.schema().schema()): projectDict = {} for f, t in outputSchema.schema(): projectDict[f] = (f, t) currRoot = newPlan.root project = Project(currRoot, projectDict) project.prepare(self.db) newPlan = Plan(root=project) return newPlan
def pickJoinOrder(self, plan): relations = plan.relations() fieldDict = self.obtainFieldDict(plan) (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict) # makes dicts that maps a list of relations to exprs involving that list # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C # and on top of it the select exprs that involve 2 tables A,C or B,C isGroupBy = True if plan.root.operatorType() == "GroupBy" else False outputSchema = plan.schema() self.reportPlanCount = 0 worklist = [] for r in relations: table = TableScan(r, self.db.relationSchema(r)) table.prepare(self.db) if (r, ) in selectTablesDict: selectExprs = selectTablesDict[(r, )] selectString = self.combineSelects(selectExprs) select = Select(table, selectString) select.prepare(self.db) worklist.append(Plan(root=select)) else: worklist.append(Plan(root=table)) while (len(worklist) > 1): combos = itertools.combinations(worklist, 2) bestJoin = None sourcePair = None for pair in combos: op1 = pair[0].root op2 = pair[1].root selectExpr = self.createExpression(pair[0].relations(), pair[1].relations(), selectTablesDict) joinExpr = self.createExpression(pair[0].relations(), pair[1].relations(), joinTablesDict) join1BnljOp = Join(op1, op2, expr=joinExpr, method="block-nested-loops") join2BnljOp = Join(op2, op1, expr=joinExpr, method="block-nested-loops") join1NljOp = Join(op1, op2, expr=joinExpr, method="nested-loops") join2NljOp = Join(op2, op1, expr=joinExpr, method="nested-loops") if selectExpr == "True": full1BnljOp = join1BnljOp full2BnljOp = join2BnljOp full1NljOp = join1NljOp full2NljOp = join2NljOp else: full1BnljOp = Select(join1BnljOp, selectExpr) full2BnljOp = Select(join2BnljOp, selectExpr) full1NljOp = Select(join1NljOp, selectExpr) full2NljOp = Select(join2NljOp, selectExpr) joinList = [full1BnljOp, full2BnljOp, full1NljOp, full2NljOp] for j in joinList: joinplan = Plan(root=j) joinplan.prepare(self.db) joinplan.sample(100) if bestJoin == None or joinplan.cost(True) < bestJoin.cost( True): bestJoin = joinplan sourcePair = pair self.reportPlanCount += 4 self.clearSampleFiles() worklist.remove(sourcePair[0]) worklist.remove(sourcePair[1]) worklist.append(bestJoin) # after System R algorithm newPlan = worklist[0] if isGroupBy: newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \ aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \ aggExprs=plan.root.aggExprs, \ groupHashFn=plan.root.groupHashFn) newGroupBy.prepare(self.db) newPlan = Plan(root=newGroupBy) if set(outputSchema.schema()) != set(newPlan.schema().schema()): projectDict = {} for f, t in outputSchema.schema(): projectDict[f] = (f, t) currRoot = newPlan.root project = Project(currRoot, projectDict) project.prepare(self.db) newPlan = Plan(root=project) return newPlan