Пример #1
0
def query2Hash_test(db, printOutput):
    query2 = db.query().fromTable('part') \
        .join(db.query().fromTable('lineitem').where('L_RETURNFLAG == \'R\''),
              rhsSchema=db.relationSchema('lineitem'),
              method='hash',
              lhsHashFn='hash(P_PARTKEY) % 111', lhsKeySchema=DBSchema('P_PARTKEY', [('P_PARTKEY', 'int')]),
              rhsHashFn='hash(L_PARTKEY) % 111', rhsKeySchema=DBSchema('L_PARTKEY', [('L_PARTKEY', 'int')])) \
            .groupBy(groupSchema=DBSchema('P_NAME', [('P_NAME', 'char(55)')]),
                    aggSchema=DBSchema('COUNT', [('COUNT', 'int')]),
                    groupExpr=(lambda e: e.P_NAME),
                    aggExprs=[(0, lambda acc, e: acc + 1, lambda x: x)],
                    groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
        .finalize()

    start = time.time()
    processedQuery = [
        query2.schema().unpack(tup) for page in db.processQuery(query2)
        for tup in page[1]
    ]
    end = time.time()
    duration = end - start
    print('Query: Query 2, Hash')
    if printOutput:
        print('Results: ', processedQuery)
    print('Time', duration)
    return
Пример #2
0
def query1(db):
    query = db.query().fromTable('lineitem')\
        .where(
            "(L_SHIPDATE >= 19940101) and (L_SHIPDATE < 19950101) and (0.06-0.01 <= L_DISCOUNT <= 0.06 + 0.01) and (L_QUANTITY < 24")\
        .groupBy(
            groupSchema=DBSchema('groupKey', [('ONE', 'int')]),
            groupExpr=(lambda e: 1),
            aggSchema=DBSchema('groupBy', [('revenue', 'float')]),
            aggExprs=[(0, lambda acc, e: acc + (e.L_EXTENDEDPRICE * e.L_DISCOUNT), lambda x: x)], \
            groupHashFn=(lambda gbVal: hash(gbVal) % 1))\
        .select(
            {'revenue': ('revenue', 'float')})\
        .finalize()
    return query
Пример #3
0
 def testHashJoin(self):
     schema = self.db.relationSchema('employee')
     e2schema = schema.rename('employee2', {'id': 'id2', 'age': 'age2', 'dept_id': 'dept_id2'})
     keySchema = DBSchema('employeeKey', [('id', 'int')])
     keySchema2 = DBSchema('employeeKey2', [('id2', 'int')])
     hashJoin = self.db.query().fromTable('employee').join( \
         self.db.query().fromTable('employee'), \
         rhsSchema=e2schema, \
         method='hash', \
         lhsHashFn='hash(id) % 4', lhsKeySchema=keySchema, \
         rhsHashFn='hash(id2) % 4', rhsKeySchema=keySchema2, \
         ).finalize()
     results = self.getResults(hashJoin)
     self.assertEqual(len(results), self.numEmployees)
Пример #4
0
 def validateSchema(self):
     if self.lhsPlan.schema().match(self.rhsPlan.schema()):
         schemaName = self.operatorType() + str(self.id())
         schemaFields = self.lhsPlan.schema().schema()
         self.unionSchema = DBSchema(schemaName, schemaFields)
     else:
         raise ValueError("Union operator type error, mismatched input schemas")
Пример #5
0
 def createRelation(self, relationName, relationFields):
     if relationName not in self.relationMap:
         schema = DBSchema(relationName, relationFields)
         self.relationMap[relationName] = schema
         self.storage.createRelation(relationName, schema)
         self.checkpoint()
     else:
         raise ValueError("Relation '" + relationName + "' already exists")
Пример #6
0
def query2(db):
    query = db.query().fromTable('part')\
        .join(
            db.query().fromTable('lineitem').where("(L_SHIPDATE >= 19950901) and (L_SHIPDATE < 19951001)"),
            method='hash',
            lhsHashFn='hash(P_PARTKEY) % 7', lhsKeySchema=DBSchema('partkey2',[('P_PARTKEY', 'int')]),
            rhsHashFn='hash(L_PARTKEY) % 7', rhsKeySchema=DBSchema('partkey1', [('L_PARTKEY', 'int')])) \
        .groupBy(
            groupSchema=DBSchema('groupKey', [('ONE', 'int')]),
            aggSchema=DBSchema('groupBy', [('promo_revenue', 'float')]),
            groupExpr=(lambda e: 1),
            aggExprs=[(0, lambda acc, e: acc + (e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT)), lambda x: x)],
            groupHashFn=(lambda gbVal: hash(gbVal) % 1))\
        .select(
            {'promo_revenue' : ('promo_revenue', 'float')}).finalize()

    return query
Пример #7
0
def query3bBNL_test(db, printOutput):
    query3 = db.query().fromTable('part') \
        .join(db.query().fromTable('lineitem'),
              rhsSchema=db.relationSchema('lineitem'),
              method='block-nested-loops',
              expr='L_PARTKEY == P_PARTKEY') \
        .join(db.query().fromTable('orders'),
              rhsSchema=db.relationSchema('orders'),
              method='block-nested-loops',
              expr='L_ORDERKEY == O_ORDERKEY') \
        .join(db.query().fromTable('customer'),
              rhsSchema=db.relationSchema('customer'),
              method='block-nested-loops',
              expr='O_CUSTKEY == C_CUSTKEY') \
        .join(db.query().fromTable('nation'),
              rhsSchema=db.relationSchema('nation'),
              method='block-nested-loops',
              expr='C_NATIONKEY == N_NATIONKEY') \
        .groupBy(groupSchema=DBSchema('FIRST', [('N_NAME', 'char(25)'), ('P_NAME', 'char(55)')]),
                 aggSchema=DBSchema('SUM', [('SUM', 'double')]),
                 groupExpr=(lambda e: (e.N_NAME, e.P_NAME)),
                 aggExprs=[(0, lambda acc, e: acc + e.L_QUANTITY, lambda x: x)],
                 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
        .groupBy(groupSchema=DBSchema('SECOND', [('N_NAME', 'char(25)')]),
                 aggSchema=DBSchema('MAX', [('MAX', 'double')]),
                 groupExpr=(lambda e: e.N_NAME),
                 aggExprs=[(0, lambda acc, e: max(acc, e.SUM), lambda x: x)],
                 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
        .select({'NATION': ('N_NAME', 'char(25)'), 'MAX': ('MAX', 'double')}) \
        .finalize()

    start = time.time()
    processedQuery = [
        query3.schema().unpack(tup) for page in db.processQuery(query3)
        for tup in page[1]
    ]
    end = time.time()
    duration = end - start
    print('Query: Query 3b (Order Reversed), BNL')
    if printOutput:
        print('Results: ', processedQuery)
    print('Time', duration)
    return
Пример #8
0
 def testGroupBy(self):
     # SELECT id, min(age), max(age) FROM Employee GROUP BY (id % 2)
     aggMinMaxSchema = DBSchema('minmax', [('minAge', 'int'), ('maxAge', 'int')])
     keySchema = DBSchema('employeeKey', [('id', 'int')])
     groupBy = self.db.query().fromTable('employee').groupBy( \
         groupSchema=keySchema, \
         aggSchema=aggMinMaxSchema, \
         groupExpr=(lambda e: e.id % 2), \
         aggExprs=[(sys.maxsize, lambda acc, e: min(acc, e.age), lambda x: x), \
                   (0, lambda acc, e: max(acc, e.age), lambda x: x)], \
         groupHashFn=(lambda gbVal: hash(gbVal[0]) % 2) \
         ).finalize()
     results = self.getResults(groupBy)
     self.assertEqual(len(results), 2)
     for result in results:
         if result.id == 0:
             self.assertEqual(result.minAge, 20)
             self.assertEqual(result.maxAge, 56)
         else:
             self.assertEqual(result.minAge, 22)
             self.assertEqual(result.maxAge, 58)
Пример #9
0
def query3(db):
    query = db.query().fromTable('customer')\
        .join(
            db.query().fromTable('orders'),
            method='hash',
            lhsHashFn='hash(C_CUSTKEY) % 5', lhsKeySchema=DBSchema('customerKey1', [('C_CUSTKEY', 'int')]),
            rhsHashFn='hash(O_CUSTKEY) % 5', rhsKeySchema=DBSchema('customerKey2', [('O_CUSTKEY', 'int')]))\
        .join(
            db.query().fromTable('lineitem'),
            method='hash',
            lhsHashFn='hash(O_ORDERKEY) % 5', lhsKeySchema=DBSchema('orderKey1', [('O_ORDERKEY', 'int')]),
            rhsHashFn='hash(L_ORDERKEY) % 5', rhsKeySchema=DBSchema('orderkey2', [('L_ORDERKEY', 'int')]))\
        .where(
            "C_MKTSEGMENT == 'BUILDING' and O_ORDERDATE < 19950315 and L_SHIPDATE > 19950315")\
        .groupBy(
            groupSchema=DBSchema('groupKey', [('L_ORDERKEY', 'int'), ('O_ORDERDATE', 'int'), ('O_SHIPPRIORITY', 'int')]),
            aggSchema=DBSchema('groupAgg', [('revenue', 'float')]),
            groupExpr=(lambda e: (e.L_ORDERKEY, e.O_ORDERDATE, e.O_SHIPPRIORITY)),
            aggExprs=[(0, lambda acc, e: acc + (e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT)), lambda x: x)],
            groupHashFn=(lambda gbVal: hash(gbVal) % 10))\
        .select(
            {'l_orderkey': ('L_ORDERKEY', 'int'),
            'revenue': ('revenue', 'float'),
            'o_orderdate': ('O_ORDERDATE', 'int'),
            'o_shippriority': ('O_SHIPPRIORITY', 'int')})\
        .finalize()
    return query
Пример #10
0
 def testIndexJoin(self):
     schema = self.db.relationSchema('employee')
     e2schema = schema.rename('employee2', {'id': 'id2', 'age': 'age2', 'dept_id': 'dept_id2'})
     lhsKeySchema = DBSchema('employeeAge', [('age2', 'int')])
     join = self.db.query().fromTable('employee').join( \
         self.db.query().fromTable('employee'), \
         lhsSchema=e2schema, \
         lhsKeySchema=lhsKeySchema, \
         method='indexed', \
         indexId=self.idxId, \
         expr='True'
     ).finalize()
     results = self.getResults(join)
     self.assertEqual(len(results), self.numEmployees)
Пример #11
0
    def setUp(self):
        warnings.simplefilter("ignore", ResourceWarning)

        # Start with a fresh 'employee' table
        self.db = Database.Database()
        self.db.createRelation('employee', [('id', 'int'), ('age', 'int'), ('dept_id', 'int')])

        # Populate employees with department 0 or 1
        self.numEmployees = 20
        empSchema = self.db.relationSchema('employee')

        # Create an index over employee age
        keySchema = DBSchema('employeeAge', [('age', 'int')])
        self.idxId = self.db.storageEngine().createIndex('employee', empSchema, keySchema, False)

        for tup in [empSchema.pack(empSchema.instantiate(i, 2 * i + 20, i % 2)) for i in range(self.numEmployees)]:
            self.db.insertTuple(empSchema.name, tup)
Пример #12
0
def query1Hash_test(db, printOutput):
    query1 = db.query().fromTable('part') \
        .join(db.query().fromTable('partsupp').where('PS_AVAILQTY == 1'),
              rhsSchema=db.relationSchema('partsupp'),
              method='hash',
              lhsHashFn='hash(P_PARTKEY) % 111', lhsKeySchema=DBSchema('P_PARTKEY', [('P_PARTKEY', 'int')]),
              rhsHashFn='hash(PS_PARTKEY) % 111', rhsKeySchema=DBSchema('PS_PARTKEY', [('PS_PARTKEY', 'int')])) \
        .join(db.query().fromTable('supplier'),
              rhsSchema=db.relationSchema('supplier'),
              method='hash',
              lhsHashFn='hash(PS_SUPPKEY) % 111', lhsKeySchema=DBSchema('PS_SUPPKEY', [('PS_SUPPKEY', 'int')]),
              rhsHashFn='hash(S_SUPPKEY) % 111', rhsKeySchema=DBSchema('S_SUPPKEY', [('S_SUPPKEY', 'int')])) \
        .union(
        db.query().fromTable('part')
            .join(db.query().fromTable('partsupp').where('PS_SUPPLYCOST < 5'),
                  rhsSchema=db.relationSchema('partsupp'),
                  method='hash',
                  lhsHashFn='hash(P_PARTKEY) % 111', lhsKeySchema=DBSchema('P_PARTKEY', [('P_PARTKEY', 'int')]),
                  rhsHashFn='hash(PS_PARTKEY) % 111', rhsKeySchema=DBSchema('PS_PARTKEY', [('PS_PARTKEY', 'int')])) \
            .join(db.query().fromTable('supplier'),
                  rhsSchema=db.relationSchema('supplier'),
                  method='hash',
                  lhsHashFn='hash(PS_SUPPKEY) % 111', lhsKeySchema=DBSchema('PS_SUPPKEY', [('PS_SUPPKEY', 'int')]),
                  rhsHashFn='hash(S_SUPPKEY) % 111', rhsKeySchema=DBSchema('S_SUPPKEY', [('S_SUPPKEY', 'int')]))) \
        .select({'P_NAME': ('P_NAME', 'char(55)'), 'S_NAME': ('S_NAME', 'char(25)')}) \
        .finalize()

    start = time.time()
    processedQuery = [
        query1.schema().unpack(tup) for page in db.processQuery(query1)
        for tup in page[1]
    ]
    end = time.time()
    duration = end - start
    print('Query: Query 1, Hash')
    if printOutput:
        print('Results: ', processedQuery)
    print('Time', duration)
    return
Пример #13
0
    def processAllPages(self):
        schema = self.operatorType() + str(self.id())
        fields = self.groupSchema.schema() + self.aggSchema.schema()
        outputSchema = DBSchema(schema, fields)

        relIds = []
        for (pageId, page) in iter(self.subPlan):
            for tpl in page:
                group = self.groupExpr(self.subSchema.unpack(tpl))
                key = self.groupHashFn((group, None))
                relId = str(self.id) + "u" + str(key)
                self.storage.createRelation(relId, self.subSchema)
                self.storage.insertTuple(relId, tpl)
                if relId not in relIds:
                    relIds.append(relId)

        for rid in relIds:
            groupDict = {}
            for (pageId, page) in self.storage.pages(rid):
                for tpl in page:
                    groupKey = self.groupExpr(self.subSchema.unpack(tpl))
                    if groupKey not in groupDict:
                        groupDict[groupKey] = []
                        for trio in self.aggExprs:
                            groupDict[groupKey].append(trio[0])

                    for i in range(len(self.aggExprs)):
                        groupDict[groupKey][i] = self.aggExprs[i][1](
                            groupDict[groupKey][i], self.subSchema.unpack(tpl))

            for key in groupDict:
                for i in range(len(self.aggExprs)):
                    groupDict[key][i] = self.aggExprs[i][2](groupDict[key][i])

            for key in groupDict:
                outTuple = outputSchema.instantiate(
                    key, *[f for f in groupDict[key]])
                self.emitOutputTuple(self.outputSchema.pack(outTuple))
        return self.storage.pages(self.relationId())
Пример #14
0
def query4(db):
    query = db.query().fromTable('nation')\
        .join(
            db.query().fromTable('customer'),
            method='hash',
            lhsHashFn='hash(N_NATIONKEY) % 11', lhsKeySchema=DBSchema('nationKey1', [('N_NATIONKEY', 'int')]),
            rhsHashFn='hash(C_NATIONKEY) % 11', rhsKeySchema=DBSchema('nationKey2', [('C_NATIONKEY', 'int')]))\
        .join(
            db.query().fromTable('orders'),
            method='hash',
            lhsHashFn='hash(C_CUSTKEY) % 11', lhsKeySchema=DBSchema('custKey1', [('C_CUSTKEY', 'int')]),
            rhsHashFn='hash(O_CUSTKEY) % 11', rhsKeySchema=DBSchema('custKey2', [('O_CUSTKEY', 'int')])) \
        .join(
            db.query().fromTable('lineitem'),
            method='hash',
            lhsHashFn='hash(O_ORDERKEY) % 11', lhsKeySchema=DBSchema('orderKey1', [('O_ORDERKEY', 'int')]),
            rhsHashFn='hash(L_ORDERKEY) % 11', rhsKeySchema=DBSchema('orderKey2', [('L_ORDERKEY', 'int')])) \
        .where(
            "O_ORDERDATE >= 19931001 and O_ORDERDATE < 19940101 and L_RETURNFLAG == 'R'")\
        .groupBy(
            groupSchema=DBSchema('groupKey', [('C_CUSTKEY','int'),('C_NAME','char(25)'),('C_ACCTBAL','float'),('C_PHONE','char(15)'),('N_NAME','char(25)'),('C_ADDRESS','char(40)'),('C_COMMENT','char(117)')]),
            aggSchema=DBSchema('groupAgg', [('revenue', 'float')]),
            groupExpr=(lambda e: (e.C_CUSTKEY, e.C_NAME, e.C_ACCTBAL, e.C_PHONE, e.N_NAME, e.N_NAME, e.C_ADDRESS, e.C_COMMENT)),
            aggExprs=[(0, lambda acc, e: acc + (e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT)), lambda x: x)],
            groupHashFn=(lambda gbVal: hash(gbVal) % 11))\
        .select(
            {'c_custkey': ('C_CUSTKEY', 'int'),
             'c_name': ('C_NAME', 'char(25)'),
             'revenue': ('revenue', 'float'),
             'c_acctbal': ('C_ACCTBAL', 'float'),
             'n_name': ('N_NAME', 'char(25)'),
             'c_address': ('C_ADDRESS', 'char(40)'),
             'c_phone': ('C_PHONE', 'char(15)'),
             'c_comment': ('C_COMMENT', 'char(117)')})\
        .finalize()
    return query
Пример #15
0
 def validate(self, operator, storage):
     if operator.operatorType() == "TableScan":
         return operator.schema()
     elif operator.operatorType() == "Select":
         operator.storage = storage
         return self.validate(operator.subPlan, storage)
     elif operator.operatorType() == "Project":
         operator.storage = storage
         self.validate(operator.subPlan, storage)
         return DBSchema( operator.relationId(), \
                             [(k, v[1]) for (k,v) in operator.projectExprs.items()])
     elif operator.operatorType() == "GroupBy":
         self.validate(operator.subPlan, storage)
         operator.storage = storage
         return operator.schema()
     elif operator.operatorType() == "UnionAll":
         operator.storage = storage
         return self.validate(operator.subPlan, storage)
     else:
         operator.lhsSchema = self.validate(operator.lhsPlan, storage)
         operator.rhsSchema = self.validate(operator.rhsPlan, storage)
         operator.initializeSchema()
         operator.storage = storage
         return operator.schema()
Пример #16
0
def tests(printOutput):
	db = setup()
	'''select
			sum(l_extendedprice * l_discount) as revenue
		from
			lineitem
		where
			l_shipdate >= 19940101
			and l_shipdate < 19950101
			and l_discount between 0.06 - 0.01 and 0.06 + 0.01
			and l_quantity < 24'''
	query1 = db.query().fromTable('lineitem') \
				.where('L_SHIPDATE >= 19940101') \
		.where('L_SHIPDATE < 19950101') \
		.where('L_DISCOUNT > 0.06 - 0.01') \
		.where('L_DISCOUNT < 0.06 + 0.01') \
		.where('L_QUANTITY < 24') \
		.groupBy(groupSchema=DBSchema('REV', [('dum', 'int')]),
								 aggSchema=DBSchema('SUM', [('SUM', 'double')]),
								 groupExpr=(lambda e: (1)),
								 aggExprs=[(0, lambda acc, e: acc + e.L_EXTENDEDPRICE * e.L_DISCOUNT, lambda x: x)],
								 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
		.select({'REVENUE': ('SUM', 'double')}) \
		.finalize()
		
	'''select
				sum(l_extendedprice * (1 - l_discount)) as promo_revenue
	from
				lineitem,
				part
	where
				l_partkey = p_partkey
				and l_shipdate >= 19950901
				and l_shipdate < 19951001'''
	query2 = db.query().fromTable('lineitem') \
		.join(db.query().fromTable('part'),
							rhsSchema=db.relationSchema('part'),
							method='block-nested-loops',
							expr='L_PARTKEY == P_PARTKEY') \
		.where('L_SHIPDATE >= 19950901') \
		.where('L_SHIPDATE < 19951001') \
		.groupBy(groupSchema=DBSchema('REV', [('dum', 'int')]),
								 aggSchema=DBSchema('SUM', [('SUM', 'double')]),
								 groupExpr=(lambda e: (1)),
								 aggExprs=[(0, lambda acc, e: acc + e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT), lambda x: x)],
								 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
		.select({'PROMO_REVENUE': ('SUM', 'double')}) \
		.finalize()
		
	'''select
				l_orderkey,
				sum(l_extendedprice * (1 - l_discount)) as revenue,
				o_orderdate,
				o_shippriority
	from
				customer,
				orders,
				lineitem
	where
				c_mktsegment = 'BUILDING'
				and c_custkey = o_custkey
				and l_orderkey = o_orderkey
				and o_orderdate < 19950315
				and l_shipdate > 19950315
	group by
		l_orderkey,
				o_orderdate,
				o_shippriority'''
	query3 = db.query().fromTable('customer') \
		.join(db.query().fromTable('orders'),
							rhsSchema=db.relationSchema('orders'),
							method='block-nested-loops',
							expr='C_CUSTKEY == O_CUSTKEY') \
		.join(db.query().fromTable('lineitem'),
							rhsSchema=db.relationSchema('lineitem'),
							method='block-nested-loops',
							expr='L_ORDERKEY == O_ORDERKEY') \
		.where('L_SHIPDATE == \'BUILDING\'') \
		.where('O_ORDERDATE < 19950315') \
		.where('L_SHIPDATE > 19950315') \
		.groupBy(groupSchema=DBSchema('FIRST', [('L_ORDERKEY', 'int'), ('O_ORDERDATE', 'char(10)'), ('O_SHIPPRIORITY', 'int')]),
								 aggSchema=DBSchema('SUM', [('SUM', 'double')]),
								 groupExpr=(lambda e: (e.L_ORDERKEY, e.O_ORDERDATE, e.O_SHIPPRIORITY)),
								 aggExprs=[(0, lambda acc, e: acc + e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT), lambda x: x)],
								 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
		.select({'L_ORDERKEY': ('L_ORDERKEY', 'int'), 'REVENUE': ('SUM', 'double'),'O_ORDERDATE': ('O_ORDERDATE', 'char(10)'),'O_SHIPPRIORITY': ('O_SHIPPRIORITY', 'int')}) \
		.finalize()
		
	'''select
				c_custkey,
				c_name,
				sum(l_extendedprice * (1 - l_discount)) as revenue,
				c_acctbal,
				n_name,
				c_address,
				c_phone,
				c_comment
	from
				customer,
				orders,
				lineitem,
				nation
	where
				c_custkey = o_custkey
				and l_orderkey = o_orderkey
				and o_orderdate >= 19931001
				and o_orderdate < 19940101
				and l_returnflag = 'R'
				and c_nationkey = n_nationkey
	group by
				c_custkey,
				c_name,
				c_acctbal,
				c_phone,
				n_name,
				c_address,
				c_comment'''
	query4 = db.query().fromTable('customer') \
		.join(db.query().fromTable('orders'),
							rhsSchema=db.relationSchema('orders'),
							method='block-nested-loops',
							expr='C_CUSTKEY == O_CUSTKEY') \
		.join(db.query().fromTable('lineitem'),
							rhsSchema=db.relationSchema('lineitem'),
							method='block-nested-loops',
							expr='L_ORDERKEY == O_ORDERKEY') \
		.where('L_SHIPDATE == \'BUILDING\'') \
		.where('O_ORDERDATE >= 19931001') \
		.where('O_ORDERDATE < 19940101') \
		.where('L_RETURNFLAG == \'R\'') \
		.join(db.query().fromTable('nation'),
							rhsSchema=db.relationSchema('nation'),
							method='block-nested-loops',
							expr='C_NATIONKEY == N_NATIONKEY') \
		.groupBy(groupSchema=DBSchema('FIRST', [('C_CUSTKEY', 'int'), ('C_NAME', 'char(25)'), ('C_ACCTBAL', 'double'), ('C_PHONE', 'char(15)'), ('N_NAME', 'char(25)'),('C_ADDRESS', 'char(40)'),('C_COMMENT', 'char(117)')]),
								 aggSchema=DBSchema('SUM', [('SUM', 'double')]),
								 groupExpr=(lambda e: (e.C_CUSTKEY, e.C_NAME, e.C_ACCTBAL, e.C_PHONE, e.N_NAME, e.C_ADDRESS, e.C_PHONE, e.C_COMMENT)),
								 aggExprs=[(0, lambda acc, e: acc + e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT), lambda x: x)],
								 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
		.select({'C_CUSTKEY' : ('C_CUSTKEY', 'int'), 'C_NAME' : ('C_NAME', 'char(25)'),'SUM' : ('SUM', 'double'), 'C_ACCTBAL' : ('C_ACCTBAL', 'double'), 'C_PHONE' : ('C_PHONE', 'char(15)'), 'N_NAME' : ('N_NAME', 'char(25)'),'C_ADDRESS' : ('C_ADDRESS', 'char(40)'),'C_COMMENT' : ('C_COMMENT', 'char(117)')}) \
		.finalize()
		
	'''select
				n_name,
				sum(l_extendedprice * (1 - l_discount)) as revenue
	from
				customer,
				orders,
				lineitem,
				supplier,
				nation,
				region
	where
				c_custkey = o_custkey
				and l_orderkey = o_orderkey
				and l_suppkey = s_suppkey
				and c_nationkey = s_nationkey
				and s_nationkey = n_nationkey
				and n_regionkey = r_regionkey
				and r_name = 'ASIA'
				and o_orderdate >= 19940101
				and o_orderdate < 19950101
	group by
				n_name'''
	query5 = db.query().fromTable('customer') \
		.join(db.query().fromTable('orders'),
							rhsSchema=db.relationSchema('orders'),
							method='block-nested-loops',
							expr='C_CUSTKEY == O_CUSTKEY') \
		.join(db.query().fromTable('lineitem'),
							rhsSchema=db.relationSchema('lineitem'),
							method='block-nested-loops',
							expr='L_ORDERKEY == O_ORDERKEY') \
		.join(db.query().fromTable('supplier'),
							rhsSchema=db.relationSchema('supplier'),
							method='block-nested-loops',
							expr='L_SUPPKEY == S_SUPPKEY and C_NATIONKEY == S_NATIONKEY') \
		.join(db.query().fromTable('nation'),
							rhsSchema=db.relationSchema('nation'),
							method='block-nested-loops',
							expr='S_NATIONKEY == N_NATIONKEY') \
		.join(db.query().fromTable('region'),
							rhsSchema=db.relationSchema('region'),
							method='block-nested-loops',
							expr='N_REGIONKEY == R_REGIONKEY') \
		.where('L_SHIPDATE == \'BUILDING\'') \
		.where('R_NAME == \'ASIA\'') \
		.where('O_ORDERDATE >= 19940101') \
		.where('O_ORDERDATE < 19950101') \
		.groupBy(groupSchema=DBSchema('FIRST', [('N_NAME', 'char(25)')]),
								 aggSchema=DBSchema('SUM', [('SUM', 'double')]),
								 groupExpr=(lambda e: (e.L_ORDERKEY, e.O_ORDERDATE, e.O_SHIPPRIORITY)),
								 aggExprs=[(0, lambda acc, e: acc + e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT), lambda x: x)],
								 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
		.select({'N_NAME': ('N_NAME', 'char(25)'), 'SUM' : ('SUM', 'double')}) \
		.finalize()
	

	queries = [query1, query2, query3, query4, query5]
	names = ['query1', 'query2', 'query3', 'query4', 'query5']
	opt = Optimizer.Optimizer(db)
	for i,q in enumerate(queries):
		querytest(db, printOutput, q, names[i], opt)
Пример #17
0
    def initializeSchemas(self):
        tpchNamesAndFields = [
            ('part', [('P_PARTKEY', 'int'), ('P_NAME', 'char(55)'),
                      ('P_MFGR', 'char(25)'), ('P_BRAND', 'char(10)'),
                      ('P_TYPE', 'char(25)'), ('P_SIZE', 'int'),
                      ('P_CONTAINER', 'char(10)'), ('P_RETAILPRICE', 'double'),
                      ('P_COMMENT', 'char(23)')], "issssisds"),
            ('supplier', [('S_SUPPKEY', 'int'), ('S_NAME', 'char(25)'),
                          ('S_ADDRESS', 'char(40)'), ('S_NATIONKEY', 'int'),
                          ('S_PHONE', 'char(15)'), ('S_ACCTBAL', 'double'),
                          ('S_COMMENT', 'char(101)')], "issisds"),
            ('partsupp', [('PS_PARTKEY', 'int'), ('PS_SUPPKEY', 'int'),
                          ('PS_AVAILQTY', 'int'), ('PS_SUPPLYCOST', 'double'),
                          ('PS_COMMENT', 'char(199)')], "iiids"),
            ('customer', [('C_CUSTKEY', 'int'), ('C_NAME', 'char(25)'),
                          ('C_ADDRESS', 'char(40)'), ('C_NATIONKEY', 'int'),
                          ('C_PHONE', 'char(15)'), ('C_ACCTBAL', 'double'),
                          ('C_MKTSEGMENT', 'char(10)'),
                          ('C_COMMENT', 'char(117)')], "issisdss"),
            (
                'orders',
                [
                    ('O_ORDERKEY', 'int'),
                    ('O_CUSTKEY', 'int'),
                    ('O_ORDERSTATUS', 'char(1)'),
                    ('O_TOTALPRICE', 'double'),
                    ('O_ORDERDATE', 'int'),  # date
                    ('O_ORDERPRIORITY', 'char(15)'),
                    ('O_CLERK', 'char(15)'),
                    ('O_SHIPPRIORITY', 'int'),
                    ('O_COMMENT', 'char(79)')
                ],
                "iisdtssis"),
            (
                'lineitem',
                [
                    ('L_ORDERKEY', 'int'),
                    ('L_PARTKEY', 'int'),
                    ('L_SUPPKEY', 'int'),
                    ('L_LINENUMBER', 'int'),
                    ('L_QUANTITY', 'double'),
                    ('L_EXTENDEDPRICE', 'double'),
                    ('L_DISCOUNT', 'double'),
                    ('L_TAX', 'double'),
                    ('L_RETURNFLAG', 'char(1)'),
                    ('L_LINESTATUS', 'char(1)'),
                    ('L_SHIPDATE', 'int'),  # date
                    ('L_COMMITDATE', 'int'),  # date
                    ('L_RECEIPTDATE', 'int'),  # date
                    ('L_SHIPINSTRUCT', 'char(25)'),
                    ('L_SHIPMODE', 'char(10)'),
                    ('L_COMMENT', 'char(44)')
                ],
                "iiiiddddsstttsss"),
            ('nation', [('N_NATIONKEY', 'int'), ('N_NAME', 'char(25)'),
                        ('N_REGIONKEY', 'int'),
                        ('N_COMMENT', 'char(152)')], "isis"),
            ('region', [('R_REGIONKEY', 'int'), ('R_NAME', 'char(25)'),
                        ('R_COMMENT', 'char(152)')], "iss")
        ]

        self.schemas = dict(
            map(lambda x: (x[0], DBSchema(x[0], x[1])), tpchNamesAndFields))
        self.parsers = dict(
            map(lambda x: (x[0], self.buildParser(x[2])), tpchNamesAndFields))
Пример #18
0
 def initializeSchema(self):
     schema = self.operatorType() + str(self.id())
     fields = self.groupSchema.schema() + self.aggSchema.schema()
     self.outputSchema = DBSchema(schema, fields)
Пример #19
0
    def checkJoins(self, joins, allAttributes, cachedLHS, cachedRHS):
        for join in joins:
            if join.joinMethod == 'hash':
                lhsCheck = join.lhsKeySchema.fields  # This is a list
                rhsCheck = join.rhsKeySchema.fields  # This is a list
                joinAttr = lhsCheck + rhsCheck

                contains = all(x in allAttributes for x in joinAttr) and any(
                    y in cachedRHS.schema().fields for y in joinAttr)

                # print(lhsCheck, rhsCheck, contains)
                if contains:
                    keySchema1 = DBSchema('keyschema1', [(lhsCheck[0], 'int')])
                    keySchema2 = DBSchema('keyschema2', [(rhsCheck[0], 'int')])
                    hashFn1 = 'hash(' + lhsCheck[0] + ') % 7'
                    hashFn2 = 'hash(' + rhsCheck[0] + ') % 7'
                    joinExpr = lhsCheck[0] + ' == ' + rhsCheck[0]

                    if lhsCheck[0] in cachedLHS.schema().fields:
                        return True, {
                            'joinMethod': 'hash',
                            'lhsHashFn': hashFn1,
                            'lhsKeySchema': keySchema1,
                            'rhsHashFn': hashFn2,
                            'rhsKeySchema': keySchema2,
                            'joinExpr': joinExpr
                        }
                    else:
                        return True, {
                            'joinMethod': 'hash',
                            'lhsHashFn': hashFn2,
                            'lhsKeySchema': keySchema2,
                            'rhsHashFn': hashFn1,
                            'rhsKeySchema': keySchema1,
                            'joinExpr': joinExpr
                        }

            elif join.joinExpr:
                joinAttr = ExpressionInfo(join.joinExpr).getAttributes()
                contains = all(x in allAttributes for x in joinAttr)

                if contains:
                    joinAttr1 = joinAttr.pop()
                    joinAttr2 = joinAttr.pop()

                    try:
                        joinAttr.pop()
                    except KeyError:
                        pass
                        # print('There are only two attributes in this, as expected')

                    keySchema1 = DBSchema('keySchema1', [([joinAttr1, 'int'])])
                    keySchema2 = DBSchema('keySchema1', [([joinAttr2, 'int'])])
                    hashFn1 = 'hash(' + joinAttr1 + ') % 7'
                    hashFn2 = 'hash(' + joinAttr2 + ') % 7'
                    joinExpr = join.joinExpr

                    if joinAttr1 in cachedLHS.schema().fields:
                        return True, {
                            'joinMethod': 'hash',
                            'lhsHashFn': hashFn1,
                            'lhsKeySchema': keySchema1,
                            'rhsHashFn': hashFn2,
                            'rhsKeySchema': keySchema2,
                            'joinExpr': joinExpr
                        }
                    else:
                        return True, {
                            'joinMethod': 'hash',
                            'lhsHashFn': hashFn2,
                            'lhsKeySchema': keySchema2,
                            'rhsHashFn': hashFn1,
                            'rhsKeySchema': keySchema1,
                            'joinExpr': joinExpr
                        }
        else:
            return False, None
Пример #20
0
 def initializeSchema(self):
     schema = self.operatorType() + str(self.id())
     fields = self.lhsSchema.schema() + self.rhsSchema.schema()
     self.joinSchema = DBSchema(schema, fields)
Пример #21
0
from
        customer,
        orders,
        lineitem
where
        c_mktsegment = 'BUILDING'
        and c_custkey = o_custkey
        and l_orderkey = o_orderkey
        and o_orderdate < 19950315
        and l_shipdate > 19950315
group by
        l_orderkey,
        o_orderdate,
        o_shippriority
'''
ls1 = DBSchema('customerKey1', [('C_CUSTKEY', 'int')])
rs1 = DBSchema('customerKey2', [('O_CUSTKEY', 'int')])

ls2 = DBSchema('orderKey1', [('O_ORDERKEY', 'int')])
rs2 = DBSchema('orderkey2', [('L_ORDERKEY', 'int')])

groupKeySchema = DBSchema('groupKey', [('L_ORDERKEY', 'int'),
                                       ('O_ORDERDATE', 'int'),
                                       ('O_SHIPPRIORITY', 'int')])
groupAggSchema = DBSchema('groupAgg', [('revenue', 'float')])

query3 = db.query().fromTable('customer').join( \
            db.query().fromTable('orders'),
            method = 'hash', \
            lhsHashFn = 'hash(C_CUSTKEY) % 5', lhsKeySchema = ls1, \
            rhsHashFn = 'hash(O_CUSTKEY) % 5', rhsKeySchema = rs1).join( \
Пример #22
0
from Storage.BufferPool import BufferPool
from Catalog.Schema import DBSchema
from Storage.StorageEngine import StorageEngine

schemas = [
        DBSchema('part', 
           [('p_partkey', 'int'),
           ('p_name',    'char(55)'),
           ('p_mfgr',    'char(25)'),
           ('p_brand',   'char(10)'),
           ('p_type',    'char(25)'),
           ('p_size',    'int'),
           ('p_container', 'char(10)'),
           ('p_retailprice', 'float'),
           ('p_comment', 'char(23)')])
        ,
        DBSchema('supplier',
            [('s_suppkey', 'int'),
            ('s_name', 'char(25)'),
            ('s_address', 'char(40)'),
            ('s_nationkey', 'int'),
            ('s_phone', 'char(15)'),
            ('s_acctbal', 'float'),
            ('s_comment', 'char(101)')])
        ,
        DBSchema('partsupp',
            [('ps_partkey', 'int'),
            ('ps_suppkey', 'int'),
            ('ps_availqty', 'int'),
            ('ps_supplycost', 'float'),
            ('ps_comment', 'char(199)')])
Пример #23
0
part = db.query().fromTable('part').select({
    'P_NAME': ('P_NAME', 'char(55)'),
    'P_PARTKEY': ('P_PARTKEY', 'int')
})
partsupp = db.query().fromTable('partsupp').where('PS_AVAILQTY == 1').select({
    'PS_PARTKEY': ('PS_PARTKEY', 'int'),
    'PS_SUPPKEY': ('PS_SUPPKEY', 'int')
})
supplier = db.query().fromTable('supplier').select({
    'S_NAME': ('S_NAME', 'char(25)'),
    'S_SUPPKEY': ('S_SUPPKEY', 'int')
})

join_ps_p = partsupp.join(\
              part, \
              rhsSchema = DBSchema('part', [('P_NAME','char(55)'), ('P_PARTKEY','int')]), \
              method = 'block-nested-loops', \
              expr = 'PS_PARTKEY == P_PARTKEY')

join_three = join_ps_p.join(\
               supplier, \
               rhsSchema = DBSchema('supplier', [('S_NAME','char(25)'), ('S_SUPPKEY', 'int')]), \
               method = 'block-nested-loops',
               expr = 'PS_SUPPKEY == S_SUPPKEY'
               ).select({'P_NAME':('P_NAME','char(55)'), 'S_NAME':('S_NAME','char(25)')})

partsupp2 = db.query().fromTable('partsupp').where('PS_SUPPLYCOST < 5').select(
    {
        'PS_PARTKEY': ('PS_PARTKEY', 'int'),
        'PS_SUPPKEY': ('PS_SUPPKEY', 'int')
    })
Пример #24
0
exp 1

TPC-H Query 6: a 4-chain filter and aggregate query

select
        sum(l_extendedprice * l_discount) as revenue
from
        lineitem
where
        l_shipdate >= 19940101
        and l_shipdate < 19950101
        and l_discount between 0.06 - 0.01 and 0.06 + 0.01
        and l_quantity < 24
'''

groupKeySchema = DBSchema('groupKey', [('ONE', 'int')])
groupAggSchema = DBSchema('groupBy', [('revenue', 'float')])

query1 = db.query().fromTable('lineitem').where( \
            "(L_SHIPDATE >= 19940101) and (L_SHIPDATE < 19950101) and \
            (0.06 - 0.01 <= L_DISCOUNT <= 0.06 + 0.01) and (L_QUANTITY < 24)"                                                                             ).groupBy( \
            groupSchema=groupKeySchema, \
            aggSchema=groupAggSchema, \
            groupExpr=(lambda e: 1), \
            aggExprs=[(0, lambda acc, e: acc + (e.L_EXTENDEDPRICE * e.L_DISCOUNT), lambda x: x)], \
            groupHashFn=(lambda gbVal: hash(gbVal) % 1)).select( \
            {'revenue' : ('revenue', 'float')}).finalize()
'''
exp 2

TPC-H Query 14: a 2-way join and aggregate query
Пример #25
0
        schema1.pack(
            schema1.instantiate(i, random.randint(0, 50),
                                'This is a testing tuple.'))
        for i in range(0, 10000)
]:
    _ = db.insertTuple(schema1.name, tup)

for tup in [
        schema2.pack(
            schema2.instantiate(i, random.randint(0, 50),
                                'This is a testing tuple.'))
        for i in range(0, 10000)
]:
    _ = db.insertTuple(schema2.name, tup)

keySchema = DBSchema('employeeKey', [('id', 'int')])
keySchema2 = DBSchema('employeeKey2', [('id2', 'int')])

query5 = db.query().fromTable('employeeeee').join( \
          db.query().fromTable('employeeeee2'), \
          rhsSchema=schema2, \
          method='hash', \
          lhsHashFn=(lambda e : e.id % 4),  lhsKeySchema=keySchema, \
          rhsHashFn=(lambda e : e.id2 % 4), rhsKeySchema=keySchema2, \
        ).finalize()

print(query5.explain())


def readResult():
    for page in db.processQuery(query5):
Пример #26
0
    def joinsOptimizer(self, operator, aPaths):
        defaultScaleFactor = 10
        defaultPartiNumber = 5

        n = len(aPaths)
        planList = []
        costList = []
        # i = 1
        for aPath in aPaths:
            # Here we define cost by number of pages.
            cards = Plan(root=aPath).sample(defaultScaleFactor)
            pageSize, _, _ = self.db.storage.relationStats(aPath.relationId())
            numPages = cards / (pageSize / aPath.schema().size)
            # Here we only consider reorganize joins
            # so that we simple put accessPaths' total cost as 0.
            planList.append(aPath)
            costList.append((numPages, 0))
        # i = 2...n
        for i in range(1, n):
            # find all possible two way join in current planList
            # put the potential joins in potentialP
            # put the potential joins cost in potentialC
            m = len(planList)
            potentialP = []
            potentialC = []
            for j in range(0, m - 1):
                for k in range(j + 1, m):
                    self.pcntr += 1
                    potentialP.append((planList[j], planList[k]))
                    potentialC.append(3 * (costList[j][0] + costList[k][0]) +
                                      costList[j][1] + costList[k][1])
            # find the cheapest joinable join (total cost)
            # build the join, remove the used two base plan and add the new join to planList
            # modify the costList as well
            while (potentialC):
                currC = min(potentialC)
                currP = potentialP[potentialC.index(currC)]
                potentialC.remove(currC)
                potentialP.remove(currP)
                if (self.joinable(operator, currP)):
                    (lField, rField) = self.joinable(operator, currP)
                    lhsSchema = currP[0].schema()
                    rhsSchema = currP[1].schema()
                    lKeySchema = DBSchema(
                        'left',
                        [(f, t)
                         for (f, t) in lhsSchema.schema() if f == lField])
                    rKeySchema = DBSchema(
                        'right',
                        [(f, t)
                         for (f, t) in rhsSchema.schema() if f == rField])
                    lHashFn = 'hash(' + lField + ') % ' + str(
                        defaultPartiNumber)
                    rHashFn = 'hash(' + rField + ') % ' + str(
                        defaultPartiNumber)
                    newJoin = Join(currP[0], currP[1], method='hash', \
                                   lhsHashFn=lHashFn, lhsKeySchema=lKeySchema, \
                                   rhsHashFn=rHashFn, rhsKeySchema=rKeySchema)

                    newJoin.prepare(self.db)
                    totalCost = currC
                    cards = Plan(root=newJoin).sample(defaultScaleFactor)
                    pageSize, _, _ = self.db.storage.relationStats(
                        newJoin.relationId())
                    pages = cards / (pageSize / newJoin.schema().size)

                    id1 = planList.index(currP[0])
                    _ = planList.pop(id1)
                    id2 = planList.index(currP[1])
                    _ = planList.pop(id2)
                    planList.append(newJoin)
                    _ = costList.pop(id1)
                    _ = costList.pop(id2)
                    costList.append((pages, totalCost))
                    break
        print("GreedyOptimizer plan considered: ", self.pcntr)
        return planList[0]
Пример #27
0
import io, math, os, os.path, pickle, struct
from struct import Struct

from Catalog.Identifiers import PageId, FileId, TupleId
from Catalog.Schema      import DBSchema
from Storage.Page        import PageHeader, Page
from Storage.SlottedPage import SlottedPageHeader, SlottedPage

from Storage.File import *

import shutil, Storage.BufferPool, Storage.FileManager

schema = DBSchema('employee', [('id', 'int'), ('age', 'int')])

bp = Storage.BufferPool.BufferPool()

fm = Storage.FileManager.FileManager(bufferPool=bp)

bp.setFileManager(fm)

fm.createRelation(schema.name, schema)

(fId, f) = fm.relationFile(schema.name)

f.numPages() == 0

pId  = PageId(fId, 0)
pId1 = PageId(fId, 1)

p    = Page(pageId=pId,  buffer=bytes(f.pageSize()), schema=schema)
p1   = Page(pageId=pId1, buffer=bytes(f.pageSize()), schema=schema)
Пример #28
0
import Database
from Query.Optimizer import Optimizer
from Query.BushyOptimizer import BushyOptimizer
from Catalog.Schema import DBSchema
from time import time

db = Database.Database()
deptSchema = DBSchema('department', [('d_id', 'int'), ('d_name', 'char(30)')])
emplSchema = DBSchema('employee', [('e_id', 'int'), ('e_name', 'char(30)'),
                                   ('e_projectid', 'int')])
projSchema = DBSchema('project', [('p_id', 'int'), ('p_name', 'char(30)')])
gratSchema = DBSchema('grant', [('g_id', 'int'), ('g_projectid', 'int'),
                                ('g_source', 'char(30)')])
synSchema1 = DBSchema('syn1', [('a', 'int'), ('b', 'char(30)')])
synSchema2 = DBSchema('syn2', [('c', 'int'), ('d', 'char(30)'), ('e', 'int')])

db.createRelation('department', [('d_id', 'int'), ('d_name', 'char(30)')])
db.createRelation('employee', [('e_id', 'int'), ('e_name', 'char(30)'),
                               ('e_projectid', 'int')])
db.createRelation('project', [('p_id', 'int'), ('p_name', 'char(30)')])
db.createRelation('grant', [('g_id', 'int'), ('g_projectid', 'int'),
                            ('g_source', 'char(30)')])
db.createRelation('syn1', [('a', 'int'), ('b', 'char(30)')])
db.createRelation('syn2', [('c', 'int'), ('d', 'char(30)'), ('e', 'int')])

for tup in [
        deptSchema.pack(deptSchema.instantiate(i, "Nature" + str(i)))
        for i in range(4000)
]:
    _ = db.insertTuple('department', tup)
for tup in [
Пример #29
0
 def __init__(self, subPlan, projectExprs, **kwargs):
     super().__init__(**kwargs)
     self.subPlan = subPlan
     self.projectExprs = projectExprs
     self.outputSchema = DBSchema(self.relationId(), \
                           [(k, v[1]) for (k,v) in self.projectExprs.items()])
Пример #30
0
def query5(db):
    query = db.query().fromTable('region')\
        .join(
            db.query().fromTable('nation'),
            method='hash',
            lhsHashFn='hash(R_REGIONKEY) % 11', lhsKeySchema=DBSchema('regionKey1', [('R_REGIONKEY', 'int')]),
            rhsHashFn='hash(N_REGIONKEY) % 11', rhsKeySchema=DBSchema('regionKey2', [('N_REGIONKEY', 'int')]))\
        .join(
            db.query().fromTable('supplier'),
            method='hash',
            lhsHashFn='hash(N_NATIONKEY) % 11', lhsKeySchema=DBSchema('nationKey1', [('N_NATIONKEY', 'int')]),
            rhsHashFn='hash(S_NATIONKEY) % 11', rhsKeySchema=DBSchema('nationKey2', [('S_NATIONKEY', 'int')])) \
        .join(
            db.query().fromTable('lineitem'),
            method='hash',
            lhsHashFn='hash(S_SUPPKEY) % 11', lhsKeySchema=DBSchema('suppKey1', [('S_SUPPKEY', 'int')]),
            rhsHashFn='hash(L_SUPPKEY) % 11', rhsKeySchema=DBSchema('suppKey2', [('L_SUPPKEY', 'int')])) \
        .join(
            db.query().fromTable('orders'),
            method='hash',
            lhsHashFn='hash(L_ORDERKEY) % 11', lhsKeySchema=DBSchema('orderKey1', [('L_ORDERKEY', 'int')]),
            rhsHashFn='hash(O_ORDERKEY) % 11', rhsKeySchema=DBSchema('orderKey2', [('O_ORDERKEY', 'int')])) \
        .join(
            db.query().fromTable('customer'),
            method='hash',
            lhsHashFn='hash(O_CUSTKEY) % 11', lhsKeySchema=DBSchema('custKey1', [('O_CUSTKEY', 'int')]),
            rhsHashFn='hash(C_CUSTKEY) % 11', rhsKeySchema=DBSchema('custKey2', [('C_CUSTKEY', 'int')])) \
        .where(
            "R_NAME == 'ASIA' and O_ORDERDATE >= 19940101 and O_ORDERDATE < 19950101")\
        .groupBy(
            groupSchema=DBSchema('groupKey', [('N_NAME','char(25)')]),
            aggSchema=DBSchema('groupAgg', [('revenue', 'float')]),
            groupExpr=(lambda e: e.N_NAME),
            aggExprs=[(0, lambda acc, e: acc + (e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT)), lambda x: x)],
            groupHashFn=(lambda gbVal: hash(gbVal) % 11))\
        .select(
            {'n_name': ('n_name', 'char(25)'),
             'revenue': ('revenue', 'float')})\
        .finalize()
    return query