def join(self, arg1, arg2): c_op1 = self.symbols[arg1.id] c_op2 = self.symbols[arg2.id] schema1 = c_op1.schema schema2 = c_op2.schema # The parser validates that we're given equal column counts assert len(arg1.column_names) == len(arg2.column_names) # Compute pairs of join attributes that must match in the merged schema. # Also, enforce type safety. join_attributes = [] offset = c_op1.schema.num_columns() for c1, c2 in zip(arg1.column_names, arg2.column_names): index1 = schema1.column_index(c1) index2 = schema2.column_index(c2) relation.Schema.check_columns_compatible( schema1, index1, schema2, index2) join_attributes.append((index1, index2 + offset)) # compute the schema of the merged relation schema_out = relation.Schema.join([c_op1.schema, c_op2.schema], [arg1.id, arg2.id]) return db.Operation('JOIN', schema_out, children=[c_op1, c_op2], join_attributes=join_attributes)
def assign(self, _id, expr): op = self.ep.evaluate(expr) if self.eager_evaluation and op.is_non_leaf(): # Transform the query into a database insertion key = db.RelationKey( user='******', program=self.program_name, relation=_id) insert = db.Operation('REPLACE', schema=None, children=[op], relation_key=key) self.db.evaluate(insert) # Re-write the expression to be a scan of the materialized table self.symbols[_id] = db.Operation('SCAN', schema=op.schema, children=[], relation_key=key) else: self.symbols[_id] = op
def foreach(self, _id, column_names, rename_schema): c_op = self.symbols[_id] schema_in = c_op.schema schema_out = c_op.schema.project(column_names) # Rename the columns, if requested if rename_schema: schema_out.check_compatible(rename_schema) schema_out = rename_schema column_indexes = [schema_in.column_index(c) for c in column_names] return db.Operation('FOREACH', schema_out, children=[c_op], column_indexes=column_indexes)
def limit(self, _id, count): c_op1 = self.symbols[_id] return db.Operation('LIMIT', c_op1.schema, children=[c_op1])
def __process_bitop(self, _type, id1, id2): c_op1 = self.symbols[id1] c_op2 = self.symbols[id2] c_op1.schema.check_compatible(c_op2.schema) return db.Operation(_type, c_op1.schema, children=[c_op1, c_op2])
def distinct(self, expr): c_op = self.evaluate(expr) return db.Operation('DISTINCT', c_op.schema, children=[c_op])
def table(self, tuple_list, schema): for tp in tuple_list: schema.validate_tuple(tp) return db.Operation('TABLE', schema, tuple_list=tuple_list)
def load(self, path, schema): return db.Operation('LOAD', schema, path=path)