def setUp(self): self.conn1 = DBConnection() self.conn2 = DBConnection() self.conn1.add_table("emp", FakeData.emp_schema, FakeData.emp_table) self.conn1.add_table("dept", FakeData.dept_schema, FakeData.dept_table) self.conn2.add_table("num", FakeData.numbers_schema, FakeData.numbers_table)
def __init__(self): # Persistent tables, identified by RelationKey self.tables = DBConnection() # Temporary tables, identified by string name self.temp_tables = DBConnection() # partitionings self.partitionings = {}
class SQLLiteTest(unittest.TestCase, FakeData): def setUp(self): self.conn1 = DBConnection() self.conn2 = DBConnection() self.conn1.add_table("emp", FakeData.emp_schema, FakeData.emp_table) self.conn1.add_table("dept", FakeData.dept_schema, FakeData.dept_table) self.conn2.add_table("num", FakeData.numbers_schema, FakeData.numbers_table) def test_empty_relation(self): self.conn1.add_table("emp2", FakeData.emp_schema, collections.Counter()) emp_out = collections.Counter(self.conn1.get_table('emp2')) self.assertEquals(emp_out, collections.Counter()) scheme_out = self.conn1.get_scheme('emp2') self.assertEquals(scheme_out, FakeData.emp_schema) def test_scan(self): emp_out = collections.Counter(self.conn1.get_table('emp')) self.assertEquals(emp_out, FakeData.emp_table) dept_out = collections.Counter(self.conn1.get_table('dept')) self.assertEquals(dept_out, FakeData.dept_table) num_out = collections.Counter(self.conn2.get_table('num')) self.assertEquals(num_out, FakeData.numbers_table) def test_schema_lookup(self): self.assertEquals(self.conn1.get_scheme('emp'), FakeData.emp_schema) self.assertEquals(self.conn1.get_scheme('dept'), FakeData.dept_schema) self.assertEquals(self.conn2.get_scheme('num'), FakeData.numbers_schema) def test_num_tuples(self): self.assertEquals(self.conn1.num_tuples('emp'), len(FakeData.emp_table)) self.assertEquals(self.conn1.num_tuples('dept'), len(FakeData.dept_table)) self.assertEquals(self.conn2.num_tuples('num'), len(FakeData.numbers_table)) def test_schema_lookup_key_error(self): with self.assertRaises(KeyError): sc = self.conn2.get_scheme("emp") def test_scan_key_error(self): with self.assertRaises(KeyError): sc = self.conn1.get_table("num") def test_delete_table(self): sc = self.conn1.get_scheme("emp") self.conn1.delete_table("emp") with self.assertRaises(KeyError): sc = self.conn1.get_scheme("emp") def test_append_table(self): self.conn1.append_table("emp", FakeData.emp_table) it = itertools.chain(iter(FakeData.emp_table), iter(FakeData.emp_table)) expected = collections.Counter(it) actual = collections.Counter(self.conn1.get_table('emp')) self.assertEquals(actual, expected)
class FakeDatabase(Catalog): """An in-memory implementation of relational algebra operators""" def __init__(self): # Persistent tables, identified by RelationKey self.tables = DBConnection() # Temporary tables, identified by string name self.temp_tables = DBConnection() # partitionings self.partitionings = {} def get_num_servers(self): return 1 def num_tuples(self, rel_key): try: return self.tables.num_tuples(rel_key) except KeyError: return DEFAULT_CARDINALITY def partitioning(self, rel_key): """get fake metadata for relation. This has no effect on query evaluation in the FakeDatabase""" return self.partitionings[rel_key] def evaluate(self, op): """Evaluate a relational algebra operation. For "query-type" operators, return a tuple iterator. For store queries, the return value is None. """ method = getattr(self, op.opname().lower()) return method(op) def evaluate_to_bag(self, op): """Return a bag (collections.Counter instance) for the operation""" return collections.Counter(self.evaluate(op)) def ingest(self, rel_key, contents, scheme, partitioning=RepresentationProperties()): """Directly load raw data into the database""" if isinstance(rel_key, basestring): rel_key = relation_key.RelationKey.from_string(rel_key) assert isinstance(rel_key, relation_key.RelationKey) self.tables.add_table(rel_key, scheme, contents.elements()) self.partitionings[rel_key] = partitioning def add_function(self, tup): print ("added function") return self.tables.register_function(tup) def get_function(self, name): if name == "": raise ValueError("Invalid UDF name.") return self.tables.get_function(name) def get_scheme(self, rel_key): if isinstance(rel_key, basestring): rel_key = relation_key.RelationKey.from_string(rel_key) assert isinstance(rel_key, relation_key.RelationKey) return self.tables.get_scheme(rel_key) def get_table(self, rel_key): """Retrieve the contents of table. :param rel_key: The key of the relation :type rel_key: relation_key.RelationKey :returns: A collections.Counter instance containing tuples. """ if isinstance(rel_key, basestring): rel_key = relation_key.RelationKey.from_string(rel_key) assert isinstance(rel_key, relation_key.RelationKey) return self.tables.get_table(rel_key) def get_temp_table(self, key): return self.temp_tables.get_table(key) def delete_temp_table(self, key): self.temp_tables.delete_table(key) def dump_all(self): for key, val in self.tables.iteritems(): bag = val[0] print '%s: (%s)' % (key, bag) for key, bag in self.temp_tables.iteritems(): print '__%s: (%s)' % (key, bag) def scan(self, op): assert isinstance(op.relation_key, relation_key.RelationKey) return self.tables.get_table(op.relation_key).elements() def calculatesamplingdistribution(self, op): if op.is_pct: tup_cnt = sum(t[1] for t in list(self.evaluate(op.input))) sample_size = int(round(tup_cnt * (op.sample_size / 100.0))) else: sample_size = op.sample_size return (t + (sample_size, op.sample_type) for t in self.evaluate(op.input)) def sample(self, op): sample_info = list(self.evaluate(op.left)) assert len(sample_info) == 1 sample_type = sample_info[0][3] sample_size = sample_info[0][2] tuples = list(self.evaluate(op.right)) if sample_type == 'WR': # Add unique index to make them appear like different tuples. sample = [(i,) + random.choice(tuples) for i in range(sample_size)] elif sample_type == 'WoR': sample = random.sample(tuples, sample_size) else: raise ValueError("Invalid sample type") return iter(sample) def filescan(self, op): type_list = op.scheme().get_types() with open(op.path, 'r') as fh: if not op.options: sample = fh.read(1024) dialect = csv.Sniffer().sniff(sample) fh.seek(0) reader = csv.reader(fh, dialect) else: options = { 'delimiter': ",", 'quote': '"', 'escape': None, 'skip': 0} options.update(op.options) reader = csv.reader( fh, delimiter=options['delimiter'], quotechar=options['quote'], escapechar=options['escape']) if options['skip']: for _ in xrange(options['skip']): next(fh) for row in reader: pairs = zip(row, type_list) cols = [types.parse_string(s, t) for s, t in pairs] yield tuple(cols) def select(self, op): child_it = self.evaluate(op.input) def filter_func(_tuple): # Note: this implicitly uses python truthiness rules for # interpreting non-boolean expressions. # TODO: Is this the the right semantics here? return op.condition.evaluate(_tuple, op.scheme()) return itertools.ifilter(filter_func, child_it) def apply(self, op): child_it = self.evaluate(op.input) scheme = op.input.scheme() def make_tuple(input_tuple): ls = [colexpr.evaluate(input_tuple, scheme) for (_, colexpr) in op.emitters] return tuple(ls) return (make_tuple(t) for t in child_it) def statefulapply(self, op): child_it = self.evaluate(op.input) scheme = op.input.scheme() state = State(scheme, op.state_scheme, op.inits) def make_tuple(input_tuple, state): # Update state variables state.update(input_tuple, op.updaters) # Extract a result for each emit expression return tuple([colexpr.evaluate(input_tuple, scheme, state) for (_, colexpr) in op.emitters]) return (make_tuple(t, state) for t in child_it) def join(self, op): # Compute the cross product of the children and flatten left_it = self.evaluate(op.left) right_it = self.evaluate(op.right) p1 = itertools.product(left_it, right_it) p2 = (x + y for (x, y) in p1) # Return tuples that match on the join conditions return (tpl for tpl in p2 if op.condition.evaluate(tpl, op.scheme())) def projectingjoin(self, op): # standard join, projecting the output columns return (tuple(t[x.position] for x in op.output_columns) for t in self.join(op)) def naryjoin(self, op): def eval_conditions(conditions, tpl): """Turns the weird NaryJoin condition set into a proper expression, then evaluates it.""" cond = reduce(lambda a, b: AND(a, b), map(lambda (a, b): EQ(a, b), conditions)) return cond.evaluate(tpl, op.scheme()) # Elements of prod are tuples of tuples like ((1, 2), (3, 4)) prod = itertools.product(*(self.evaluate(child) for child in op.children())) # Elements of tuples have been flattened like (1, 2, 3, 4) tuples = (sum(x, ()) for x in prod) return (tpl for tpl in tuples if eval_conditions(op.conditions, tpl)) def crossproduct(self, op): left_it = self.evaluate(op.left) right_it = self.evaluate(op.right) p1 = itertools.product(left_it, right_it) return (x + y for (x, y) in p1) def distinct(self, op): it = self.evaluate(op.input) s = set(it) return iter(s) def project(self, op): if not op.columnlist: return self.distinct(op) return set(tuple(t[x.position] for x in op.columnlist) for t in self.evaluate(op.input)) def limit(self, op): it = self.evaluate(op.input) return itertools.islice(it, op.count) def orderby(self, op): it = self.evaluate(op.input) oList = reversed(zip(op.sort_columns, op.ascending)) sortedList = list(it) for o in oList: sortedList = sorted( sortedList, key=lambda x: x[o[0]], reverse=not o[1]) return iter(sortedList) @staticmethod def singletonrelation(op): return iter([()]) @staticmethod def emptyrelation(op): return iter([]) def union(self, op): return set(self.evaluate(op.left)).union(set(self.evaluate(op.right))) def unionall(self, op): return itertools.chain.from_iterable( self.evaluate(arg) for arg in op.args) def difference(self, op): its = [self.evaluate(op.left), self.evaluate(op.right)] sets = [set(it) for it in its] return sets[0].difference(sets[1]) def intersection(self, op): its = [self.evaluate(op.left), self.evaluate(op.right)] sets = [set(it) for it in its] return sets[0].intersection(sets[1]) def groupby(self, op): child_it = self.evaluate(op.input) input_scheme = op.input.scheme() def process_grouping_columns(_tuple): ls = [sexpr.evaluate(_tuple, input_scheme) for sexpr in op.grouping_list] return tuple(ls) # Calculate groups of matching input tuples. # If there are no grouping terms, then all tuples are added # to a single bin. results = collections.defaultdict(list) if len(op.grouping_list) == 0: results[()] = list(child_it) else: for input_tuple in child_it: grouped_tuple = process_grouping_columns(input_tuple) results[grouped_tuple].append(input_tuple) # resolve aggregate functions for key, tuples in results.iteritems(): state = State(input_scheme, op.state_scheme, op.inits) for tpl in tuples: state.update(tpl, op.updaters) # For now, built-in aggregates are handled differently than UDA # aggregates. TODO: clean this up! agg_fields = [] for expr in op.aggregate_list: if isinstance(expr, BuiltinAggregateExpression): # Old-style aggregate: pass all tuples to the eval func agg_fields.append( expr.evaluate_aggregate(tuples, input_scheme)) else: # UDA-style aggregate: evaluate a normal expression that # can reference only the state tuple agg_fields.append(expr.evaluate(None, None, state)) yield(key + tuple(agg_fields)) def sequence(self, op): for child_op in op.children(): self.evaluate(child_op) return None def parallel(self, op): for child_op in op.children(): self.evaluate(child_op) return None def dowhile(self, op): i = 0 children = op.children() body_ops = children[:-1] term_op = children[-1] if isinstance(term_op, StoreTemp): term_op = term_op.input if debug: print '---------- Values at top of do/while -----' self.dump_all() while True: for op in body_ops: self.evaluate(op) result_iterator = self.evaluate(term_op) if debug: i += 1 print '-------- Iteration %d ------------' % i self.dump_all() try: tpl = result_iterator.next() if debug: print 'Term: %s' % str(tpl) # XXX should we use python truthiness here? if not tpl[0]: break except StopIteration: break except IndexError: break def debroadcast(self, op): return self.evaluate(op.input) def store(self, op): assert isinstance(op.relation_key, relation_key.RelationKey) scheme = op.input.scheme() self.tables.add_table(op.relation_key, scheme, self.evaluate(op.input)) return None def sink(self, op): scheme = op.input.scheme() self.tables.add_table( relation_key.RelationKey("OUTPUT"), scheme, self.evaluate(op.input)) return None def dump(self, op): for tpl in self.evaluate(op.input): print ','.join(tpl) return None def storetemp(self, op): scheme = op.input.scheme() self.temp_tables.add_table(op.name, scheme, self.evaluate(op.input)) def appendtemp(self, op): self.temp_tables.append_table(op.name, self.evaluate(op.input)) def scantemp(self, op): return self.temp_tables.get_table(op.name).elements() def myriascan(self, op): return self.scan(op) def myriacalculatesamplingdistribution(self, op): return self.calculatesamplingdistribution(op) def myriasample(self, op): return self.sample(op) def myriafilescan(self, op): return self.filescan(op) def myriasink(self, op): return self.sink(op) def myriascantemp(self, op): return self.scantemp(op) def myrialimit(self, op): return self.limit(op) def myriasymmetrichashjoin(self, op): return self.projectingjoin(op) def myrialeapfrogjoin(self, op): # standard naryjoin, projecting the output columns return (tuple(t[x.position] for x in op.output_columns) for t in self.naryjoin(op)) def myriainmemoryorderby(self, op): return self.orderby(op) def myriahypercubeshuffleconsumer(self, op): return self.evaluate(op.input) def myriahypercubeshuffleproducer(self, op): return self.evaluate(op.input) def myriasplitconsumer(self, op): return self.evaluate(op.input) def myriasplitproducer(self, op): return self.evaluate(op.input) def myriastore(self, op): return self.store(op) def myriastoretemp(self, op): return self.storetemp(op) def myriaappendtemp(self, op): return self.appendtemp(op) def myriaapply(self, op): return self.apply(op) def myriastatefulapply(self, op): return self.statefulapply(op) def myriadupelim(self, op): return self.distinct(op) def myriaselect(self, op): return self.select(op) def myriacrossproduct(self, op): return self.crossproduct(op) def myriagroupby(self, op): return self.groupby(op) def myriashuffleconsumer(self, op): return self.evaluate(op.input) def myriashuffleproducer(self, op): return self.evaluate(op.input) def myriacollectconsumer(self, op): return self.evaluate(op.input) def myriacollectproducer(self, op): return self.evaluate(op.input) def myriabroadcastconsumer(self, op): return self.evaluate(op.input) def myriabroadcastproducer(self, op): return self.evaluate(op.input) def myriasingleton(self, op): return self.singletonrelation(op) def myriaemptyrelation(self, op): return self.emptyrelation(op) def myriaunionall(self, op): return self.unionall(op) def myriadifference(self, op): return self.difference(op) def myriaqueryscan(self, op): return self.tables.get_sql_output(op.sql).elements()
class FakeDatabase(Catalog): """An in-memory implementation of relational algebra operators""" def __init__(self): # Persistent tables, identified by RelationKey self.tables = DBConnection() # Temporary tables, identified by string name self.temp_tables = DBConnection() # partitionings self.partitionings = {} def get_num_servers(self): return 1 def num_tuples(self, rel_key): try: return self.tables.num_tuples(rel_key) except KeyError: return DEFAULT_CARDINALITY def partitioning(self, rel_key): """get fake metadata for relation. This has no effect on query evaluation in the FakeDatabase""" return self.partitionings[rel_key] def evaluate(self, op): """Evaluate a relational algebra operation. For "query-type" operators, return a tuple iterator. For store queries, the return value is None. """ method = getattr(self, op.opname().lower()) return method(op) def evaluate_to_bag(self, op): """Return a bag (collections.Counter instance) for the operation""" return collections.Counter(self.evaluate(op)) def ingest(self, rel_key, contents, scheme, partitioning=RepresentationProperties()): """Directly load raw data into the database""" if isinstance(rel_key, basestring): rel_key = relation_key.RelationKey.from_string(rel_key) assert isinstance(rel_key, relation_key.RelationKey) self.tables.add_table(rel_key, scheme, contents.elements()) self.partitionings[rel_key] = partitioning def add_function(self, tup): print("added function") return self.tables.register_function(tup) def get_function(self, name): if name == "": raise ValueError("Invalid UDF name.") return self.tables.get_function(name) def get_scheme(self, rel_key): if isinstance(rel_key, basestring): rel_key = relation_key.RelationKey.from_string(rel_key) assert isinstance(rel_key, relation_key.RelationKey) return self.tables.get_scheme(rel_key) def get_table(self, rel_key): """Retrieve the contents of table. :param rel_key: The key of the relation :type rel_key: relation_key.RelationKey :returns: A collections.Counter instance containing tuples. """ if isinstance(rel_key, basestring): rel_key = relation_key.RelationKey.from_string(rel_key) assert isinstance(rel_key, relation_key.RelationKey) return self.tables.get_table(rel_key) def get_temp_table(self, key): return self.temp_tables.get_table(key) def delete_temp_table(self, key): self.temp_tables.delete_table(key) def dump_all(self): for key, val in self.tables.iteritems(): bag = val[0] print '%s: (%s)' % (key, bag) for key, bag in self.temp_tables.iteritems(): print '__%s: (%s)' % (key, bag) def scan(self, op): assert isinstance(op.relation_key, relation_key.RelationKey) return self.tables.get_table(op.relation_key).elements() def calculatesamplingdistribution(self, op): if op.is_pct: tup_cnt = sum(t[1] for t in list(self.evaluate(op.input))) sample_size = int(round(tup_cnt * (op.sample_size / 100.0))) else: sample_size = op.sample_size return (t + (sample_size, op.sample_type) for t in self.evaluate(op.input)) def sample(self, op): sample_info = list(self.evaluate(op.left)) assert len(sample_info) == 1 sample_type = sample_info[0][3] sample_size = sample_info[0][2] tuples = list(self.evaluate(op.right)) if sample_type == 'WR': # Add unique index to make them appear like different tuples. sample = [(i, ) + random.choice(tuples) for i in range(sample_size)] elif sample_type == 'WoR': sample = random.sample(tuples, sample_size) else: raise ValueError("Invalid sample type") return iter(sample) def filescan(self, op): type_list = op.scheme().get_types() with open(op.path, 'r') as fh: if not op.options: sample = fh.read(1024) dialect = csv.Sniffer().sniff(sample) fh.seek(0) reader = csv.reader(fh, dialect) else: options = { 'delimiter': ",", 'quote': '"', 'escape': None, 'skip': 0 } options.update(op.options) reader = csv.reader(fh, delimiter=options['delimiter'], quotechar=options['quote'], escapechar=options['escape']) if options['skip']: for _ in xrange(options['skip']): next(fh) for row in reader: pairs = zip(row, type_list) cols = [types.parse_string(s, t) for s, t in pairs] yield tuple(cols) def select(self, op): child_it = self.evaluate(op.input) def filter_func(_tuple): # Note: this implicitly uses python truthiness rules for # interpreting non-boolean expressions. # TODO: Is this the the right semantics here? return op.condition.evaluate(_tuple, op.scheme()) return itertools.ifilter(filter_func, child_it) def apply(self, op): child_it = self.evaluate(op.input) scheme = op.input.scheme() def make_tuple(input_tuple): ls = [ colexpr.evaluate(input_tuple, scheme) for (_, colexpr) in op.emitters ] return tuple(ls) return (make_tuple(t) for t in child_it) def statefulapply(self, op): child_it = self.evaluate(op.input) scheme = op.input.scheme() state = State(scheme, op.state_scheme, op.inits) def make_tuple(input_tuple, state): # Update state variables state.update(input_tuple, op.updaters) # Extract a result for each emit expression return tuple([ colexpr.evaluate(input_tuple, scheme, state) for (_, colexpr) in op.emitters ]) return (make_tuple(t, state) for t in child_it) def join(self, op): # Compute the cross product of the children and flatten left_it = self.evaluate(op.left) right_it = self.evaluate(op.right) p1 = itertools.product(left_it, right_it) p2 = (x + y for (x, y) in p1) # Return tuples that match on the join conditions return (tpl for tpl in p2 if op.condition.evaluate(tpl, op.scheme())) def projectingjoin(self, op): # standard join, projecting the output columns return (tuple(t[x.position] for x in op.output_columns) for t in self.join(op)) def naryjoin(self, op): def eval_conditions(conditions, tpl): """Turns the weird NaryJoin condition set into a proper expression, then evaluates it.""" cond = reduce(lambda a, b: AND(a, b), map(lambda (a, b): EQ(a, b), conditions)) return cond.evaluate(tpl, op.scheme()) # Elements of prod are tuples of tuples like ((1, 2), (3, 4)) prod = itertools.product(*(self.evaluate(child) for child in op.children())) # Elements of tuples have been flattened like (1, 2, 3, 4) tuples = (sum(x, ()) for x in prod) return (tpl for tpl in tuples if eval_conditions(op.conditions, tpl)) def crossproduct(self, op): left_it = self.evaluate(op.left) right_it = self.evaluate(op.right) p1 = itertools.product(left_it, right_it) return (x + y for (x, y) in p1) def distinct(self, op): it = self.evaluate(op.input) s = set(it) return iter(s) def project(self, op): if not op.columnlist: return self.distinct(op) return set( tuple(t[x.position] for x in op.columnlist) for t in self.evaluate(op.input)) def limit(self, op): it = self.evaluate(op.input) return itertools.islice(it, op.count) def orderby(self, op): it = self.evaluate(op.input) oList = reversed(zip(op.sort_columns, op.ascending)) sortedList = list(it) for o in oList: sortedList = sorted(sortedList, key=lambda x: x[o[0]], reverse=not o[1]) return iter(sortedList) @staticmethod def singletonrelation(op): return iter([()]) @staticmethod def emptyrelation(op): return iter([]) def union(self, op): return set(self.evaluate(op.left)).union(set(self.evaluate(op.right))) def unionall(self, op): return itertools.chain.from_iterable( self.evaluate(arg) for arg in op.args) def difference(self, op): its = [self.evaluate(op.left), self.evaluate(op.right)] sets = [set(it) for it in its] return sets[0].difference(sets[1]) def intersection(self, op): its = [self.evaluate(op.left), self.evaluate(op.right)] sets = [set(it) for it in its] return sets[0].intersection(sets[1]) def groupby(self, op): child_it = self.evaluate(op.input) input_scheme = op.input.scheme() def process_grouping_columns(_tuple): ls = [ sexpr.evaluate(_tuple, input_scheme) for sexpr in op.grouping_list ] return tuple(ls) # Calculate groups of matching input tuples. # If there are no grouping terms, then all tuples are added # to a single bin. results = collections.defaultdict(list) if len(op.grouping_list) == 0: results[()] = list(child_it) else: for input_tuple in child_it: grouped_tuple = process_grouping_columns(input_tuple) results[grouped_tuple].append(input_tuple) # resolve aggregate functions for key, tuples in results.iteritems(): state = State(input_scheme, op.state_scheme, op.inits) for tpl in tuples: state.update(tpl, op.updaters) # For now, built-in aggregates are handled differently than UDA # aggregates. TODO: clean this up! agg_fields = [] for expr in op.aggregate_list: if isinstance(expr, BuiltinAggregateExpression): # Old-style aggregate: pass all tuples to the eval func agg_fields.append( expr.evaluate_aggregate(tuples, input_scheme)) else: # UDA-style aggregate: evaluate a normal expression that # can reference only the state tuple agg_fields.append(expr.evaluate(None, None, state)) yield (key + tuple(agg_fields)) def sequence(self, op): for child_op in op.children(): self.evaluate(child_op) return None def parallel(self, op): for child_op in op.children(): self.evaluate(child_op) return None def dowhile(self, op): i = 0 children = op.children() body_ops = children[:-1] term_op = children[-1] if isinstance(term_op, StoreTemp): term_op = term_op.input if debug: print '---------- Values at top of do/while -----' self.dump_all() while True: for op in body_ops: self.evaluate(op) result_iterator = self.evaluate(term_op) if debug: i += 1 print '-------- Iteration %d ------------' % i self.dump_all() try: tpl = result_iterator.next() if debug: print 'Term: %s' % str(tpl) # XXX should we use python truthiness here? if not tpl[0]: break except StopIteration: break except IndexError: break def debroadcast(self, op): return self.evaluate(op.input) def store(self, op): assert isinstance(op.relation_key, relation_key.RelationKey) scheme = op.input.scheme() self.tables.add_table(op.relation_key, scheme, self.evaluate(op.input)) return None def sink(self, op): scheme = op.input.scheme() self.tables.add_table(relation_key.RelationKey("OUTPUT"), scheme, self.evaluate(op.input)) return None def dump(self, op): for tpl in self.evaluate(op.input): print ','.join(tpl) return None def storetemp(self, op): scheme = op.input.scheme() self.temp_tables.add_table(op.name, scheme, self.evaluate(op.input)) def appendtemp(self, op): self.temp_tables.append_table(op.name, self.evaluate(op.input)) def scantemp(self, op): return self.temp_tables.get_table(op.name).elements() def myriascan(self, op): return self.scan(op) def myriacalculatesamplingdistribution(self, op): return self.calculatesamplingdistribution(op) def myriasample(self, op): return self.sample(op) def myriafilescan(self, op): return self.filescan(op) def myriasink(self, op): return self.sink(op) def myriascantemp(self, op): return self.scantemp(op) def myrialimit(self, op): return self.limit(op) def myriasymmetrichashjoin(self, op): return self.projectingjoin(op) def myrialeapfrogjoin(self, op): # standard naryjoin, projecting the output columns return (tuple(t[x.position] for x in op.output_columns) for t in self.naryjoin(op)) def myriainmemoryorderby(self, op): return self.orderby(op) def myriahypercubeshuffleconsumer(self, op): return self.evaluate(op.input) def myriahypercubeshuffleproducer(self, op): return self.evaluate(op.input) def myriasplitconsumer(self, op): return self.evaluate(op.input) def myriasplitproducer(self, op): return self.evaluate(op.input) def myriastore(self, op): return self.store(op) def myriastoretemp(self, op): return self.storetemp(op) def myriaappendtemp(self, op): return self.appendtemp(op) def myriaapply(self, op): return self.apply(op) def myriastatefulapply(self, op): return self.statefulapply(op) def myriadupelim(self, op): return self.distinct(op) def myriaselect(self, op): return self.select(op) def myriacrossproduct(self, op): return self.crossproduct(op) def myriagroupby(self, op): return self.groupby(op) def myriashuffleconsumer(self, op): return self.evaluate(op.input) def myriashuffleproducer(self, op): return self.evaluate(op.input) def myriacollectconsumer(self, op): return self.evaluate(op.input) def myriacollectproducer(self, op): return self.evaluate(op.input) def myriabroadcastconsumer(self, op): return self.evaluate(op.input) def myriabroadcastproducer(self, op): return self.evaluate(op.input) def myriasingleton(self, op): return self.singletonrelation(op) def myriaemptyrelation(self, op): return self.emptyrelation(op) def myriaunionall(self, op): return self.unionall(op) def myriadifference(self, op): return self.difference(op) def myriaqueryscan(self, op): return self.tables.get_sql_output(op.sql).elements()