def test_create_view(self): query = ''' CREATE VIEW view1 AS SELECT col1, col2 FROM example.com-simple-simple1;''' view = parse_view(query) self.assertEqual(view.name, 'view1') self.assertEqual(view.sources[0].name, 'example.com-simple-simple1') self.assertEqual([x.name for x in view.columns], ['col1', 'col2'])
def test_create_view_having_column_aliases(self): query = ''' CREATE VIEW view1 AS SELECT col1 as c1, col2 as c2 FROM example.com-simple-simple1;''' view = parse_view(query) self.assertEqual(view.name, 'view1') self.assertEqual(view.sources[0].name, 'example.com-simple-simple1') self.assertEqual(view.columns[0].name, 'col1') self.assertEqual(view.columns[0].alias, 'c1') self.assertEqual(view.columns[1].name, 'col2') self.assertEqual(view.columns[1].alias, 'c2')
def test_create_view_having_table_alias(self): query = ''' CREATE VIEW view1 AS SELECT t1.col1 AS t1_col1, t1.col2 AS t1_col2 FROM example.com-simple-simple1 AS t1''' view = parse_view(query) self.assertEqual(view.name, 'view1') self.assertEqual(view.sources[0].name, 'example.com-simple-simple1') self.assertEqual(view.sources[0].alias, 't1') self.assertEqual(view.columns[0].name, 't1.col1') self.assertEqual(view.columns[0].alias, 't1_col1') self.assertEqual(view.columns[1].name, 't1.col2') self.assertEqual(view.columns[1].alias, 't1_col2')
def test_create_view_having_join(self): query = ''' CREATE VIEW view1 AS SELECT t1.col1 AS t1_col1, t1.col2 AS t1_col2 FROM example.com-simple-simple1 AS t1 LEFT JOIN example.com-simple-simple2 AS t2 ON t1.id = t2.id; ''' view = parse_view(query) self.assertEqual(view.name, 'view1') self.assertEqual(view.sources[0].name, 'example.com-simple-simple1') self.assertEqual(view.joins[0].source.name, 'example.com-simple-simple2') self.assertEqual(view.columns[0].name, 't1.col1') self.assertEqual(view.columns[0].alias, 't1_col1') self.assertEqual(view.columns[1].name, 't1.col2') self.assertEqual(view.columns[1].alias, 't1_col2')
def test_parser_basic(self): from ambry.bundle.asql_parser import parse_view, parse_select view = parse_view('CREATE VIEW view1 AS SELECT col1 as c1, col2 as c2 FROM table1 WHERE foo is None and bar is baz;') self.assertEquals('view1', view.name) self.assertEquals('col1', view.columns[0].name) self.assertEquals('c1', view.columns[0].alias) self.assertEquals('col2', view.columns[1].name) self.assertEquals('c2', view.columns[1].alias) select = parse_select('SELECT col1 as c1, col2 as c2 FROM table1;') self.assertEquals('col1', select.columns[0].name) self.assertEquals('c1', select.columns[0].alias) self.assertEquals('col2', select.columns[1].name) self.assertEquals('c2', select.columns[1].alias) select = parse_select( '''SELECT t1.col AS t1_c, t2.col AS t2_c, t3.col AS t3_c FROM table1 AS t1 JOIN table2 AS t2 JOIN table3 AS t3;''') self.assertEquals('t1.col', select.columns[0].name) self.assertEquals('t1_c', select.columns[0].alias) self.assertEquals('t2.col', select.columns[1].name) self.assertEquals('t2_c', select.columns[1].alias) self.assertEquals('t3.col', select.columns[2].name) self.assertEquals('t3_c', select.columns[2].alias) self.assertEquals('table1', select.sources[0].name) self.assertEquals('t1', select.sources[0].alias) self.assertEquals('table2', select.joins[0].source.name) self.assertEquals('t2', select.joins[0].source.alias) self.assertEquals('table3', select.joins[1].source.name) self.assertEquals('t3', select.joins[1].source.alias) select = parse_select( '''SELECT t1.col AS t1_c, t2.col AS t2_c, t3.col AS t3_c FROM cdph.ca.gov-hci-high_school_ed-county AS t1 JOIN cdph.ca.gov-hci-high_school_ed-city AS t2 ON t1_c = t2_c JOIN cdph.ca.gov-hci-high_school_ed-state AS t3 ON t1_c = t3_c; ''') self.assertEquals('t1.col', select.columns[0].name) self.assertEquals('t1_c', select.columns[0].alias) self.assertEquals('t2.col', select.columns[1].name) self.assertEquals('t2_c', select.columns[1].alias) self.assertEquals('t3.col', select.columns[2].name) self.assertEquals('t3_c', select.columns[2].alias) self.assertEquals('cdph.ca.gov-hci-high_school_ed-county', select.sources[0].name) self.assertEquals('t1', select.sources[0].alias) self.assertEquals('cdph.ca.gov-hci-high_school_ed-city', select.joins[0].source.name) self.assertEquals('t2', select.joins[0].source.alias) self.assertEquals(['t1_c', 't2_c'], list(select.joins[0].join_cols)) self.assertEquals('cdph.ca.gov-hci-high_school_ed-state', select.joins[1].source.name) self.assertEquals('t3', select.joins[1].source.alias) self.assertEquals(['t1_c', 't3_c'], select.joins[1].join_cols) select = parse_view( '''CREATE VIEW foobar AS SELECT t1.col AS t1_c, t2.col AS t2_c, t3.col AS t3_c FROM cdph.ca.gov-hci-high_school_ed-county AS t1 JOIN cdph.ca.gov-hci-high_school_ed-city AS t2 ON t1_c = t2_c JOIN cdph.ca.gov-hci-high_school_ed-state AS t3 ON t1_c = t3_c; ''') self.assertEquals('t1.col', select.columns[0].name) self.assertEquals('t1_c', select.columns[0].alias) self.assertEquals('t2.col', select.columns[1].name) self.assertEquals('t2_c', select.columns[1].alias) self.assertEquals('t3.col', select.columns[2].name) self.assertEquals('t3_c', select.columns[2].alias) self.assertEquals('cdph.ca.gov-hci-high_school_ed-county', select.sources[0].name) self.assertEquals('t1', select.sources[0].alias) self.assertEquals('cdph.ca.gov-hci-high_school_ed-city', select.joins[0].source.name) self.assertEquals('t2', select.joins[0].source.alias) self.assertEquals(['t1_c', 't2_c'], list(select.joins[0].join_cols)) self.assertEquals('cdph.ca.gov-hci-high_school_ed-state', select.joins[1].source.name) self.assertEquals('t3', select.joins[1].source.alias) self.assertEquals(['t1_c', 't3_c'], select.joins[1].join_cols) stmt = """ SELECT t1.uuid AS t1_uuid, t2.float_a AS t2_float_a, t3.a AS t3_a FROM build.example.com-casters-simple AS t1 JOIN build.example.com-casters-simple_stats AS t2 ON t1.id = t2.index JOIN build.example.com-casters-integers AS t3 ON t3_a = t2.index WHERE foo = bar """ select = parse_select(stmt) self.assertEqual(2, len(select.joins))
def _preprocess_sqlite_view(asql_query, library, backend, connection): """ Finds view or materialized view in the asql query and converts it to create table/insert rows. Note: Assume virtual tables for all partitions already created. Args: asql_query (str): asql query library (ambry.Library): backend (SQLiteBackend): connection (apsw.Connection): Returns: str: valid sql query containing create table and insert into queries if asql_query contains 'create materialized view'. If asql_query does not contain 'create materialized view' returns asql_query as is. """ new_query = None if 'create materialized view' in asql_query.lower( ) or 'create view' in asql_query.lower(): logger.debug( '_preprocess_sqlite_view: materialized view found.\n asql query: {}' .format(asql_query)) view = parse_view(asql_query) tablename = view.name.replace('-', '_').lower().replace('.', '_') create_query_columns = {} for column in view.columns: create_query_columns[column.name] = column.alias ref_to_partition_map = { } # key is ref found in the query, value is Partition instance. alias_to_partition_map = { } # key is alias of ref found in the query, value is Partition instance. # collect sources from select statement of the view. for source in view.sources: partition = library.partition(source.name) ref_to_partition_map[source.name] = partition if source.alias: alias_to_partition_map[source.alias] = partition # collect sources from joins of the view. for join in view.joins: partition = library.partition(join.source.name) ref_to_partition_map[join.source.name] = partition if join.source.alias: alias_to_partition_map[join.source.alias] = partition # collect and convert columns. TYPE_MAP = { 'int': 'INTEGER', 'float': 'REAL', six.binary_type.__name__: 'TEXT', six.text_type.__name__: 'TEXT', 'date': 'DATE', 'datetime': 'TIMESTAMP WITHOUT TIME ZONE' } column_types = [] column_names = [] for column in view.columns: if '.' in column.name: source_alias, column_name = column.name.split('.') else: # TODO: Test that case. source_alias = None column_name = column.name # find column specification in the mpr file. if source_alias: partition = alias_to_partition_map[source_alias] for part_column in partition.datafile.reader.columns: if part_column['name'] == column_name: sqlite_type = TYPE_MAP.get(part_column['type']) if not sqlite_type: raise Exception( 'Do not know how to convert {} to sql column.'. format(column['type'])) column_types.append(' {} {}'.format( column.alias if column.alias else column.name, sqlite_type)) column_names.append( column.alias if column.alias else column.name) column_types_str = ',\n'.join(column_types) column_names_str = ', '.join(column_names) create_query = 'CREATE TABLE IF NOT EXISTS {}(\n{});'.format( tablename, column_types_str) # drop 'create materialized view' part _, select_part = asql_query.split(view.name) select_part = select_part.strip() assert select_part.lower().startswith('as') # drop 'as' keyword select_part = select_part.strip()[2:].strip() assert select_part.lower().strip().startswith('select') # Create query to copy data from mpr to just created table. copy_query = 'INSERT INTO {table}(\n{columns})\n {select}'.format( table=tablename, columns=column_names_str, select=select_part) if not copy_query.strip().lower().endswith(';'): copy_query = copy_query + ';' new_query = '{}\n\n{}'.format(create_query, copy_query) logger.debug( '_preprocess_sqlite_view: preprocess finished.\n asql query: {}\n\n new query: {}' .format(asql_query, new_query)) return new_query or asql_query
def test_parser_basic(self): from ambry.bundle.asql_parser import parse_view, parse_select view = parse_view( 'CREATE VIEW view1 AS SELECT col1 as c1, col2 as c2 FROM table1 WHERE foo is None and bar is baz;' ) self.assertEquals('view1', view.name) self.assertEquals('col1', view.columns[0].name) self.assertEquals('c1', view.columns[0].alias) self.assertEquals('col2', view.columns[1].name) self.assertEquals('c2', view.columns[1].alias) select = parse_select('SELECT col1 as c1, col2 as c2 FROM table1;') self.assertEquals('col1', select.columns[0].name) self.assertEquals('c1', select.columns[0].alias) self.assertEquals('col2', select.columns[1].name) self.assertEquals('c2', select.columns[1].alias) select = parse_select( '''SELECT t1.col AS t1_c, t2.col AS t2_c, t3.col AS t3_c FROM table1 AS t1 JOIN table2 AS t2 JOIN table3 AS t3;''') self.assertEquals('t1.col', select.columns[0].name) self.assertEquals('t1_c', select.columns[0].alias) self.assertEquals('t2.col', select.columns[1].name) self.assertEquals('t2_c', select.columns[1].alias) self.assertEquals('t3.col', select.columns[2].name) self.assertEquals('t3_c', select.columns[2].alias) self.assertEquals('table1', select.sources[0].name) self.assertEquals('t1', select.sources[0].alias) self.assertEquals('table2', select.joins[0].source.name) self.assertEquals('t2', select.joins[0].source.alias) self.assertEquals('table3', select.joins[1].source.name) self.assertEquals('t3', select.joins[1].source.alias) select = parse_select( '''SELECT t1.col AS t1_c, t2.col AS t2_c, t3.col AS t3_c FROM cdph.ca.gov-hci-high_school_ed-county AS t1 JOIN cdph.ca.gov-hci-high_school_ed-city AS t2 ON t1_c = t2_c JOIN cdph.ca.gov-hci-high_school_ed-state AS t3 ON t1_c = t3_c; ''') self.assertEquals('t1.col', select.columns[0].name) self.assertEquals('t1_c', select.columns[0].alias) self.assertEquals('t2.col', select.columns[1].name) self.assertEquals('t2_c', select.columns[1].alias) self.assertEquals('t3.col', select.columns[2].name) self.assertEquals('t3_c', select.columns[2].alias) self.assertEquals('cdph.ca.gov-hci-high_school_ed-county', select.sources[0].name) self.assertEquals('t1', select.sources[0].alias) self.assertEquals('cdph.ca.gov-hci-high_school_ed-city', select.joins[0].source.name) self.assertEquals('t2', select.joins[0].source.alias) self.assertEquals(['t1_c', 't2_c'], list(select.joins[0].join_cols)) self.assertEquals('cdph.ca.gov-hci-high_school_ed-state', select.joins[1].source.name) self.assertEquals('t3', select.joins[1].source.alias) self.assertEquals(['t1_c', 't3_c'], select.joins[1].join_cols) select = parse_view( '''CREATE VIEW foobar AS SELECT t1.col AS t1_c, t2.col AS t2_c, t3.col AS t3_c FROM cdph.ca.gov-hci-high_school_ed-county AS t1 JOIN cdph.ca.gov-hci-high_school_ed-city AS t2 ON t1_c = t2_c JOIN cdph.ca.gov-hci-high_school_ed-state AS t3 ON t1_c = t3_c; ''') self.assertEquals('t1.col', select.columns[0].name) self.assertEquals('t1_c', select.columns[0].alias) self.assertEquals('t2.col', select.columns[1].name) self.assertEquals('t2_c', select.columns[1].alias) self.assertEquals('t3.col', select.columns[2].name) self.assertEquals('t3_c', select.columns[2].alias) self.assertEquals('cdph.ca.gov-hci-high_school_ed-county', select.sources[0].name) self.assertEquals('t1', select.sources[0].alias) self.assertEquals('cdph.ca.gov-hci-high_school_ed-city', select.joins[0].source.name) self.assertEquals('t2', select.joins[0].source.alias) self.assertEquals(['t1_c', 't2_c'], list(select.joins[0].join_cols)) self.assertEquals('cdph.ca.gov-hci-high_school_ed-state', select.joins[1].source.name) self.assertEquals('t3', select.joins[1].source.alias) self.assertEquals(['t1_c', 't3_c'], select.joins[1].join_cols) stmt = """ SELECT t1.uuid AS t1_uuid, t2.float_a AS t2_float_a, t3.a AS t3_a FROM build.example.com-casters-simple AS t1 JOIN build.example.com-casters-simple_stats AS t2 ON t1.id = t2.index JOIN build.example.com-casters-integers AS t3 ON t3_a = t2.index WHERE foo = bar """ select = parse_select(stmt) self.assertEqual(2, len(select.joins))