示例#1
0
 def test_create_view(self):
     query = '''
         CREATE VIEW view1 AS
             SELECT col1, col2
             FROM example.com-simple-simple1;'''
     view = parse_view(query)
     self.assertEqual(view.name, 'view1')
     self.assertEqual(view.sources[0].name, 'example.com-simple-simple1')
     self.assertEqual([x.name for x in view.columns], ['col1', 'col2'])
示例#2
0
    def test_create_view_having_column_aliases(self):
        query = '''
            CREATE VIEW view1 AS
                SELECT col1 as c1, col2 as c2
                FROM example.com-simple-simple1;'''
        view = parse_view(query)
        self.assertEqual(view.name, 'view1')
        self.assertEqual(view.sources[0].name, 'example.com-simple-simple1')

        self.assertEqual(view.columns[0].name, 'col1')
        self.assertEqual(view.columns[0].alias, 'c1')

        self.assertEqual(view.columns[1].name, 'col2')
        self.assertEqual(view.columns[1].alias, 'c2')
示例#3
0
    def test_create_view_having_table_alias(self):
        query = '''
            CREATE VIEW view1 AS
                SELECT t1.col1 AS t1_col1, t1.col2 AS t1_col2
                FROM example.com-simple-simple1 AS t1'''
        view = parse_view(query)
        self.assertEqual(view.name, 'view1')
        self.assertEqual(view.sources[0].name, 'example.com-simple-simple1')
        self.assertEqual(view.sources[0].alias, 't1')

        self.assertEqual(view.columns[0].name, 't1.col1')
        self.assertEqual(view.columns[0].alias, 't1_col1')

        self.assertEqual(view.columns[1].name, 't1.col2')
        self.assertEqual(view.columns[1].alias, 't1_col2')
示例#4
0
    def test_create_view_having_join(self):
        query = '''
            CREATE VIEW view1 AS
                SELECT t1.col1 AS t1_col1, t1.col2 AS t1_col2
                FROM example.com-simple-simple1 AS t1
                LEFT JOIN example.com-simple-simple2 AS t2 ON t1.id = t2.id;
        '''
        view = parse_view(query)
        self.assertEqual(view.name, 'view1')
        self.assertEqual(view.sources[0].name, 'example.com-simple-simple1')

        self.assertEqual(view.joins[0].source.name,
                         'example.com-simple-simple2')

        self.assertEqual(view.columns[0].name, 't1.col1')
        self.assertEqual(view.columns[0].alias, 't1_col1')

        self.assertEqual(view.columns[1].name, 't1.col2')
        self.assertEqual(view.columns[1].alias, 't1_col2')
示例#5
0
    def test_parser_basic(self):

        from ambry.bundle.asql_parser import parse_view, parse_select

        view = parse_view('CREATE VIEW view1 AS SELECT col1 as c1, col2 as c2 FROM table1 WHERE foo is None and bar is baz;')

        self.assertEquals('view1', view.name)

        self.assertEquals('col1', view.columns[0].name)
        self.assertEquals('c1', view.columns[0].alias)
        self.assertEquals('col2', view.columns[1].name)
        self.assertEquals('c2', view.columns[1].alias)

        select = parse_select('SELECT col1 as c1, col2 as c2 FROM table1;')

        self.assertEquals('col1', select.columns[0].name)
        self.assertEquals('c1', select.columns[0].alias)
        self.assertEquals('col2', select.columns[1].name)
        self.assertEquals('c2', select.columns[1].alias)

        select = parse_select(
            '''SELECT t1.col AS t1_c, t2.col AS t2_c, t3.col AS t3_c
            FROM table1 AS t1
            JOIN table2 AS t2
            JOIN table3 AS t3;''')

        self.assertEquals('t1.col', select.columns[0].name)
        self.assertEquals('t1_c', select.columns[0].alias)

        self.assertEquals('t2.col', select.columns[1].name)
        self.assertEquals('t2_c', select.columns[1].alias)

        self.assertEquals('t3.col', select.columns[2].name)
        self.assertEquals('t3_c', select.columns[2].alias)

        self.assertEquals('table1', select.sources[0].name)
        self.assertEquals('t1', select.sources[0].alias)

        self.assertEquals('table2', select.joins[0].source.name)
        self.assertEquals('t2', select.joins[0].source.alias)

        self.assertEquals('table3', select.joins[1].source.name)
        self.assertEquals('t3', select.joins[1].source.alias)

        select = parse_select(
            '''SELECT t1.col AS t1_c, t2.col AS t2_c, t3.col AS t3_c
            FROM cdph.ca.gov-hci-high_school_ed-county AS t1
            JOIN cdph.ca.gov-hci-high_school_ed-city AS t2 ON t1_c = t2_c
            JOIN cdph.ca.gov-hci-high_school_ed-state AS t3 ON t1_c = t3_c;
            ''')

        self.assertEquals('t1.col', select.columns[0].name)
        self.assertEquals('t1_c', select.columns[0].alias)

        self.assertEquals('t2.col', select.columns[1].name)
        self.assertEquals('t2_c', select.columns[1].alias)

        self.assertEquals('t3.col', select.columns[2].name)
        self.assertEquals('t3_c', select.columns[2].alias)

        self.assertEquals('cdph.ca.gov-hci-high_school_ed-county', select.sources[0].name)
        self.assertEquals('t1', select.sources[0].alias)

        self.assertEquals('cdph.ca.gov-hci-high_school_ed-city', select.joins[0].source.name)
        self.assertEquals('t2', select.joins[0].source.alias)
        self.assertEquals(['t1_c', 't2_c'], list(select.joins[0].join_cols))

        self.assertEquals('cdph.ca.gov-hci-high_school_ed-state', select.joins[1].source.name)
        self.assertEquals('t3', select.joins[1].source.alias)
        self.assertEquals(['t1_c', 't3_c'], select.joins[1].join_cols)

        select = parse_view(
            '''CREATE VIEW foobar AS SELECT t1.col AS t1_c, t2.col AS t2_c, t3.col AS t3_c
            FROM cdph.ca.gov-hci-high_school_ed-county AS t1
            JOIN cdph.ca.gov-hci-high_school_ed-city AS t2 ON t1_c = t2_c
            JOIN cdph.ca.gov-hci-high_school_ed-state AS t3 ON t1_c = t3_c;
            ''')

        self.assertEquals('t1.col', select.columns[0].name)
        self.assertEquals('t1_c', select.columns[0].alias)

        self.assertEquals('t2.col', select.columns[1].name)
        self.assertEquals('t2_c', select.columns[1].alias)

        self.assertEquals('t3.col', select.columns[2].name)
        self.assertEquals('t3_c', select.columns[2].alias)

        self.assertEquals('cdph.ca.gov-hci-high_school_ed-county', select.sources[0].name)
        self.assertEquals('t1', select.sources[0].alias)

        self.assertEquals('cdph.ca.gov-hci-high_school_ed-city', select.joins[0].source.name)
        self.assertEquals('t2', select.joins[0].source.alias)
        self.assertEquals(['t1_c', 't2_c'], list(select.joins[0].join_cols))

        self.assertEquals('cdph.ca.gov-hci-high_school_ed-state', select.joins[1].source.name)
        self.assertEquals('t3', select.joins[1].source.alias)
        self.assertEquals(['t1_c', 't3_c'], select.joins[1].join_cols)

        stmt = """
            SELECT t1.uuid AS t1_uuid, t2.float_a AS t2_float_a, t3.a AS t3_a
                FROM build.example.com-casters-simple AS t1
                JOIN build.example.com-casters-simple_stats AS t2 ON t1.id = t2.index
                JOIN build.example.com-casters-integers AS t3 ON t3_a = t2.index
            WHERE foo = bar
            """

        select = parse_select(stmt)

        self.assertEqual(2, len(select.joins))
示例#6
0
def _preprocess_sqlite_view(asql_query, library, backend, connection):
    """ Finds view or materialized view in the asql query and converts it to create table/insert rows.

    Note:
        Assume virtual tables for all partitions already created.

    Args:
        asql_query (str): asql query
        library (ambry.Library):
        backend (SQLiteBackend):
        connection (apsw.Connection):

    Returns:
        str: valid sql query containing create table and insert into queries if asql_query contains
            'create materialized view'. If asql_query does not contain 'create materialized view' returns
            asql_query as is.
    """

    new_query = None

    if 'create materialized view' in asql_query.lower(
    ) or 'create view' in asql_query.lower():

        logger.debug(
            '_preprocess_sqlite_view: materialized view found.\n    asql query: {}'
            .format(asql_query))

        view = parse_view(asql_query)

        tablename = view.name.replace('-', '_').lower().replace('.', '_')
        create_query_columns = {}
        for column in view.columns:
            create_query_columns[column.name] = column.alias

        ref_to_partition_map = {
        }  # key is ref found in the query, value is Partition instance.
        alias_to_partition_map = {
        }  # key is alias of ref found in the query, value is Partition instance.

        # collect sources from select statement of the view.
        for source in view.sources:
            partition = library.partition(source.name)
            ref_to_partition_map[source.name] = partition
            if source.alias:
                alias_to_partition_map[source.alias] = partition

        # collect sources from joins of the view.
        for join in view.joins:
            partition = library.partition(join.source.name)
            ref_to_partition_map[join.source.name] = partition
            if join.source.alias:
                alias_to_partition_map[join.source.alias] = partition

        # collect and convert columns.
        TYPE_MAP = {
            'int': 'INTEGER',
            'float': 'REAL',
            six.binary_type.__name__: 'TEXT',
            six.text_type.__name__: 'TEXT',
            'date': 'DATE',
            'datetime': 'TIMESTAMP WITHOUT TIME ZONE'
        }
        column_types = []
        column_names = []
        for column in view.columns:
            if '.' in column.name:
                source_alias, column_name = column.name.split('.')
            else:
                # TODO: Test that case.
                source_alias = None
                column_name = column.name

            # find column specification in the mpr file.
            if source_alias:
                partition = alias_to_partition_map[source_alias]
                for part_column in partition.datafile.reader.columns:
                    if part_column['name'] == column_name:
                        sqlite_type = TYPE_MAP.get(part_column['type'])
                        if not sqlite_type:
                            raise Exception(
                                'Do not know how to convert {} to sql column.'.
                                format(column['type']))

                        column_types.append('    {} {}'.format(
                            column.alias if column.alias else column.name,
                            sqlite_type))
                        column_names.append(
                            column.alias if column.alias else column.name)

        column_types_str = ',\n'.join(column_types)
        column_names_str = ', '.join(column_names)

        create_query = 'CREATE TABLE IF NOT EXISTS {}(\n{});'.format(
            tablename, column_types_str)

        # drop 'create materialized view' part
        _, select_part = asql_query.split(view.name)
        select_part = select_part.strip()
        assert select_part.lower().startswith('as')

        # drop 'as' keyword
        select_part = select_part.strip()[2:].strip()
        assert select_part.lower().strip().startswith('select')

        # Create query to copy data from mpr to just created table.
        copy_query = 'INSERT INTO {table}(\n{columns})\n  {select}'.format(
            table=tablename, columns=column_names_str, select=select_part)
        if not copy_query.strip().lower().endswith(';'):
            copy_query = copy_query + ';'
        new_query = '{}\n\n{}'.format(create_query, copy_query)
    logger.debug(
        '_preprocess_sqlite_view: preprocess finished.\n    asql query: {}\n\n    new query: {}'
        .format(asql_query, new_query))

    return new_query or asql_query
示例#7
0
    def test_parser_basic(self):

        from ambry.bundle.asql_parser import parse_view, parse_select

        view = parse_view(
            'CREATE VIEW view1 AS SELECT col1 as c1, col2 as c2 FROM table1 WHERE foo is None and bar is baz;'
        )

        self.assertEquals('view1', view.name)

        self.assertEquals('col1', view.columns[0].name)
        self.assertEquals('c1', view.columns[0].alias)
        self.assertEquals('col2', view.columns[1].name)
        self.assertEquals('c2', view.columns[1].alias)

        select = parse_select('SELECT col1 as c1, col2 as c2 FROM table1;')

        self.assertEquals('col1', select.columns[0].name)
        self.assertEquals('c1', select.columns[0].alias)
        self.assertEquals('col2', select.columns[1].name)
        self.assertEquals('c2', select.columns[1].alias)

        select = parse_select(
            '''SELECT t1.col AS t1_c, t2.col AS t2_c, t3.col AS t3_c
            FROM table1 AS t1
            JOIN table2 AS t2
            JOIN table3 AS t3;''')

        self.assertEquals('t1.col', select.columns[0].name)
        self.assertEquals('t1_c', select.columns[0].alias)

        self.assertEquals('t2.col', select.columns[1].name)
        self.assertEquals('t2_c', select.columns[1].alias)

        self.assertEquals('t3.col', select.columns[2].name)
        self.assertEquals('t3_c', select.columns[2].alias)

        self.assertEquals('table1', select.sources[0].name)
        self.assertEquals('t1', select.sources[0].alias)

        self.assertEquals('table2', select.joins[0].source.name)
        self.assertEquals('t2', select.joins[0].source.alias)

        self.assertEquals('table3', select.joins[1].source.name)
        self.assertEquals('t3', select.joins[1].source.alias)

        select = parse_select(
            '''SELECT t1.col AS t1_c, t2.col AS t2_c, t3.col AS t3_c
            FROM cdph.ca.gov-hci-high_school_ed-county AS t1
            JOIN cdph.ca.gov-hci-high_school_ed-city AS t2 ON t1_c = t2_c
            JOIN cdph.ca.gov-hci-high_school_ed-state AS t3 ON t1_c = t3_c;
            ''')

        self.assertEquals('t1.col', select.columns[0].name)
        self.assertEquals('t1_c', select.columns[0].alias)

        self.assertEquals('t2.col', select.columns[1].name)
        self.assertEquals('t2_c', select.columns[1].alias)

        self.assertEquals('t3.col', select.columns[2].name)
        self.assertEquals('t3_c', select.columns[2].alias)

        self.assertEquals('cdph.ca.gov-hci-high_school_ed-county',
                          select.sources[0].name)
        self.assertEquals('t1', select.sources[0].alias)

        self.assertEquals('cdph.ca.gov-hci-high_school_ed-city',
                          select.joins[0].source.name)
        self.assertEquals('t2', select.joins[0].source.alias)
        self.assertEquals(['t1_c', 't2_c'], list(select.joins[0].join_cols))

        self.assertEquals('cdph.ca.gov-hci-high_school_ed-state',
                          select.joins[1].source.name)
        self.assertEquals('t3', select.joins[1].source.alias)
        self.assertEquals(['t1_c', 't3_c'], select.joins[1].join_cols)

        select = parse_view(
            '''CREATE VIEW foobar AS SELECT t1.col AS t1_c, t2.col AS t2_c, t3.col AS t3_c
            FROM cdph.ca.gov-hci-high_school_ed-county AS t1
            JOIN cdph.ca.gov-hci-high_school_ed-city AS t2 ON t1_c = t2_c
            JOIN cdph.ca.gov-hci-high_school_ed-state AS t3 ON t1_c = t3_c;
            ''')

        self.assertEquals('t1.col', select.columns[0].name)
        self.assertEquals('t1_c', select.columns[0].alias)

        self.assertEquals('t2.col', select.columns[1].name)
        self.assertEquals('t2_c', select.columns[1].alias)

        self.assertEquals('t3.col', select.columns[2].name)
        self.assertEquals('t3_c', select.columns[2].alias)

        self.assertEquals('cdph.ca.gov-hci-high_school_ed-county',
                          select.sources[0].name)
        self.assertEquals('t1', select.sources[0].alias)

        self.assertEquals('cdph.ca.gov-hci-high_school_ed-city',
                          select.joins[0].source.name)
        self.assertEquals('t2', select.joins[0].source.alias)
        self.assertEquals(['t1_c', 't2_c'], list(select.joins[0].join_cols))

        self.assertEquals('cdph.ca.gov-hci-high_school_ed-state',
                          select.joins[1].source.name)
        self.assertEquals('t3', select.joins[1].source.alias)
        self.assertEquals(['t1_c', 't3_c'], select.joins[1].join_cols)

        stmt = """
            SELECT t1.uuid AS t1_uuid, t2.float_a AS t2_float_a, t3.a AS t3_a
                FROM build.example.com-casters-simple AS t1
                JOIN build.example.com-casters-simple_stats AS t2 ON t1.id = t2.index
                JOIN build.example.com-casters-integers AS t3 ON t3_a = t2.index
            WHERE foo = bar
            """

        select = parse_select(stmt)

        self.assertEqual(2, len(select.joins))