示例#1
0
    def test_ast_with_projection_join_filter(self):
        table = self.con.table('test1')
        table2 = self.con.table('test2')

        filter_pred = table['f'] > 0

        table3 = table[filter_pred]

        join_pred = table3['g'] == table2['key']

        joined = table2.inner_join(table3, [join_pred])
        result = joined[[table3, table2['value']]]

        ast = build_ast(result)
        stmt = ast.queries[0]

        def foo():
            table3 = table[filter_pred]
            joined = table2.inner_join(table3, [join_pred])
            result = joined[[table3, table2['value']]]
            return result

        assert len(stmt.select_set) == 2
        assert len(stmt.where) == 1
        assert stmt.where[0] is filter_pred

        # Check that the join has been rebuilt to only include the root tables
        tbl = stmt.table_set
        tbl_node = tbl.op()
        assert isinstance(tbl_node, ops.InnerJoin)
        assert tbl_node.left is table2
        assert tbl_node.right is table

        # table expression substitution has been made in the predicate
        assert tbl_node.predicates[0].equals(table['g'] == table2['key'])
示例#2
0
    def test_multiple_limits(self):
        t = self.con.table('functional_alltypes')

        expr = t.limit(20).limit(10)
        stmt = build_ast(expr).queries[0]

        assert stmt.limit['n'] == 10
示例#3
0
    def insert(self, table_name, expr, database=None, overwrite=False,
               validate=True):
        """
        Insert into existing table

        Parameters
        ----------
        table_name : string
        expr : TableExpr
        database : string, default None
        overwrite : boolean, default False
          If True, will replace existing contents of table
        validate : boolean, default True
          If True, do more rigorous validation that schema of table being
          inserted is compatible with the existing table

        Examples
        --------
        con.insert('my_table', table_expr)

        # Completely overwrite contents
        con.insert('my_table', table_expr, overwrite=True)
        """
        if validate:
            existing_schema = self.get_schema(table_name, database=database)
            insert_schema = expr.schema()
            if not insert_schema.equals(existing_schema):
                _validate_compatible(insert_schema, existing_schema)

        ast = sql.build_ast(expr)
        select = ast.queries[0]
        statement = ddl.InsertSelect(table_name, select,
                                     database=database,
                                     overwrite=overwrite)
        self._execute(statement)
示例#4
0
    def insert(self, table_name, expr, database=None, overwrite=False):
        """
        Insert into existing table

        Parameters
        ----------
        table_name : string
        expr : TableExpr
        database : string, default None
        overwrite : boolean, default False
          If True, will replace existing contents of table

        Examples
        --------
        con.insert('my_table', table_expr)

        # Completely overwrite contents
        con.insert('my_table', table_expr, overwrite=True)
        """
        ast = sql.build_ast(expr)
        select = ast.queries[0]
        statement = ddl.InsertSelect(table_name, select,
                                     database=database,
                                     overwrite=overwrite)
        self._execute(statement)
示例#5
0
    def test_ast_with_projection_join_filter(self):
        table = self.con.table('test1')
        table2 = self.con.table('test2')

        filter_pred = table['f'] > 0

        table3 = table[filter_pred]

        join_pred = table3['g'] == table2['key']

        joined = table2.inner_join(table3, [join_pred])
        result = joined[[table3, table2['value']]]

        ast = build_ast(result)
        stmt = ast.queries[0]

        def foo():
            table3 = table[filter_pred]
            joined = table2.inner_join(table3, [join_pred])
            result = joined[[table3, table2['value']]]
            return result

        assert len(stmt.select_set) == 2
        assert len(stmt.where) == 1
        assert stmt.where[0] is filter_pred

        # Check that the join has been rebuilt to only include the root tables
        tbl = stmt.table_set
        tbl_node = tbl.op()
        assert isinstance(tbl_node, ops.InnerJoin)
        assert tbl_node.left is table2
        assert tbl_node.right is table

        # table expression substitution has been made in the predicate
        assert tbl_node.predicates[0].equals(table['g'] == table2['key'])
示例#6
0
    def insert(self, table_name, expr, database=None, overwrite=False):
        """
        Insert into existing table

        Parameters
        ----------
        table_name : string
        expr : TableExpr
        database : string, default None
        overwrite : boolean, default False
          If True, will replace existing contents of table

        Examples
        --------
        con.insert('my_table', table_expr)

        # Completely overwrite contents
        con.insert('my_table', table_expr, overwrite=True)
        """
        ast = sql.build_ast(expr)
        select = ast.queries[0]
        statement = ddl.InsertSelect(table_name,
                                     select,
                                     database=database,
                                     overwrite=overwrite)
        self._execute(statement)
示例#7
0
文件: client.py 项目: raderaj/ibis
    def explain(self, expr):
        """
        Query for and return the query plan associated with the indicated
        expression or SQL query.

        Returns
        -------
        plan : string
        """
        if isinstance(expr, ir.Expr):
            ast = sql.build_ast(expr)
            if len(ast.queries) > 1:
                raise Exception('Multi-query expression')

            query = ast.queries[0].compile()
        else:
            query = expr

        statement = 'EXPLAIN {0}'.format(query)

        with self._execute(statement, results=True) as cur:
            result = self._get_list(cur)

        return 'Query:\n{0}\n\n{1}'.format(util.indent(query, 2),
                                           '\n'.join(result))
示例#8
0
    def test_multiple_limits(self):
        t = self.con.table('functional_alltypes')

        expr = t.limit(20).limit(10)
        stmt = build_ast(expr).queries[0]

        assert stmt.limit['n'] == 10
示例#9
0
    def explain(self, expr):
        """
        Query for and return the query plan associated with the indicated
        expression or SQL query.

        Returns
        -------
        plan : string
        """
        if isinstance(expr, ir.Expr):
            ast = sql.build_ast(expr)
            if len(ast.queries) > 1:
                raise Exception('Multi-query expression')

            query = ast.queries[0].compile()
        else:
            query = expr

        statement = 'EXPLAIN {0}'.format(query)

        with self._execute(statement, results=True) as cur:
            result = self._get_list(cur)

        return 'Query:\n{0}\n\n{1}'.format(util.indent(query, 2),
                                           '\n'.join(result))
示例#10
0
def _create_table(table_name, expr, database=None, can_exist=False,
                  format='parquet'):
    ast = build_ast(expr)
    select = ast.queries[0]
    statement = ddl.CTAS(table_name, select,
                         database=database,
                         format=format,
                         can_exist=can_exist)
    return statement
示例#11
0
    def create_table(self,
                     table_name,
                     expr=None,
                     schema=None,
                     database=None,
                     format='parquet',
                     overwrite=False,
                     external=False,
                     path=None):
        """
        Create a new table in Impala using an Ibis table expression

        Parameters
        ----------
        table_name : string
        expr : TableExpr, optional
          If passed, creates table from select statement results
        schema : ibis.Schema, optional
          Mutually exclusive with expr, creates an empty table with a
          particular schema
        database : string, default None (optional)
        format : {'parquet'}
        overwrite : boolean, default False
          Do not create table if table with indicated name already exists
        external : boolean, default False
          Create an external table; Impala will not delete the underlying data
          when the table is dropped
        path : string, default None
          Specify the path where Impala reads and writes files for the table

        Examples
        --------
        con.create_table('new_table_name', table_expr)
        """
        if expr is not None:
            ast = sql.build_ast(expr)
            select = ast.queries[0]
            statement = ddl.CTAS(table_name,
                                 select,
                                 database=database,
                                 overwrite=overwrite,
                                 format=format,
                                 external=external,
                                 path=path)
        elif schema is not None:
            statement = ddl.CreateTableWithSchema(table_name,
                                                  schema,
                                                  ddl.NoFormat(),
                                                  database=database,
                                                  format=format,
                                                  overwrite=overwrite,
                                                  external=external,
                                                  path=path)
        else:
            raise com.IbisError('Must pass expr or schema')

        self._execute(statement)
示例#12
0
    def create_view(self, name, expr, database=None):
        """
        Create an Impala view from a table expression

        Parameters
        ----------
        name : string
        expr : ibis TableExpr
        database : string, default None
        """
        ast = sql.build_ast(expr)
        select = ast.queries[0]
        statement = ddl.CreateView(name, select, database=database)
        self._execute(statement)
示例#13
0
    def create_view(self, name, expr, database=None):
        """
        Create an Impala view from a table expression

        Parameters
        ----------
        name : string
        expr : ibis TableExpr
        database : string, default None
        """
        ast = sql.build_ast(expr)
        select = ast.queries[0]
        statement = ddl.CreateView(name, select, database=database)
        self._execute(statement)
示例#14
0
    def create_table(self, table_name, expr=None, schema=None, database=None,
                     format='parquet', overwrite=False, external=False,
                     path=None):
        """
        Create a new table in Impala using an Ibis table expression

        Parameters
        ----------
        table_name : string
        expr : TableExpr, optional
          If passed, creates table from select statement results
        schema : ibis.Schema, optional
          Mutually exclusive with expr, creates an empty table with a
          particular schema
        database : string, default None (optional)
        format : {'parquet'}
        overwrite : boolean, default False
          Do not create table if table with indicated name already exists
        external : boolean, default False
          Create an external table; Impala will not delete the underlying data
          when the table is dropped
        path : string, default None
          Specify the path where Impala reads and writes files for the table

        Examples
        --------
        con.create_table('new_table_name', table_expr)
        """
        if expr is not None:
            ast = sql.build_ast(expr)
            select = ast.queries[0]
            statement = ddl.CTAS(table_name, select,
                                 database=database,
                                 overwrite=overwrite,
                                 format=format,
                                 external=external,
                                 path=path)
        elif schema is not None:
            statement = ddl.CreateTableWithSchema(
                table_name, schema, ddl.NoFormat(),
                database=database,
                format=format,
                overwrite=overwrite,
                external=external,
                path=path)
        else:
            raise com.IbisError('Must pass expr or schema')

        self._execute(statement)
示例#15
0
 def _build_ast_ensure_limit(self, expr, limit):
     ast = sql.build_ast(expr)
     # note: limit can still be None at this point, if the global
     # default_limit is None
     for query in reversed(ast.queries):
         if (isinstance(query, ddl.Select)
                 and not isinstance(expr, ir.ScalarExpr)
                 and query.table_set is not None):
             if query.limit is None:
                 query_limit = limit or options.sql.default_limit
                 if query_limit:
                     query.limit = {'n': query_limit, 'offset': 0}
             elif limit is not None:
                 query.limit = {'n': limit, 'offset': query.limit['offset']}
     return ast
示例#16
0
    def test_create_external_table_as(self):
        path = "/path/to/table"
        select = build_ast(self.con.table("test1")).queries[0]
        statement = ddl.CTAS("another_table", select, external=True, can_exist=False, path=path, database="foo")
        result = statement.compile()

        expected = """\
CREATE EXTERNAL TABLE foo.`another_table`
STORED AS PARQUET
LOCATION '{0}'
AS
SELECT *
FROM test1""".format(
            path
        )
        assert result == expected
示例#17
0
 def _build_ast_ensure_limit(self, expr, limit):
     ast = sql.build_ast(expr)
     if not limit:
         limit = options.sql.default_limit
     # note: limit can still be None at this point, if the global
     # default_limit is None
     if limit is not None:
         for query in reversed(ast.queries):
             if (isinstance(query, ddl.Select) and
                     not isinstance(expr, ir.ScalarExpr) and
                     query.table_set is not None):
                 if query.limit is None:
                     query.limit = {'n': limit,
                                    'offset': 0}
                     break
     return ast
示例#18
0
    def test_create_external_table_as(self):
        path = '/path/to/table'
        select = build_ast(self.con.table('test1')).queries[0]
        statement = ddl.CTAS('another_table',
                             select,
                             external=True,
                             can_exist=False,
                             path=path,
                             database='foo')
        result = statement.compile()

        expected = """\
CREATE EXTERNAL TABLE foo.`another_table`
STORED AS PARQUET
LOCATION '{0}'
AS
SELECT *
FROM test1""".format(path)
        assert result == expected
示例#19
0
    def insert(self,
               table_name,
               expr,
               database=None,
               overwrite=False,
               validate=True):
        """
        Insert into existing table

        Parameters
        ----------
        table_name : string
        expr : TableExpr
        database : string, default None
        overwrite : boolean, default False
          If True, will replace existing contents of table
        validate : boolean, default True
          If True, do more rigorous validation that schema of table being
          inserted is compatible with the existing table

        Examples
        --------
        con.insert('my_table', table_expr)

        # Completely overwrite contents
        con.insert('my_table', table_expr, overwrite=True)
        """
        if validate:
            existing_schema = self.get_schema(table_name, database=database)
            insert_schema = expr.schema()
            if not insert_schema.equals(existing_schema):
                _validate_compatible(insert_schema, existing_schema)

        ast = sql.build_ast(expr)
        select = ast.queries[0]
        statement = ddl.InsertSelect(table_name,
                                     select,
                                     database=database,
                                     overwrite=overwrite)
        self._execute(statement)
示例#20
0
    def test_simple_scalar_aggregates(self):
        from pandas import DataFrame

        # Things like table.column.{sum, mean, ...}()
        table = self.con.table('alltypes')

        expr = table[table.c > 0].f.sum()

        ast = build_ast(expr)
        query = ast.queries[0]

        sql_query = query.compile()
        expected = """SELECT sum(`f`) AS `tmp`
FROM alltypes
WHERE `c` > 0"""

        assert sql_query == expected

        # Maybe the result handler should act on the cursor. Not sure.
        handler = query.result_handler
        output = DataFrame({'tmp': [5]})
        assert handler(output) == 5
示例#21
0
    def test_simple_scalar_aggregates(self):
        from pandas import DataFrame

        # Things like table.column.{sum, mean, ...}()
        table = self.con.table('alltypes')

        expr = table[table.c > 0].f.sum()

        ast = build_ast(expr)
        query = ast.queries[0]

        sql_query = query.compile()
        expected = """SELECT sum(`f`) AS `tmp`
FROM alltypes
WHERE `c` > 0"""

        assert sql_query == expected

        # Maybe the result handler should act on the cursor. Not sure.
        handler = query.result_handler
        output = DataFrame({'tmp': [5]})
        assert handler(output) == 5
示例#22
0
    def test_table_column_unbox(self):
        from pandas import DataFrame

        table = self.table
        m = table.f.sum().name('total')
        agged = table[table.c > 0].group_by('g').aggregate([m])
        expr = agged.g

        ast = build_ast(expr)
        query = ast.queries[0]

        sql_query = query.compile()
        expected = """SELECT `g`, sum(`f`) AS `total`
FROM alltypes
WHERE `c` > 0
GROUP BY 1"""

        assert sql_query == expected

        # Maybe the result handler should act on the cursor. Not sure.
        handler = query.result_handler
        output = DataFrame({'g': ['foo', 'bar', 'baz']})
        assert (handler(output) == output['g']).all()
示例#23
0
    def test_table_column_unbox(self):
        from pandas import DataFrame

        table = self.table
        m = table.f.sum().name('total')
        agged = table[table.c > 0].group_by('g').aggregate([m])
        expr = agged.g

        ast = build_ast(expr)
        query = ast.queries[0]

        sql_query = query.compile()
        expected = """SELECT `g`, sum(`f`) AS `total`
FROM alltypes
WHERE `c` > 0
GROUP BY 1"""

        assert sql_query == expected

        # Maybe the result handler should act on the cursor. Not sure.
        handler = query.result_handler
        output = DataFrame({'g': ['foo', 'bar', 'baz']})
        assert (handler(output) == output['g']).all()
示例#24
0
    def test_ast_with_aggregation_join_filter(self):
        table = self.con.table('test1')
        table2 = self.con.table('test2')

        filter_pred = table['f'] > 0
        table3 = table[filter_pred]
        join_pred = table3['g'] == table2['key']

        joined = table2.inner_join(table3, [join_pred])

        met1 = (table3['f'] - table2['value']).mean().name('foo')
        result = joined.aggregate([met1, table3['f'].sum().name('bar')],
                                  by=[table3['g'], table2['key']])

        ast = build_ast(result)
        stmt = ast.queries[0]

        # hoisted metrics
        ex_metrics = [(table['f'] - table2['value']).mean().name('foo'),
                      table['f'].sum().name('bar')]
        ex_by = [table['g'], table2['key']]

        # hoisted join and aggregate
        expected_table_set = \
            table2.inner_join(table, [table['g'] == table2['key']])
        assert stmt.table_set.equals(expected_table_set)

        # Check various exprs
        for res, ex in zip(stmt.select_set, ex_by + ex_metrics):
            assert res.equals(ex)

        for res, ex in zip(stmt.group_by, ex_by):
            assert stmt.select_set[res].equals(ex)

        # Check we got the filter
        assert len(stmt.where) == 1
        assert stmt.where[0].equals(filter_pred)
示例#25
0
    def test_ast_with_aggregation_join_filter(self):
        table = self.con.table('test1')
        table2 = self.con.table('test2')

        filter_pred = table['f'] > 0
        table3 = table[filter_pred]
        join_pred = table3['g'] == table2['key']

        joined = table2.inner_join(table3, [join_pred])

        met1 = (table3['f'] - table2['value']).mean().name('foo')
        result = joined.aggregate([met1, table3['f'].sum().name('bar')],
                                  by=[table3['g'], table2['key']])

        ast = build_ast(result)
        stmt = ast.queries[0]

        # hoisted metrics
        ex_metrics = [(table['f'] - table2['value']).mean().name('foo'),
                      table['f'].sum().name('bar')]
        ex_by = [table['g'], table2['key']]

        # hoisted join and aggregate
        expected_table_set = \
            table2.inner_join(table, [table['g'] == table2['key']])
        assert stmt.table_set.equals(expected_table_set)

        # Check various exprs
        for res, ex in zip(stmt.select_set, ex_by + ex_metrics):
            assert res.equals(ex)

        for res, ex in zip(stmt.group_by, ex_by):
            assert stmt.select_set[res].equals(ex)

        # Check we got the filter
        assert len(stmt.where) == 1
        assert stmt.where[0].equals(filter_pred)
示例#26
0
def _get_query(expr):
    ast = build_ast(expr)
    return ast.queries[0]
示例#27
0
def _get_select(expr):
    ast = build_ast(expr)
    select = ast.queries[0]
    context = ast.context

    return select, context
示例#28
0
def _create_table(table_name, expr, database=None, can_exist=False, format="parquet"):
    ast = build_ast(expr)
    select = ast.queries[0]
    statement = ddl.CTAS(table_name, select, database=database, format=format, can_exist=can_exist)
    return statement
示例#29
0
def _get_select(expr):
    ast = build_ast(expr)
    select = ast.queries[0]
    context = ast.context

    return select, context
示例#30
0
    def create_table(self, table_name, expr=None, schema=None, database=None,
                     format='parquet', force=False, external=False,
                     path=None, partition=None, like_parquet=None):
        """
        Create a new table in Impala using an Ibis table expression

        Parameters
        ----------
        table_name : string
        expr : TableExpr, optional
          If passed, creates table from select statement results
        schema : ibis.Schema, optional
          Mutually exclusive with expr, creates an empty table with a
          particular schema
        database : string, default None (optional)
        format : {'parquet'}
        force : boolean, default False
          Do not create table if table with indicated name already exists
        external : boolean, default False
          Create an external table; Impala will not delete the underlying data
          when the table is dropped
        path : string, default None
          Specify the path where Impala reads and writes files for the table
        partition : list of strings
          Must pass a schema to use this. Cannot partition from an expression
          (create-table-as-select)
        like_parquet : string (HDFS path), optional
          Can specify in lieu of a schema

        Examples
        --------
        con.create_table('new_table_name', table_expr)
        """
        if like_parquet is not None:
            raise NotImplementedError

        if expr is not None:
            ast = sql.build_ast(expr)
            select = ast.queries[0]

            if partition is not None:
                # Fairly certain this is currently the case
                raise ValueError('partition not supported with '
                                 'create-table-as-select')

            statement = ddl.CTAS(table_name, select,
                                 database=database,
                                 can_exist=force,
                                 format=format,
                                 external=external,
                                 path=path)
        elif schema is not None:
            statement = ddl.CreateTableWithSchema(
                table_name, schema, ddl.NoFormat(),
                database=database,
                format=format,
                can_exist=force,
                external=external,
                path=path, partition=partition)
        else:
            raise com.IbisError('Must pass expr or schema')

        self._execute(statement)
示例#31
0
    def create_table(self,
                     table_name,
                     expr=None,
                     schema=None,
                     database=None,
                     format='parquet',
                     overwrite=False,
                     external=False,
                     path=None,
                     partition=None,
                     like_parquet=None):
        """
        Create a new table in Impala using an Ibis table expression

        Parameters
        ----------
        table_name : string
        expr : TableExpr, optional
          If passed, creates table from select statement results
        schema : ibis.Schema, optional
          Mutually exclusive with expr, creates an empty table with a
          particular schema
        database : string, default None (optional)
        format : {'parquet'}
        overwrite : boolean, default False
          Do not create table if table with indicated name already exists
        external : boolean, default False
          Create an external table; Impala will not delete the underlying data
          when the table is dropped
        path : string, default None
          Specify the path where Impala reads and writes files for the table
        partition : list of strings
          Must pass a schema to use this. Cannot partition from an expression
          (create-table-as-select)
        like_parquet : string (HDFS path), optional
          Can specify in lieu of a schema

        Examples
        --------
        con.create_table('new_table_name', table_expr)
        """
        if like_parquet is not None:
            raise NotImplementedError

        if expr is not None:
            ast = sql.build_ast(expr)
            select = ast.queries[0]

            if partition is not None:
                # Fairly certain this is currently the case
                raise ValueError('partition not supported with '
                                 'create-table-as-select')

            statement = ddl.CTAS(table_name,
                                 select,
                                 database=database,
                                 overwrite=overwrite,
                                 format=format,
                                 external=external,
                                 path=path)
        elif schema is not None:
            statement = ddl.CreateTableWithSchema(table_name,
                                                  schema,
                                                  ddl.NoFormat(),
                                                  database=database,
                                                  format=format,
                                                  overwrite=overwrite,
                                                  external=external,
                                                  path=path,
                                                  partition=partition)
        else:
            raise com.IbisError('Must pass expr or schema')

        self._execute(statement)
示例#32
0
def _get_query(expr):
    ast = build_ast(expr)
    return ast.queries[0]