Пример #1
0
def test_select_basics(t):
    name = 'testing123456'

    expr = t.limit(10)
    ast = build_ast(expr, SparkDialect.make_context())
    select = ast.queries[0]

    stmt = ddl.InsertSelect(name, select, database='foo')
    result = stmt.compile()

    expected = """\
INSERT INTO foo.`testing123456`
SELECT *
FROM functional_alltypes
LIMIT 10"""
    assert result == expected

    stmt = ddl.InsertSelect(name, select, database='foo', overwrite=True)
    result = stmt.compile()

    expected = """\
INSERT OVERWRITE TABLE foo.`testing123456`
SELECT *
FROM functional_alltypes
LIMIT 10"""
    assert result == expected
Пример #2
0
    def create_view(
        self,
        name,
        expr,
        database=None,
        can_exist=False,
        temporary=False,
    ):
        """
        Create a Spark view from a table expression

        Parameters
        ----------
        name : string
        expr : ibis TableExpr
        database : string, default None
        can_exist : boolean, default False
          Replace an existing view of the same name if it exists
        temporary : boolean, default False
        """
        ast = self._build_ast(expr, SparkDialect.make_context())
        select = ast.queries[0]
        statement = ddl.CreateView(
            name,
            select,
            database=database,
            can_exist=can_exist,
            temporary=temporary,
        )
        return self._execute(statement.compile())
def _create_table(
    table_name, expr, database=None, can_exist=False, format='parquet'
):
    ast = build_ast(expr, SparkDialect.make_context())
    select = ast.queries[0]
    statement = ddl.CTAS(
        table_name,
        select,
        database=database,
        format=format,
        can_exist=can_exist,
    )
    return statement
Пример #4
0
    def insert(
        self,
        obj=None,
        overwrite=False,
        values=None,
        validate=True,
    ):
        """
        Insert into Spark table.

        Parameters
        ----------
        obj : TableExpr or pandas DataFrame
        overwrite : boolean, default False
          If True, will replace existing contents of table
        validate : boolean, default True
          If True, do more rigorous validation that schema of table being
          inserted is compatible with the existing table

        Examples
        --------
        >>> t.insert(table_expr)  # doctest: +SKIP

        # Completely overwrite contents
        >>> t.insert(table_expr, overwrite=True)  # doctest: +SKIP
        """
        if isinstance(obj, pd.DataFrame):
            spark_df = self._session.createDataFrame(obj)
            spark_df.insertInto(self.name, overwrite=overwrite)
            return

        expr = obj

        if values is not None:
            raise NotImplementedError

        if validate:
            existing_schema = self.schema()
            insert_schema = expr.schema()
            if not insert_schema.equals(existing_schema):
                _validate_compatible(insert_schema, existing_schema)

        ast = build_ast(expr, SparkDialect.make_context())
        select = ast.queries[0]
        statement = ddl.InsertSelect(
            self._qualified_name,
            select,
            overwrite=overwrite,
        )
        return self._execute(statement.compile())
Пример #5
0
    def create_table(
        self,
        table_name,
        obj=None,
        schema=None,
        database=None,
        force=False,
        # HDFS options
        format='parquet',
    ):
        """
        Create a new table in Spark using an Ibis table expression.

        Parameters
        ----------
        table_name : string
        obj : TableExpr or pandas.DataFrame, optional
          If passed, creates table from select statement results
        schema : ibis.Schema, optional
          Mutually exclusive with obj, creates an empty table with a
          particular schema
        database : string, default None (optional)
        force : boolean, default False
          If true, create table if table with indicated name already exists
        format : {'parquet'}

        Examples
        --------
        >>> con.create_table('new_table_name', table_expr)  # doctest: +SKIP
        """
        if obj is not None:
            if isinstance(obj, pd.DataFrame):
                spark_df = self._session.createDataFrame(obj)
                mode = 'error'
                if force:
                    mode = 'overwrite'
                spark_df.write.saveAsTable(
                    table_name,
                    format=format,
                    mode=mode,
                )
                return

            ast = self._build_ast(obj, SparkDialect.make_context())
            select = ast.queries[0]

            statement = ddl.CTAS(
                table_name,
                select,
                database=database,
                can_exist=force,
                format=format,
            )
        elif schema is not None:
            statement = ddl.CreateTableWithSchema(
                table_name,
                schema,
                database=database,
                format=format,
                can_exist=force,
            )
        else:
            raise com.IbisError('Must pass expr or schema')

        return self._execute(statement.compile())