def test_create_table_parquet_with_schema():
    directory = '/path/to/'

    schema = ibis.schema(
        [('foo', 'string'), ('bar', 'int8'), ('baz', 'int16')]
    )

    statement = ddl.CreateTableParquet(
        'new_table',
        directory,
        schema=schema,
        external=True,
        can_exist=True,
        database='foo',
    )

    result = statement.compile()
    expected = """\
CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table`
(`foo` string,
 `bar` tinyint,
 `baz` smallint)
STORED AS PARQUET
LOCATION '{0}'""".format(
        directory
    )

    assert result == expected
Пример #2
0
    def test_create_table_like_parquet(self):
        directory = '/path/to/'
        path = '/path/to/parquetfile'
        statement = ddl.CreateTableParquet('new_table',
                                           directory,
                                           example_file=path,
                                           can_exist=True,
                                           database='foo')

        result = statement.compile()
        expected = """\
CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table`
LIKE PARQUET '{0}'
STORED AS PARQUET
LOCATION '{1}'""".format(path, directory)

        assert result == expected
Пример #3
0
    def test_create_table_parquet_like_other(self):
        # alternative to "LIKE PARQUET"
        directory = '/path/to/'
        example_table = 'db.other'

        statement = ddl.CreateTableParquet('new_table',
                                           directory,
                                           example_table=example_table,
                                           can_exist=True,
                                           database='foo')

        result = statement.compile()
        expected = """\
CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table`
LIKE {0}
STORED AS PARQUET
LOCATION '{1}'""".format(example_table, directory)

        assert result == expected
Пример #4
0
    def parquet_file(self,
                     hdfs_dir,
                     schema=None,
                     name=None,
                     database=None,
                     external=True,
                     like_file=None,
                     like_table=None,
                     persist=False):
        """
        Make indicated parquet file in HDFS available as an Ibis table.

        The table created can be optionally named and persisted, otherwise a
        unique name will be generated. Temporarily, for any non-persistent
        external table created by Ibis we will attempt to drop it when the
        underlying object is garbage collected (or the Python interpreter shuts
        down normally).

        Parameters
        ----------
        hdfs_dir : string
          Path in HDFS
        schema : ibis Schema
          If no schema provided, and neither of the like_* argument is passed,
          one will be inferred from one of the parquet files in the directory.
        like_file : string
          Absolute path to Parquet file in HDFS to use for schema
          definitions. An alternative to having to supply an explicit schema
        like_table : string
          Fully scoped and escaped string to an Impala table whose schema we
          will use for the newly created table.
        name : string, optional
          random unique name generated otherwise
        database : string, optional
          Database to create the (possibly temporary) table in
        external : boolean, default True
          If a table is external, the referenced data will not be deleted when
          the table is dropped in Impala. Otherwise (external=False) Impala
          takes ownership of the Parquet file.
        persist : boolean, default False
          Do not drop the table upon Ibis garbage collection / interpreter
          shutdown

        Returns
        -------
        parquet_table : ImpalaTable
        """
        name, database = self._get_concrete_table_path(name,
                                                       database,
                                                       persist=persist)

        # If no schema provided, need to find some absolute path to a file in
        # the HDFS directory
        if like_file is None and like_table is None and schema is None:
            like_file = self.hdfs.find_any_file(hdfs_dir)

        qualified_name = self._fully_qualified_name(name, database)

        stmt = ddl.CreateTableParquet(name,
                                      hdfs_dir,
                                      schema=schema,
                                      database=database,
                                      example_file=like_file,
                                      example_table=like_table,
                                      external=external,
                                      can_exist=False)
        self._execute(stmt)
        return self._wrap_new_table(qualified_name, persist)