def test_create_table_parquet_with_schema(): directory = '/path/to/' schema = ibis.schema( [('foo', 'string'), ('bar', 'int8'), ('baz', 'int16')] ) statement = ddl.CreateTableParquet( 'new_table', directory, schema=schema, external=True, can_exist=True, database='foo', ) result = statement.compile() expected = """\ CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table` (`foo` string, `bar` tinyint, `baz` smallint) STORED AS PARQUET LOCATION '{0}'""".format( directory ) assert result == expected
def test_create_table_like_parquet(self): directory = '/path/to/' path = '/path/to/parquetfile' statement = ddl.CreateTableParquet('new_table', directory, example_file=path, can_exist=True, database='foo') result = statement.compile() expected = """\ CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table` LIKE PARQUET '{0}' STORED AS PARQUET LOCATION '{1}'""".format(path, directory) assert result == expected
def test_create_table_parquet_like_other(self): # alternative to "LIKE PARQUET" directory = '/path/to/' example_table = 'db.other' statement = ddl.CreateTableParquet('new_table', directory, example_table=example_table, can_exist=True, database='foo') result = statement.compile() expected = """\ CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table` LIKE {0} STORED AS PARQUET LOCATION '{1}'""".format(example_table, directory) assert result == expected
def parquet_file(self, hdfs_dir, schema=None, name=None, database=None, external=True, like_file=None, like_table=None, persist=False): """ Make indicated parquet file in HDFS available as an Ibis table. The table created can be optionally named and persisted, otherwise a unique name will be generated. Temporarily, for any non-persistent external table created by Ibis we will attempt to drop it when the underlying object is garbage collected (or the Python interpreter shuts down normally). Parameters ---------- hdfs_dir : string Path in HDFS schema : ibis Schema If no schema provided, and neither of the like_* argument is passed, one will be inferred from one of the parquet files in the directory. like_file : string Absolute path to Parquet file in HDFS to use for schema definitions. An alternative to having to supply an explicit schema like_table : string Fully scoped and escaped string to an Impala table whose schema we will use for the newly created table. name : string, optional random unique name generated otherwise database : string, optional Database to create the (possibly temporary) table in external : boolean, default True If a table is external, the referenced data will not be deleted when the table is dropped in Impala. Otherwise (external=False) Impala takes ownership of the Parquet file. persist : boolean, default False Do not drop the table upon Ibis garbage collection / interpreter shutdown Returns ------- parquet_table : ImpalaTable """ name, database = self._get_concrete_table_path(name, database, persist=persist) # If no schema provided, need to find some absolute path to a file in # the HDFS directory if like_file is None and like_table is None and schema is None: like_file = self.hdfs.find_any_file(hdfs_dir) qualified_name = self._fully_qualified_name(name, database) stmt = ddl.CreateTableParquet(name, hdfs_dir, schema=schema, database=database, example_file=like_file, example_table=like_table, external=external, can_exist=False) self._execute(stmt) return self._wrap_new_table(qualified_name, persist)