def test_create_table_delimited(self): path = '/path/to/files/' schema = ibis.schema([('a', 'string'), ('b', 'int32'), ('c', 'double'), ('d', 'decimal(12,2)')]) stmt = ddl.CreateTableDelimited('new_table', path, schema, delimiter='|', escapechar='\\', lineterminator='\0', database='foo', can_exist=True) result = stmt.compile() expected = """\ CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table` (`a` string, `b` int, `c` double, `d` decimal(12,2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' ESCAPED BY '\\' LINES TERMINATED BY '\0' LOCATION '{0}'""".format(path) assert result == expected
def delimited_file(self, hdfs_dir, schema, name=None, database=None, delimiter=',', escapechar=None, lineterminator=None, external=True, persist=False): """ Interpret delimited text files (CSV / TSV / etc.) as an Ibis table. See `parquet_file` for more exposition on what happens under the hood. Parameters ---------- hdfs_dir : string HDFS directory name containing delimited text files schema : ibis Schema name : string, default None Name for temporary or persistent table; otherwise random one generated database : string Database to create the (possibly temporary) table in delimiter : length-1 string, default ',' Pass None if there is no delimiter escapechar : length-1 string Character used to escape special characters lineterminator : length-1 string Character used to delimit lines external : boolean, default True Create table as EXTERNAL (data will not be deleted on drop). Not that if persist=False and external=False, whatever data you reference will be deleted persist : boolean, default False If True, do not delete the table upon garbage collection of ibis table object Returns ------- delimited_table : ImpalaTable """ name, database = self._get_concrete_table_path(name, database, persist=persist) qualified_name = self._fully_qualified_name(name, database) stmt = ddl.CreateTableDelimited(name, hdfs_dir, schema, database=database, delimiter=delimiter, external=external, lineterminator=lineterminator, escapechar=escapechar) self._execute(stmt) return self._wrap_new_table(qualified_name, persist)