Exemplo n.º 1
0
    def test_create_table_delimited(self):
        path = '/path/to/files/'
        schema = ibis.schema([('a', 'string'), ('b', 'int32'), ('c', 'double'),
                              ('d', 'decimal(12,2)')])

        stmt = ddl.CreateTableDelimited('new_table',
                                        path,
                                        schema,
                                        delimiter='|',
                                        escapechar='\\',
                                        lineterminator='\0',
                                        database='foo',
                                        can_exist=True)

        result = stmt.compile()
        expected = """\
CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table`
(`a` string,
 `b` int,
 `c` double,
 `d` decimal(12,2))
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
ESCAPED BY '\\'
LINES TERMINATED BY '\0'
LOCATION '{0}'""".format(path)
        assert result == expected
Exemplo n.º 2
0
    def delimited_file(self,
                       hdfs_dir,
                       schema,
                       name=None,
                       database=None,
                       delimiter=',',
                       escapechar=None,
                       lineterminator=None,
                       external=True,
                       persist=False):
        """
        Interpret delimited text files (CSV / TSV / etc.) as an Ibis table. See
        `parquet_file` for more exposition on what happens under the hood.

        Parameters
        ----------
        hdfs_dir : string
          HDFS directory name containing delimited text files
        schema : ibis Schema
        name : string, default None
          Name for temporary or persistent table; otherwise random one
          generated
        database : string
          Database to create the (possibly temporary) table in
        delimiter : length-1 string, default ','
          Pass None if there is no delimiter
        escapechar : length-1 string
          Character used to escape special characters
        lineterminator : length-1 string
          Character used to delimit lines
        external : boolean, default True
          Create table as EXTERNAL (data will not be deleted on drop). Not that
          if persist=False and external=False, whatever data you reference will
          be deleted
        persist : boolean, default False
          If True, do not delete the table upon garbage collection of ibis
          table object

        Returns
        -------
        delimited_table : ImpalaTable
        """
        name, database = self._get_concrete_table_path(name,
                                                       database,
                                                       persist=persist)

        qualified_name = self._fully_qualified_name(name, database)

        stmt = ddl.CreateTableDelimited(name,
                                        hdfs_dir,
                                        schema,
                                        database=database,
                                        delimiter=delimiter,
                                        external=external,
                                        lineterminator=lineterminator,
                                        escapechar=escapechar)
        self._execute(stmt)
        return self._wrap_new_table(qualified_name, persist)