def test_create_external_table_avro():
    path = '/path/to/files/'

    avro_schema = {
        'fields': [
            {'name': 'a', 'type': 'string'},
            {'name': 'b', 'type': 'int'},
            {'name': 'c', 'type': 'double'},
            {
                "type": "bytes",
                "logicalType": "decimal",
                "precision": 4,
                "scale": 2,
                'name': 'd',
            },
        ],
        'name': 'my_record',
        'type': 'record',
    }

    stmt = ddl.CreateTableAvro(
        'new_table', path, avro_schema, database='foo', can_exist=True
    )

    result = stmt.compile()
    expected = (
        """\
CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table`
STORED AS AVRO
LOCATION '%s'
TBLPROPERTIES (
  'avro.schema.literal'='{
  "fields": [
    {
      "name": "a",
      "type": "string"
    },
    {
      "name": "b",
      "type": "int"
    },
    {
      "name": "c",
      "type": "double"
    },
    {
      "logicalType": "decimal",
      "name": "d",
      "precision": 4,
      "scale": 2,
      "type": "bytes"
    }
  ],
  "name": "my_record",
  "type": "record"
}'
)"""
        % path
    )
    assert result == expected
Пример #2
0
    def avro_file(self,
                  hdfs_dir,
                  avro_schema,
                  name=None,
                  database=None,
                  external=True,
                  persist=False):
        """
        Create a (possibly temporary) table to read a collection of Avro data.

        Parameters
        ----------
        hdfs_dir : string
          Absolute HDFS path to directory containing avro files
        avro_schema : dict
          The Avro schema for the data as a Python dict
        name : string, default None
        database : string, default None
        external : boolean, default True
        persist : boolean, default False

        Returns
        -------
        avro_table : ImpalaTable
        """
        name, database = self._get_concrete_table_path(name,
                                                       database,
                                                       persist=persist)

        qualified_name = self._fully_qualified_name(name, database)
        stmt = ddl.CreateTableAvro(name,
                                   hdfs_dir,
                                   avro_schema,
                                   database=database,
                                   external=external)
        self._execute(stmt)
        return self._wrap_new_table(qualified_name, persist)