Exemplo n.º 1
0
 def test_write_in_multiple_blocks(self):
     writer = AvroWriter(
         self.client,
         'weather.avro',
         schema=self.schema,
         sync_interval=1  # Flush block on every write.
     )
     with writer:
         for record in self.records:
             writer.write(record)
     with AvroReader(self.client, 'weather.avro') as reader:
         eq_(list(reader), self.records)
Exemplo n.º 2
0
 def test_write_codec(self):
   with open(osp.join(self.dpath, 'weather.jsonl')) as reader:
     main(
       [
         'write', 'weather.avro',
         '--schema', dumps(self.schema),
         '--codec', 'deflate',
       ],
       client=self.client,
       stdin=reader
     )
   # Correct content.
   with AvroReader(self.client, 'weather.avro') as reader:
     records = list(reader)
   eq_(records, self.records)
   # Different size (might not be smaller, since very small file).
   compressed_size = self.client.content('weather.avro')['length']
   uncompressed_size = osp.getsize(osp.join(self.dpath, 'weather.avro'))
   ok_(compressed_size != uncompressed_size)
Exemplo n.º 3
0
 def test_read_with_compatible_schema(self):
     self.client.upload('w.avro', osp.join(self.dpath, 'weather.avro'))
     schema = {
         'name':
         'test.Weather',
         'type':
         'record',
         'fields': [
             {
                 'name': 'temp',
                 'type': 'int'
             },
             {
                 'name': 'tag',
                 'type': 'string',
                 'default': ''
             },
         ],
     }
     with AvroReader(self.client, 'w.avro', reader_schema=schema) as reader:
         eq_(list(reader), [{
             'temp': r['temp'],
             'tag': ''
         } for r in self.records])
Exemplo n.º 4
0
from hdfs import Config
from hdfs.ext.avro import AvroReader, AvroWriter

# Get the default alias' client.
client = Config().get_client()

# Some sample data.
records = [
    {
        'name': 'Ann',
        'age': 23
    },
    {
        'name': 'Bob',
        'age': 22
    },
]

# Write an Avro File to HDFS (since our records' schema is very simple, we let
# the writer infer it automatically, otherwise we would pass it as argument).
with AvroWriter(client, 'names.avro', overwrite=True) as writer:
    for record in records:
        writer.write(record)

# Read it back.
with AvroReader(client, 'names.avro') as reader:
    schema = reader.schema  # The inferred schema.
    content = reader.content  # The remote file's HDFS content object.
    assert list(reader) == records  # The records match!
Exemplo n.º 5
0
def read():
    """"""
    with AvroReader(client, '/tmp/hdfscli_avro/example.avro') as reader:
        for record in reader:
            print(record)
Exemplo n.º 6
0
 def test_write(self):
     write_dataframe(self.client, 'weather.avro', self.df)
     with AvroReader(self.client, 'weather.avro') as reader:
         eq_(list(reader), self.records)
Exemplo n.º 7
0
 def test_infer_schema(self):
     with AvroWriter(self.client, 'weather.avro') as writer:
         for record in self.records:
             writer.write(record)
     with AvroReader(self.client, 'weather.avro') as reader:
         eq_(list(reader), self.records)
Exemplo n.º 8
0
 def test_write_empty(self):
     with AvroWriter(self.client, 'empty.avro', schema=self.schema):
         pass
     with AvroReader(self.client, 'empty.avro') as reader:
         eq_(reader.schema, self.schema)
         eq_(list(reader), [])
Exemplo n.º 9
0
 def test_read(self):
     self.client.upload('weather.avro', osp.join(self.dpath,
                                                 'weather.avro'))
     with AvroReader(self.client, 'weather.avro') as reader:
         eq_(list(reader), self.records)
Exemplo n.º 10
0
 def test_read_with_same_schema(self):
     self.client.upload('w.avro', osp.join(self.dpath, 'weather.avro'))
     with AvroReader(self.client, 'w.avro',
                     reader_schema=self.schema) as reader:
         eq_(list(reader), self.records)
Exemplo n.º 11
0
 def testAvroLength(self, status, destFileName, client):
     if status[u"length"] > 5000: return False
     with AvroReader(client, destFileName) as reader:
         return len(list(reader)) == 0