Пример #1
0
 def test_write_in_multiple_blocks(self):
     writer = AvroWriter(
         self.client,
         'weather.avro',
         schema=self.schema,
         sync_interval=1  # Flush block on every write.
     )
     with writer:
         for record in self.records:
             writer.write(record)
     with AvroReader(self.client, 'weather.avro') as reader:
         eq_(list(reader), self.records)
Пример #2
0
 def test_write_in_multiple_blocks(self):
   writer = AvroWriter(
     self.client,
     'weather.avro',
     schema=self.schema,
     sync_interval = 1 # Flush block on every write.
   )
   with writer:
     for record in self.records:
       writer.write(record)
   with AvroReader(self.client, 'weather.avro') as reader:
     eq_(list(reader), self.records)
Пример #3
0
 def test_write(self):
     writer = AvroWriter(
         self.client,
         'weather.avro',
         schema=self.schema,
     )
     with writer:
         for record in self.records:
             writer.write(record)
     with temppath() as tpath:
         self.client.download('weather.avro', tpath)
         eq_(self._get_data_bytes(osp.join(self.dpath, 'weather.avro')),
             self._get_data_bytes(tpath))
Пример #4
0
 def test_write(self):
   writer = AvroWriter(
     self.client,
     'weather.avro',
     schema=self.schema,
   )
   with writer:
     for record in self.records:
       writer.write(record)
   with temppath() as tpath:
     self.client.download('weather.avro', tpath)
     eq_(
       self._get_data_bytes(osp.join(self.dpath, 'weather.avro')),
       self._get_data_bytes(tpath)
     )
Пример #5
0
def write():
    with AvroWriter(client,
                    '/tmp/hdfscli_avro/example.avro',
                    overwrite=True,
                    schema=schema) as writer:
        for record in records:
            writer.write(record)
Пример #6
0
 def test_read_part_file(self):
     data = {
         'part-m-00000.avro': [{
             'name': 'jane'
         }, {
             'name': 'bob'
         }],
         'part-m-00001.avro': [{
             'name': 'john'
         }, {
             'name': 'liz'
         }],
     }
     for fname, records in data.items():
         with AvroWriter(self.client, 'data.avro/%s' % (fname, )) as writer:
             for record in records:
                 writer.write(record)
     with temppath() as tpath:
         with open(tpath, 'w') as writer:
             main(['read', 'data.avro', '--parts', '1,'],
                  client=self.client,
                  stdout=writer)
         with open(tpath) as reader:
             records = [loads(line) for line in reader]
         eq_(records, data['part-m-00001.avro'])
Пример #7
0
 def test_write_overwrite_error(self):
     # To check that the background `AsyncWriter` thread doesn't hang.
     self.client.makedirs('weather.avro')
     with AvroWriter(self.client, 'weather.avro',
                     schema=self.schema) as writer:
         for record in self.records:
             writer.write(record)
Пример #8
0
    def saveToHDFS(self):

        qs="select %s from %s where push_time >= '%s 00:00:00' and push_time < '%s 00:00:00' and id>%d order by id asc;"\
        %(self.needFields,self.prefix,self.curID_PT["PT"].strftime('%Y-%m-%d'),(self.curID_PT["PT"] + timedelta(1)).strftime('%Y-%m-%d'),self.curID_PT["ID"])
        curAvroFileName = self.buildFileName()
        try:
            with self.con, AvroWriter(self.clientHDFS,
                                      curAvroFileName,
                                      schema=self.schema,
                                      overwrite=True) as writer:
                db = self.con.cursor(mdb.cursors.DictCursor)
                db.execute(qs)
                rs = db.fetchall()
                for r in rs:
                    #if r['last_checktime']:
                    #	r['last_checktime'] = time.mktime(r['last_checktime'].timetuple())
                    for k, v in r.items():
                        if type(v) == datetime.datetime:
                            r[k] = time.mktime(v.timetuple())
                    writer.write(r)
                    self.curID_PT["ID"] = r["id"]
        except (mdb.Error) as e:
            self.slogger.info(e)
        finally:
            self.slogger.info(
                "Now here:%s\t%d" %
                (self.curID_PT["PT"].isoformat(), self.curID_PT["ID"]))
            self.saveWhere()
Пример #9
0
def upload_to_hdfs(hdfs, records):
    print('saving to AVRO ...')
    # print(records)

    with AvroWriter(hdfs,
                    f'{BASE_DIR}{FILE_NAME}-{round(time.time())}.avro',
                    schema=parsed_schema) as writer:
        for record in records:
            try:
                writer.write(record)
            except:
                continue
    time.sleep(1)
Пример #10
0
def file_conversion_to_avro(client, fp_to_write, fp_to_read, schema):
    x = 0
    with AvroWriter(client, fp_to_write, schema) as writer:
        with client.read(fp_to_read, encoding='utf-8',
                         delimiter='\n') as reader:
            for i in reader:
                x += 1
                line = i.split(',')
                if len(line) == 1: continue
                if x == 1:
                    keys = line
                    continue
                dict_to_write = MakeStringDict(keys, line)
                print(dict_to_write)
                writer.write(dict_to_write)
def write_avro(file_name, predictions):
    avro_schema = generate_avro_schema()
    with AvroWriter(hdfs_client, file_name, schema=avro_schema, overwrite=True) as writer:
        for prediction in predictions:
            data = {}
            data['event_id'] = 'pred_' + str(prediction['_id'])
            data['valid_on'] = int(prediction['WCT'].timestamp())
            data['created_on'] = int(prediction['WCT'].timestamp())
            data['input_events'] = str(prediction['_id'])
            data['patient_id'] = prediction['VISIT_NUMBER']
            data['provenance'] = ['psPredsExtract', 'PredictSepsis']
            data['Prediction'] = float(prediction['result']['result']['predict'])
            data['Score'] = float(prediction['result']['result']['score'])
            data['heuristic_rule'] = prediction['result']['result']['heuristic_alert']
            writer.write(data)
Пример #12
0
from hdfs import Config
from hdfs.ext.avro import AvroReader, AvroWriter

# Get the default alias' client.
client = Config().get_client()

# Some sample data.
records = [
    {
        'name': 'Ann',
        'age': 23
    },
    {
        'name': 'Bob',
        'age': 22
    },
]

# Write an Avro File to HDFS (since our records' schema is very simple, we let
# the writer infer it automatically, otherwise we would pass it as argument).
with AvroWriter(client, 'names.avro', overwrite=True) as writer:
    for record in records:
        writer.write(record)

# Read it back.
with AvroReader(client, 'names.avro') as reader:
    schema = reader.schema  # The inferred schema.
    content = reader.content  # The remote file's HDFS content object.
    assert list(reader) == records  # The records match!
Пример #13
0
 def test_infer_schema(self):
     with AvroWriter(self.client, 'weather.avro') as writer:
         for record in self.records:
             writer.write(record)
     with AvroReader(self.client, 'weather.avro') as reader:
         eq_(list(reader), self.records)
Пример #14
0
 def test_write_empty(self):
     with AvroWriter(self.client, 'empty.avro', schema=self.schema):
         pass
     with AvroReader(self.client, 'empty.avro') as reader:
         eq_(reader.schema, self.schema)
         eq_(list(reader), [])
Пример #15
0
    'name': 'xiaoming',
    'favorite_number': 123,
    'favorite_color': 'red'
}, {
    'name': 'xiaohong',
    'favorite_number': 123,
    'favorite_color': 'yellow'
}, {
    'name': 'xiaoliang',
    'favorite_number': 123,
    'favorite_color': 'black'
}]
#
# with client.write(path, overwrite=True) as writer:
# #     for record in records:
# #         writer.write(record)
#     with DataFileWriter(writer, DatumWriter(), schema) as data_file_writer:
#         for record in records:
#             data_file_writer.append(record)

with AvroWriter(client, path) as writer:
    for record in records:
        writer.write(record)
#
# with AvroReader(client, '/tmp/test_avro/data-1560762234.783534.avro') as reader:
#     schema = reader.schema  # The remote file's Avro schema.
#     print(schema)
#     content = reader.content  # Content metadata (e.g. size).
#     for record in reader:
#         print(record)