def main(): try: opts, args = getopt.getopt(sys.argv[1:], "hi:s:", ["help", "input-file=", "schema="]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage(sys.argv[0]) sys.exit(2) avro_file = None avro_schema_file = None required_cl = 0 for o, a in opts: if o in ("-h", "--help"): usage(sys.argv[0]) sys.exit() elif o in ("-i", "--input-file"): required_cl += 1 avro_file = a elif o in ("-s", "--schema"): avro_schema_file = a else: assert False, "unhandled option" if (required_cl < 1): print "ERROR: Missing required argument" usage(sys.argv[0]) sys.exit(1) if not avro_schema_file: reader = DataFileReader(open(avro_file, "r"), DatumReader()) for datum in reader: print datum reader.close() else: reader_schema = open(avro_schema_file, "r") avro_schema = reader_schema.read() reader_schema.close() parsed_avro_schema = avro.schema.parse(avro_schema) with open(avro_file, "rb") as reader_data: inputio = io.BytesIO(reader_data.read()) decoder = avro.io.BinaryDecoder(inputio) reader = avro.io.DatumReader(parsed_avro_schema) while inputio.tell() < len(inputio.getvalue()): avro_datum = reader.read(decoder) print avro_datum reader_data.close()
def getit(avroType): reader = DataFileReader(urllib.urlopen(url), DatumReader()) return reader.read()
# In[35]: r["productMap"] # ## Generate Avro Schemaless # In[36]: writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) # Write data using DatumWriter writer.write( { "modelId": model_id, "tensorFlowModel": model_file_binary, "productMap": productMapping, "customerMap": customerMapping }, encoder) raw_bytes = bytes_writer.getvalue() open(model_path + "recommender-no-schema.avro", 'wb').write(raw_bytes) bytes_reader = io.BytesIO(raw_bytes) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(schema) r = reader.read(decoder) r["productMap"]