def kafka_dataset(servers, topic, offset, schema, eof=True): print("Create: ", "{}:0:{}".format(topic, offset)) dataset = kafka_io.KafkaDataset(["{}:0:{}".format(topic, offset, offset)], servers=servers, group="cardata-autoencoder", eof=eof, config_global=kafka_config) # remove kafka framing dataset = dataset.map(lambda e: tf.strings.substr(e, 5, -1)) # deserialize avro dataset = dataset.map( lambda e: kafka_io.decode_avro( e, schema=schema, dtype=[ tf.float64, tf.float64, tf.float64, tf.float64, tf.float64, tf.float64, tf.float64, tf.float64, tf.float64, tf.int32, tf.int32, tf.int32, tf.int32, tf.float64, tf.float64, tf.float64, tf.float64, tf.int32, tf.string])) return dataset
def deserialize(kafkamessage): trainingitem = kafka_io.decode_avro(kafkamessage, schema=schemastr, dtype=[tf.string, tf.int32]) image = tf.io.decode_raw(trainingitem[0], out_type=tf.uint8) image = tf.reshape(image, [28, 28]) image = tf.image.convert_image_dtype(image, tf.float32) label = tf.reshape(trainingitem[1], []) return image, label
def test_avro_encode_decode(): """test_avro_encode_decode""" schema = ('{"type":"record","name":"myrecord","fields":' '[{"name":"f1","type":"string"},{"name":"f2","type":"long"}]}"') value = [('value1', 1), ('value2', 2), ('value3', 3)] f1 = tf.cast([v[0] for v in value], tf.string) f2 = tf.cast([v[1] for v in value], tf.int64) message = kafka_io.encode_avro([f1, f2], schema=schema) entries = kafka_io.decode_avro( message, schema=schema, dtype=[tf.string, tf.int64]) assert np.all(entries[1].numpy() == f2.numpy()) assert np.all(entries[0].numpy() == f1.numpy())
def test_avro_kafka_dataset(): """test_avro_kafka_dataset""" schema = ('{"type":"record","name":"myrecord","fields":' '[{"name":"f1","type":"string"},{"name":"f2","type":"long"}]}"') dataset = kafka_io.KafkaDataset(["avro-test:0"], group="avro-test", eof=True) # remove kafka framing dataset = dataset.map(lambda e: tf.strings.substr(e, 5, -1)) # deserialize avro dataset = dataset.map(lambda e: kafka_io.decode_avro( e, schema=schema, dtype=[tf.string, tf.int64])) entries = [(f1.numpy(), f2.numpy()) for (f1, f2) in dataset] np.all(entries == [('value1', 1), ('value2', 2), ('value3', 3)])
def test_avro_kafka_dataset_with_resource(): """test_avro_kafka_dataset_with_resource""" schema = ('{"type":"record","name":"myrecord","fields":[' '{"name":"f1","type":"string"},' '{"name":"f2","type":"long"},' '{"name":"f3","type":["null","string"],"default":null}' ']}"') schema_resource = kafka_io.decode_avro_init(schema) dataset = kafka_io.KafkaDataset( ["avro-test:0"], group="avro-test", eof=True) # remove kafka framing dataset = dataset.map(lambda e: tf.strings.substr(e, 5, -1)) # deserialize avro dataset = dataset.map( lambda e: kafka_io.decode_avro( e, schema=schema_resource, dtype=[tf.string, tf.int64, tf.string])) entries = [(f1.numpy(), f2.numpy(), f3.numpy()) for (f1, f2, f3) in dataset] np.all(entries == [('value1', 1), ('value2', 2), ('value3', 3)])
import tensorflow as tf import tensorflow_io.kafka as kafka_io with open('cardata-v1.avsc') as f: schema = f.read() dataset = kafka_io.KafkaDataset(["cardata-v1:0"], group="cardata-v1", eof=True) # remove kafka framing dataset = dataset.map(lambda e: tf.strings.substr(e, 5, -1)) # deserialize avro dataset = dataset.map(lambda e: kafka_io.decode_avro( e, schema=schema, dtype=[ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.int32, tf.int32, tf.int32, tf. int32, tf.float32, tf.float32, tf.float32, tf.float32, tf.int32 ])) def normalize_fn(coolant_temp, intake_air_temp, intake_air_flow_speed, battery_percentage, battery_voltage, current_draw, speed, engine_vibration_amplitude, throttle_pos, tire_pressure_1_1, tire_pressure_1_2, tire_pressure_2_1, tire_pressure_2_2, accelerometer_1_1_value, accelerometer_1_2_value, accelerometer_2_1_value, accelerometer_2_2_value, control_unit_firmware): tire_pressure_1_1 = tf.cast(tire_pressure_1_1, tf.float32) tire_pressure_1_2 = tf.cast(tire_pressure_1_2, tf.float32)