def loadavsc(avscid): global avscmap global options serializelog.debug("In loadavsc with avscid: %s" % avscid) avsc = None serializelog.debug("options.urlscreg: %s options.calocation: %s" % (options.urlscreg, options.calocation)) try: serializelog.debug("querying screg with avscid: %s" % (avscid)) client = CachedSchemaRegistryClient({'url':options.urlscreg, 'ssl.ca.location':options.calocation}) avsc = client.get_by_id(avscid) except Exception as e: serializelog.info("ERROR: load avro schema from schema-registry-server is failed on CachedSchemaRegistryClient on using method get_by_id()") serializelog.info("ERROR: %s" % (e)) try: avsc_dict = json.loads(str(avsc)) except Exception as e: serializelog.info("ERROR: json.loads of the avsc_str is faild to produce a dict") serializelog.info("ERROR: %s" % (e)) serializelog.info("SCHEMA_OF_ID(%s): %s" % (avscid, avsc_dict["name"])) #Query Schema-Registry #jsonmap = json.load(mapfile) if avscid in avscmap: serializelog.debug("Update avscmap the existing record avscid (%s) with avroschema" % avscid) avscmap[avscid].update({"avsc": avsc_dict}) else: serializelog.debug("Update avscmap with new record avscid (%s) with avroschema" % avscid) avscmap.update({avscid:{"avsc": avsc_dict}}) return avsc
def test_value_subject_name_strategies(self): client = CachedSchemaRegistryClient({ 'url': 'https://*****:*****@127.0.0.1:65534', 'value.subject.name.strategy': "TopicNameStrategy", }) expected = topic_name_strategy self.assertEqual(expected, client.value_subject_name_strategy_func) client = CachedSchemaRegistryClient({ 'url': 'https://*****:*****@127.0.0.1:65534', 'value.subject.name.strategy': "RecordNameStrategy", }) expected = record_name_strategy self.assertEqual(expected, client.value_subject_name_strategy_func) client = CachedSchemaRegistryClient({ 'url': 'https://*****:*****@127.0.0.1:65534', 'value.subject.name.strategy': "TopicRecordNameStrategy", }) expected = topic_record_name_strategy self.assertEqual(expected, client.value_subject_name_strategy_func)
def loadavsc(self, avscid): # global self.avscmap self.__logger.debug("In loadavsc with avscid: %s" % avscid) avsc = None self.__logger.debug( "lib_pmgrpcd.OPTIONS.urlscreg: %s lib_pmgrpcd.OPTIONS.calocation: %s" % (lib_pmgrpcd.OPTIONS.urlscreg, lib_pmgrpcd.OPTIONS.calocation)) try: self.__logger.debug( "Instancing client (CachedSchemaRegistryClient) with avscid:%s url:%s ssl.ca.location:%s", avscid, lib_pmgrpcd.OPTIONS.urlscreg, lib_pmgrpcd.OPTIONS.calocation, ) client = CachedSchemaRegistryClient( url=lib_pmgrpcd.OPTIONS.urlscreg, ca_location=lib_pmgrpcd.OPTIONS.calocation, ) except Exception as e: self.__logger.info( "ERROR: load avro schema from schema-registry-server is failed on CachedSchemaRegistryClient on using method get_by_id()" ) self.__logger.info("ERROR: %s" % (e)) return avsc try: avsc = client.get_by_id(avscid) except Exception as e: self.__logger.info( "ERROR: load avro schema from schema-registry-server is failed on CachedSchemaRegistryClient on using method get_by_id()" ) self.__logger.info("ERROR: %s" % (e)) return avsc try: avsc_dict = json.loads(str(avsc)) except Exception as e: self.__logger.info( "ERROR: json.loads of the avsc_str is faild to produce a dict") self.__logger.info("ERROR: %s" % (e)) return avsc self.__logger.info("SCHEMA_OF_ID(%s): %s" % (avscid, avsc_dict["name"])) # Query Schema-Registry # self.jsonmap = json.load(mapfile) if avscid in self.avscmap: self.__logger.debug( "Update self.avscmap the existing record avscid (%s) with avroschema" % avscid) self.avscmap[avscid].update({"avsc": avsc_dict}) else: self.__logger.debug( "Update self.avscmap with new record avscid (%s) with avroschema" % avscid) self.avscmap.update({avscid: {"avsc": avsc_dict}}) return avsc
def produce(config, topic, input_messages): """ produce initiate sending a message to Kafka, call the produce method passing in the input_messages key/value and and callback Parameters ---------- topic: str topic where the input message publish too input_messages: dict a key/value input messages config: dict the config values that needed by the produce """ if topic is None: logger.debug('Required topic field must be set') raise ValueError() if len(input_messages) <= 0: logger.debug('Required data field must not be empty.') raise ValueError() bootstrap_servers, schema_registry = producer_config(config) producer = Producer(bootstrap_servers) admin_client = AdminClient(bootstrap_servers) topics = admin_client.list_topics().topics #Just to show what's available print(topics) if not topics: print('Not Topics') raise RuntimeError() sr = CachedSchemaRegistryClient(schema_registry) ser = MessageSerializer(sr) # get schema id, schema, version = sr.get_latest_schema(topic + "-value") if schema: print('In If Schema') for key, value in input_messages.items(): if validate_uuid4(key): print('In validate in For loop') serializedMessage = ser.encode_record_with_schema( topic, schema, value) producer.produce(topic=topic, key=key, value=serializedMessage, callback=acked) # producer.flush() # bad idea, it limits throughput to the broker round trip time producer.poll(1) else: print('In Else of For Loop') logger.error('Invalid UUID String: ', key) else: print('Schema not found for topic name: ', topic) print('In Else Schema') sys.exit(1)
def initSchemaRegistry(self): try: RegistryConfig = {'url': self.Conf['schema.registry']} except: return self.RegistryClient = CachedSchemaRegistryClient(**RegistryConfig) debug(level=1, RegistryClient=self.RegistryClient)
def init_consumer(self, schema_registry_url, topics): logger.info("Initializing avro consumer") self.consumer = Consumer(self.consumer_conf) logger.info(f"Schema registry url: {schema_registry_url}") self.register_client = CachedSchemaRegistryClient( url=schema_registry_url) logger.info(f"Subscribing to topics: {topics}") self.topics = topics self.consumer.subscribe(self.topics)
def __init__(self, schema_registry_url): """Private implementation class for Avro IO using the registry""" log.info( f"Using registry with schema_url/id {schema_registry_url}/{config.SCHEMA_ID}" ) try: self.client = CachedSchemaRegistryClient(url=schema_registry_url) self.schema = self.client.get_by_id(config.SCHEMA_ID) self.serializer = MessageSerializer(self.client) except: raise ValueError("Client id or schema id not found")
def __init__(self, app): print("2") LocationKafkaListerner.__instance = self self.app = app self.config = Config.getInstance() self.register_client = CachedSchemaRegistryClient( url=app.config['KAFKA_SCHEMA_REGISTRY_URL'] ) print("3") self.client = self.config.getESClient() threading.Thread(target=self.readJobsData).start() threading.Thread(target=self.readMappingsData).start() threading.Thread(target=self.readSubmissionsData).start()
def __init__(self, schema_subject, schema_registry_url): ''' Create a new serializer object, which includes the remote-loaded schema object specified by schema_subject. Note this constructor is not exception safe ''' self.schema_subject = schema_subject self.schema_registry_url = schema_registry_url self.schema_registry_client = CachedSchemaRegistryClient(url=self.schema_registry_url) self._load_schema() self.writer = SchemalessAvroRecordWriter(self.avro_schema)
def init_consumer(self): bootstrap_server = self.config['bootstrap-server'] schema_url = self.config['schema-registery-url'] # KAFKA BROKER URL consumer = Consumer({ 'bootstrap.servers': bootstrap_server, 'group.id': 'catalog-export-%s' %self.catalogId, 'auto.offset.reset': 'earliest' }) # SCHEMA URL self.register_client = CachedSchemaRegistryClient(url=schema_url) consumer.subscribe(['catserver-%s-catalog' % self.catalogId], on_assign=self.my_on_assign) return consumer
def __init__(self, config, default_key_schema=None, default_value_schema=None, schema_registry=None): sr_conf = { key.replace("schema.registry.", ""): value for key, value in config.items() if key.startswith("schema.registry") } if sr_conf.get("basic.auth.credentials.source") == 'SASL_INHERIT': sr_conf['sasl.mechanisms'] = config.get('sasl.mechanisms', '') sr_conf['sasl.username'] = config.get('sasl.username', '') sr_conf['sasl.password'] = config.get('sasl.password', '') ap_conf = { key: value for key, value in config.items() if not key.startswith("schema.registry") } if schema_registry is None: schema_registry = CachedSchemaRegistryClient(sr_conf) elif sr_conf.get("url", None) is not None: raise ValueError( "Cannot pass schema_registry along with schema.registry.url config" ) super(AvroProducer, self).__init__(ap_conf) self._serializer = MessageSerializer(schema_registry) self._key_schema = default_key_schema self._value_schema = default_value_schema
def __init__(self, config, default_key_schema=None, default_value_schema=None, schema_registry=None, subject_name_strategy=SubjectNameStrategy.RecordNameStrategy ): sr_conf = {key.replace("schema.registry.", ""): value for key, value in config.items() if key.startswith("schema.registry")} if sr_conf.get("basic.auth.credentials.source") == 'SASL_INHERIT': # Fallback to plural 'mechanisms' for backward compatibility sr_conf['sasl.mechanism'] = config.get('sasl.mechanism', config.get('sasl.mechanisms', '')) sr_conf['sasl.username'] = config.get('sasl.username', '') sr_conf['sasl.password'] = config.get('sasl.password', '') sr_conf['auto.register.schemas'] = config.get('auto.register.schemas', True) ap_conf = {key: value for key, value in config.items() if not key.startswith("schema.registry")} if schema_registry is None: schema_registry = CachedSchemaRegistryClient(sr_conf) elif sr_conf.get("url", None) is not None: raise ValueError("Cannot pass schema_registry along with schema.registry.url config") super(AvroProducer, self).__init__(ap_conf) self._serializer = MessageSerializer(schema_registry, subject_name_strategy=subject_name_strategy) self._key_schema = default_key_schema self._value_schema = default_value_schema
def __init__(self, config, default_key_schema=None, default_value_schema=None, schema_registry=None): schema_registry_url = config.pop("schema.registry.url", None) schema_registry_ca_location = config.pop( "schema.registry.ssl.ca.location", None) schema_registry_certificate_location = config.pop( "schema.registry.ssl.certificate.location", None) schema_registry_key_location = config.pop( "schema.registry.ssl.key.location", None) if schema_registry is None: if schema_registry_url is None: raise ValueError("Missing parameter: schema.registry.url") schema_registry = CachedSchemaRegistryClient( url=schema_registry_url, ca_location=schema_registry_ca_location, cert_location=schema_registry_certificate_location, key_location=schema_registry_key_location) elif schema_registry_url is not None: raise ValueError( "Cannot pass schema_registry along with schema.registry.url config" ) super(AvroProducer, self).__init__(config) self._serializer = MessageSerializer(schema_registry) self._key_schema = default_key_schema self._value_schema = default_value_schema
def test_basic_auth_url(self): self.client = CachedSchemaRegistryClient({ 'url': 'https://*****:*****@127.0.0.1:65534', }) self.assertTupleEqual(('user_url', 'secret_url'), self.client._session.auth)
def test_basic_auth_invalid(self): with self.assertRaises(ValueError): self.client = CachedSchemaRegistryClient({ 'url': 'https://*****:*****@127.0.0.1:65534', 'basic.auth.credentials.source': 'VAULT', })
def test_invalid_value_subject_name_strategy(self): with self.assertRaises(ValueError): CachedSchemaRegistryClient({ 'url': 'https://*****:*****@127.0.0.1:65534', 'value.subject.name.strategy': "InvalidNameStrategy", })
def setUp(self): # Make RecordSchema and PrimitiveSchema hashable schema.RecordSchema.__hash__ = self.hash_func schema.PrimitiveSchema.__hash__ = self.hash_func self.server = mock_registry.ServerThread(9002) self.server.start() self.client = CachedSchemaRegistryClient('http://127.0.0.1:9002') time.sleep(1)
def test_default_value_subject_name_strategy(self): client = CachedSchemaRegistryClient({ 'url': 'https://*****:*****@127.0.0.1:65534', }) expected = topic_name_strategy self.assertEqual(expected, client.value_subject_name_strategy_func)
def __init__(self, config): if ('schema.registry.url' not in config.keys()): raise ValueError("Missing parameter: schema.registry.url") schem_registry_url = config["schema.registry.url"] del config["schema.registry.url"] super(AvroConsumer, self).__init__(config) self._serializer = MessageSerializer(CachedSchemaRegistryClient(url=schem_registry_url))
def test_init_with_dict(self): self.client = CachedSchemaRegistryClient({ 'url': 'https://127.0.0.1:65534', 'ssl.certificate.location': '/path/to/cert', 'ssl.key.location': '/path/to/key' }) self.assertEqual('https://127.0.0.1:65534', self.client.url)
def __init__(self, config, default_key_schema=None, default_value_schema=None): if ('schema.registry.url' not in config.keys()): raise ValueError("Missing parameter: schema.registry.url") schem_registry_url = config["schema.registry.url"] del config["schema.registry.url"] super(AvroProducer, self).__init__(config) self._serializer = MessageSerializer(CachedSchemaRegistryClient(url=schem_registry_url)) self._key_schema = default_key_schema self._value_schema = default_value_schema
def read_from_offset(self, offset=0, lang='json', schema=None): ''' Kafka read message Read json and avro messages from consumer ''' log.debug("[KafkaDriver][read_from_offset] lang: " + str(lang)) log.debug("[KafkaDriver][read_from_offset] offset: " + str(offset)) def outputJSON(obj): ''' Default JSON serializer. ''' if isinstance(obj, datetime.datetime): return int(obj.strftime("%s%f")[:-3]) return obj ret = None log.debug("[KafkaDriver][read_from_offset] read start: " + str(self.server)) consumer = KafkaConsumer(bootstrap_servers=self.server + ':9092', auto_offset_reset='earliest', consumer_timeout_ms=1000) partition = TopicPartition(self.topic, 0) consumer.assign([partition]) consumer.seek_to_end(partition) start = int(offset) consumer.seek(partition, offset) for msg in consumer: if (lang == 'avro'): #message = AvroDecoder.decode(schema, msg.value) schema_registry = CachedSchemaRegistryClient(url='http://' + self.schema_registry + ':8081') self._serializer = MessageSerializer(schema_registry) message = self._serializer.decode_message(msg.value) message = json.dumps(message, indent=4, sort_keys=True, default=outputJSON) #log.debug("[KafkaDriver][read_from_offset] avro message: " + str(message)) ret = message else: message = msg.value #log.debug("[KafkaDriver][read_from_offset] other message: " + str(message)) ret = msg.value log.debug("[KafkaDriver][read_from_offset] msg: " + str(message) + " msg.offset: " + str(msg.offset)) consumer.close() log.debug("[KafkaDriver][read_from_offset] read end") return ret
def test_basic_auth_userinfo(self): self.client = CachedSchemaRegistryClient({ 'url': 'https://*****:*****@127.0.0.1:65534', 'basic.auth.credentials.source': 'user_info', 'basic.auth.user.info': 'user_userinfo:secret_userinfo' }) self.assertTupleEqual(('user_userinfo', 'secret_userinfo'), self.client._session.auth)
def __init__(self, producer, schema_registry_url, default_key_schema=None, default_value_schema=None ): # real signature unknown; restored from __doc__ self._producer = producer self._serializer = MessageSerializer( CachedSchemaRegistryClient(url=schema_registry_url)) self.key_schema = default_key_schema self.value_schema = default_value_schema
def consume(config, topic, handler): """ Starts a consumer and calls the given handler for each consumed message. Assumes that keys are serialized as strings and values are serialized as Avro objects with their schemas stored in a Confluent Schema Registry. """ c_conf = {} for key, value in config.items(): if not key.startswith("schema.registry"): if not value is None: c_conf[key] = value.strip() if "auto.offset.reset" in c_conf: print("offset provided") else: c_conf['auto.offset.reset'] = 'earliest' if "group.id" in c_conf: print("group id provided") else: c_conf['group.id'] = 'sme_test' c = Consumer(c_conf) c.subscribe([topic]) sr_conf = { key.replace("schema.registry.", ""): value.strip() for key, value in config.items() if key.startswith("schema.registry") } sr = CachedSchemaRegistryClient(sr_conf) ser = MessageSerializer(sr) while True: try: msg = c.poll(10) if msg is None: print('No Messages') continue if msg.error(): log.error("Consumer error: {}".format(msg.error())) continue key = msg.key().decode('utf-8') value = ser.decode_message(msg.value(), is_key=False) except Exception as e: log.error("Message consumption failed: {}".format(e)) break try: handler(key, value) except Exception as e: log.error("Message handler failed: {}".format(e)) break c.close()
def test_basic_auth_sasl_inherit_invalid(self): with self.assertRaises(ValueError) as e: self.client = CachedSchemaRegistryClient({ 'url': 'https://*****:*****@127.0.0.1:65534', 'basic.auth.credentials.source': 'SASL_INHERIT', 'sasl.mechanism': 'gssapi' # also test the .upper() }) self.assertEqual( str(e.exception), "SASL_INHERIT does not support SASL mechanism GSSAPI")
def __init__(self, config, schema_registry=None): schema_registry_url = config.pop("schema.registry.url", None) if schema_registry is None: if schema_registry_url is None: raise ValueError("Missing parameter: schema.registry.url") schema_registry = CachedSchemaRegistryClient( url=schema_registry_url) elif schema_registry_url is not None: raise ValueError( "Cannot pass schema_registry along with schema.registry.url config" ) super(AvroConsumer, self).__init__(config) self._serializer = MessageSerializer(schema_registry)
def test_basic_auth_sasl_inherit(self): self.client = CachedSchemaRegistryClient({ 'url': 'https://*****:*****@127.0.0.1:65534', 'basic.auth.credentials.source': 'SASL_INHERIT', 'sasl.mechanism': 'PLAIN', 'sasl.username': '******', 'sasl.password': '******' }) self.assertTupleEqual(('user_sasl', 'secret_sasl'), self.client._session.auth)
def test_invalid_conf(self): with self.assertRaises(ValueError): self.client = CachedSchemaRegistryClient({ 'url': 'https://*****:*****@127.0.0.1:65534', 'basic.auth.credentials.source': 'SASL_INHERIT', 'sasl.username': '******', 'sasl.password': '******', 'invalid.conf': 1, 'invalid.conf2': 2 })
class _AvroIORegistry: def __init__(self, schema_registry_url): """Private implementation class for Avro IO using the registry""" log.info( f"Using registry with schema_url/id {schema_registry_url}/{config.SCHEMA_ID}" ) try: self.client = CachedSchemaRegistryClient(url=schema_registry_url) self.schema = self.client.get_by_id(config.SCHEMA_ID) self.serializer = MessageSerializer(self.client) except: raise ValueError("Client id or schema id not found") def decode(self, bytes): return self.serializer.decode_message(bytes) def encode(self, record): return self.serializer.encode_record_with_schema_id( config.SCHEMA_ID, record)