def __init__( self, url: typing.Union[str, dict], ca_location: str = None, cert_location: str = None, key_location: str = None, extra_headers: dict = None, ) -> None: if isinstance(url, str): conf = { "url": url, "ssl.ca.location": ca_location, "ssl.certificate.location": cert_location, "ssl.key.location": key_location, } else: conf = url schema_server_url = conf.get("url", "") self.url_manager = UrlManager(schema_server_url, paths) # type: ignore self.extra_headers = extra_headers self.session = self._create_session(conf) # CACHE: # subj => { schema => id } self.subject_to_schema_ids = defaultdict(dict) # type: dict # id => avro_schema self.id_to_schema = defaultdict(dict) # type: dict # subj => { schema => version } self.subject_to_schema_versions = defaultdict(dict) # type: dict
def __init__( self, url: typing.Union[str, dict], ca_location: str = None, cert_location: str = None, key_location: str = None, key_password: str = None, extra_headers: dict = None, timeout: typing.Optional[httpx.Timeout] = None, pool_limits: typing.Optional[httpx.Limits] = None, ) -> None: if isinstance(url, str): conf = { utils.URL: url, utils.SSL_CA_LOCATION: ca_location, utils.SSL_CERTIFICATE_LOCATION: cert_location, utils.SSL_KEY_LOCATION: key_location, utils.SSL_KEY_PASSWORD: key_password, } else: conf = url self.conf = conf schema_server_url = conf.get(utils.URL, "") self.url_manager = UrlManager(schema_server_url, paths) # type: ignore self.extra_headers = extra_headers self.timeout = timeout self.pool_limits = pool_limits self.session = self._create_session() # Cache Schemas: subj => { schema => id } self.subject_to_schema_ids: dict = defaultdict(dict) # Cache Schemas: id => avro_schema self.id_to_schema: dict = defaultdict(dict) # Cache Schemas: subj => { schema => version } self.subject_to_schema_versions: dict = defaultdict(dict)
def __init__( self, url: typing.Union[str, dict], ca_location: str = None, cert_location: str = None, key_location: str = None, extra_headers: dict = None, ) -> None: super().__init__() if isinstance(url, str): conf = { "url": url, "ssl.ca.location": ca_location, "ssl.certificate.location": cert_location, "ssl.key.location": key_location, } else: conf = url schema_server_url = conf.get("url") self.url_manager = UrlManager(schema_server_url, paths) # type: ignore self.extra_headers = extra_headers self.verify = conf.pop("ssl.ca.location", None) self.cert = self._configure_client_tls(conf) self.auth = self._configure_basic_auth(conf) # CACHE: # subj => { schema => id } self.subject_to_schema_ids = defaultdict(dict) # type: typing.Dict # id => avro_schema self.id_to_schema = defaultdict(dict) # type: typing.Dict # subj => { schema => version } self.subject_to_schema_versions = defaultdict( dict) # type: typing.Dict
def __init__( self, url: typing.Union[str, dict], ca_location: str = None, cert_location: str = None, key_location: str = None, key_password: str = None, extra_headers: dict = None, ): if isinstance(url, str): conf = { utils.URL: url, utils.SSL_CA_LOCATION: ca_location, utils.SSL_CERTIFICATE_LOCATION: cert_location, utils.SSL_KEY_LOCATION: key_location, utils.SSL_KEY_PASSWORD: key_password, } else: conf = url self.conf = conf schema_server_url = conf.get(utils.URL, "") self.url_manager = UrlManager(schema_server_url, paths) # type: ignore self.extra_headers = extra_headers self.session = self._create_session() # CACHE: # subj => { schema => id } self.subject_to_schema_ids = defaultdict(dict) # type: dict # id => avro_schema self.id_to_schema = defaultdict(dict) # type: dict # subj => { schema => version } self.subject_to_schema_versions = defaultdict(dict) # type: dict
class SchemaRegistryClient: """ A client that talks to a Schema Registry over HTTP Args: url (str|dict) url: Url to schema registry or dictionary containing client configuration. ca_location (str): File or path to CA certificate(s) for verifying the Schema Registry key. cert_location (str): Path to public key used for authentication. key_location (str): Path to ./vate key used for authentication. extra_headers (dict): Extra headers to add on every requests. """ def __init__( self, url: typing.Union[str, dict], ca_location: str = None, cert_location: str = None, key_location: str = None, key_password: str = None, extra_headers: dict = None, ): if isinstance(url, str): conf = { utils.URL: url, utils.SSL_CA_LOCATION: ca_location, utils.SSL_CERTIFICATE_LOCATION: cert_location, utils.SSL_KEY_LOCATION: key_location, utils.SSL_KEY_PASSWORD: key_password, } else: conf = url self.conf = conf schema_server_url = conf.get(utils.URL, "") self.url_manager = UrlManager(schema_server_url, paths) # type: ignore self.extra_headers = extra_headers self.session = self._create_session() # CACHE: # subj => { schema => id } self.subject_to_schema_ids = defaultdict(dict) # type: dict # id => avro_schema self.id_to_schema = defaultdict(dict) # type: dict # subj => { schema => version } self.subject_to_schema_versions = defaultdict(dict) # type: dict def _create_session(self) -> httpx.AsyncClient: """ Create a httpx client session Returns: httpx.AsyncClient """ verify = self.conf.get(utils.SSL_CA_LOCATION, False) certificate = self._configure_client_tls(self.conf) auth = self._configure_basic_auth(self.conf) return httpx.AsyncClient(verify=verify, cert=certificate, auth=auth) # type: ignore @staticmethod def _configure_basic_auth(conf: dict) -> typing.Union[None, str, typing.Tuple[str, str]]: url = conf["url"] auth_provider = conf.pop("basic.auth.credentials.source", "URL").upper() if auth_provider not in utils.VALID_AUTH_PROVIDERS: raise ValueError( f"schema.registry.basic.auth.credentials.source " f"must be one of {utils.VALID_AUTH_PROVIDERS}" ) if auth_provider == "SASL_INHERIT": if conf.pop("sasl.mechanism", "").upper() is ["GSSAPI"]: raise ValueError("SASL_INHERIT does not support SASL mechanisms GSSAPI") auth = (conf.pop("sasl.username", ""), conf.pop("sasl.password", "")) elif auth_provider == "USER_INFO": auth = tuple(conf.pop("basic.auth.user.info", "").split(":")) # type: ignore else: auth = requests_utils.get_auth_from_url(url) conf["url"] = requests_utils.urldefragauth(url) return auth @staticmethod def _configure_client_tls( conf: dict, ) -> typing.Optional[typing.Union[str, typing.Tuple[str, str], typing.Tuple[str, str, str]]]: certificate = conf.get(utils.SSL_CERTIFICATE_LOCATION) if certificate: key_path = conf.get(utils.SSL_KEY_LOCATION) key_password = conf.get(utils.SSL_KEY_PASSWORD) if key_path: certificate = (certificate, key_path) if key_password: certificate += (key_password,) return certificate def prepare_headers(self, body: dict = None, headers: dict = None) -> dict: _headers = {"Accept": utils.ACCEPT_HEADERS} if self.extra_headers: _headers.update(self.extra_headers) if body: _headers["Content-Type"] = utils.HEADERS if headers: _headers.update(headers) return _headers async def request(self, url: str, method: str = "GET", body: dict = None, headers: dict = None) -> tuple: if method not in utils.VALID_METHODS: raise ClientError(f"Method {method} is invalid; valid methods include {utils.VALID_METHODS}") _headers = self.prepare_headers(body=body, headers=headers) response = await self.session.request(method, url, headers=_headers, json=body) await self.session.aclose() try: return response.json(), response.status_code except ValueError: return response.content, response.status_code @staticmethod def _add_to_cache(cache: dict, subject: str, schema: AvroSchema, value: typing.Union[str, int]): sub_cache = cache[subject] sub_cache[schema] = value def _cache_schema( self, schema: AvroSchema, schema_id: int, subject: str = None, version: typing.Union[str, int] = None ): if schema_id in self.id_to_schema: schema = self.id_to_schema[schema_id] else: self.id_to_schema[schema_id] = schema if subject: self._add_to_cache(self.subject_to_schema_ids, subject, schema, schema_id) if version: self._add_to_cache(self.subject_to_schema_versions, subject, schema, version) async def register(self, subject: str, avro_schema: AvroSchema, headers: dict = None) -> int: """ POST /subjects/(string: subject)/versions Register a schema with the registry under the given subject and receive a schema id. avro_schema must be a parsed schema from the python avro library Multiple instances of the same schema will result in cache misses. Args: subject (str): subject name avro_schema (avro.schema.RecordSchema): Avro schema to be registered headers (dict): Extra headers to add on the requests Returns: int: schema_id """ schemas_to_id = self.subject_to_schema_ids[subject] schema_id = schemas_to_id.get(avro_schema.name) if schema_id is not None: return schema_id url, method = self.url_manager.url_for("register", subject=subject) body = {"schema": json.dumps(avro_schema.schema)} result, code = await self.request(url, method=method, body=body, headers=headers) msg = None if code in (status.HTTP_401_UNAUTHORIZED, status.HTTP_403_FORBIDDEN): msg = "Unauthorized access" elif code == status.HTTP_409_CONFLICT: msg = "Incompatible Avro schema" elif code == status.HTTP_422_UNPROCESSABLE_ENTITY: msg = "Invalid Avro schema" elif not status.is_success(code): msg = "Unable to register schema" if msg is not None: raise ClientError(message=msg, http_code=code, server_traceback=result) schema_id = result["id"] self._cache_schema(avro_schema, schema_id, subject) return schema_id async def get_subjects(self, headers: dict = None) -> list: """ GET /subjects/(string: subject) Get list of all registered subjects in your Schema Registry. Args: subject (str): subject name headers (dict): Extra headers to add on the requests Returns: list [str]: list of registered subjects. """ url, method = self.url_manager.url_for("get_subjects") result, code = await self.request(url, method=method, headers=headers) if status.is_success(code): return result raise ClientError("Unable to get subject", http_code=code, server_traceback=result) async def delete_subject(self, subject: str, headers: dict = None) -> list: """ DELETE /subjects/(string: subject) Deletes the specified subject and its associated compatibility level if registered. It is recommended to use this API only when a topic needs to be recycled or in development environments. Args: subject (str): subject name headers (dict): Extra headers to add on the requests Returns: list (int): version of the schema deleted under this subject """ url, method = self.url_manager.url_for("delete_subject", subject=subject) result, code = await self.request(url, method=method, headers=headers) if status.is_success(code): return result elif code == status.HTTP_404_NOT_FOUND: return [] raise ClientError("Unable to delete subject", http_code=code, server_traceback=result) async def get_by_id(self, schema_id: int, headers: dict = None) -> typing.Optional[AvroSchema]: """ GET /schemas/ids/{int: id} Retrieve a parsed avro schema by id or None if not found Args: schema_id (int): Schema Id headers (dict): Extra headers to add on the requests Returns: client.schema.AvroSchema: Avro Record schema """ if schema_id in self.id_to_schema: return self.id_to_schema[schema_id] url, method = self.url_manager.url_for("get_by_id", schema_id=schema_id) result, code = await self.request(url, method=method, headers=headers) if code == status.HTTP_404_NOT_FOUND: logger.error(f"Schema not found: {code}") return None elif status.is_success(code): schema_str = result.get("schema") result = AvroSchema(schema_str) # cache the result self._cache_schema(result, schema_id) return result raise ClientError(f"Received bad schema (id {schema_id})", http_code=code, server_traceback=result) async def get_schema( self, subject: str, version: typing.Union[int, str] = "latest", headers: dict = None ) -> typing.Optional[utils.SchemaVersion]: """ GET /subjects/(string: subject)/versions/(versionId: version) Get a specific version of the schema registered under this subject Args: subject (str): subject name version (int, optional): version id. If is None, the latest schema is returned headers (dict): Extra headers to add on the requests Returns: SchemaVersion (nametupled): (subject, schema_id, schema, version) None: If server returns a not success response: 404: Schema not found 422: Unprocessable entity ~ (200 - 299): Not success """ url, method = self.url_manager.url_for("get_schema", subject=subject, version=version) result, code = await self.request(url, method=method, headers=headers) if code == status.HTTP_404_NOT_FOUND: logger.error(f"Schema not found: {code}") return None elif code == status.HTTP_422_UNPROCESSABLE_ENTITY: logger.error(f"Invalid version: {code}") return None elif not status.is_success(code): logger.error(f"Not success version: {code}") return None schema_id = result.get("id") if schema_id in self.id_to_schema: schema = self.id_to_schema[schema_id] else: try: schema = AvroSchema(result["schema"]) except ClientError: raise version = result["version"] self._cache_schema(schema, schema_id, subject, version) return utils.SchemaVersion(subject, schema_id, schema, version) async def get_versions(self, subject: str, headers: dict = None) -> list: """ GET subjects/{subject}/versions Get a list of versions registered under the specified subject. Args: subject (str): subject name headers (dict): Extra headers to add on the requests Returns: list (str): version of the schema registered under this subject """ url, method = self.url_manager.url_for("get_versions", subject=subject) result, code = await self.request(url, method=method, headers=headers) if status.is_success(code): return result elif code == status.HTTP_404_NOT_FOUND: logger.error(f"Subject {subject} not found") return [] raise ClientError(f"Unable to get the versions for subject {subject}", http_code=code, server_traceback=result) async def delete_version( self, subject: str, version: typing.Union[int, str] = "latest", headers: dict = None ) -> typing.Optional[int]: """ DELETE /subjects/(string: subject)/versions/(versionId: version) Deletes a specific version of the schema registered under this subject. This only deletes the version and the schema ID remains intact making it still possible to decode data using the schema ID. This API is recommended to be used only in development environments or under extreme circumstances where-in, its required to delete a previously registered schema for compatibility purposes or re-register previously registered schema. Args: subject (str): subject name version (str): Version of the schema to be deleted. Valid values for versionId are between [1,2^31-1] or the string "latest". "latest" deletes the last registered schema under the specified subject. headers (dict): Extra headers to add on the requests Returns: int: version of the schema deleted None: If the subject or version does not exist. """ url, method = self.url_manager.url_for("delete_version", subject=subject, version=version) result, code = await self.request(url, method=method, headers=headers) if status.is_success(code): return result elif status.is_client_error(code): return None raise ClientError("Unable to delete the version", http_code=code, server_traceback=result) async def check_version( self, subject: str, avro_schema: AvroSchema, headers: dict = None ) -> typing.Optional[utils.SchemaVersion]: """ POST /subjects/(string: subject) Check if a schema has already been registered under the specified subject. If so, this returns the schema string along with its globally unique identifier, its version under this subject and the subject name. Args: subject (str): subject name avro_schema (avro.schema.RecordSchema): Avro schema headers (dict): Extra headers to add on the requests Returns: dict: subject (string) -- Name of the subject that this schema is registered under id (int) -- Globally unique identifier of the schema version (int) -- Version of the returned schema schema (dict) -- The Avro schema None: If schema not found. """ schemas_to_version = self.subject_to_schema_versions[subject] version = schemas_to_version.get(avro_schema) schemas_to_id = self.subject_to_schema_ids[subject] schema_id = schemas_to_id.get(avro_schema) if all((version, schema_id)): return utils.SchemaVersion(subject, schema_id, version, avro_schema) url, method = self.url_manager.url_for("check_version", subject=subject) body = {"schema": json.dumps(avro_schema.schema)} result, code = await self.request(url, method=method, body=body, headers=headers) if code == status.HTTP_404_NOT_FOUND: logger.error(f"Not found: {code}") return None elif status.is_success(code): schema_id = result.get("id") version = result.get("version") self._cache_schema(avro_schema, schema_id, subject, version) return utils.SchemaVersion(subject, schema_id, version, result.get("schema")) raise ClientError("Unable to get version of a schema", http_code=code, server_traceback=result) async def test_compatibility( self, subject: str, avro_schema: AvroSchema, version: typing.Union[int, str] = "latest", headers: dict = None ) -> bool: """ POST /compatibility/subjects/(string: subject)/versions/(versionId: version) Test the compatibility of a candidate parsed schema for a given subject. By default the latest version is checked against. Args: subject (str): subject name avro_schema (avro.schema.RecordSchema): Avro schema headers (dict): Extra headers to add on the requests Returns: bool: True if schema given compatible, False otherwise """ url, method = self.url_manager.url_for("test_compatibility", subject=subject, version=version) body = {"schema": json.dumps(avro_schema.schema)} result, code = await self.request(url, method=method, body=body, headers=headers) if code == status.HTTP_404_NOT_FOUND: logger.error(f"Subject or version not found: {code}") return False elif code == status.HTTP_422_UNPROCESSABLE_ENTITY: logger.error(f"Invalid subject or schema: {code}") return False elif status.is_success(code): return result.get("is_compatible") raise ClientError("Unable to check the compatibility", http_code=code, server_traceback=result) async def update_compatibility(self, level: str, subject: str = None, headers: dict = None) -> bool: """ PUT /config/(string: subject) Update the compatibility level. If subject is None, the compatibility level is global. Args: level (str): one of BACKWARD, BACKWARD_TRANSITIVE, FORWARD, FORWARD_TRANSITIVE, FULL, FULL_TRANSITIVE, NONE subject (str): Option subject headers (dict): Extra headers to add on the requests Returns: bool: True if compatibility was updated Raises: ClientError: if the request was unsuccessful or an invalid """ if level not in utils.VALID_LEVELS: raise ClientError(f"Invalid level specified: {level}") url, method = self.url_manager.url_for("update_compatibility", subject=subject) body = {"compatibility": level} result, code = await self.request(url, method=method, body=body, headers=headers) print(result, code) if status.is_success(code): return True raise ClientError(f"Unable to update level: {level}.", http_code=code, server_traceback=result) async def get_compatibility(self, subject: str = None, headers: dict = None) -> str: """ Get the current compatibility level for a subject. Args: subject (str): subject name headers (dict): Extra headers to add on the requests Returns: str: one of BACKWARD, BACKWARD_TRANSITIVE, FORWARD, FORWARD_TRANSITIVE, FULL, FULL_TRANSITIVE, NONE Raises: ClientError: if the request was unsuccessful or an invalid compatibility level was returned """ url, method = self.url_manager.url_for("get_compatibility", subject=subject) result, code = await self.request(url, method=method, headers=headers) if status.is_success(code): compatibility = result.get("compatibilityLevel") if compatibility not in utils.VALID_LEVELS: if compatibility is None: error_msg_suffix = "No compatibility was returned" else: error_msg_suffix = str(compatibility) raise ClientError( f"Invalid compatibility level received: {error_msg_suffix}", http_code=code, server_traceback=result ) return compatibility raise ClientError( f"Unable to fetch compatibility level. Error code: {code}", http_code=code, server_traceback=result )