async def create_consumer(self, group_name: str, request_data: dict, content_type: str): group_name = group_name.strip("/") self.log.info("Create consumer request for group %s", group_name) consumer_name = request_data.get("name") or self.new_name() internal_name = self.create_internal_name(group_name, consumer_name) async with self.consumer_locks[internal_name]: if internal_name in self.consumers: self.log.error( "Error creating duplicate consumer in group %s with id %s", group_name, consumer_name) KarapaceBase.r(status=409, content_type=content_type, body={ "error_code": 40902, "message": f"Consumer {consumer_name} already exists" }) self._validate_create_consumer(request_data, content_type) self.log.info( "Creating new consumer in group %s with id %s and request_info %r", group_name, consumer_name, request_data) for k in ["consumer.request.timeout.ms", "fetch_min_bytes"]: convert_to_int(request_data, k, content_type) try: enable_commit = request_data.get( "auto.commit.enable", self.config["consumer_enable_auto_commit"]) if isinstance(enable_commit, str): enable_commit = enable_commit.lower() == "true" request_data["consumer.request.timeout.ms"] = request_data.get( "consumer.request.timeout.ms", self.config["consumer_request_timeout_ms"]) request_data["auto.commit.enable"] = enable_commit request_data["auto.offset.reset"] = request_data.get( "auto.offset.reset", "earliest") fetch_min_bytes = request_data.get( "fetch.min.bytes", self.config["fetch_min_bytes"]) c = await self.create_kafka_consumer(fetch_min_bytes, group_name, internal_name, request_data) except KafkaConfigurationError as e: KarapaceBase.internal_error(str(e), content_type) self.consumers[internal_name] = TypedConsumer( consumer=c, serialization_format=request_data["format"], config=request_data) base_uri = urljoin( self.hostname, f"consumers/{group_name}/instances/{consumer_name}") KarapaceBase.r(content_type=content_type, body={ "base_uri": base_uri, "instance_id": consumer_name })
async def publish(self, topic: str, partition_id: Optional[str], content_type: str, formats: dict, data: dict): _ = self.get_topic_info(topic, content_type) if partition_id is not None: _ = self.get_partition_info(topic, partition_id, content_type) partition_id = int(partition_id) for k in ["key_schema_id", "value_schema_id"]: convert_to_int(data, k, content_type) await self.validate_publish_request_format(data, formats, content_type, topic) status = HTTPStatus.OK ser_format = formats["embedded_format"] prepared_records = [] try: prepared_records = await self._prepare_records( data=data, ser_format=ser_format, key_schema_id=data.get("key_schema_id"), value_schema_id=data.get("value_schema_id"), default_partition=partition_id ) except (FormatError, B64DecodeError): self.unprocessable_entity( message=f"Request includes data improperly formatted given the format {ser_format}", content_type=content_type, sub_code=RESTErrorCodes.INVALID_DATA.value, ) except InvalidMessageSchema as e: self.r( body={ "error_code": RESTErrorCodes.INVALID_DATA.value, "message": str(e) }, content_type=content_type, status=HTTPStatus.UNPROCESSABLE_ENTITY, ) except SchemaRetrievalError as e: self.r( body={ "error_code": RESTErrorCodes.SCHEMA_RETRIEVAL_ERROR.value, "message": str(e) }, content_type=content_type, status=HTTPStatus.REQUEST_TIMEOUT, ) response = { "key_schema_id": data.get("key_schema_id"), "value_schema_id": data.get("value_schema_id"), "offsets": [] } for key, value, partition in prepared_records: publish_result = await self.produce_message(topic=topic, key=key, value=value, partition=partition) if "error" in publish_result and status == HTTPStatus.OK: status = HTTPStatus.INTERNAL_SERVER_ERROR response["offsets"].append(publish_result) self.r(body=response, content_type=content_type, status=status)
async def publish(self, topic: str, partition_id: Optional[str], content_type: str, formats: dict, data: dict): _ = self.get_topic_info(topic, content_type) if partition_id is not None: _ = self.get_partition_info(topic, partition_id, content_type) partition_id = int(partition_id) for k in ["key_schema_id", "value_schema_id"]: convert_to_int(data, k, content_type) await self.validate_publish_request_format(data, formats, content_type, topic) status = 200 ser_format = formats["embedded_format"] prepared_records = [] try: prepared_records = await self._prepare_records( data=data, ser_format=ser_format, key_schema_id=data.get("key_schema_id"), value_schema_id=data.get("value_schema_id"), default_partition=partition_id) except (FormatError, B64DecodeError): self.unprocessable_entity( message= f"Request includes data improperly formatted given the format {ser_format}", content_type=content_type, sub_code=42205) except InvalidMessageSchema as e: self.r(body={ "error_code": 42205, "message": str(e) }, content_type=content_type, status=422) except SchemaRetrievalError as e: self.r(body={ "error_code": 40801, "message": str(e) }, content_type=content_type, status=408) response = { "key_schema_id": data.get("key_schema_id"), "value_schema_id": data.get("value_schema_id"), "offsets": [] } for key, value, partition in prepared_records: publish_result = await self.produce_message(topic=topic, key=key, value=value, partition=partition) if "error" in publish_result and status == 200: status = 500 response["offsets"].append(publish_result) self.r(body=response, content_type=content_type, status=status)
async def set_assignments(self, internal_name: Tuple[str, str], content_type: str, request_data: dict): self.log.info("Updating assignments for %s to %r", internal_name, request_data) self._assert_consumer_exists(internal_name, content_type) self._assert_has_key(request_data, "partitions", content_type) partitions = [] for el in request_data["partitions"]: convert_to_int(el, "partition", content_type) self._has_topic_and_partition_keys(el, content_type) partitions.append(TopicPartition(el["topic"], el["partition"])) async with self.consumer_locks[internal_name]: try: consumer = self.consumers[internal_name].consumer consumer.assign(partitions) self._update_partition_assignments(consumer) empty_response() except IllegalStateError as e: self._illegal_state_fail(message=str(e), content_type=content_type) finally: self.log.info("Done updating assignment")
async def validate_publish_request_format(self, data: dict, formats: dict, content_type: str, topic: str): # this method will do in place updates for binary embedded formats, because the validation itself # is equivalent to a parse / attempt to parse operation # disallow missing or non empty 'records' key , plus any other keys if "records" not in data or set( data.keys()).difference(PUBLISH_KEYS) or not data["records"]: self.unprocessable_entity( message="Invalid request format", content_type=content_type, sub_code=RESTErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value, ) for r in data["records"]: convert_to_int(r, "partition", content_type) if set(r.keys()).difference(RECORD_KEYS): self.unprocessable_entity( message="Invalid request format", content_type=content_type, sub_code=RESTErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value, ) # disallow missing id and schema for any key/value list that has at least one populated element if formats["embedded_format"] in {"avro", "jsonschema"}: for prefix, code in zip(RECORD_KEYS, RECORD_CODES): if self.all_empty(data, prefix): continue if not self.is_valid_schema_request(data, prefix): self.unprocessable_entity( message= f"Request includes {prefix}s and uses a format that requires schemas " f"but does not include the {prefix}_schema or {prefix}_schema_id fields", content_type=content_type, sub_code=code) try: await self.validate_schema_info(data, prefix, content_type, topic, formats["embedded_format"]) except InvalidMessageSchema as e: self.unprocessable_entity( message=str(e), content_type=content_type, sub_code=RESTErrorCodes.INVALID_DATA.value, )
async def seek_to(self, internal_name: Tuple[str, str], content_type: str, request_data: dict): self.log.info("Resetting offsets for %s to %r", internal_name, request_data) self._assert_consumer_exists(internal_name, content_type) self._assert_has_key(request_data, "offsets", content_type) seeks = [] for el in request_data["offsets"]: self._assert_has_key(el, "topic", content_type) for k in ["offset", "partition"]: self._assert_has_key(el, k, content_type) convert_to_int(el, k, content_type) self._assert_positive_number(el, "offset", content_type) seeks.append((TopicPartition(topic=el["topic"], partition=el["partition"]), el["offset"])) async with self.consumer_locks[internal_name]: consumer = self.consumers[internal_name].consumer for part, offset in seeks: try: consumer.seek(part, offset) except AssertionError: self._illegal_state_fail(f"Partition {part} is unassigned", content_type) empty_response()
async def get_offsets(self, internal_name: Tuple[str, str], content_type: str, request_data: dict): self.log.info("Retrieving offsets for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) self._assert_has_key(request_data, "partitions", content_type) response = {"offsets": []} async with self.consumer_locks[internal_name]: consumer = self.consumers[internal_name].consumer for el in request_data["partitions"]: convert_to_int(el, "partition", content_type) tp = TopicPartition(el["topic"], el["partition"]) commit_info = consumer.committed(tp, metadata=True) if not commit_info: continue response["offsets"].append({ "topic": tp.topic, "partition": tp.partition, "metadata": commit_info.metadata, "offset": commit_info.offset }) KarapaceBase.r(body=response, content_type=content_type)
async def seek_limit( self, internal_name: Tuple[str, str], content_type: str, request_data: dict, beginning: bool = True ): direction = "beginning" if beginning else "end" self.log.info("Seeking %s offsets", direction) self._assert_consumer_exists(internal_name, content_type) self._assert_has_key(request_data, "partitions", content_type) resets = [] for el in request_data["partitions"]: convert_to_int(el, "partition", content_type) for k in ["topic", "partition"]: self._assert_has_key(el, k, content_type) resets.append(TopicPartition(topic=el["topic"], partition=el["partition"])) async with self.consumer_locks[internal_name]: consumer = self.consumers[internal_name].consumer try: if beginning: consumer.seek_to_beginning(*resets) else: consumer.seek_to_end(*resets) empty_response() except AssertionError: self._illegal_state_fail(f"Trying to reset unassigned partitions to {direction}", content_type)
async def commit_offsets( self, internal_name: Tuple[str, str], content_type: str, request_data: dict, cluster_metadata: dict ): self.log.info("Committing offsets for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) if request_data: self._assert_has_key(request_data, "offsets", content_type) payload = {} for el in request_data.get("offsets", []): for k in ["partition", "offset"]: convert_to_int(el, k, content_type) # If we commit for a partition that does not belong to this consumer, then the internal error raised # is marked as retriable, and thus the commit method will remain blocked in what looks like an infinite loop self._topic_and_partition_valid(cluster_metadata, el, content_type) payload[TopicPartition(el["topic"], el["partition"])] = OffsetAndMetadata(el["offset"] + 1, None) async with self.consumer_locks[internal_name]: consumer = self.consumers[internal_name].consumer payload = payload or None try: consumer.commit(offsets=payload) except KafkaError as e: KarapaceBase.internal_error(message=f"error sending commit request: {e}", content_type=content_type) empty_response()