Exemplo n.º 1
0
 async def create_consumer(self, group_name: str, request_data: dict,
                           content_type: str):
     group_name = group_name.strip("/")
     self.log.info("Create consumer request for group  %s", group_name)
     consumer_name = request_data.get("name") or self.new_name()
     internal_name = self.create_internal_name(group_name, consumer_name)
     async with self.consumer_locks[internal_name]:
         if internal_name in self.consumers:
             self.log.error(
                 "Error creating duplicate consumer in group %s with id %s",
                 group_name, consumer_name)
             KarapaceBase.r(status=409,
                            content_type=content_type,
                            body={
                                "error_code":
                                40902,
                                "message":
                                f"Consumer {consumer_name} already exists"
                            })
         self._validate_create_consumer(request_data, content_type)
         self.log.info(
             "Creating new consumer in group %s with id %s and request_info %r",
             group_name, consumer_name, request_data)
         for k in ["consumer.request.timeout.ms", "fetch_min_bytes"]:
             convert_to_int(request_data, k, content_type)
         try:
             enable_commit = request_data.get(
                 "auto.commit.enable",
                 self.config["consumer_enable_auto_commit"])
             if isinstance(enable_commit, str):
                 enable_commit = enable_commit.lower() == "true"
             request_data["consumer.request.timeout.ms"] = request_data.get(
                 "consumer.request.timeout.ms",
                 self.config["consumer_request_timeout_ms"])
             request_data["auto.commit.enable"] = enable_commit
             request_data["auto.offset.reset"] = request_data.get(
                 "auto.offset.reset", "earliest")
             fetch_min_bytes = request_data.get(
                 "fetch.min.bytes", self.config["fetch_min_bytes"])
             c = await self.create_kafka_consumer(fetch_min_bytes,
                                                  group_name, internal_name,
                                                  request_data)
         except KafkaConfigurationError as e:
             KarapaceBase.internal_error(str(e), content_type)
         self.consumers[internal_name] = TypedConsumer(
             consumer=c,
             serialization_format=request_data["format"],
             config=request_data)
         base_uri = urljoin(
             self.hostname,
             f"consumers/{group_name}/instances/{consumer_name}")
         KarapaceBase.r(content_type=content_type,
                        body={
                            "base_uri": base_uri,
                            "instance_id": consumer_name
                        })
Exemplo n.º 2
0
 async def publish(self, topic: str, partition_id: Optional[str], content_type: str, formats: dict, data: dict):
     _ = self.get_topic_info(topic, content_type)
     if partition_id is not None:
         _ = self.get_partition_info(topic, partition_id, content_type)
         partition_id = int(partition_id)
     for k in ["key_schema_id", "value_schema_id"]:
         convert_to_int(data, k, content_type)
     await self.validate_publish_request_format(data, formats, content_type, topic)
     status = HTTPStatus.OK
     ser_format = formats["embedded_format"]
     prepared_records = []
     try:
         prepared_records = await self._prepare_records(
             data=data,
             ser_format=ser_format,
             key_schema_id=data.get("key_schema_id"),
             value_schema_id=data.get("value_schema_id"),
             default_partition=partition_id
         )
     except (FormatError, B64DecodeError):
         self.unprocessable_entity(
             message=f"Request includes data improperly formatted given the format {ser_format}",
             content_type=content_type,
             sub_code=RESTErrorCodes.INVALID_DATA.value,
         )
     except InvalidMessageSchema as e:
         self.r(
             body={
                 "error_code": RESTErrorCodes.INVALID_DATA.value,
                 "message": str(e)
             },
             content_type=content_type,
             status=HTTPStatus.UNPROCESSABLE_ENTITY,
         )
     except SchemaRetrievalError as e:
         self.r(
             body={
                 "error_code": RESTErrorCodes.SCHEMA_RETRIEVAL_ERROR.value,
                 "message": str(e)
             },
             content_type=content_type,
             status=HTTPStatus.REQUEST_TIMEOUT,
         )
     response = {
         "key_schema_id": data.get("key_schema_id"),
         "value_schema_id": data.get("value_schema_id"),
         "offsets": []
     }
     for key, value, partition in prepared_records:
         publish_result = await self.produce_message(topic=topic, key=key, value=value, partition=partition)
         if "error" in publish_result and status == HTTPStatus.OK:
             status = HTTPStatus.INTERNAL_SERVER_ERROR
         response["offsets"].append(publish_result)
     self.r(body=response, content_type=content_type, status=status)
Exemplo n.º 3
0
 async def publish(self, topic: str, partition_id: Optional[str],
                   content_type: str, formats: dict, data: dict):
     _ = self.get_topic_info(topic, content_type)
     if partition_id is not None:
         _ = self.get_partition_info(topic, partition_id, content_type)
         partition_id = int(partition_id)
     for k in ["key_schema_id", "value_schema_id"]:
         convert_to_int(data, k, content_type)
     await self.validate_publish_request_format(data, formats, content_type,
                                                topic)
     status = 200
     ser_format = formats["embedded_format"]
     prepared_records = []
     try:
         prepared_records = await self._prepare_records(
             data=data,
             ser_format=ser_format,
             key_schema_id=data.get("key_schema_id"),
             value_schema_id=data.get("value_schema_id"),
             default_partition=partition_id)
     except (FormatError, B64DecodeError):
         self.unprocessable_entity(
             message=
             f"Request includes data improperly formatted given the format {ser_format}",
             content_type=content_type,
             sub_code=42205)
     except InvalidMessageSchema as e:
         self.r(body={
             "error_code": 42205,
             "message": str(e)
         },
                content_type=content_type,
                status=422)
     except SchemaRetrievalError as e:
         self.r(body={
             "error_code": 40801,
             "message": str(e)
         },
                content_type=content_type,
                status=408)
     response = {
         "key_schema_id": data.get("key_schema_id"),
         "value_schema_id": data.get("value_schema_id"),
         "offsets": []
     }
     for key, value, partition in prepared_records:
         publish_result = await self.produce_message(topic=topic,
                                                     key=key,
                                                     value=value,
                                                     partition=partition)
         if "error" in publish_result and status == 200:
             status = 500
         response["offsets"].append(publish_result)
     self.r(body=response, content_type=content_type, status=status)
Exemplo n.º 4
0
 async def set_assignments(self, internal_name: Tuple[str, str], content_type: str, request_data: dict):
     self.log.info("Updating assignments for %s to %r", internal_name, request_data)
     self._assert_consumer_exists(internal_name, content_type)
     self._assert_has_key(request_data, "partitions", content_type)
     partitions = []
     for el in request_data["partitions"]:
         convert_to_int(el, "partition", content_type)
         self._has_topic_and_partition_keys(el, content_type)
         partitions.append(TopicPartition(el["topic"], el["partition"]))
     async with self.consumer_locks[internal_name]:
         try:
             consumer = self.consumers[internal_name].consumer
             consumer.assign(partitions)
             self._update_partition_assignments(consumer)
             empty_response()
         except IllegalStateError as e:
             self._illegal_state_fail(message=str(e), content_type=content_type)
         finally:
             self.log.info("Done updating assignment")
Exemplo n.º 5
0
    async def validate_publish_request_format(self, data: dict, formats: dict,
                                              content_type: str, topic: str):
        # this method will do in place updates for binary embedded formats, because the validation itself
        # is equivalent to a parse / attempt to parse operation

        # disallow missing or non empty 'records' key , plus any other keys
        if "records" not in data or set(
                data.keys()).difference(PUBLISH_KEYS) or not data["records"]:
            self.unprocessable_entity(
                message="Invalid request format",
                content_type=content_type,
                sub_code=RESTErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value,
            )
        for r in data["records"]:
            convert_to_int(r, "partition", content_type)
            if set(r.keys()).difference(RECORD_KEYS):
                self.unprocessable_entity(
                    message="Invalid request format",
                    content_type=content_type,
                    sub_code=RESTErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value,
                )
        # disallow missing id and schema for any key/value list that has at least one populated element
        if formats["embedded_format"] in {"avro", "jsonschema"}:
            for prefix, code in zip(RECORD_KEYS, RECORD_CODES):
                if self.all_empty(data, prefix):
                    continue
                if not self.is_valid_schema_request(data, prefix):
                    self.unprocessable_entity(
                        message=
                        f"Request includes {prefix}s and uses a format that requires schemas "
                        f"but does not include the {prefix}_schema or {prefix}_schema_id fields",
                        content_type=content_type,
                        sub_code=code)
                try:
                    await self.validate_schema_info(data, prefix, content_type,
                                                    topic,
                                                    formats["embedded_format"])
                except InvalidMessageSchema as e:
                    self.unprocessable_entity(
                        message=str(e),
                        content_type=content_type,
                        sub_code=RESTErrorCodes.INVALID_DATA.value,
                    )
Exemplo n.º 6
0
 async def seek_to(self, internal_name: Tuple[str, str], content_type: str, request_data: dict):
     self.log.info("Resetting offsets for %s to %r", internal_name, request_data)
     self._assert_consumer_exists(internal_name, content_type)
     self._assert_has_key(request_data, "offsets", content_type)
     seeks = []
     for el in request_data["offsets"]:
         self._assert_has_key(el, "topic", content_type)
         for k in ["offset", "partition"]:
             self._assert_has_key(el, k, content_type)
             convert_to_int(el, k, content_type)
         self._assert_positive_number(el, "offset", content_type)
         seeks.append((TopicPartition(topic=el["topic"], partition=el["partition"]), el["offset"]))
     async with self.consumer_locks[internal_name]:
         consumer = self.consumers[internal_name].consumer
         for part, offset in seeks:
             try:
                 consumer.seek(part, offset)
             except AssertionError:
                 self._illegal_state_fail(f"Partition {part} is unassigned", content_type)
         empty_response()
Exemplo n.º 7
0
 async def get_offsets(self, internal_name: Tuple[str, str], content_type: str, request_data: dict):
     self.log.info("Retrieving offsets for %s", internal_name)
     self._assert_consumer_exists(internal_name, content_type)
     self._assert_has_key(request_data, "partitions", content_type)
     response = {"offsets": []}
     async with self.consumer_locks[internal_name]:
         consumer = self.consumers[internal_name].consumer
         for el in request_data["partitions"]:
             convert_to_int(el, "partition", content_type)
             tp = TopicPartition(el["topic"], el["partition"])
             commit_info = consumer.committed(tp, metadata=True)
             if not commit_info:
                 continue
             response["offsets"].append({
                 "topic": tp.topic,
                 "partition": tp.partition,
                 "metadata": commit_info.metadata,
                 "offset": commit_info.offset
             })
     KarapaceBase.r(body=response, content_type=content_type)
Exemplo n.º 8
0
    async def seek_limit(
        self, internal_name: Tuple[str, str], content_type: str, request_data: dict, beginning: bool = True
    ):
        direction = "beginning" if beginning else "end"
        self.log.info("Seeking %s offsets", direction)
        self._assert_consumer_exists(internal_name, content_type)
        self._assert_has_key(request_data, "partitions", content_type)
        resets = []
        for el in request_data["partitions"]:
            convert_to_int(el, "partition", content_type)
            for k in ["topic", "partition"]:
                self._assert_has_key(el, k, content_type)
            resets.append(TopicPartition(topic=el["topic"], partition=el["partition"]))

        async with self.consumer_locks[internal_name]:
            consumer = self.consumers[internal_name].consumer
            try:
                if beginning:
                    consumer.seek_to_beginning(*resets)
                else:
                    consumer.seek_to_end(*resets)
                empty_response()
            except AssertionError:
                self._illegal_state_fail(f"Trying to reset unassigned partitions to {direction}", content_type)
Exemplo n.º 9
0
    async def commit_offsets(
        self, internal_name: Tuple[str, str], content_type: str, request_data: dict, cluster_metadata: dict
    ):
        self.log.info("Committing offsets for %s", internal_name)
        self._assert_consumer_exists(internal_name, content_type)
        if request_data:
            self._assert_has_key(request_data, "offsets", content_type)
        payload = {}
        for el in request_data.get("offsets", []):
            for k in ["partition", "offset"]:
                convert_to_int(el, k, content_type)
            # If we commit for a partition that does not belong to this consumer, then the internal error raised
            # is marked as retriable, and thus the commit method will remain blocked in what looks like an infinite loop
            self._topic_and_partition_valid(cluster_metadata, el, content_type)
            payload[TopicPartition(el["topic"], el["partition"])] = OffsetAndMetadata(el["offset"] + 1, None)

        async with self.consumer_locks[internal_name]:
            consumer = self.consumers[internal_name].consumer
            payload = payload or None
            try:
                consumer.commit(offsets=payload)
            except KafkaError as e:
                KarapaceBase.internal_error(message=f"error sending commit request: {e}", content_type=content_type)
        empty_response()