def test_limited_read_with_relative_offset_from_end( dummy_message_writer: DummyMessageWriter, binary_messages: List[BinaryMessage], prepared_builder: PipelineBuilder): prepared_builder.with_range(start=-2, limit=1) pipeline = prepared_builder.build() assert isinstance(pipeline, Pipeline) pipeline.run_pipeline() assert dummy_message_writer.get_written_messages( ) == binary_messages[-2:-1]
def test_limited_read_with_absolute_offset( dummy_message_writer: DummyMessageWriter, binary_messages: List[BinaryMessage], prepared_builder: PipelineBuilder): prepared_builder.with_range(start=1, limit=1) pipeline = prepared_builder.build() assert isinstance(pipeline, Pipeline) pipeline.run_pipeline() assert len(dummy_message_writer.get_written_messages()) == 1 assert dummy_message_writer.get_written_messages()[0] in [ msg for msg in binary_messages if msg.offset >= 1 ]
def consume( state: State, topic: str, from_context: str, number: Optional[int], match: str, last: bool, avro: bool, binary: bool, directory: str, consumergroup: str, preserve_order: bool, write_to_stdout: bool, pretty_print: bool, ): """Consume messages from a topic. Read messages from a given topic in a given context. These messages can either be written to files in an automatically generated directory (default behavior), or to STDOUT. If writing to STDOUT, then data will be represented as a JSON object with the message key and the message value always being a string. With the --avro option, those strings are JSON serialized objects. With the --binary option those strings contain the base64 encoded binary data. Without any of the two options, the data in the messages is treated utf-8 encoded strings and will be used as-is. \b EXAMPLES: # Consume the first 10 messages from TOPIC in the current context and print them to STDOUT in order. esque consume --first -n 10 --preserve-order --pretty-print --stdout TOPIC \b # Consume <n> messages, starting from the 10th, from TOPIC in the <source_ctx> context and write them to files. esque consume --match "message.offset > 9" -n <n> TOPIC -f <source_ctx> \b # Extract json objects from keys esque consume --stdout --avro TOPIC | jq '.key | fromjson' \b # Extract binary data from keys (depending on the data this could mess up your console) esque consume --stdout --binary TOPIC | jq '.key | @base64d' """ if not from_context: from_context = state.config.current_context state.config.context_switch(from_context) if not write_to_stdout and not directory: directory = Path() / "messages" / topic / datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") if binary and avro: raise ValueError("Cannot set data to be interpreted as binary AND avro.") builder = PipelineBuilder() input_message_serializer = create_input_serializer(avro, binary, state) builder.with_input_message_serializer(input_message_serializer) input_handler = create_input_handler(consumergroup, from_context, topic) builder.with_input_handler(input_handler) output_handler = create_output_handler(directory, write_to_stdout, binary, pretty_print) builder.with_output_handler(output_handler) output_message_serializer = create_output_message_serializer(write_to_stdout, directory, avro, binary) builder.with_output_message_serializer(output_message_serializer) if last: start = KafkaHandler.OFFSET_AFTER_LAST_MESSAGE else: start = KafkaHandler.OFFSET_AT_FIRST_MESSAGE builder.with_range(start=start, limit=number) if preserve_order: topic_data = Cluster().topic_controller.get_cluster_topic(topic, retrieve_partition_watermarks=False) builder.with_stream_decorator(yield_messages_sorted_by_timestamp(len(topic_data.partitions))) if match: builder.with_stream_decorator(yield_only_matching_messages(match)) counter, counter_decorator = event_counter() builder.with_stream_decorator(counter_decorator) pipeline = builder.build() pipeline.run_pipeline() if not write_to_stdout: if counter.message_count == number: click.echo(blue_bold(str(counter.message_count)) + " messages consumed.") else: click.echo( "Only found " + bold(str(counter.message_count)) + " messages in topic, out of " + blue_bold(str(number)) + " required." )
def transfer( state: State, from_topic: str, to_topic: str, from_context: str, to_context: str, number: int, last: bool, avro: bool, binary: bool, consumergroup: str, match: str = None, ): """Transfer messages between two topics. Read messages from the source topic in the source context and write them into the destination topic in the destination context. This function is shorthand for using a combination of `esque consume` and `esque produce` \b EXAMPLES: # Transfer the first 10 messages from TOPIC1 in the current context to TOPIC2 in context DSTCTX. esque transfer --first -n 10 --from-topic TOPIC1 --to-topic TOPIC2 --to-context DSTCTX \b # Transfer the first 10 messages from TOPIC1 in the context SRCCTX to TOPIC2 in context DSTCTX, assuming the messages are AVRO. esque transfer --first -n 10 --avro --from-topic TOPIC1 --from-context SRCCTX --to-topic TOPIC2 --to-context DSTCTX """ if not from_context: from_context = state.config.current_context state.config.context_switch(from_context) if binary and avro: raise ValueError("Cannot set data to be interpreted as binary AND avro.") if not to_context: to_context = from_context if from_context == to_context and from_topic == to_topic: raise ValueError("Cannot transfer data to the same topic.") topic_controller = Cluster().topic_controller if not topic_controller.topic_exists(to_topic): if ensure_approval(f"Topic {to_topic!r} does not exist, do you want to create it?", no_verify=state.no_verify): topic_controller.create_topics([Topic(to_topic)]) else: click.echo(click.style("Aborted!", bg="red")) return builder = PipelineBuilder() input_message_serializer = create_input_serializer(avro, binary, state) builder.with_input_message_serializer(input_message_serializer) input_handler = create_input_handler(consumergroup, from_context, from_topic) builder.with_input_handler(input_handler) output_message_serializer = create_output_serializer(avro, binary, to_topic, state) builder.with_output_message_serializer(output_message_serializer) output_handler = create_output_handler(to_context, to_topic) builder.with_output_handler(output_handler) if last: start = KafkaHandler.OFFSET_AFTER_LAST_MESSAGE else: start = KafkaHandler.OFFSET_AT_FIRST_MESSAGE builder.with_range(start=start, limit=number) if match: builder.with_stream_decorator(yield_only_matching_messages(match)) counter, counter_decorator = event_counter() builder.with_stream_decorator(counter_decorator) pipeline = builder.build() pipeline.run_pipeline() click.echo( green_bold(str(counter.message_count)) + " messages consumed from topic " + blue_bold(from_topic) + " in context " + blue_bold(to_context) + " and produced to topic " + blue_bold(to_topic) + " in context " + blue_bold(to_context) + "." )