Exemplo n.º 1
0
def test_build_fails_with_message_writer_and_serializer(
    dummy_message_writer: DummyMessageWriter, string_message_serializer: MessageSerializer
):
    builder = PipelineBuilder()
    builder.with_message_writer(dummy_message_writer)
    builder.with_input_message_serializer(string_message_serializer)

    with pytest.raises(EsqueIOInvalidPipelineBuilderState):
        builder.build()
Exemplo n.º 2
0
def test_create_pipeline_with_handler_and_serializer_input(
    dummy_handler: DummyHandler,
    string_message_serializer: MessageSerializer,
    binary_messages: List[BinaryMessage],
    string_messages: List[Message],
):
    builder = PipelineBuilder()
    builder.with_input_handler(dummy_handler)
    builder.with_input_message_serializer(string_message_serializer)
    builder.with_stream_decorator(skip_stream_events)
    pipeline = builder.build()

    dummy_handler.set_messages(binary_messages)

    assert list(pipeline.decorated_message_stream()) == string_messages
Exemplo n.º 3
0
def produce(
    state: State,
    topic: str,
    to_context: str,
    directory: str,
    avro: bool,
    binary: bool,
    match: str = None,
    read_from_stdin: bool = False,
    ignore_stdin_errors: bool = False,
):
    """Produce messages to a topic.

    Write messages to a given topic in a given context. These messages can come from either a directory <directory>
    that was previously written to with "esque consume" or from JSON objects coming in via STDIN.

    If reading from STDIN, then data will be expected as single-line JSON objects with the message key and the
    message value always being a string.
    The --avro option is currently not supported when reading from STDIN.
    With the --binary option those strings are expected to contain the base64 encoded binary data.
    By default, the data in the messages is treated utf-8 encoded strings and will be used as-is.
    In addition to "key" and "value" one can also define headers as list of objects with a "key" and a "value" attribute
    with the former being a string and the latter being a string, "null" or simply not defined.

    \b
    So valid json objects for reading from stdin would be:
    {"key": "foo", "value": "bar", "headers":[{"key":"h1", "value":"v1"},{"key":"h2"}]}
    {"key": "foo", "value": null, "partition": 1}
    {"key": "foo"}

    \b
    EXAMPLES:
    # Write all messages from the files in <directory> to TOPIC in the <destination_ctx> context.
    esque produce -d <directory> -t <destination_ctx> TOPIC

    \b
    # Start environment in terminal to write messages to TOPIC in the <destination_ctx> context.
    esque produce --stdin -f <destination_ctx> -y TOPIC

    \b
    # Copy source_topic to destination_topic.
    esque consume -f first-context --stdout source_topic | esque produce -t second-context --stdin destination_topic
    """
    if not to_context:
        to_context = state.config.current_context
    state.config.context_switch(to_context)

    if not read_from_stdin:
        if not directory:
            raise ValueError(
                "Need to provide directory if not reading from stdin.")
        else:
            directory = pathlib.Path(directory)
    elif avro:
        raise ValueError(
            "Cannot read avro data from stdin. Use a directory instead.")

    if binary and avro:
        raise ValueError(
            "Cannot set data to be interpreted as binary AND avro.")

    topic_controller = Cluster().topic_controller
    if not topic_controller.topic_exists(topic):
        if ensure_approval(
                f"Topic {topic!r} does not exist, do you want to create it?",
                no_verify=state.no_verify):
            topic_controller.create_topics([Topic(topic)])
        else:
            click.echo(click.style("Aborted!", bg="red"))
            return

    builder = PipelineBuilder()

    input_handler = create_input_handler(directory, read_from_stdin)
    builder.with_input_handler(input_handler)

    input_message_serializer = create_input_message_serializer(
        directory, avro, binary)
    builder.with_input_message_serializer(input_message_serializer)

    output_message_serializer = create_output_serializer(
        avro, binary, topic, state)
    builder.with_output_message_serializer(output_message_serializer)

    output_handler = create_output_handler(to_context, topic)
    builder.with_output_handler(output_handler)

    if match:
        builder.with_stream_decorator(yield_only_matching_messages(match))

    counter, counter_decorator = event_counter()

    builder.with_stream_decorator(counter_decorator)

    pipeline = builder.build()
    pipeline.run_pipeline()

    click.echo(
        green_bold(str(counter.message_count)) +
        " messages successfully produced to topic " + blue_bold(topic) +
        " in context " + blue_bold(to_context) + ".")
Exemplo n.º 4
0
def consume(
    state: State,
    topic: str,
    from_context: str,
    number: Optional[int],
    match: str,
    last: bool,
    avro: bool,
    binary: bool,
    directory: str,
    consumergroup: str,
    preserve_order: bool,
    write_to_stdout: bool,
    pretty_print: bool,
):
    """Consume messages from a topic.

    Read messages from a given topic in a given context. These messages can either be written
    to files in an automatically generated directory (default behavior), or to STDOUT.

    If writing to STDOUT, then data will be represented as a JSON object with the message key and the message value
    always being a string.
    With the --avro option, those strings are JSON serialized objects.
    With the --binary option those strings contain the base64 encoded binary data.
    Without any of the two options, the data in the messages is treated utf-8 encoded strings and will be used as-is.

    \b
    EXAMPLES:
    # Consume the first 10 messages from TOPIC in the current context and print them to STDOUT in order.
    esque consume --first -n 10 --preserve-order --pretty-print --stdout TOPIC

    \b
    # Consume <n> messages, starting from the 10th, from TOPIC in the <source_ctx> context and write them to files.
    esque consume --match "message.offset > 9" -n <n> TOPIC -f <source_ctx>

    \b
    # Extract json objects from keys
    esque consume --stdout --avro TOPIC | jq '.key | fromjson'

    \b
    # Extract binary data from keys (depending on the data this could mess up your console)
    esque consume --stdout --binary TOPIC | jq '.key | @base64d'
    """
    if not from_context:
        from_context = state.config.current_context
    state.config.context_switch(from_context)

    if not write_to_stdout and not directory:
        directory = Path() / "messages" / topic / datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

    if binary and avro:
        raise ValueError("Cannot set data to be interpreted as binary AND avro.")

    builder = PipelineBuilder()

    input_message_serializer = create_input_serializer(avro, binary, state)
    builder.with_input_message_serializer(input_message_serializer)

    input_handler = create_input_handler(consumergroup, from_context, topic)
    builder.with_input_handler(input_handler)

    output_handler = create_output_handler(directory, write_to_stdout, binary, pretty_print)
    builder.with_output_handler(output_handler)

    output_message_serializer = create_output_message_serializer(write_to_stdout, directory, avro, binary)
    builder.with_output_message_serializer(output_message_serializer)

    if last:
        start = KafkaHandler.OFFSET_AFTER_LAST_MESSAGE
    else:
        start = KafkaHandler.OFFSET_AT_FIRST_MESSAGE

    builder.with_range(start=start, limit=number)

    if preserve_order:
        topic_data = Cluster().topic_controller.get_cluster_topic(topic, retrieve_partition_watermarks=False)
        builder.with_stream_decorator(yield_messages_sorted_by_timestamp(len(topic_data.partitions)))

    if match:
        builder.with_stream_decorator(yield_only_matching_messages(match))

    counter, counter_decorator = event_counter()

    builder.with_stream_decorator(counter_decorator)

    pipeline = builder.build()
    pipeline.run_pipeline()

    if not write_to_stdout:
        if counter.message_count == number:
            click.echo(blue_bold(str(counter.message_count)) + " messages consumed.")
        else:
            click.echo(
                "Only found "
                + bold(str(counter.message_count))
                + " messages in topic, out of "
                + blue_bold(str(number))
                + " required."
            )
Exemplo n.º 5
0
def transfer(
    state: State,
    from_topic: str,
    to_topic: str,
    from_context: str,
    to_context: str,
    number: int,
    last: bool,
    avro: bool,
    binary: bool,
    consumergroup: str,
    match: str = None,
):
    """Transfer messages between two topics.

    Read messages from the source topic in the source context and write them into the destination topic in the destination context.
    This function is shorthand for using a combination of `esque consume` and `esque produce`

    \b
    EXAMPLES:
    # Transfer the first 10 messages from TOPIC1 in the current context to TOPIC2 in context DSTCTX.
    esque transfer --first -n 10 --from-topic TOPIC1 --to-topic TOPIC2 --to-context DSTCTX

    \b
    # Transfer the first 10 messages from TOPIC1 in the context SRCCTX to TOPIC2 in context DSTCTX, assuming the messages are AVRO.
    esque transfer --first -n 10 --avro --from-topic TOPIC1 --from-context SRCCTX --to-topic TOPIC2 --to-context DSTCTX
    """
    if not from_context:
        from_context = state.config.current_context
    state.config.context_switch(from_context)

    if binary and avro:
        raise ValueError("Cannot set data to be interpreted as binary AND avro.")

    if not to_context:
        to_context = from_context

    if from_context == to_context and from_topic == to_topic:
        raise ValueError("Cannot transfer data to the same topic.")

    topic_controller = Cluster().topic_controller
    if not topic_controller.topic_exists(to_topic):
        if ensure_approval(f"Topic {to_topic!r} does not exist, do you want to create it?", no_verify=state.no_verify):
            topic_controller.create_topics([Topic(to_topic)])
        else:
            click.echo(click.style("Aborted!", bg="red"))
            return

    builder = PipelineBuilder()

    input_message_serializer = create_input_serializer(avro, binary, state)
    builder.with_input_message_serializer(input_message_serializer)

    input_handler = create_input_handler(consumergroup, from_context, from_topic)
    builder.with_input_handler(input_handler)

    output_message_serializer = create_output_serializer(avro, binary, to_topic, state)
    builder.with_output_message_serializer(output_message_serializer)

    output_handler = create_output_handler(to_context, to_topic)
    builder.with_output_handler(output_handler)

    if last:
        start = KafkaHandler.OFFSET_AFTER_LAST_MESSAGE
    else:
        start = KafkaHandler.OFFSET_AT_FIRST_MESSAGE

    builder.with_range(start=start, limit=number)

    if match:
        builder.with_stream_decorator(yield_only_matching_messages(match))

    counter, counter_decorator = event_counter()

    builder.with_stream_decorator(counter_decorator)

    pipeline = builder.build()
    pipeline.run_pipeline()

    click.echo(
        green_bold(str(counter.message_count))
        + " messages consumed from topic "
        + blue_bold(from_topic)
        + " in context "
        + blue_bold(to_context)
        + " and produced to topic "
        + blue_bold(to_topic)
        + " in context "
        + blue_bold(to_context)
        + "."
    )