示例#1
0
    def test_send_without_response(self):
        """Imitate producer without acknowledge, in this case client produces
        messages and kafka does not send response, and we make sure that
        futures do not stuck in queue forever"""

        host, port = self.kafka_host, self.kafka_port
        conn = yield from create_conn(host, port, loop=self.loop)

        # prepare message
        builder = LegacyRecordBatchBuilder(
            magic=1, compression_type=0, batch_size=99999999)
        builder.append(offset=0, value=b"foo", key=None, timestamp=None)
        request = ProduceRequest(
            required_acks=0, timeout=10 * 1000,
            topics=[(b'foo', [(0, bytes(builder.build()))])])

        # produce messages without acknowledge
        req = []
        for i in range(10):
            req.append(conn.send(request, expect_response=False))
        # make sure futures no stuck in queue
        self.assertEqual(len(conn._requests), 0)
        for x in req:
            yield from x
        conn.close()
示例#2
0
def test_read_write_serde_v0_v1_no_compression(magic, key, value, checksum):
    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=0,
                                       batch_size=1024 * 1024)
    builder.append(0, timestamp=9999999, key=key, value=value)
    buffer = builder.build()

    batch = LegacyRecordBatch(buffer, magic)
    assert batch.validate_crc()

    assert batch.is_control_batch is False
    assert batch.is_transactional is False
    assert batch.producer_id is None
    assert batch.next_offset == 1

    msgs = list(batch)
    assert len(msgs) == 1
    msg = msgs[0]

    assert msg.offset == 0
    assert msg.timestamp == (9999999 if magic else None)
    assert msg.timestamp_type == (0 if magic else None)
    assert msg.key == key
    assert msg.value == value
    assert msg.checksum == checksum[magic] & 0xffffffff
示例#3
0
def test_read_write_serde_v0_v1_with_compression(compression_type, magic):
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=compression_type, batch_size=1024 * 1024)
    for offset in range(10):
        builder.append(
            offset, timestamp=9999999, key=b"test", value=b"Super")
    buffer = builder.build()

    # Broker will set the offset to a proper last offset value
    struct.pack_into(">q", buffer, 0, 9)

    batch = LegacyRecordBatch(buffer, magic)
    assert batch.validate_crc()

    assert batch.is_control_batch is False
    assert batch.is_transactional is False
    assert batch.producer_id is None
    assert batch.next_offset == 10

    msgs = list(batch)

    for offset, msg in enumerate(msgs):
        assert msg.offset == offset
        assert msg.timestamp == (9999999 if magic else None)
        assert msg.timestamp_type == (0 if magic else None)
        assert msg.key == b"test"
        assert msg.value == b"Super"
        assert msg.checksum == (-2095076219 if magic else 278251978) & \
            0xffffffff
示例#4
0
    async def test_send_without_response(self):
        """Imitate producer without acknowledge, in this case client produces
        messages and kafka does not send response, and we make sure that
        futures do not stuck in queue forever"""

        host, port = self.kafka_host, self.kafka_port
        conn = await create_conn(host, port)

        # prepare message
        builder = LegacyRecordBatchBuilder(magic=1,
                                           compression_type=0,
                                           batch_size=99999999)
        builder.append(offset=0, value=b"foo", key=None, timestamp=None)
        request = ProduceRequest(required_acks=0,
                                 timeout=10 * 1000,
                                 topics=[(b'foo', [(0, bytes(builder.build()))
                                                   ])])

        # produce messages without acknowledge
        req = []
        for i in range(10):
            req.append(conn.send(request, expect_response=False))
        # make sure futures no stuck in queue
        self.assertEqual(len(conn._requests), 0)
        for x in req:
            await x
        conn.close()
示例#5
0
def test_read_write_serde_v0_v1_with_compression(compression_type, magic):
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=compression_type, batch_size=1024 * 1024)
    for offset in range(10):
        builder.append(
            offset, timestamp=9999999, key=b"test", value=b"Super")
    buffer = builder.build()

    # Broker will set the offset to a proper last offset value
    struct.pack_into(">q", buffer, 0, 9)

    batch = LegacyRecordBatch(buffer, magic)
    assert batch.validate_crc()

    assert batch.is_control_batch is False
    assert batch.is_transactional is False
    assert batch.producer_id is None
    assert batch.next_offset == 10

    msgs = list(batch)

    for offset, msg in enumerate(msgs):
        assert msg.offset == offset
        assert msg.timestamp == (9999999 if magic else None)
        assert msg.timestamp_type == (0 if magic else None)
        assert msg.key == b"test"
        assert msg.value == b"Super"
        assert msg.checksum == (-2095076219 if magic else 278251978) & \
            0xffffffff
示例#6
0
def _make_compressed_batch(magic):
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=LegacyRecordBatch.CODEC_GZIP,
        batch_size=1024 * 1024)
    for offset in range(10):
        builder.append(
            offset, timestamp=9999999, key=b"test", value=b"Super")
    return builder.build()
示例#7
0
def _make_compressed_batch(magic):
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=LegacyRecordBatch.CODEC_GZIP,
        batch_size=1024 * 1024)
    for offset in range(10):
        builder.append(
            offset, timestamp=9999999, key=b"test", value=b"Super")
    return builder.build()
示例#8
0
def test_legacy_correct_metadata_response(magic):
    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=0,
                                       batch_size=1024 * 1024)
    meta = builder.append(0, timestamp=9999999, key=b"test", value=b"Super")

    assert meta.offset == 0
    assert meta.timestamp == (9999999 if magic else -1)
    assert meta.crc == (-2095076219 if magic else 278251978) & 0xffffffff
    assert repr(meta) == ("LegacyRecordMetadata(offset=0, crc={}, size={}, "
                          "timestamp={})".format(meta.crc, meta.size,
                                                 meta.timestamp))
示例#9
0
def test_legacy_correct_metadata_response(magic):
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024 * 1024)
    meta = builder.append(
        0, timestamp=9999999, key=b"test", value=b"Super")

    assert meta.offset == 0
    assert meta.timestamp == (9999999 if magic else -1)
    assert meta.crc == (-2095076219 if magic else 278251978) & 0xffffffff
    assert repr(meta) == (
        "LegacyRecordMetadata(offset=0, crc={}, size={}, "
        "timestamp={})".format(meta.crc, meta.size, meta.timestamp)
    )
示例#10
0
def test_record_overhead():
    known = {
        0: 14,
        1: 22,
    }
    for magic, size in known.items():
        assert LegacyRecordBatchBuilder.record_overhead(magic) == size
示例#11
0
def test_record_overhead():
    known = {
        0: 14,
        1: 22,
    }
    for magic, size in known.items():
        assert LegacyRecordBatchBuilder.record_overhead(magic) == size
示例#12
0
def prepare(magic: int):
    samples = []
    for _ in range(BATCH_SAMPLES):
        batch = LegacyRecordBatchBuilder(magic,
                                         batch_size=DEFAULT_BATCH_SIZE,
                                         compression_type=0)
        for offset in range(MESSAGES_PER_BATCH):
            size = batch.append(
                offset,
                None,  # random.randint(*TIMESTAMP_RANGE)
                random_bytes(KEY_SIZE),
                random_bytes(VALUE_SIZE))
            assert size
        samples.append(bytes(batch.build()))

    return iter(itertools.cycle(samples))
示例#13
0
    async def test_compacted_topic_consumption(self):
        # Compacted topics can have offsets skipped
        client = AIOKafkaClient(
            loop=self.loop,
            bootstrap_servers=[])
        client.ready = mock.MagicMock()
        client.ready.side_effect = asyncio.coroutine(lambda a: True)
        client.force_metadata_update = mock.MagicMock()
        client.force_metadata_update.side_effect = asyncio.coroutine(
            lambda: False)
        client.send = mock.MagicMock()

        subscriptions = SubscriptionState(loop=self.loop)
        fetcher = Fetcher(client, subscriptions, loop=self.loop)

        tp = TopicPartition('test', 0)
        req = FetchRequest(
            -1,  # replica_id
            100, 100, [(tp.topic, [(tp.partition, 155, 100000)])])

        builder = LegacyRecordBatchBuilder(
            magic=1, compression_type=0, batch_size=99999999)
        builder.append(160, value=b"12345", key=b"1", timestamp=None)
        builder.append(162, value=b"23456", key=b"2", timestamp=None)
        builder.append(167, value=b"34567", key=b"3", timestamp=None)
        batch = bytes(builder.build())

        resp = FetchResponse(
            [('test', [(
                0, 0, 3000,  # partition, error_code, highwater_offset
                batch  # Batch raw bytes
            )])])

        subscriptions.assign_from_user({tp})
        assignment = subscriptions.subscription.assignment
        tp_state = assignment.state_value(tp)
        client.send.side_effect = asyncio.coroutine(lambda n, r: resp)

        tp_state.seek(155)
        fetcher._in_flight.add(0)
        needs_wake_up = await fetcher._proc_fetch_request(
            assignment, 0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        # Test successful getone, the closest in batch offset=160
        first = buf.getone()
        self.assertEqual(tp_state.position, 161)
        self.assertEqual(
            (first.value, first.key, first.offset),
            (b"12345", b"1", 160))

        # Test successful getmany
        second, third = buf.getall()
        self.assertEqual(tp_state.position, 168)
        self.assertEqual(
            (second.value, second.key, second.offset),
            (b"23456", b"2", 162))
        self.assertEqual(
            (third.value, third.key, third.offset),
            (b"34567", b"3", 167))
示例#14
0
def test_read_write_serde_v0_v1_no_compression(magic, key, value, checksum):
    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=0,
                                       batch_size=1024 * 1024)
    builder.append(0, timestamp=9999999, key=key, value=value)
    buffer = builder.build()

    batch = LegacyRecordBatch(buffer, magic)
    msgs = list(batch)
    assert len(msgs) == 1
    msg = msgs[0]

    assert msg.offset == 0
    assert msg.timestamp == (9999999 if magic else None)
    assert msg.timestamp_type == (0 if magic else None)
    assert msg.key == key
    assert msg.value == value
    assert msg.checksum == checksum[magic] & 0xffffffff
示例#15
0
def test_read_write_serde_v0_v1_with_compression(compression_type, magic):
    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=compression_type,
                                       batch_size=1024 * 1024)
    for offset in range(10):
        builder.append(offset, timestamp=9999999, key=b"test", value=b"Super")
    buffer = builder.build()

    batch = LegacyRecordBatch(buffer, magic)
    msgs = list(batch)

    for offset, msg in enumerate(msgs):
        assert msg.offset == offset
        assert msg.timestamp == (9999999 if magic else None)
        assert msg.timestamp_type == (0 if magic else None)
        assert msg.key == b"test"
        assert msg.value == b"Super"
        assert msg.checksum == (-2095076219 if magic else 278251978) & \
            0xffffffff
示例#16
0
def test_legacy_batch_size_limit(magic):
    # First message can be added even if it's too big
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024)
    meta = builder.append(0, timestamp=None, key=None, value=b"M" * 2000)
    assert meta.size > 0
    assert meta.crc is not None
    assert meta.offset == 0
    assert meta.timestamp is not None
    assert len(builder.build()) > 2000

    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024)
    meta = builder.append(0, timestamp=None, key=None, value=b"M" * 700)
    assert meta is not None
    meta = builder.append(1, timestamp=None, key=None, value=b"M" * 700)
    assert meta is None
    meta = builder.append(2, timestamp=None, key=None, value=b"M" * 700)
    assert meta is None
    assert len(builder.build()) < 1000
示例#17
0
    def test_compacted_topic_consumption(self):
        # Compacted topics can have offsets skipped
        client = AIOKafkaClient(
            loop=self.loop,
            bootstrap_servers=[])
        client.ready = mock.MagicMock()
        client.ready.side_effect = asyncio.coroutine(lambda a: True)
        client.force_metadata_update = mock.MagicMock()
        client.force_metadata_update.side_effect = asyncio.coroutine(
            lambda: False)
        client.send = mock.MagicMock()

        subscriptions = SubscriptionState(loop=self.loop)
        fetcher = Fetcher(client, subscriptions, loop=self.loop)

        tp = TopicPartition('test', 0)
        req = FetchRequest(
            -1,  # replica_id
            100, 100, [(tp.topic, [(tp.partition, 155, 100000)])])

        builder = LegacyRecordBatchBuilder(
            magic=1, compression_type=0, batch_size=99999999)
        builder.append(160, value=b"12345", key=b"1", timestamp=None)
        builder.append(162, value=b"23456", key=b"2", timestamp=None)
        builder.append(167, value=b"34567", key=b"3", timestamp=None)
        batch = bytes(builder.build())

        resp = FetchResponse(
            [('test', [(
                0, 0, 3000,  # partition, error_code, highwater_offset
                batch  # Batch raw bytes
            )])])

        subscriptions.assign_from_user({tp})
        assignment = subscriptions.subscription.assignment
        tp_state = assignment.state_value(tp)
        client.send.side_effect = asyncio.coroutine(lambda n, r: resp)

        tp_state.seek(155)
        fetcher._in_flight.add(0)
        needs_wake_up = yield from fetcher._proc_fetch_request(
            assignment, 0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        # Test successful getone, the closest in batch offset=160
        first = buf.getone()
        self.assertEqual(tp_state.position, 161)
        self.assertEqual(
            (first.value, first.key, first.offset),
            (b"12345", b"1", 160))

        # Test successful getmany
        second, third = buf.getall()
        self.assertEqual(tp_state.position, 168)
        self.assertEqual(
            (second.value, second.key, second.offset),
            (b"23456", b"2", 162))
        self.assertEqual(
            (third.value, third.key, third.offset),
            (b"34567", b"3", 167))
示例#18
0
def test_unsupported_yet_codec():
    compression_type = LegacyRecordBatch.CODEC_MASK  # It doesn't exist
    builder = LegacyRecordBatchBuilder(
        magic=0, compression_type=compression_type, batch_size=1024)
    with pytest.raises(UnsupportedCodecError):
        builder.append(0, timestamp=None, key=None, value=b"M")
        builder.build()
示例#19
0
 def __init__(self, magic, batch_size, compression_type,
              *, is_transactional):
     if magic < 2:
         assert not is_transactional
         self._builder = LegacyRecordBatchBuilder(
             magic, compression_type, batch_size)
     else:
         self._builder = DefaultRecordBatchBuilder(
             magic, compression_type, is_transactional=is_transactional,
             producer_id=-1, producer_epoch=-1, base_sequence=0,
             batch_size=batch_size)
     self._relative_offset = 0
     self._buffer = None
     self._closed = False
示例#20
0
def test_written_bytes_equals_size_in_bytes(magic):
    key = b"test"
    value = b"Super"
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024 * 1024)

    size_in_bytes = builder.size_in_bytes(
        0, timestamp=9999999, key=key, value=value)

    pos = builder.size()
    builder.append(0, timestamp=9999999, key=key, value=value)

    assert builder.size() - pos == size_in_bytes
示例#21
0
def test_read_write_serde_v0_v1_no_compression(magic, key, value, checksum):
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024 * 1024)
    builder.append(0, timestamp=9999999, key=key, value=value)
    buffer = builder.build()

    batch = LegacyRecordBatch(buffer, magic)
    assert batch.validate_crc()

    assert batch.is_control_batch is False
    assert batch.is_transactional is False
    assert batch.producer_id is None
    assert batch.next_offset == 1

    msgs = list(batch)
    assert len(msgs) == 1
    msg = msgs[0]

    assert msg.offset == 0
    assert msg.timestamp == (9999999 if magic else None)
    assert msg.timestamp_type == (0 if magic else None)
    assert msg.key == key
    assert msg.value == value
    assert msg.checksum == checksum[magic] & 0xffffffff
示例#22
0
def test_legacy_batch_size_limit(magic):
    # First message can be added even if it's too big
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024)
    meta = builder.append(0, timestamp=None, key=None, value=b"M" * 2000)
    assert meta.size > 0
    assert meta.crc is not None
    assert meta.offset == 0
    assert meta.timestamp is not None
    assert len(builder.build()) > 2000

    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024)
    meta = builder.append(0, timestamp=None, key=None, value=b"M" * 700)
    assert meta is not None
    meta = builder.append(1, timestamp=None, key=None, value=b"M" * 700)
    assert meta is None
    meta = builder.append(2, timestamp=None, key=None, value=b"M" * 700)
    assert meta is None
    assert len(builder.build()) < 1000
示例#23
0
def test_written_bytes_equals_size_in_bytes(magic):
    key = b"test"
    value = b"Super"
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024 * 1024)

    size_in_bytes = builder.size_in_bytes(
        0, timestamp=9999999, key=key, value=value)

    pos = builder.size()
    builder.append(0, timestamp=9999999, key=key, value=value)

    assert builder.size() - pos == size_in_bytes
示例#24
0
    def _serialize(self, topic, key, value):
        if self._key_serializer:
            serialized_key = self._key_serializer(key)
        else:
            serialized_key = key
        if self._value_serializer:
            serialized_value = self._value_serializer(value)
        else:
            serialized_value = value

        message_size = LegacyRecordBatchBuilder.record_overhead(
            self._producer_magic)
        if serialized_key is not None:
            message_size += len(serialized_key)
        if serialized_value is not None:
            message_size += len(serialized_value)
        if message_size > self._max_request_size:
            raise MessageSizeTooLargeError(
                "The message is %d bytes when serialized which is larger than"
                " the maximum request size you have configured with the"
                " max_request_size configuration" % message_size)

        return serialized_key, serialized_value
示例#25
0
    def _serialize(self, topic, key, value):
        if self._key_serializer:
            serialized_key = self._key_serializer(key)
        else:
            serialized_key = key
        if self._value_serializer:
            serialized_value = self._value_serializer(value)
        else:
            serialized_value = value

        message_size = LegacyRecordBatchBuilder.record_overhead(
            self._producer_magic)
        if serialized_key is not None:
            message_size += len(serialized_key)
        if serialized_value is not None:
            message_size += len(serialized_value)
        if message_size > self._max_request_size:
            raise MessageSizeTooLargeError(
                "The message is %d bytes when serialized which is larger than"
                " the maximum request size you have configured with the"
                " max_request_size configuration" % message_size)

        return serialized_key, serialized_value
 def __init__(self, magic, batch_size, compression_type):
     self._builder = LegacyRecordBatchBuilder(magic, compression_type,
                                              batch_size)
     self._relative_offset = 0
     self._buffer = None
     self._closed = False
示例#27
0
    async def test_proc_fetch_request(self):
        client = AIOKafkaClient(bootstrap_servers=[])
        subscriptions = SubscriptionState()
        fetcher = Fetcher(client, subscriptions, auto_offset_reset="latest")

        tp = TopicPartition('test', 0)
        tp_info = (tp.topic, [(tp.partition, 4, 100000)])
        req = FetchRequest(
            -1,  # replica_id
            100,
            100,
            [tp_info])

        async def ready(conn):
            return True

        def force_metadata_update():
            fut = create_future()
            fut.set_result(False)
            return fut

        client.ready = mock.MagicMock()
        client.ready.side_effect = ready
        client.force_metadata_update = mock.MagicMock()
        client.force_metadata_update.side_effect = force_metadata_update
        client.send = mock.MagicMock()

        builder = LegacyRecordBatchBuilder(magic=1,
                                           compression_type=0,
                                           batch_size=99999999)
        builder.append(offset=4, value=b"test msg", key=None, timestamp=None)
        raw_batch = bytes(builder.build())

        fetch_response = FetchResponse([('test', [(0, 0, 9, raw_batch)])])

        async def send(node, request):
            nonlocal fetch_response
            return fetch_response

        client.send.side_effect = send
        subscriptions.assign_from_user({tp})
        assignment = subscriptions.subscription.assignment
        tp_state = assignment.state_value(tp)

        # The partition has no active position, so will ignore result
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(fetcher._records, {})

        # The partition's position does not match request's fetch offset
        subscriptions.seek(tp, 0)
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(fetcher._records, {})

        subscriptions.seek(tp, 4)
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        self.assertEqual(buf.getone().value, b"test msg")

        # If position changed after fetch request passed
        subscriptions.seek(tp, 4)
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        subscriptions.seek(tp, 10)
        self.assertIsNone(buf.getone())

        # If assignment is lost after fetch request passed
        subscriptions.seek(tp, 4)
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        subscriptions.unsubscribe()
        self.assertIsNone(buf.getone())

        subscriptions.assign_from_user({tp})
        assignment = subscriptions.subscription.assignment
        tp_state = assignment.state_value(tp)

        # error -> no partition found (UnknownTopicOrPartitionError)
        subscriptions.seek(tp, 4)
        fetcher._records.clear()
        fetch_response = FetchResponse([('test', [(0, 3, 9, raw_batch)])])
        cc = client.force_metadata_update.call_count
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(client.force_metadata_update.call_count, cc + 1)

        # error -> topic auth failed (TopicAuthorizationFailedError)
        fetch_response = FetchResponse([('test', [(0, 29, 9, raw_batch)])])
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, True)
        with self.assertRaises(TopicAuthorizationFailedError):
            await fetcher.next_record([])

        # error -> unknown
        fetch_response = FetchResponse([('test', [(0, -1, 9, raw_batch)])])
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, False)

        # error -> offset out of range with offset strategy
        fetch_response = FetchResponse([('test', [(0, 1, 9, raw_batch)])])
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(tp_state.has_valid_position, False)
        self.assertEqual(tp_state.awaiting_reset, True)
        self.assertEqual(tp_state.reset_strategy, OffsetResetStrategy.LATEST)

        # error -> offset out of range without offset strategy
        subscriptions.seek(tp, 4)
        fetcher._default_reset_strategy = OffsetResetStrategy.NONE
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, True)
        with self.assertRaises(OffsetOutOfRangeError):
            await fetcher.next_record([])

        await fetcher.close()
示例#28
0
def test_legacy_batch_builder_validates_arguments(magic):
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024 * 1024)

    # Key should not be str
    with pytest.raises(TypeError):
        builder.append(
            0, timestamp=9999999, key="some string", value=None)

    # Value should not be str
    with pytest.raises(TypeError):
        builder.append(
            0, timestamp=9999999, key=None, value="some string")

    # Timestamp should be of proper type (timestamp is ignored for magic == 0)
    if magic != 0:
        with pytest.raises(TypeError):
            builder.append(
                0, timestamp="1243812793", key=None, value=b"some string")

    # Offset of invalid type
    with pytest.raises(TypeError):
        builder.append(
            "0", timestamp=9999999, key=None, value=b"some string")

    # Unknown struct errors are passed through. These are theoretical and
    # indicate a bug in the implementation. The C implementation locates
    # _encode_msg elsewhere and is less vulnerable to such bugs since it's
    # statically typed, so we skip the test there.
    if hasattr(builder, "_encode_msg"):
        with mock.patch.object(builder, "_encode_msg") as mocked:
            err = struct.error("test error")
            mocked.side_effect = err
            with pytest.raises(struct.error) as excinfo:
                builder.append(
                    0, timestamp=None, key=None, value=b"some string")
            assert excinfo.value == err

    # Ok to pass value as None
    builder.append(
        0, timestamp=9999999, key=b"123", value=None)

    # Timestamp can be None
    builder.append(
        1, timestamp=None, key=None, value=b"some string")

    # Ok to pass offsets in not incremental order. This should not happen thou
    builder.append(
        5, timestamp=9999999, key=b"123", value=None)

    # in case error handling code fails to fix inner buffer in builder
    assert len(builder.build()) == 119 if magic else 95
示例#29
0
def test_legacy_batch_builder_validates_arguments(magic):
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024 * 1024)

    # Key should not be str
    with pytest.raises(TypeError):
        builder.append(
            0, timestamp=9999999, key="some string", value=None)

    # Value should not be str
    with pytest.raises(TypeError):
        builder.append(
            0, timestamp=9999999, key=None, value="some string")

    # Timestamp should be of proper type (timestamp is ignored for magic == 0)
    if magic != 0:
        with pytest.raises(TypeError):
            builder.append(
                0, timestamp="1243812793", key=None, value=b"some string")

    # Offset of invalid type
    with pytest.raises(TypeError):
        builder.append(
            "0", timestamp=9999999, key=None, value=b"some string")

    # Unknown struct errors are passed through. These are theoretical and
    # indicate a bug in the implementation. The C implementation locates
    # _encode_msg elsewhere and is less vulnerable to such bugs since it's
    # statically typed, so we skip the test there.
    if hasattr(builder, "_encode_msg"):
        with mock.patch.object(builder, "_encode_msg") as mocked:
            err = struct.error("test error")
            mocked.side_effect = err
            with pytest.raises(struct.error) as excinfo:
                builder.append(
                    0, timestamp=None, key=None, value=b"some string")
            assert excinfo.value == err

    # Ok to pass value as None
    builder.append(
        0, timestamp=9999999, key=b"123", value=None)

    # Timestamp can be None
    builder.append(
        1, timestamp=None, key=None, value=b"some string")

    # Ok to pass offsets in not incremental order. This should not happen thou
    builder.append(
        5, timestamp=9999999, key=b"123", value=None)

    # in case error handling code fails to fix inner buffer in builder
    assert len(builder.build()) == 119 if magic else 95
示例#30
0
def test_unavailable_codec(compression_type, name, checker_name):
    builder = LegacyRecordBatchBuilder(
        magic=0, compression_type=compression_type, batch_size=1024)
    builder.append(0, timestamp=None, key=None, value=b"M")
    correct_buffer = builder.build()

    with mock.patch.object(kafka.codec, checker_name) as mocked:
        mocked.return_value = False
        # Check that builder raises error
        builder = LegacyRecordBatchBuilder(
            magic=0, compression_type=compression_type, batch_size=1024)
        error_msg = "Libraries for {} compression codec not found".format(name)
        with pytest.raises(UnsupportedCodecError, match=error_msg):
            builder.append(0, timestamp=None, key=None, value=b"M")
            builder.build()

        # Check that reader raises same error
        batch = LegacyRecordBatch(bytes(correct_buffer), 0)
        with pytest.raises(UnsupportedCodecError, match=error_msg):
            list(batch)
示例#31
0
def test_legacy_batch_builder_validates_arguments(magic):
    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=0,
                                       batch_size=1024 * 1024)

    # Key should not be str
    with pytest.raises(TypeError):
        builder.append(0, timestamp=9999999, key="some string", value=None)

    # Value should not be str
    with pytest.raises(TypeError):
        builder.append(0, timestamp=9999999, key=None, value="some string")

    # Timestamp should be of proper type
    if magic != 0:
        with pytest.raises(TypeError):
            builder.append(0,
                           timestamp="1243812793",
                           key=None,
                           value=b"some string")

    # Offset of invalid type
    with pytest.raises(TypeError):
        builder.append("0", timestamp=9999999, key=None, value=b"some string")

    # Ok to pass value as None
    builder.append(0, timestamp=9999999, key=b"123", value=None)

    # Timestamp can be None
    builder.append(1, timestamp=None, key=None, value=b"some string")

    # Ok to pass offsets in not incremental order. This should not happen thou
    builder.append(5, timestamp=9999999, key=b"123", value=None)

    # in case error handling code fails to fix inner buffer in builder
    assert len(builder.build()) == 119 if magic else 95
示例#32
0
    def test_proc_fetch_request(self):
        client = AIOKafkaClient(
            loop=self.loop,
            bootstrap_servers=[])
        subscriptions = SubscriptionState(loop=self.loop)
        fetcher = Fetcher(
            client, subscriptions, auto_offset_reset="latest", loop=self.loop)

        tp = TopicPartition('test', 0)
        tp_info = (tp.topic, [(tp.partition, 4, 100000)])
        req = FetchRequest(
            -1,  # replica_id
            100, 100, [tp_info])

        client.ready = mock.MagicMock()
        client.ready.side_effect = asyncio.coroutine(lambda a: True)
        client.force_metadata_update = mock.MagicMock()
        client.force_metadata_update.side_effect = asyncio.coroutine(
            lambda: False)
        client.send = mock.MagicMock()

        builder = LegacyRecordBatchBuilder(
            magic=1, compression_type=0, batch_size=99999999)
        builder.append(offset=4, value=b"test msg", key=None, timestamp=None)
        raw_batch = bytes(builder.build())

        client.send.side_effect = asyncio.coroutine(
            lambda n, r: FetchResponse(
                [('test', [(0, 0, 9, raw_batch)])]))
        subscriptions.assign_from_user({tp})
        assignment = subscriptions.subscription.assignment
        tp_state = assignment.state_value(tp)

        # The partition has no active position, so will ignore result
        needs_wake_up = yield from fetcher._proc_fetch_request(
            assignment, 0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(fetcher._records, {})

        # The partition's position does not match request's fetch offset
        subscriptions.seek(tp, 0)
        needs_wake_up = yield from fetcher._proc_fetch_request(
            assignment, 0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(fetcher._records, {})

        subscriptions.seek(tp, 4)
        needs_wake_up = yield from fetcher._proc_fetch_request(
            assignment, 0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        self.assertEqual(buf.getone().value, b"test msg")

        # If position changed after fetch request passed
        subscriptions.seek(tp, 4)
        needs_wake_up = yield from fetcher._proc_fetch_request(
            assignment, 0, req)
        subscriptions.seek(tp, 10)
        self.assertIsNone(buf.getone())

        # If assignment is lost after fetch request passed
        subscriptions.seek(tp, 4)
        needs_wake_up = yield from fetcher._proc_fetch_request(
            assignment, 0, req)
        subscriptions.unsubscribe()
        self.assertIsNone(buf.getone())

        subscriptions.assign_from_user({tp})
        assignment = subscriptions.subscription.assignment
        tp_state = assignment.state_value(tp)

        # error -> no partition found (UnknownTopicOrPartitionError)
        subscriptions.seek(tp, 4)
        fetcher._records.clear()
        client.send.side_effect = asyncio.coroutine(
            lambda n, r: FetchResponse(
                [('test', [(0, 3, 9, raw_batch)])]))
        cc = client.force_metadata_update.call_count
        needs_wake_up = yield from fetcher._proc_fetch_request(
            assignment, 0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(client.force_metadata_update.call_count, cc + 1)

        # error -> topic auth failed (TopicAuthorizationFailedError)
        client.send.side_effect = asyncio.coroutine(
            lambda n, r: FetchResponse(
                [('test', [(0, 29, 9, raw_batch)])]))
        needs_wake_up = yield from fetcher._proc_fetch_request(
            assignment, 0, req)
        self.assertEqual(needs_wake_up, True)
        with self.assertRaises(TopicAuthorizationFailedError):
            yield from fetcher.next_record([])

        # error -> unknown
        client.send.side_effect = asyncio.coroutine(
            lambda n, r: FetchResponse(
                [('test', [(0, -1, 9, raw_batch)])]))
        needs_wake_up = yield from fetcher._proc_fetch_request(
            assignment, 0, req)
        self.assertEqual(needs_wake_up, False)

        # error -> offset out of range with offset strategy
        client.send.side_effect = asyncio.coroutine(
            lambda n, r: FetchResponse(
                [('test', [(0, 1, 9, raw_batch)])]))
        needs_wake_up = yield from fetcher._proc_fetch_request(
            assignment, 0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(tp_state.has_valid_position, False)
        self.assertEqual(tp_state.awaiting_reset, True)
        self.assertEqual(tp_state.reset_strategy, OffsetResetStrategy.LATEST)

        # error -> offset out of range without offset strategy
        subscriptions.seek(tp, 4)
        fetcher._default_reset_strategy = OffsetResetStrategy.NONE
        needs_wake_up = yield from fetcher._proc_fetch_request(
            assignment, 0, req)
        self.assertEqual(needs_wake_up, True)
        with self.assertRaises(OffsetOutOfRangeError):
            yield from fetcher.next_record([])

        yield from fetcher.close()
示例#33
0
    def test_proc_fetch_request(self):
        client = AIOKafkaClient(
            loop=self.loop,
            bootstrap_servers=[])
        subscriptions = SubscriptionState('latest')
        fetcher = Fetcher(client, subscriptions, loop=self.loop)

        tp = TopicPartition('test', 0)
        tp_info = (tp.topic, [(tp.partition, 155, 100000)])
        req = FetchRequest(
            -1,  # replica_id
            100, 100, [tp_info])

        client.ready = mock.MagicMock()
        client.ready.side_effect = asyncio.coroutine(lambda a: True)
        client.force_metadata_update = mock.MagicMock()
        client.force_metadata_update.side_effect = asyncio.coroutine(
            lambda: False)
        client.send = mock.MagicMock()

        builder = LegacyRecordBatchBuilder(
            magic=1, compression_type=0, batch_size=99999999)
        builder.append(offset=4, value=b"test msg", key=None, timestamp=None)
        raw_batch = bytes(builder.build())

        client.send.side_effect = asyncio.coroutine(
            lambda n, r: FetchResponse(
                [('test', [(0, 0, 9, raw_batch)])]))
        fetcher._in_flight.add(0)
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, False)

        state = TopicPartitionState()
        state.seek(0)
        subscriptions.assignment[tp] = state
        subscriptions.needs_partition_assignment = False
        fetcher._in_flight.add(0)
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        self.assertEqual(buf.getone(), None)  # invalid offset, msg is ignored

        state.seek(4)
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        self.assertEqual(buf.getone().value, b"test msg")

        # error -> no partition found
        client.send.side_effect = asyncio.coroutine(
            lambda n, r: FetchResponse(
                [('test', [(0, 3, 9, raw_batch)])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, False)

        # error -> topic auth failed
        client.send.side_effect = asyncio.coroutine(
            lambda n, r: FetchResponse(
                [('test', [(0, 29, 9, raw_batch)])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        with self.assertRaises(TopicAuthorizationFailedError):
            yield from fetcher.next_record([])

        # error -> unknown
        client.send.side_effect = asyncio.coroutine(
            lambda n, r: FetchResponse(
                [('test', [(0, -1, 9, raw_batch)])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, False)

        # error -> offset out of range with offset strategy
        client.send.side_effect = asyncio.coroutine(
            lambda n, r: FetchResponse(
                [('test', [(0, 1, 9, raw_batch)])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        with mock.patch.object(fetcher, "update_fetch_positions") as mocked:
            mocked.side_effect = asyncio.coroutine(lambda o: None)
            needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
            self.assertEqual(needs_wake_up, False)
            self.assertEqual(state.is_fetchable(), False)
            mocked.assert_called_with([tp])

        # error -> offset out of range with strategy errors out
        state.seek(4)
        client.send.side_effect = asyncio.coroutine(
            lambda n, r: FetchResponse(
                [('test', [(0, 1, 9, [(4, 10, raw_batch)])])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        with mock.patch.object(fetcher, "update_fetch_positions") as mocked:
            # the exception should not fail execution here
            @asyncio.coroutine
            def mock_async_raises(offests):
                raise Exception()
            mocked.side_effect = mock_async_raises
            needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
            self.assertEqual(needs_wake_up, False)
            self.assertEqual(state.is_fetchable(), False)
            mocked.assert_called_with([tp])

        # error -> offset out of range without offset strategy
        state.seek(4)
        subscriptions._default_offset_reset_strategy = OffsetResetStrategy.NONE
        client.send.side_effect = asyncio.coroutine(
            lambda n, r: FetchResponse(
                [('test', [(0, 1, 9, raw_batch)])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        with self.assertRaises(OffsetOutOfRangeError):
            yield from fetcher.next_record([])

        yield from fetcher.close()
class BatchBuilder:
    def __init__(self, magic, batch_size, compression_type):
        self._builder = LegacyRecordBatchBuilder(magic, compression_type,
                                                 batch_size)
        self._relative_offset = 0
        self._buffer = None
        self._closed = False

    def append(self, *, timestamp, key, value):
        """Add a message to the batch.

        Arguments:
            timestamp (float or None): epoch timestamp in seconds. If None,
                the timestamp will be set to the current time. If submitting to
                an 0.8.x or 0.9.x broker, the timestamp will be ignored.
            key (bytes or None): the message key. `key` and `value` may not
                both be None.
            value (bytes or None): the message value. `key` and `value` may not
                both be None.

        Returns:
            If the message was successfully added, returns a metadata object
            with crc, offset, size, and timestamp fields. If the batch is full
            or closed, returns None.
        """
        if self._closed:
            return None

        metadata = self._builder.append(self._relative_offset, timestamp, key,
                                        value)

        # Check if we could add the message
        if metadata is None:
            return None

        self._relative_offset += 1
        return metadata

    def close(self):
        """Close the batch to further updates.

        Closing the batch before submitting to the producer ensures that no
        messages are added via the ``producer.send()`` interface. To gracefully
        support both the batch and individual message interfaces, leave the
        batch open. For complete control over the batch's contents, close
        before submission. Closing a batch has no effect on when it's sent to
        the broker.

        A batch may not be reopened after it's closed.
        """
        if self._closed:
            return
        self._closed = True
        data = self._builder.build()
        self._buffer = io.BytesIO(Int32.encode(len(data)) + data)
        del self._builder

    def _build(self):
        if not self._closed:
            self.close()
        return self._buffer

    def size(self):
        """Get the size of batch in bytes."""
        if self._buffer:
            return self._buffer.getbuffer().nbytes
        else:
            return self._builder.size()

    def record_count(self):
        """Get the number of records in the batch."""
        return self._relative_offset