示例#1
0
def test_init_fetches(fetcher, mocker):
    fetch_requests = [
        FetchRequest(-1, fetcher.config['fetch_max_wait_ms'],
                     fetcher.config['fetch_min_bytes'],
                     [('foobar', [
                         (0, 0, fetcher.config['max_partition_fetch_bytes']),
                         (1, 0, fetcher.config['max_partition_fetch_bytes']),
                     ])]),
        FetchRequest(-1, fetcher.config['fetch_max_wait_ms'],
                     fetcher.config['fetch_min_bytes'],
                     [('foobar', [
                         (2, 0, fetcher.config['max_partition_fetch_bytes']),
                     ])])
    ]

    mocker.patch.object(fetcher, '_create_fetch_requests',
                        return_value = dict(enumerate(fetch_requests)))

    fetcher._records.append('foobar')
    ret = fetcher.init_fetches()
    assert fetcher._create_fetch_requests.call_count == 0
    assert ret == []
    fetcher._records.clear()

    fetcher._iterator = 'foo'
    ret = fetcher.init_fetches()
    assert fetcher._create_fetch_requests.call_count == 0
    assert ret == []
    fetcher._iterator = None

    ret = fetcher.init_fetches()
    for node, request in enumerate(fetch_requests):
        fetcher._client.send.assert_any_call(node, request)
    assert len(ret) == len(fetch_requests)
示例#2
0
    def _create_fetch_requests(self):
        """Create fetch requests for all assigned partitions, grouped by node.

        FetchRequests skipped if no leader, node has requests in flight, or we
        have not returned all previously fetched records to consumer

        Returns:
            dict: {node_id: [FetchRequest,...]}
        """
        # create the fetch info as a dict of lists of partition info tuples
        # which can be passed to FetchRequest() via .items()
        fetchable = collections.defaultdict(
            lambda: collections.defaultdict(list))

        for partition in self._subscriptions.fetchable_partitions():
            node_id = self._client.cluster.leader_for_partition(partition)
            if node_id is None or node_id == -1:
                log.debug(
                    "No leader found for partition %s."
                    " Requesting metadata update", partition)
                self._client.cluster.request_update()
            elif self._client.in_flight_request_count(node_id) == 0:
                # if there is a leader and no in-flight requests,
                # issue a new fetch but only fetch data for partitions whose
                # previously fetched data has been consumed
                position = self._subscriptions.assignment[partition].position
                partition_info = (partition.partition, position,
                                  self.config['max_partition_fetch_bytes'])
                fetchable[node_id][partition.topic].append(partition_info)
                log.debug("Adding fetch request for partition %s at offset %d",
                          partition, position)

        requests = {}
        for node_id, partition_data in six.iteritems(fetchable):
            requests[node_id] = FetchRequest(
                -1,  # replica_id
                self.config['fetch_max_wait_ms'],
                self.config['fetch_min_bytes'],
                partition_data.items())
        return requests
示例#3
0
    def test_proc_fetch_request(self):
        client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[])
        subscriptions = SubscriptionState('latest')
        fetcher = Fetcher(client, subscriptions, loop=self.loop)

        tp = TopicPartition('test', 0)
        tp_info = (tp.topic, [(tp.partition, 155, 100000)])
        req = FetchRequest(
            -1,  # replica_id
            100,
            100,
            [tp_info])

        client.ready = mock.MagicMock()
        client.ready.side_effect = asyncio.coroutine(lambda a: True)
        client.force_metadata_update = mock.MagicMock()
        client.force_metadata_update.side_effect = asyncio.coroutine(
            lambda: False)
        client.send = mock.MagicMock()
        msg = Message(b"test msg")
        msg._encode_self()
        client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse(
            [('test', [(0, 0, 9, [(4, 10, msg)])])]))
        fetcher._in_flight.add(0)
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, False)

        state = TopicPartitionState()
        state.seek(0)
        subscriptions.assignment[tp] = state
        subscriptions.needs_partition_assignment = False
        fetcher._in_flight.add(0)
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        self.assertEqual(buf.getone(), None)  # invalid offset, msg is ignored

        state.seek(4)
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        self.assertEqual(buf.getone().value, b"test msg")

        # error -> no partition found
        client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse(
            [('test', [(0, 3, 9, [(4, 10, msg)])])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, False)

        # error -> topic auth failed
        client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse(
            [('test', [(0, 29, 9, [(4, 10, msg)])])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        with self.assertRaises(TopicAuthorizationFailedError):
            yield from fetcher.next_record([])

        # error -> unknown
        client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse(
            [('test', [(0, -1, 9, [(4, 10, msg)])])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, False)

        # error -> offset out of range
        client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse(
            [('test', [(0, 1, 9, [(4, 10, msg)])])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(state.is_fetchable(), False)

        state.seek(4)
        subscriptions._default_offset_reset_strategy = OffsetResetStrategy.NONE
        client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse(
            [('test', [(0, 1, 9, [(4, 10, msg)])])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        with self.assertRaises(OffsetOutOfRangeError):
            yield from fetcher.next_record([])

        yield from fetcher.close()
示例#4
0
    def _create_fetch_requests(self):
        """Create fetch requests for all assigned partitions, grouped by node.

        FetchRequests skipped if:
        * no leader, or node has already fetches in flight
        * we have data for this partition
        * we have data for other partitions on this node

        Returns:
            dict: {node_id: FetchRequest, ...}
        """
        if self._subscriptions.needs_partition_assignment:
            return {}, self._fetcher_timeout

        # create the fetch info as a dict of lists of partition info tuples
        # which can be passed to FetchRequest() via .items()
        fetchable = collections.defaultdict(
            lambda: collections.defaultdict(list))
        backoff_by_nodes = collections.defaultdict(list)

        fetchable_partitions = self._subscriptions.fetchable_partitions()
        for tp in fetchable_partitions:
            node_id = self._client.cluster.leader_for_partition(tp)
            if tp in self._records:
                record = self._records[tp]
                # Calculate backoff for this node if data is only recently
                # fetched. If data is consumed before backoff we will
                # include this partition in this fetch request
                backoff = record.calculate_backoff()
                if backoff:
                    backoff_by_nodes[node_id].append(backoff)
                # We have some prefetched data for this partition already
                continue
            if node_id in self._in_flight:
                # We have in-flight fetches to this node
                continue
            if node_id is None or node_id == -1:
                log.debug(
                    "No leader found for partition %s."
                    " Waiting metadata update", tp)
            else:
                # fetch if there is a leader and no in-flight requests
                position = self._subscriptions.assignment[tp].position
                partition_info = (tp.partition, position,
                                  self._max_partition_fetch_bytes)
                fetchable[node_id][tp.topic].append(partition_info)
                log.debug("Adding fetch request for partition %s at offset %d",
                          tp, position)

        requests = []
        for node_id, partition_data in fetchable.items():
            if node_id in backoff_by_nodes:
                # At least one partition is still waiting to be consumed
                continue
            req = FetchRequest(
                -1,  # replica_id
                self._fetch_max_wait_ms,
                self._fetch_min_bytes,
                partition_data.items())
            requests.append((node_id, req))
        if backoff_by_nodes:
            # Return min time til any node will be ready to send event
            # (max of it's backoffs)
            backoff = min(map(max, backoff_by_nodes.values()))
        else:
            backoff = self._fetcher_timeout
        return requests, backoff
示例#5
0
    def test_compacted_topic_consumption(self):
        # Compacted topics can have offsets skipped
        client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[])
        client.ready = mock.MagicMock()
        client.ready.side_effect = asyncio.coroutine(lambda a: True)
        client.force_metadata_update = mock.MagicMock()
        client.force_metadata_update.side_effect = asyncio.coroutine(
            lambda: False)
        client.send = mock.MagicMock()

        subscriptions = SubscriptionState('latest')
        fetcher = Fetcher(client, subscriptions, loop=self.loop)

        tp = TopicPartition('test', 0)
        req = FetchRequest(
            -1,  # replica_id
            100,
            100,
            [(tp.topic, [(tp.partition, 155, 100000)])])
        msg1 = Message(b"12345", key=b"1")
        msg1._encode_self()
        msg2 = Message(b"23456", key=b"2")
        msg2._encode_self()
        msg3 = Message(b"34567", key=b"3")
        msg3._encode_self()
        resp = FetchResponse([(
            'test',
            [(
                0,
                0,
                3000,  # partition, error_code, highwater_offset
                [
                    (160, 5, msg1),  # offset, len_bytes, bytes
                    (162, 5, msg2),
                    (167, 5, msg3),
                ])])])

        client.send.side_effect = asyncio.coroutine(lambda n, r: resp)
        state = TopicPartitionState()
        state.seek(155)
        state.drop_pending_message_set = False
        subscriptions.assignment[tp] = state
        subscriptions.needs_partition_assignment = False
        fetcher._in_flight.add(0)

        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        # Test successful getone
        first = buf.getone()
        self.assertEqual(state.position, 161)
        self.assertEqual((first.value, first.key, first.offset),
                         (msg1.value, msg1.key, 160))

        # Test successful getmany
        second, third = buf.getall()
        self.assertEqual(state.position, 168)
        self.assertEqual((second.value, second.key, second.offset),
                         (msg2.value, msg2.key, 162))
        self.assertEqual((third.value, third.key, third.offset),
                         (msg3.value, msg3.key, 167))
示例#6
0
    async def test_compacted_topic_consumption(self):
        # Compacted topics can have offsets skipped
        client = AIOKafkaClient(bootstrap_servers=[])

        async def ready(conn):
            return True

        def force_metadata_update():
            fut = create_future()
            fut.set_result(True)
            return fut

        client.ready = mock.MagicMock()
        client.ready.side_effect = ready
        client.force_metadata_update = mock.MagicMock()
        client.force_metadata_update.side_effect = force_metadata_update
        client.send = mock.MagicMock()

        subscriptions = SubscriptionState()
        fetcher = Fetcher(client, subscriptions)

        tp = TopicPartition('test', 0)
        req = FetchRequest(
            -1,  # replica_id
            100,
            100,
            [(tp.topic, [(tp.partition, 155, 100000)])])

        builder = LegacyRecordBatchBuilder(magic=1,
                                           compression_type=0,
                                           batch_size=99999999)
        builder.append(160, value=b"12345", key=b"1", timestamp=None)
        builder.append(162, value=b"23456", key=b"2", timestamp=None)
        builder.append(167, value=b"34567", key=b"3", timestamp=None)
        batch = bytes(builder.build())

        resp = FetchResponse([(
            'test',
            [(
                0,
                0,
                3000,  # partition, error_code, highwater_offset
                batch  # Batch raw bytes
            )])])

        async def send(node, ready):
            return resp

        subscriptions.assign_from_user({tp})
        assignment = subscriptions.subscription.assignment
        tp_state = assignment.state_value(tp)
        client.send.side_effect = send

        tp_state.seek(155)
        fetcher._in_flight.add(0)
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        # Test successful getone, the closest in batch offset=160
        first = buf.getone()
        self.assertEqual(tp_state.position, 161)
        self.assertEqual((first.value, first.key, first.offset),
                         (b"12345", b"1", 160))

        # Test successful getmany
        second, third = buf.getall()
        self.assertEqual(tp_state.position, 168)
        self.assertEqual((second.value, second.key, second.offset),
                         (b"23456", b"2", 162))
        self.assertEqual((third.value, third.key, third.offset),
                         (b"34567", b"3", 167))
示例#7
0
    async def test_proc_fetch_request(self):
        client = AIOKafkaClient(bootstrap_servers=[])
        subscriptions = SubscriptionState()
        fetcher = Fetcher(client, subscriptions, auto_offset_reset="latest")

        tp = TopicPartition('test', 0)
        tp_info = (tp.topic, [(tp.partition, 4, 100000)])
        req = FetchRequest(
            -1,  # replica_id
            100,
            100,
            [tp_info])

        async def ready(conn):
            return True

        def force_metadata_update():
            fut = create_future()
            fut.set_result(False)
            return fut

        client.ready = mock.MagicMock()
        client.ready.side_effect = ready
        client.force_metadata_update = mock.MagicMock()
        client.force_metadata_update.side_effect = force_metadata_update
        client.send = mock.MagicMock()

        builder = LegacyRecordBatchBuilder(magic=1,
                                           compression_type=0,
                                           batch_size=99999999)
        builder.append(offset=4, value=b"test msg", key=None, timestamp=None)
        raw_batch = bytes(builder.build())

        fetch_response = FetchResponse([('test', [(0, 0, 9, raw_batch)])])

        async def send(node, request):
            nonlocal fetch_response
            return fetch_response

        client.send.side_effect = send
        subscriptions.assign_from_user({tp})
        assignment = subscriptions.subscription.assignment
        tp_state = assignment.state_value(tp)

        # The partition has no active position, so will ignore result
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(fetcher._records, {})

        # The partition's position does not match request's fetch offset
        subscriptions.seek(tp, 0)
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(fetcher._records, {})

        subscriptions.seek(tp, 4)
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        self.assertEqual(buf.getone().value, b"test msg")

        # If position changed after fetch request passed
        subscriptions.seek(tp, 4)
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        subscriptions.seek(tp, 10)
        self.assertIsNone(buf.getone())

        # If assignment is lost after fetch request passed
        subscriptions.seek(tp, 4)
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        subscriptions.unsubscribe()
        self.assertIsNone(buf.getone())

        subscriptions.assign_from_user({tp})
        assignment = subscriptions.subscription.assignment
        tp_state = assignment.state_value(tp)

        # error -> no partition found (UnknownTopicOrPartitionError)
        subscriptions.seek(tp, 4)
        fetcher._records.clear()
        fetch_response = FetchResponse([('test', [(0, 3, 9, raw_batch)])])
        cc = client.force_metadata_update.call_count
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(client.force_metadata_update.call_count, cc + 1)

        # error -> topic auth failed (TopicAuthorizationFailedError)
        fetch_response = FetchResponse([('test', [(0, 29, 9, raw_batch)])])
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, True)
        with self.assertRaises(TopicAuthorizationFailedError):
            await fetcher.next_record([])

        # error -> unknown
        fetch_response = FetchResponse([('test', [(0, -1, 9, raw_batch)])])
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, False)

        # error -> offset out of range with offset strategy
        fetch_response = FetchResponse([('test', [(0, 1, 9, raw_batch)])])
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(tp_state.has_valid_position, False)
        self.assertEqual(tp_state.awaiting_reset, True)
        self.assertEqual(tp_state.reset_strategy, OffsetResetStrategy.LATEST)

        # error -> offset out of range without offset strategy
        subscriptions.seek(tp, 4)
        fetcher._default_reset_strategy = OffsetResetStrategy.NONE
        needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req)
        self.assertEqual(needs_wake_up, True)
        with self.assertRaises(OffsetOutOfRangeError):
            await fetcher.next_record([])

        await fetcher.close()