Пример #1
0
    def iter_changes(self, since, forever):
        """
        Since must be a dictionary of topic partition offsets.
        """
        timeout = float('inf') if forever else MIN_TIMEOUT
        start_from_latest = since is None
        reset = 'largest' if start_from_latest else 'smallest'
        self._init_consumer(timeout, auto_offset_reset=reset)

        since = self._filter_offsets(since)
        # a special value of since=None will start from the end of the change stream
        if since is not None and (not isinstance(since, dict) or not since):
            raise ValueError("'since' must be None or a topic offset dictionary")

        if not start_from_latest:
            if self.strict:
                validate_offsets(since)

            checkpoint_topics = {tp[0] for tp in since}
            extra_topics = checkpoint_topics - set(self._topics)
            if extra_topics:
                raise ValueError("'since' contains extra topics: {}".format(list(extra_topics)))

            self._processed_topic_offsets = copy(since)

            # Tell the consumer to start from offsets that were passed in
            for topic_partition, offset in since.items():
                self.consumer.seek(TopicPartition(topic_partition[0], topic_partition[1]), int(offset))

        try:
            for message in self.consumer:
                self._processed_topic_offsets[(message.topic, message.partition)] = message.offset
                yield change_from_kafka_message(message)
        except StopIteration:
            assert not forever, 'Kafka pillow should not timeout when waiting forever!'
def validate_checkpoints(print_only):

    for pillow in get_all_pillow_instances():
        if isinstance(pillow.get_change_feed(), KafkaChangeFeed):
            checkpoint_dict = _get_checkpoint_dict(pillow)
            try:
                validate_offsets(checkpoint_dict)
            except UnavailableKafkaOffset as e:
                message = u'Problem with checkpoint for {}: {}'.format(
                    pillow.pillow_id, e)
                if print_only:
                    print message
                else:
                    raise Exception(message)
Пример #3
0
def validate_checkpoints(print_only):

    for pillow in get_all_pillow_instances():
        if isinstance(pillow.get_change_feed(), KafkaChangeFeed):
            checkpoint_dict = _get_checkpoint_dict(pillow)
            try:
                validate_offsets(checkpoint_dict)
            except UnavailableKafkaOffset as e:
                message = u'Problem with checkpoint for {}: {}'.format(
                    pillow.pillow_id, e
                )
                if print_only:
                    print message
                else:
                    raise Exception(message)
def validate_checkpoints(print_only):

    for pillow in get_all_pillow_instances():
        if (pillow.pillow_id in getattr(settings, 'ACTIVE_PILLOW_NAMES', [pillow.pillow_id])
                and isinstance(pillow.get_change_feed(), KafkaChangeFeed)):
            checkpoint_dict = _get_checkpoint_dict(pillow)
            try:
                validate_offsets(checkpoint_dict)
            except UnavailableKafkaOffset as e:
                message = 'Problem with checkpoint for {}: {}'.format(
                    pillow.pillow_id, e
                )
                if print_only:
                    print(message)
                else:
                    raise Exception(message)
Пример #5
0
def validate_checkpoints(print_only):

    for pillow in get_all_pillow_instances():
        if (pillow.pillow_id in getattr(settings, 'ACTIVE_PILLOW_NAMES',
                                        [pillow.pillow_id])
                and isinstance(pillow.get_change_feed(), KafkaChangeFeed)):
            checkpoint_dict = _get_checkpoint_dict(pillow)
            try:
                validate_offsets(checkpoint_dict)
            except UnavailableKafkaOffset as e:
                message = 'Problem with checkpoint for {}: {}'.format(
                    pillow.pillow_id, e)
                if print_only:
                    print(message)
                else:
                    raise Exception(message)
Пример #6
0
    def iter_changes(
        self,
        since: Optional[Dict[TopicPartition, int]],
        forever: bool,
    ) -> Iterator[Change]:
        """
        ``since`` must be a dictionary of topic partition offsets, or None
        """
        timeout = MAX_TIMEOUT if forever else MIN_TIMEOUT
        start_from_latest = since is None
        reset = 'largest' if start_from_latest else 'smallest'
        self._init_consumer(timeout, auto_offset_reset=reset)

        since = self._filter_offsets(since)
        # a special value of since=None will start from the end of the change stream
        if not isinstance(since, (dict, type(None))):
            raise ValueError(
                f"Expected None or a topic offset dictionary. Got {since!r}")
        if since == {}:
            raise ValueError('Topic partition offsets not found')

        if not start_from_latest:
            if self.strict:
                validate_offsets(since)

            checkpoint_topics = {tp[0] for tp in since}
            extra_topics = checkpoint_topics - set(self._topics)
            if extra_topics:
                raise ValueError("'since' contains extra topics: {}".format(
                    list(extra_topics)))

            self._processed_topic_offsets = copy(since)

            # Tell the consumer to start from offsets that were passed in
            for topic_partition, offset in since.items():
                self.consumer.seek(
                    TopicPartition(topic_partition[0], topic_partition[1]),
                    int(offset))

        try:
            for message in self.consumer:
                self._processed_topic_offsets[(
                    message.topic, message.partition)] = message.offset
                yield change_from_kafka_message(message)
        except StopIteration:
            # no need to do anything since this is just telling us we've reached the end of the feed
            pass
Пример #7
0
    def iter_changes(self, since, forever):
        """
        Since must be a dictionary of topic partition offsets.
        """
        # a special value of since=None will start from the end of the change stream
        if since is not None and (not isinstance(since, dict) or not since):
            raise ValueError(
                "'since' must be None or a topic offset dictionary")

        # in milliseconds, -1 means wait forever for changes
        timeout = -1 if forever else MIN_TIMEOUT

        start_from_latest = since is None

        reset = 'largest' if start_from_latest else 'smallest'
        consumer = self._get_consumer(timeout, auto_offset_reset=reset)
        if not start_from_latest:
            if self.strict:
                validate_offsets(since)

            checkpoint_topics = {tp[0] for tp in since}
            extra_topics = checkpoint_topics - set(self._topics)
            if extra_topics:
                raise ValueError("'since' contains extra topics: {}".format(
                    list(extra_topics)))

            self._processed_topic_offsets = copy(since)

            offsets = [copy(self._processed_topic_offsets)]
            topics_missing = set(self._topics) - checkpoint_topics
            for topic in topics_missing:
                offsets.append(topic)  # consume all available partitions

            # this is how you tell the consumer to start from a certain point in the sequence
            consumer.set_topic_partitions(*offsets)

        try:
            for message in consumer:
                self._processed_topic_offsets[(
                    message.topic, message.partition)] = message.offset
                yield change_from_kafka_message(message)
        except ConsumerTimeout:
            assert not forever, 'Kafka pillow should not timeout when waiting forever!'
Пример #8
0
    def iter_changes(self, since, forever):
        """
        Since must be a dictionary of topic partition offsets.
        """
        timeout = float('inf') if forever else MIN_TIMEOUT
        start_from_latest = since is None
        reset = 'largest' if start_from_latest else 'smallest'
        self._init_consumer(timeout, auto_offset_reset=reset)

        since = self._filter_offsets(since)
        # a special value of since=None will start from the end of the change stream
        if since is not None and (not isinstance(since, dict) or not since):
            raise ValueError(
                "'since' must be None or a topic offset dictionary")

        if not start_from_latest:
            if self.strict:
                validate_offsets(since)

            checkpoint_topics = {tp[0] for tp in since}
            extra_topics = checkpoint_topics - set(self._topics)
            if extra_topics:
                raise ValueError("'since' contains extra topics: {}".format(
                    list(extra_topics)))

            self._processed_topic_offsets = copy(since)

            # Tell the consumer to start from offsets that were passed in
            for topic_partition, offset in since.items():
                self.consumer.seek(
                    TopicPartition(topic_partition[0], topic_partition[1]),
                    int(offset))

        try:
            for message in self.consumer:
                self._processed_topic_offsets[(
                    message.topic, message.partition)] = message.offset
                yield change_from_kafka_message(message)
        except StopIteration:
            assert not forever, 'Kafka pillow should not timeout when waiting forever!'
Пример #9
0
 def _validate_offsets(self, offsets):
     expected_values = {
         offset[0]: offset[2]
         for offset in offsets if len(offset) > 2
     }
     validate_offsets(expected_values)
Пример #10
0
 def _validate_offsets(self, offsets):
     expected_values = {offset[0]: offset[2] for offset in offsets if len(offset) > 2}
     validate_offsets(expected_values)