Пример #1
0
 def test_non_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed', strict=True)
     first_avaliable_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE])
     since = {
         topic: first_available for topic, first_available in first_avaliable_offsets.items()
     }
     feed.iter_changes(since=since, forever=False).next()
Пример #2
0
 def __init__(self, *topics):
     self.topics = topics
     self.change_feed = KafkaChangeFeed(
         topics=topics,
         group_id='test-{}'.format(uuid.uuid4().hex),
     )
     self.changes = None
Пример #3
0
 def test_non_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed', strict=True)
     first_avaliable_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE])
     since = {
         topic: first_available for topic, first_available in first_avaliable_offsets.items()
     }
     feed.iter_changes(since=since, forever=False).next()
    def test_checkpoint_with_multiple_topics(self):
        feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
        pillow_name = 'test-multi-topic-checkpoints'
        checkpoint = PillowCheckpoint(pillow_name)
        processor = CountingProcessor()
        pillow = ConstructedPillow(
            name=pillow_name,
            document_store=None,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=MultiTopicCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            )
        )
        offsets = feed.get_current_offsets()
        self.assertEqual(set([topics.FORM, topics.CASE]), set(offsets.keys()))

        # send a few changes to kafka so they should be picked up by the pillow
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE_SQL)
        pillow.process_changes(since=offsets, forever=False)
        self.assertEqual(4, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE_SQL)
        pillow.process_changes(pillow.get_last_checkpoint_sequence(), forever=False)
        self.assertEqual(8, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
    def test_checkpoint_with_multiple_topics(self):
        feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
        pillow_name = 'test-multi-topic-checkpoints'
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = CountingProcessor()
        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            )
        )
        offsets = feed.get_latest_offsets()
        self.assertEqual(set([(topics.FORM, 0), (topics.CASE, 0)]), set(offsets.keys()))

        # send a few changes to kafka so they should be picked up by the pillow
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE_SQL)
        pillow.process_changes(since=offsets, forever=False)
        self.assertEqual(4, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE_SQL)
        pillow.process_changes(pillow.get_last_checkpoint_sequence(), forever=False)
        self.assertEqual(8, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
 def test_non_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE],
                            client_id='test-kafka-feed',
                            strict=True)
     first_available_offsets = get_multi_topic_first_available_offsets(
         [topics.FORM, topics.CASE])
     next(feed.iter_changes(since=first_available_offsets, forever=False))
    def test_dont_create_checkpoint_past_current(self):
        pillow_name = 'test-checkpoint-reset'

        # initialize change feed and pillow
        feed = KafkaChangeFeed(topics=topics.USER_TOPICS, group_id='test-kafka-feed')
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = CountingProcessor()
        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            )
        )

        original_kafka_offsets = feed.get_latest_offsets()
        current_kafka_offsets = deepcopy(original_kafka_offsets)
        self.assertEqual(feed.get_current_checkpoint_offsets(), {})
        self.assertEqual(pillow.get_last_checkpoint_sequence(), {})

        publish_stub_change(topics.COMMCARE_USER)
        # the following line causes tests to fail if you have multiple partitions
        current_kafka_offsets[(topics.COMMCARE_USER, 0)] += 1
        pillow.process_changes(since=original_kafka_offsets, forever=False)
        self.assertEqual(1, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), current_kafka_offsets)
Пример #8
0
    def test_dont_create_checkpoint_past_current(self):
        pillow_name = 'test-checkpoint-reset'

        # initialize change feed and pillow
        feed = KafkaChangeFeed(topics=topics.USER_TOPICS, client_id='test-kafka-feed')
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = CountingProcessor()
        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            )
        )

        original_kafka_offsets = feed.get_latest_offsets()
        current_kafka_offsets = deepcopy(original_kafka_offsets)
        self.assertEqual(feed.get_current_checkpoint_offsets(), {})
        self.assertEqual(pillow.get_last_checkpoint_sequence(), {})

        publish_stub_change(topics.COMMCARE_USER)
        # the following line causes tests to fail if you have multiple partitions
        current_kafka_offsets[(topics.COMMCARE_USER, 0)] += 1
        pillow.process_changes(since=original_kafka_offsets, forever=False)
        self.assertEqual(1, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), current_kafka_offsets)
    def handle(self, **options):
        if options['print_kafka_offsets']:
            start, end = self.get_min_max_offsets()
            print("\n\nKakfa topic offset range: {} - {}".format(start, end))
            return

        start_offset = options['offset_start']
        end_offset = options['offset_end']

        start, end = self.get_min_max_offsets()
        if start_offset < start:
            start_offset = start
        if end_offset < 0 or end_offset > end:
            end_offset = end

        if start_offset > end_offset:
            raise CommandError("Start greater than end: {} > {}".format(start_offset, end_offset))

        print('Using kafka offset range: {} - {}'.format(start_offset, end_offset))

        if options['find_start_offset']:
            find_first_match = FindFirstMatch(start_offset, end_offset, check_user_at_offset)
            first_matching_offset = find_first_match.search()
            if first_matching_offset is None:
                raise CommandError("Unable to find first matching offset. "
                                   "Try a different search range.")
            else:
                print("\nFirst matching offset = {}".format(first_matching_offset))
            return

        check = options['check']

        seen_ids = set()
        change_feed = KafkaChangeFeed(topics=[COMMCARE_USER], group_id='user-repair')
        for change in change_feed.iter_changes(since=start_offset, forever=False):
            if change.sequence_id > end_offset:
                return

            if change.id in seen_ids:
                continue

            seen_ids.add(change.id)

            if change.deleted:
                continue

            try:
                user = change.get_document()
            except ResourceNotFound:
                continue

            user = CommCareUser.wrap(user)

            if user_looks_ok(user):
                continue

            restore_domain_membership(user, check=check)

            if change.sequence_id % 100 == 0:
                print("Processed up to offset: {}".format(change.sequence_id))
    def test_multiple_topics(self):
        feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id="test-kafka-feed")
        self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
        producer = KeyedProducer(get_kafka_client_or_none())
        offsets = feed.get_current_offsets()
        send_to_kafka(
            producer, topics.FORM, ChangeMeta(document_id="1", data_source_type="form", data_source_name="form")
        )
        send_to_kafka(
            producer, topics.CASE, ChangeMeta(document_id="2", data_source_type="case", data_source_name="case")
        )
        send_to_kafka(
            producer,
            topics.FORM_SQL,
            ChangeMeta(document_id="3", data_source_type="form-sql", data_source_name="form-sql"),
        )
        send_to_kafka(
            producer,
            topics.CASE_SQL,
            ChangeMeta(document_id="4", data_source_type="case-sql", data_source_name="case-sql"),
        )

        changes = list(feed.iter_changes(since=offsets, forever=False))
        self.assertEqual(2, len(changes))
        self.assertEqual(set(["1", "2"]), set([change.id for change in changes]))
Пример #11
0
 def test_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], client_id='test-kafka-feed', strict=True)
     first_available_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE])
     since = {
         topic_partition: offset - 1
         for topic_partition, offset in first_available_offsets.items()
     }
     with self.assertRaises(UnavailableKafkaOffset):
         next(feed.iter_changes(since=since, forever=False))
Пример #12
0
 def test_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed', strict=True)
     first_available_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE])
     since = {
         topic_partition: offset - 1
         for topic_partition, offset in first_available_offsets.items()
     }
     with self.assertRaises(UnavailableKafkaOffset):
         next(feed.iter_changes(since=since, forever=False))
 def test_multiple_topics(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
     self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
     offsets = feed.get_current_offsets()
     expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)]
     unexpected_metas = [publish_stub_change(topics.FORM_SQL), publish_stub_change(topics.CASE_SQL)]
     changes = list(feed.iter_changes(since=offsets, forever=False))
     self.assertEqual(2, len(changes))
     found_change_ids = set([change.id for change in changes])
     self.assertEqual(set([meta.document_id for meta in expected_metas]), found_change_ids)
     for unexpected in unexpected_metas:
         self.assertTrue(unexpected.document_id not in found_change_ids)
Пример #14
0
 def test_multiple_topics_with_partial_checkpoint(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
     self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
     offsets = {'form': feed.get_latest_offsets()['form']}
     expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)]
     changes = list(feed.iter_changes(since=offsets, forever=False))
     # should include at least the form and the case (may have more than one case since not
     # specifying a checkpoint rewinds it to the beginning of the feed)
     self.assertTrue(len(changes) > 1)
     found_change_ids = set([change.id for change in changes])
     for expected_id in set([meta.document_id for meta in expected_metas]):
         self.assertTrue(expected_id in found_change_ids)
 def test_multiple_topics_with_partial_checkpoint(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
     self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
     offsets = {'form': feed.get_current_offsets()['form']}
     expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)]
     changes = list(feed.iter_changes(since=offsets, forever=False))
     # should include at least the form and the case (may have more than one case since not
     # specifying a checkpoint rewinds it to the beginning of the feed)
     self.assertTrue(len(changes) > 1)
     found_change_ids = set([change.id for change in changes])
     for expected_id in set([meta.document_id for meta in expected_metas]):
         self.assertTrue(expected_id in found_change_ids)
Пример #16
0
 def test_multiple_topics(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
     self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
     offsets = feed.get_latest_offsets()
     expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)]
     unexpected_metas = [publish_stub_change(topics.FORM_SQL), publish_stub_change(topics.CASE_SQL)]
     changes = list(feed.iter_changes(since=offsets, forever=False))
     self.assertEqual(2, len(changes))
     found_change_ids = set([change.id for change in changes])
     self.assertEqual(set([meta.document_id for meta in expected_metas]), found_change_ids)
     for unexpected in unexpected_metas:
         self.assertTrue(unexpected.document_id not in found_change_ids)
Пример #17
0
def get_unknown_users_pillow(pillow_id='unknown-users-pillow',
                             num_processes=1,
                             process_num=0,
                             **kwargs):
    """This pillow adds users from xform submissions that come in to the User Index if they don't exist in HQ

        Processors:
          - :py:class:`corehq.pillows.user.UnknownUsersProcessor`
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, USER_INDEX_INFO, topics.FORM_TOPICS)
    processor = UnknownUsersProcessor()
    change_feed = KafkaChangeFeed(topics=topics.FORM_TOPICS,
                                  client_id='unknown-users',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
Пример #18
0
def get_case_search_to_elasticsearch_pillow(
        pillow_id='CaseSearchToElasticsearchPillow',
        num_processes=1,
        process_num=0,
        **kwargs):
    assert pillow_id == 'CaseSearchToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, CASE_SEARCH_INDEX_INFO, topics.CASE_TOPICS)
    case_processor = CaseSearchPillowProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_SEARCH_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch)
    change_feed = KafkaChangeFeed(topics=topics.CASE_TOPICS,
                                  group_id='cases-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=case_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed,
        ),
    )
Пример #19
0
def get_location_pillow(pillow_id='location-ucr-pillow', include_ucrs=None,
                        num_processes=1, process_num=0, ucr_configs=None, **kwargs):
    """Processes updates to locations for UCR

    Note this is only applicable if a domain on the environment has `LOCATIONS_IN_UCR` flag enabled.

    Processors:
      - :py:func:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor`
    """
    change_feed = KafkaChangeFeed(
        [LOCATION_TOPIC], client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('Location'), StaticDataSourceProvider('Location')],
        include_ucrs=include_ucrs,
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    checkpoint = KafkaPillowCheckpoint(pillow_id, [LOCATION_TOPIC])
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=[ucr_processor]
    )
Пример #20
0
def get_user_pillow_old(pillow_id='UserPillow',
                        num_processes=1,
                        process_num=0,
                        **kwargs):
    """Processes users and sends them to ES.

    Processors:
      - :py:func:`pillowtop.processors.elastic.ElasticProcessor`
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    assert pillow_id == 'UserPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, USER_INDEX_INFO, topics.USER_TOPICS)
    user_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=USER_INDEX_INFO,
        doc_prep_fn=transform_user_for_elasticsearch,
    )
    change_feed = KafkaChangeFeed(topics=topics.USER_TOPICS,
                                  client_id='users-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=user_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
Пример #21
0
 def __init__(self, *topics):
     self.topics = topics
     self.change_feed = KafkaChangeFeed(
         topics=topics,
         client_id='test-{}'.format(uuid.uuid4().hex),
     )
     self.changes = None
Пример #22
0
def get_user_pillow(pillow_id='user-pillow',
                    num_processes=1,
                    process_num=0,
                    skip_ucr=False,
                    **kwargs):
    # Pillow that sends users to ES and UCR
    assert pillow_id == 'user-pillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, USER_INDEX_INFO, topics.USER_TOPICS)
    user_processor = get_user_es_processor()
    ucr_processor = ConfigurableReportPillowProcessor(data_source_providers=[
        DynamicDataSourceProvider(),
        StaticDataSourceProvider()
    ], )
    change_feed = KafkaChangeFeed(topics=topics.USER_TOPICS,
                                  client_id='users-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=[user_processor]
        if skip_ucr else [ucr_processor, user_processor],
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
Пример #23
0
def get_case_search_to_elasticsearch_pillow(
        pillow_id='CaseSearchToElasticsearchPillow',
        num_processes=1,
        process_num=0,
        **kwargs):
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    assert pillow_id == 'CaseSearchToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, CASE_SEARCH_INDEX_INFO, topics.CASE_TOPICS)
    case_processor = CaseSearchPillowProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_SEARCH_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch)
    change_feed = KafkaChangeFeed(topics=topics.CASE_TOPICS,
                                  client_id='cases-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=case_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed,
        ),
    )
Пример #24
0
def get_group_pillow_old(pillow_id='GroupPillow',
                         num_processes=1,
                         process_num=0,
                         **kwargs):
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    This pillow adds users from xform submissions that come in to the User Index if they don't exist in HQ
    """
    assert pillow_id == 'GroupPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, GROUP_INDEX_INFO, [topics.GROUP])
    processor = get_group_to_elasticsearch_processor()
    change_feed = KafkaChangeFeed(topics=[topics.GROUP],
                                  client_id='groups-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=10,
            change_feed=change_feed),
    )
Пример #25
0
def get_app_to_elasticsearch_pillow(
        pillow_id='ApplicationToElasticsearchPillow',
        num_processes=1,
        process_num=0,
        **kwargs):
    """App pillow

    Processors:
      - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor`
    """
    assert pillow_id == 'ApplicationToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, APP_INDEX_INFO, [topics.APP])
    app_processor = ElasticProcessor(elasticsearch=get_es_new(),
                                     index_info=APP_INDEX_INFO,
                                     doc_prep_fn=transform_app_for_es)
    change_feed = KafkaChangeFeed(topics=[topics.APP],
                                  client_id='apps-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=app_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
Пример #26
0
    def __init__(self,
                 indicator_name,
                 kafka_topic,
                 processor,
                 domains=None,
                 doc_type=None):
        self.kafka_topic = kafka_topic
        self.domains = domains or processor.domains
        self.doc_type = doc_type or processor.doc_type

        change_feed = KafkaChangeFeed(topics=[self.kafka_topic],
                                      group_id=indicator_name)

        name = '{}Pillow'.format(indicator_name)
        checkpoint = PillowCheckpoint(
            'fluff.{}.{}'.format(name, get_machine_id()),
            change_feed.sequence_format)

        super(FluffPillow, self).__init__(
            name=name,
            checkpoint=checkpoint,
            change_feed=change_feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint,
                checkpoint_frequency=1000,
                change_feed=change_feed))
Пример #27
0
def get_case_messaging_sync_pillow(
        pillow_id='case_messaging_sync_pillow',
        topics=None,
        num_processes=1,
        process_num=0,
        processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
        **kwargs):
    if topics:
        assert set(topics).issubset(
            CASE_TOPICS), set(topics) - set(CASE_TOPICS)
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(topics,
                                  client_id=pillow_id,
                                  num_processes=num_processes,
                                  process_num=process_num)
    checkpoint = KafkaPillowCheckpoint(pillow_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint,
        checkpoint_frequency=1000,
        change_feed=change_feed,
    )
    return ConstructedPillow(name=pillow_id,
                             change_feed=change_feed,
                             checkpoint=checkpoint,
                             change_processed_event_handler=event_handler,
                             processor=[CaseMessagingSyncProcessor()],
                             processor_chunk_size=processor_chunk_size)
Пример #28
0
def get_user_pillow(pillow_id='UserPillow',
                    num_processes=1,
                    process_num=0,
                    **kwargs):
    assert pillow_id == 'UserPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, USER_INDEX_INFO, topics.USER_TOPICS)
    user_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=USER_INDEX_INFO,
        doc_prep_fn=transform_user_for_elasticsearch,
    )
    change_feed = KafkaChangeFeed(topics=topics.USER_TOPICS,
                                  group_id='users-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=user_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
Пример #29
0
def get_sql_sms_pillow(pillow_id='SqlSMSPillow',
                       num_processes=1,
                       process_num=0,
                       processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
                       **kwargs):
    """SMS Pillow

    Processors:
      - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor`
    """
    assert pillow_id == 'SqlSMSPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, SMS_INDEX_INFO, [topics.SMS])
    processor = BulkElasticProcessor(elasticsearch=get_es_new(),
                                     index_info=SMS_INDEX_INFO,
                                     doc_prep_fn=lambda x: x)
    change_feed = KafkaChangeFeed(topics=[topics.SMS],
                                  client_id='sql-sms-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
        processor_chunk_size=processor_chunk_size)
Пример #30
0
def get_case_to_elasticsearch_pillow(pillow_id='CaseToElasticsearchPillow',
                                     num_processes=1,
                                     process_num=0,
                                     **kwargs):
    """Return a pillow that processes cases to Elasticsearch.

    Processors:
      - :py:class:`pillowtop.processors.elastic.ElasticProcessor`
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    assert pillow_id == 'CaseToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, CASE_INDEX_INFO, CASE_TOPICS)
    case_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch,
        change_filter_fn=is_couch_change_for_sql_domain)
    kafka_change_feed = KafkaChangeFeed(topics=CASE_TOPICS,
                                        client_id='cases-to-es',
                                        num_processes=num_processes,
                                        process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=kafka_change_feed,
        processor=case_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=kafka_change_feed),
    )
Пример #31
0
def get_form_submission_metadata_tracker_pillow(
        pillow_id='FormSubmissionMetadataTrackerProcessor',
        num_processes=1,
        process_num=0,
        **kwargs):
    """
    This gets a pillow which iterates through all forms and marks the corresponding app
    as having submissions. This could be expanded to be more generic and include
    other processing that needs to happen on each form
    """
    change_feed = KafkaChangeFeed(topics=[topics.FORM, topics.FORM_SQL],
                                  group_id='form-processsor',
                                  num_processes=num_processes,
                                  process_num=process_num)
    checkpoint = PillowCheckpoint('form-submission-metadata-tracker',
                                  change_feed.sequence_format)
    form_processor = FormSubmissionMetadataTrackerProcessor()
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=form_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed,
        ),
    )
Пример #32
0
def get_report_case_to_elasticsearch_pillow(
        pillow_id='ReportCaseToElasticsearchPillow',
        num_processes=1,
        process_num=0,
        **kwargs):
    assert pillow_id == 'ReportCaseToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, REPORT_CASE_INDEX_INFO, topics.CASE_TOPICS)
    form_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=REPORT_CASE_INDEX_INFO,
        doc_prep_fn=transform_case_to_report_es,
        doc_filter_fn=report_case_filter,
    )
    kafka_change_feed = KafkaChangeFeed(topics=topics.CASE_TOPICS,
                                        group_id='report-cases-to-es',
                                        num_processes=num_processes,
                                        process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=kafka_change_feed,
        processor=form_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=kafka_change_feed),
    )
Пример #33
0
def get_group_pillow(pillow_id='group-pillow',
                     num_processes=1,
                     process_num=0,
                     **kwargs):
    """Group pillow

    Processors:
      - :py:class:`corehq.pillows.groups_to_user.GroupsToUsersProcessor`
      - :py:func:`corehq.pillows.group.get_group_to_elasticsearch_processor`
    """
    assert pillow_id == 'group-pillow', 'Pillow ID is not allowed to change'
    to_user_es_processor = GroupsToUsersProcessor()
    to_group_es_processor = get_group_to_elasticsearch_processor()
    change_feed = KafkaChangeFeed(topics=[topics.GROUP],
                                  client_id='groups-to-users',
                                  num_processes=num_processes,
                                  process_num=process_num)
    checkpoint_id = "{}-{}-{}".format(pillow_id, USER_INDEX,
                                      to_group_es_processor.index_info.index)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, [topics.GROUP])
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=[to_user_es_processor, to_group_es_processor],
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=10,
            change_feed=change_feed),
    )
Пример #34
0
    def __init__(self, processor, pillow_name, topics, num_processes, process_num, retry_errors=False,
            processor_chunk_size=0):
        change_feed = KafkaChangeFeed(
            topics, client_id=pillow_name, num_processes=num_processes, process_num=process_num
        )
        checkpoint = KafkaPillowCheckpoint(pillow_name, topics)
        event_handler = KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
            checkpoint_callback=processor
        )
        super(ConfigurableReportKafkaPillow, self).__init__(
            name=pillow_name,
            change_feed=change_feed,
            processor=processor,
            checkpoint=checkpoint,
            change_processed_event_handler=event_handler,
            processor_chunk_size=processor_chunk_size
        )
        # set by the superclass constructor
        assert self.processors is not None
        assert len(self.processors) == 1
        self._processor = self.processors[0]
        assert self._processor.bootstrapped is not None

        # retry errors defaults to False because there is not a solution to
        # distinguish between doc save errors and data source config errors
        self.retry_errors = retry_errors
Пример #35
0
def get_domain_kafka_to_elasticsearch_pillow(pillow_id='KafkaDomainPillow',
                                             num_processes=1,
                                             process_num=0,
                                             **kwargs):
    """Domain pillow to replicate documents to ES

    Processors:
      - :py:class:`pillowtop.processors.elastic.ElasticProcessor`
    """
    assert pillow_id == 'KafkaDomainPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, DOMAIN_INDEX_INFO, [topics.DOMAIN])
    domain_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=DOMAIN_INDEX_INFO,
        doc_prep_fn=transform_domain_for_elasticsearch,
    )
    change_feed = KafkaChangeFeed(topics=[topics.DOMAIN],
                                  client_id='domains-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=domain_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
Пример #36
0
def get_report_xform_to_elasticsearch_pillow(
        pillow_id='ReportXFormToElasticsearchPillow',
        num_processes=1,
        process_num=0,
        **kwargs):
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    assert pillow_id == 'ReportXFormToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, REPORT_XFORM_INDEX_INFO, topics.FORM_TOPICS)
    form_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=REPORT_XFORM_INDEX_INFO,
        doc_prep_fn=transform_xform_for_report_forms_index,
        doc_filter_fn=report_xform_filter)
    kafka_change_feed = KafkaChangeFeed(topics=topics.FORM_TOPICS,
                                        client_id='report-forms-to-es',
                                        num_processes=num_processes,
                                        process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=kafka_change_feed,
        processor=form_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=kafka_change_feed),
    )
Пример #37
0
def get_location_pillow(pillow_id='location-ucr-pillow',
                        include_ucrs=None,
                        num_processes=1,
                        process_num=0,
                        ucr_configs=None,
                        **kwargs):
    # Todo; is ucr_division needed?
    change_feed = KafkaChangeFeed([LOCATION_TOPIC],
                                  client_id=pillow_id,
                                  num_processes=num_processes,
                                  process_num=process_num)
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[
            DynamicDataSourceProvider('Location'),
            StaticDataSourceProvider('Location')
        ],
        include_ucrs=include_ucrs,
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    checkpoint = KafkaPillowCheckpoint(pillow_id, [LOCATION_TOPIC])
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint,
        checkpoint_frequency=1000,
        change_feed=change_feed,
        checkpoint_callback=ucr_processor)
    return ConstructedPillow(name=pillow_id,
                             change_feed=change_feed,
                             checkpoint=checkpoint,
                             change_processed_event_handler=event_handler,
                             processor=[ucr_processor])
Пример #38
0
def get_group_pillow(pillow_id='GroupPillow',
                     num_processes=1,
                     process_num=0,
                     **kwargs):
    """
    This pillow adds users from xform submissions that come in to the User Index if they don't exist in HQ
    """
    assert pillow_id == 'GroupPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, GROUP_INDEX_INFO, [topics.GROUP])
    processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=GROUP_INDEX_INFO,
    )
    change_feed = KafkaChangeFeed(topics=[topics.GROUP],
                                  group_id='groups-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
Пример #39
0
    def handle(self, *args, **options):
        since = options['from']
        sleep = float(options['sleep'] or '.01')
        last_domain = None
        change_feed = KafkaChangeFeed(topic=topics.FORM, group_id='form-feed')
        for change in change_feed.iter_changes(since=since, forever=True):
            if not change.deleted:
                # this is just helpful for demos to find domain transitions
                if change.metadata.domain != last_domain:
                    last_domain = change.metadata.domain
                    print change.sequence_id, last_domain

                metadata = change.metadata.to_json()
                if not options['compact']:
                    metadata['country'] = _get_country(change.metadata.domain)
                message = RedisMessage(json.dumps(metadata))
                RedisPublisher(facility='form-feed', broadcast=True).publish_message(message)
                time.sleep(sleep)
Пример #40
0
    def test_basic(self):
        # setup
        feed = KafkaChangeFeed(topics=[topics.CASE], client_id='test-kafka-feed')
        pillow_name = 'test-chunked-processing'
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = ChunkedCountProcessor()
        original_process_change = processor.process_change
        original_process_changes_chunk = processor.process_changes_chunk

        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            ),
            processor_chunk_size=2
        )

        since = feed.get_latest_offsets()
        self._produce_changes(2)
        # pillow should use process_changes_chunk (make process_change raise an exception for test)
        processor.process_change = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=since, forever=False)
        self.assertEqual(processor.count, 2)

        self._produce_changes(2)
        # if process_changes_chunk raises exception, pillow should use process_change
        processor.process_change = original_process_change
        processor.process_changes_chunk = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(), forever=False)
        self.assertEqual(processor.count, 4)

        self._produce_changes(1)
        # offsets after full chunk should still be processed
        processor.process_change = MagicMock(side_effect=Exception('_'))
        processor.process_changes_chunk = original_process_changes_chunk
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(), forever=False)
        self.assertEqual(processor.count, 5)
Пример #41
0
class capture_kafka_changes_context(object):
    def __init__(self, *topics):
        self.topics = topics
        self.change_feed = KafkaChangeFeed(
            topics=topics,
            client_id='test-{}'.format(uuid.uuid4().hex),
        )
        self.changes = None

    def __enter__(self):
        self.kafka_seq = get_multi_topic_offset(self.topics)
        self.changes = []
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        for change in self.change_feed.iter_changes(since=self.kafka_seq, forever=False):
            if change:
                self.changes.append(change)
Пример #42
0
 def test_non_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], client_id='test-kafka-feed', strict=True)
     first_available_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE])
     next(feed.iter_changes(since=first_available_offsets, forever=False))