Пример #1
0
 def test_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], client_id='test-kafka-feed', strict=True)
     first_available_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE])
     since = {
         topic_partition: offset - 1
         for topic_partition, offset in first_available_offsets.items()
     }
     with self.assertRaises(UnavailableKafkaOffset):
         next(feed.iter_changes(since=since, forever=False))
Пример #2
0
 def setUp(self):
     self.processor = CountingProcessor()
     self.pillow = ConstructedPillow(
         name='test-kafka-case-feed',
         checkpoint=None,
         change_feed=KafkaChangeFeed(topics=[topics.CASE, topics.CASE_SQL],
                                     group_id='test-kafka-case-feed'),
         processor=self.processor)
     self.original_process_change = self.pillow.process_change
Пример #3
0
def get_case_pillow(
        pillow_id='case-pillow', ucr_division=None,
        include_ucrs=None, exclude_ucrs=None,
        num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False,
        processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs):
    """
    Return a pillow that processes cases. The processors include, UCR and elastic processors
        Args:
            skip_ucr: Can be set to True to avoid passing UCR processor, useful for tests
    """
    if topics:
        assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only"
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(
        topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('CommCareCase'), StaticDataSourceProvider('CommCareCase')],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
        run_migrations=(process_num == 0),  # only first process runs migrations
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    case_to_es_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch
    )
    case_search_processor = get_case_search_processor()

    checkpoint_id = "{}-{}-{}-{}".format(
        pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index, 'messaging-sync')
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    processors = [case_to_es_processor, CaseMessagingSyncProcessor()]
    if settings.RUN_CASE_SEARCH_PILLOW:
        processors.append(case_search_processor)
    if not settings.ENTERPRISE_MODE:
        processors.append(get_case_to_report_es_processor())
    if not skip_ucr:
        # this option is useful in tests to avoid extra UCR setup where unneccessary
        processors = [ucr_processor] + processors
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size
    )
Пример #4
0
 def test_non_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE],
                            group_id='test-kafka-feed',
                            strict=True)
     first_avaliable_offsets = get_multi_topic_first_available_offsets(
         [topics.FORM, topics.CASE])
     since = {
         topic: first_available
         for topic, first_available in first_avaliable_offsets.items()
     }
     feed.iter_changes(since=since, forever=False).next()
Пример #5
0
def get_main_blob_deletion_pillow(pillow_id):
    """Get blob deletion pillow for the main couch database

    Using the KafkaChangeFeed ties this to the main couch database.
    """
    return _get_blob_deletion_pillow(
        pillow_id,
        get_db(None),
        PillowCheckpoint('kafka-blob-deletion-pillow-checkpoint'),
        KafkaChangeFeed(topics=[topics.META], group_id='blob-deletion-group'),
    )
 def test_multiple_topics(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
     self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
     offsets = feed.get_latest_offsets()
     expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)]
     unexpected_metas = [publish_stub_change(topics.FORM_SQL), publish_stub_change(topics.CASE_SQL)]
     changes = list(feed.iter_changes(since=offsets, forever=False))
     self.assertEqual(2, len(changes))
     found_change_ids = set([change.id for change in changes])
     self.assertEqual(set([meta.document_id for meta in expected_metas]), found_change_ids)
     for unexpected in unexpected_metas:
         self.assertTrue(unexpected.document_id not in found_change_ids)
Пример #7
0
 def test_multiple_topics_with_partial_checkpoint(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
     self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
     offsets = {'form': feed.get_latest_offsets()['form']}
     expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)]
     changes = list(feed.iter_changes(since=offsets, forever=False))
     # should include at least the form and the case (may have more than one case since not
     # specifying a checkpoint rewinds it to the beginning of the feed)
     self.assertTrue(len(changes) > 1)
     found_change_ids = set([change.id for change in changes])
     for expected_id in set([meta.document_id for meta in expected_metas]):
         self.assertTrue(expected_id in found_change_ids)
Пример #8
0
    def setUpClass(cls):
        super(KafkaPublishingTest, cls).setUpClass()
        cls.processor = TestProcessor()
        cls.form_pillow = ConstructedPillow(
            name='test-kafka-form-feed',
            checkpoint=None,
            change_feed=KafkaChangeFeed(topics=[topics.FORM_SQL],
                                        client_id='test-kafka-form-feed'),
            processor=cls.processor)
        cls.case_pillow = ConstructedPillow(
            name='test-kafka-case-feed',
            checkpoint=None,
            change_feed=KafkaChangeFeed(topics=[topics.CASE_SQL],
                                        client_id='test-kafka-case-feed'),
            processor=cls.processor)
        cls.process_form_changes = process_pillow_changes(
            'DefaultChangeFeedPillow')
        cls.process_form_changes.add_pillow(cls.form_pillow)

        cls.process_case_changes = process_pillow_changes(
            'DefaultChangeFeedPillow')
        cls.process_case_changes.add_pillow(cls.case_pillow)
Пример #9
0
 def setUp(self):
     super(KafkaPublishingTest, self).setUp()
     FormProcessorTestUtils.delete_all_cases_forms_ledgers()
     self.form_accessors = FormAccessors(domain=self.domain)
     self.processor = TestProcessor()
     self.form_pillow = ConstructedPillow(
         name='test-kafka-form-feed',
         checkpoint=None,
         change_feed=KafkaChangeFeed(topics=[topics.FORM, topics.FORM_SQL],
                                     group_id='test-kafka-form-feed'),
         processor=self.processor)
     self.case_pillow = ConstructedPillow(
         name='test-kafka-case-feed',
         checkpoint=None,
         change_feed=KafkaChangeFeed(topics=[topics.CASE, topics.CASE_SQL],
                                     group_id='test-kafka-case-feed'),
         processor=self.processor)
     self.ledger_pillow = ConstructedPillow(
         name='test-kafka-ledger-feed',
         checkpoint=None,
         change_feed=KafkaChangeFeed(topics=[topics.LEDGER],
                                     group_id='test-kafka-ledger-feed'),
         processor=self.processor)
Пример #10
0
def get_group_to_user_pillow(pillow_id='GroupToUserPillow', num_processes=1, process_num=0, **kwargs):
    assert pillow_id == 'GroupToUserPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, USER_INDEX_INFO, [topics.GROUP])
    processor = GroupsToUsersProcessor()
    change_feed = KafkaChangeFeed(
        topics=[topics.GROUP], group_id='groups-to-users', num_processes=num_processes, process_num=process_num
    )
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed
        ),
    )
Пример #11
0
def get_group_to_user_pillow(pillow_id='GroupToUserPillow'):
    assert pillow_id == 'GroupToUserPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, USER_INDEX_INFO)
    processor = GroupsToUsersProcessor()
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=KafkaChangeFeed(topics=[GROUP],
                                    group_id='groups-to-users'),
        processor=processor,
        change_processed_event_handler=PillowCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
        ),
    )
Пример #12
0
 def __init__(self, processor, pillow_name):
     change_feed = KafkaChangeFeed(topics.ALL, group_id=pillow_name)
     checkpoint = PillowCheckpoint(pillow_name)
     event_handler = MultiTopicCheckpointEventHandler(
         checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed
     )
     super(ConfigurableReportKafkaPillow, self).__init__(
         name=pillow_name,
         change_feed=change_feed,
         processor=processor,
         checkpoint=checkpoint,
         change_processed_event_handler=event_handler
     )
     # set by the superclass constructor
     assert self._processor is not None
     assert self._processor.bootstrapped is not None
Пример #13
0
def get_group_to_user_pillow(pillow_id='GroupToUserPillow', num_processes=1, process_num=0, **kwargs):
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    assert pillow_id == 'GroupToUserPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, USER_INDEX_INFO, [topics.GROUP])
    processor = GroupsToUsersProcessor()
    change_feed = KafkaChangeFeed(
        topics=[topics.GROUP], client_id='groups-to-users', num_processes=num_processes, process_num=process_num
    )
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=10, change_feed=change_feed
        ),
    )
Пример #14
0
def get_user_pillow(pillow_id='user-pillow',
                    num_processes=1,
                    dedicated_migration_process=False,
                    process_num=0,
                    skip_ucr=False,
                    processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
                    **kwargs):
    """Processes users and sends them to ES and UCRs.

    Processors:
      - :py:func:`pillowtop.processors.elastic.BulkElasticProcessor`
      - :py:func:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor`
    """
    # Pillow that sends users to ES and UCR
    assert pillow_id == 'user-pillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, USER_INDEX_INFO, topics.USER_TOPICS)
    user_processor = get_user_es_processor()
    ucr_processor = get_ucr_processor(
        data_source_providers=[
            DynamicDataSourceProvider('CommCareUser'),
            StaticDataSourceProvider('CommCareUser')
        ],
        run_migrations=(
            process_num == 0),  # only first process runs migrations,
    )
    change_feed = KafkaChangeFeed(
        topics=topics.USER_TOPICS,
        client_id='users-to-es',
        num_processes=num_processes,
        process_num=process_num,
        dedicated_migration_process=dedicated_migration_process)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=[user_processor]
        if skip_ucr else [ucr_processor, user_processor],
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
        processor_chunk_size=processor_chunk_size,
        process_num=process_num,
        is_dedicated_migration_process=dedicated_migration_process
        and (process_num == 0))
Пример #15
0
def get_ledger_to_elasticsearch_pillow(
        pillow_id='LedgerToElasticsearchPillow'):
    assert pillow_id == 'LedgerToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, LEDGER_INDEX_INFO)
    processor = ElasticProcessor(elasticsearch=get_es_new(),
                                 index_info=LEDGER_INDEX_INFO,
                                 doc_prep_fn=_prepare_ledger_for_es)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=KafkaChangeFeed(topics=[topics.LEDGER],
                                    group_id='ledgers-to-es'),
        processor=processor,
        change_processed_event_handler=PillowCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100),
    )
Пример #16
0
def get_app_to_elasticsearch_pillow(pillow_id='ApplicationToElasticsearchPillow'):
    assert pillow_id == 'ApplicationToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, APP_INDEX_INFO)
    app_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=APP_INDEX_INFO,
        doc_prep_fn=transform_app_for_es
    )
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=KafkaChangeFeed(topics=[topics.APP], group_id='apps-to-es'),
        processor=app_processor,
        change_processed_event_handler=PillowCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100,
        ),
    )
Пример #17
0
def get_sql_sms_pillow(pillow_id='SqlSMSPillow'):
    assert pillow_id == 'SqlSMSPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, SMS_INDEX_INFO)
    processor = ElasticProcessor(elasticsearch=get_es_new(),
                                 index_info=SMS_INDEX_INFO,
                                 doc_prep_fn=lambda x: x)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=KafkaChangeFeed(
            topics=[topics.SMS], group_id=SMS_PILLOW_KAFKA_CONSUMER_GROUP_ID),
        processor=processor,
        change_processed_event_handler=PillowCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
        ),
    )
Пример #18
0
    def __init__(self, indicator_class, processor):
        self.indicator_class = indicator_class
        self.kafka_topic = indicator_class().kafka_topic
        self.domains = processor.domains
        self.doc_type = processor.doc_type

        name = '{}Pillow'.format(indicator_class.__name__)
        checkpoint = PillowCheckpoint('fluff.{}.{}'.format(name, get_machine_id()))

        super(FluffPillow, self).__init__(
            name=name,
            checkpoint=checkpoint,
            change_feed=KafkaChangeFeed(topics=[self.kafka_topic], group_id=indicator_class.__name__),
            processor=processor,
            change_processed_event_handler=PillowCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1000,
            )
        )
Пример #19
0
def get_case_to_elasticsearch_pillow(pillow_id='CaseToElasticsearchPillow'):
    assert pillow_id == 'CaseToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, CASE_INDEX_INFO)
    case_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch
    )
    kafka_change_feed = KafkaChangeFeed(topics=[topics.CASE, topics.CASE_SQL], group_id='cases-to-es')
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=kafka_change_feed,
        processor=case_processor,
        change_processed_event_handler=MultiTopicCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=kafka_change_feed
        ),
    )
Пример #20
0
    def handle(self, **options):
        since = options['from']
        sleep = float(options['sleep'] or '.01')
        last_domain = None
        change_feed = KafkaChangeFeed(topics=[topics.FORM], group_id='form-feed')
        for change in change_feed.iter_changes(since=since, forever=True):
            if not change.deleted:
                # this is just helpful for demos to find domain transitions
                if change.metadata.domain != last_domain:
                    last_domain = change.metadata.domain
                    print(change.sequence_id, last_domain)

                metadata = change.metadata.to_json()
                if not options['compact']:
                    metadata['country'] = _get_country(change.metadata.domain)
                message = RedisMessage(json.dumps(metadata))
                RedisPublisher(facility='form-feed', broadcast=True).publish_message(message)
                time.sleep(sleep)
Пример #21
0
def get_user_sync_history_pillow(
        pillow_id='UpdateUserSyncHistoryPillow', num_processes=1, process_num=0, **kwargs):
    """
    This gets a pillow which iterates through all synclogs
    """
    change_feed = KafkaChangeFeed(
        topics=[topics.SYNCLOG_SQL], client_id=SYNCLOG_SQL_USER_SYNC_GROUP_ID,
        num_processes=num_processes, process_num=process_num)
    checkpoint = KafkaPillowCheckpoint(pillow_id, [topics.SYNCLOG_SQL])
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=UserSyncHistoryProcessor(),
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed
        ),
    )
Пример #22
0
def get_domain_kafka_to_elasticsearch_pillow(pillow_id='KafkaDomainPillow'):
    assert pillow_id == 'KafkaDomainPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, DOMAIN_INDEX_INFO)
    domain_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=DOMAIN_INDEX_INFO,
        doc_prep_fn=transform_domain_for_elasticsearch)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=KafkaChangeFeed(topics=[DOMAIN], group_id='domains-to-es'),
        processor=domain_processor,
        change_processed_event_handler=PillowCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
        ),
    )
Пример #23
0
def get_unknown_users_pillow(pillow_id='unknown-users-pillow', num_processes=1, process_num=0, **kwargs):
    """
    This pillow adds users from xform submissions that come in to the User Index if they don't exist in HQ
    """
    checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, USER_INDEX_INFO, topics.FORM_TOPICS)
    processor = UnknownUsersProcessor()
    change_feed = KafkaChangeFeed(
        topics=topics.FORM_TOPICS, group_id='unknown-users', num_processes=num_processes, process_num=process_num
    )
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed
        ),
    )
Пример #24
0
def get_unknown_users_pillow(pillow_id='unknown-users-pillow', num_processes=1, process_num=0, **kwargs):
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    This pillow adds users from xform submissions that come in to the User Index if they don't exist in HQ
    """
    checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, USER_INDEX_INFO, topics.FORM_TOPICS)
    processor = UnknownUsersProcessor()
    change_feed = KafkaChangeFeed(
        topics=topics.FORM_TOPICS, client_id='unknown-users', num_processes=num_processes, process_num=process_num
    )
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed
        ),
    )
Пример #25
0
    def __init__(self, indicator_name, kafka_topic, processor, domains=None, doc_type=None):
        self.kafka_topic = kafka_topic
        self.domains = domains or processor.domains
        self.doc_type = doc_type or processor.doc_type

        change_feed = KafkaChangeFeed(topics=[self.kafka_topic], client_id=indicator_name)

        name = '{}Pillow'.format(indicator_name)
        checkpoint = PillowCheckpoint('fluff.{}.{}'.format(name, get_machine_id()), change_feed.sequence_format)

        super(FluffPillow, self).__init__(
            name=name,
            checkpoint=checkpoint,
            change_feed=change_feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed
            )
        )
Пример #26
0
def get_report_case_to_elasticsearch_pillow(pillow_id='ReportCaseToElasticsearchPillow', **kwargs):
    assert pillow_id == 'ReportCaseToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, REPORT_CASE_INDEX_INFO)
    form_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=REPORT_CASE_INDEX_INFO,
        doc_prep_fn=transform_case_to_report_es,
        doc_filter_fn=report_case_filter,
    )
    kafka_change_feed = KafkaChangeFeed(topics=topics.CASE_TOPICS, group_id='report-cases-to-es')
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=kafka_change_feed,
        processor=form_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=kafka_change_feed
        ),
    )
Пример #27
0
def get_unknown_users_pillow(pillow_id='unknown-users-pillow'):
    """
    This pillow adds users from xform submissions that come in to the User Index if they don't exist in HQ
    """
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, USER_INDEX_INFO)
    processor = UnknownUsersProcessor()
    change_feed = KafkaChangeFeed(topics=[FORM, FORM_SQL],
                                  group_id='unknown-users')
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=MultiTopicCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
Пример #28
0
def get_group_pillow(pillow_id='group-pillow', num_processes=1, process_num=0, **kwargs):
    assert pillow_id == 'group-pillow', 'Pillow ID is not allowed to change'
    to_user_es_processor = GroupsToUsersProcessor()
    to_group_es_processor = get_group_to_elasticsearch_processor()
    change_feed = KafkaChangeFeed(
        topics=[topics.GROUP], client_id='groups-to-users', num_processes=num_processes, process_num=process_num
    )
    checkpoint_id = "{}-{}-{}".format(
        pillow_id, USER_INDEX, to_group_es_processor.index_info.index)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, [topics.GROUP])
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=[to_user_es_processor, to_group_es_processor],
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=10, change_feed=change_feed
        ),
    )
Пример #29
0
    def test_basic(self):
        # setup
        feed = KafkaChangeFeed(topics=[topics.CASE],
                               client_id='test-kafka-feed')
        pillow_name = 'test-chunked-processing'
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = ChunkedCountProcessor()
        original_process_change = processor.process_change
        original_process_changes_chunk = processor.process_changes_chunk

        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint,
                checkpoint_frequency=1,
                change_feed=feed),
            processor_chunk_size=2)

        since = feed.get_latest_offsets()
        self._produce_changes(2)
        # pillow should use process_changes_chunk (make process_change raise an exception for test)
        processor.process_change = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=since, forever=False)
        self.assertEqual(processor.count, 2)

        self._produce_changes(2)
        # if process_changes_chunk raises exception, pillow should use process_change
        processor.process_change = original_process_change
        processor.process_changes_chunk = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(),
                               forever=False)
        self.assertEqual(processor.count, 4)

        self._produce_changes(1)
        # offsets after full chunk should still be processed
        processor.process_change = MagicMock(side_effect=Exception('_'))
        processor.process_changes_chunk = original_process_changes_chunk
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(),
                               forever=False)
        self.assertEqual(processor.count, 5)
Пример #30
0
def get_xform_to_elasticsearch_pillow(pillow_id='XFormToElasticsearchPillow'):
    assert pillow_id == 'XFormToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, XFORM_INDEX_INFO)
    form_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=XFORM_INDEX_INFO,
        doc_prep_fn=transform_xform_for_elasticsearch,
        doc_filter_fn=xform_pillow_filter,
    )
    kafka_change_feed = KafkaChangeFeed(topics=[topics.FORM, topics.FORM_SQL], group_id='forms-to-es')
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=kafka_change_feed,
        processor=form_processor,
        change_processed_event_handler=MultiTopicCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=kafka_change_feed
        ),
    )