Пример #1
0
def get_location_pillow(pillow_id='location-ucr-pillow',
                        include_ucrs=None,
                        num_processes=1,
                        process_num=0,
                        ucr_configs=None,
                        **kwargs):
    # Todo; is ucr_division needed?
    change_feed = KafkaChangeFeed([LOCATION_TOPIC],
                                  client_id=pillow_id,
                                  num_processes=num_processes,
                                  process_num=process_num)
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[
            DynamicDataSourceProvider('Location'),
            StaticDataSourceProvider('Location')
        ],
        include_ucrs=include_ucrs,
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    checkpoint = KafkaPillowCheckpoint(pillow_id, [LOCATION_TOPIC])
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint,
        checkpoint_frequency=1000,
        change_feed=change_feed,
        checkpoint_callback=ucr_processor)
    return ConstructedPillow(name=pillow_id,
                             change_feed=change_feed,
                             checkpoint=checkpoint,
                             change_processed_event_handler=event_handler,
                             processor=[ucr_processor])
Пример #2
0
def get_location_pillow(pillow_id='location-ucr-pillow', include_ucrs=None,
                        num_processes=1, process_num=0, ucr_configs=None, **kwargs):
    """Processes updates to locations for UCR

    Note this is only applicable if a domain on the environment has `LOCATIONS_IN_UCR` flag enabled.

    Processors:
      - :py:func:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor`
    """
    change_feed = KafkaChangeFeed(
        [LOCATION_TOPIC], client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('Location'), StaticDataSourceProvider('Location')],
        include_ucrs=include_ucrs,
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    checkpoint = KafkaPillowCheckpoint(pillow_id, [LOCATION_TOPIC])
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=[ucr_processor]
    )
Пример #3
0
def get_kafka_ucr_static_pillow(
        pillow_id='kafka-ucr-static',
        ucr_division=None,
        include_ucrs=None,
        exclude_ucrs=None,
        topics=None,
        num_processes=1,
        process_num=0,
        processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
        **kwargs):
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    topics = topics or KAFKA_TOPICS
    topics = [t for t in topics]
    return ConfigurableReportKafkaPillow(
        processor=ConfigurableReportPillowProcessor(
            data_source_providers=[StaticDataSourceProvider()],
            ucr_division=ucr_division,
            include_ucrs=include_ucrs,
            exclude_ucrs=exclude_ucrs,
            bootstrap_interval=7 * 24 * 60 * 60,  # 1 week
            run_migrations=(process_num == 0
                            )  # only first process runs migrations
        ),
        pillow_name=pillow_id,
        topics=topics,
        num_processes=num_processes,
        process_num=process_num,
        retry_errors=True,
        processor_chunk_size=processor_chunk_size,
    )
Пример #4
0
def get_user_pillow(pillow_id='user-pillow',
                    num_processes=1,
                    process_num=0,
                    skip_ucr=False,
                    **kwargs):
    # Pillow that sends users to ES and UCR
    assert pillow_id == 'user-pillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, USER_INDEX_INFO, topics.USER_TOPICS)
    user_processor = get_user_es_processor()
    ucr_processor = ConfigurableReportPillowProcessor(data_source_providers=[
        DynamicDataSourceProvider(),
        StaticDataSourceProvider()
    ], )
    change_feed = KafkaChangeFeed(topics=topics.USER_TOPICS,
                                  client_id='users-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=[user_processor]
        if skip_ucr else [ucr_processor, user_processor],
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
Пример #5
0
def get_kafka_ucr_static_pillow(pillow_id='kafka-ucr-static', ucr_division=None,
                                include_ucrs=None, exclude_ucrs=None, topics=None,
                                num_processes=1, process_num=0, dedicated_migration_process=False,
                                processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, **kwargs):
    """UCR pillow that reads from all Kafka topics and writes data into the UCR database tables.

    Only processes `static` UCR datasources (configuration lives in the codebase instead of the database).

        Processors:
          - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor`
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    topics = topics or KAFKA_TOPICS
    topics = [t for t in topics]
    return ConfigurableReportKafkaPillow(
        processor=ConfigurableReportPillowProcessor(
            data_source_providers=[StaticDataSourceProvider()],
            ucr_division=ucr_division,
            include_ucrs=include_ucrs,
            exclude_ucrs=exclude_ucrs,
            bootstrap_interval=7 * 24 * 60 * 60,  # 1 week
            run_migrations=(process_num == 0)  # only first process runs migrations
        ),
        pillow_name=pillow_id,
        topics=topics,
        num_processes=num_processes,
        process_num=process_num,
        retry_errors=True,
        is_dedicated_migration_process=dedicated_migration_process and (process_num == 0),
        processor_chunk_size=processor_chunk_size,
    )
Пример #6
0
def get_user_pillow(pillow_id='user-pillow', num_processes=1, process_num=0,
        skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, **kwargs):
    """Processes users and sends them to ES and UCRs.

    Processors:
      - :py:func:`pillowtop.processors.elastic.BulkElasticProcessor`
      - :py:func:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor`
    """
    # Pillow that sends users to ES and UCR
    assert pillow_id == 'user-pillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, USER_INDEX_INFO, topics.USER_TOPICS)
    user_processor = get_user_es_processor()
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('CommCareUser'), StaticDataSourceProvider('CommCareUser')],
        run_migrations=(process_num == 0),  # only first process runs migrations
    )
    change_feed = KafkaChangeFeed(
        topics=topics.USER_TOPICS, client_id='users-to-es', num_processes=num_processes, process_num=process_num
    )
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=[user_processor] if skip_ucr else [ucr_processor, user_processor],
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed
        ),
        processor_chunk_size=processor_chunk_size
    )
Пример #7
0
def get_kafka_ucr_static_pillow(pillow_id='kafka-ucr-static'):
    return ConfigurableReportKafkaPillow(
        processor=ConfigurableReportPillowProcessor(
            data_source_provider=StaticDataSourceProvider(),
            auto_repopulate_tables=True,
        ),
        pillow_name=pillow_id,
    )
Пример #8
0
def get_all_es_data_sources():
    from corehq.apps.userreports.data_source_providers import DynamicDataSourceProvider, StaticDataSourceProvider
    data_sources = DynamicDataSourceProvider().get_data_sources()
    data_sources.extend(StaticDataSourceProvider().get_data_sources())
    return [
        s for s in data_sources if s.backend_id in
        [UCR_ES_BACKEND, UCR_LABORATORY_BACKEND, UCR_ES_PRIMARY]
    ]
Пример #9
0
def get_case_pillow(
        pillow_id='case-pillow', ucr_division=None,
        include_ucrs=None, exclude_ucrs=None,
        num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False,
        processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs):
    """Return a pillow that processes cases. The processors include, UCR and elastic processors

    Processors:
      - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` (disabled when skip_ucr=True)
      - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor`
      - :py:function:`corehq.pillows.case_search.get_case_search_processor`
      - :py:class:`corehq.messaging.pillow.CaseMessagingSyncProcessor`
    """
    if topics:
        assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only"
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(
        topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('CommCareCase'), StaticDataSourceProvider('CommCareCase')],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
        run_migrations=(process_num == 0),  # only first process runs migrations
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    case_to_es_processor = BulkElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch
    )
    case_search_processor = get_case_search_processor()

    checkpoint_id = "{}-{}-{}-{}".format(
        pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index, 'messaging-sync')
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    processors = [case_to_es_processor, CaseMessagingSyncProcessor()]
    if settings.RUN_CASE_SEARCH_PILLOW:
        processors.append(case_search_processor)
    if not settings.ENTERPRISE_MODE:
        processors.append(get_case_to_report_es_processor())
    if not skip_ucr:
        # this option is useful in tests to avoid extra UCR setup where unneccessary
        processors = [ucr_processor] + processors
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size
    )
Пример #10
0
def get_case_pillow(
        pillow_id='case-pillow', ucr_division=None,
        include_ucrs=None, exclude_ucrs=None,
        num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False,
        processor_chunk_size=UCR_PROCESSING_CHUNK_SIZE, topics=None, **kwargs):
    """
    Return a pillow that processes cases. The processors include, UCR and elastic processors
        Args:
            skip_ucr: Can be set to True to avoid passing UCR processor, useful for tests
    """
    if topics:
        assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only"
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(
        topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider(), StaticDataSourceProvider()],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    case_to_es_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch
    )
    case_search_processor = get_case_search_processor()

    checkpoint_id = "{}-{}-{}".format(
        pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    processors = [case_to_es_processor, case_search_processor]
    if not settings.ENTERPRISE_MODE:
        processors.append(get_case_to_report_es_processor())
    if not skip_ucr:
        # this option is useful in tests to avoid extra UCR setup where unneccessary
        processors = [ucr_processor] + processors
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size
    )
Пример #11
0
def get_kafka_ucr_static_pillow(pillow_id='kafka-ucr-static', ucr_division=None,
                                include_ucrs=None, exclude_ucrs=None, topics=None):
    topics = topics or KAFKA_TOPICS
    topics = [str(t) for t in topics]
    return ConfigurableReportKafkaPillow(
        processor=ConfigurableReportPillowProcessor(
            data_source_provider=StaticDataSourceProvider(),
            auto_repopulate_tables=True,
            ucr_division=ucr_division,
            include_ucrs=include_ucrs,
            exclude_ucrs=exclude_ucrs,
            filter_missing_domains=True,
        ),
        pillow_name=pillow_id,
        topics=topics
    )
Пример #12
0
def get_kafka_ucr_static_pillow(pillow_id='kafka-ucr-static', ucr_division=None,
                                include_ucrs=None, exclude_ucrs=None, topics=None,
                                num_processes=1, process_num=0, **kwargs):
    topics = topics or KAFKA_TOPICS
    topics = [kafka_bytestring(t) for t in topics]
    return ConfigurableReportKafkaPillow(
        processor=ConfigurableReportPillowProcessor(
            data_source_provider=StaticDataSourceProvider(),
            auto_repopulate_tables=True,
            ucr_division=ucr_division,
            include_ucrs=include_ucrs,
            exclude_ucrs=exclude_ucrs,
            bootstrap_interval=7 * 24 * 60 * 60  # 1 week
        ),
        pillow_name=pillow_id,
        topics=topics,
        num_processes=num_processes,
        process_num=process_num,
        retry_errors=True
    )
Пример #13
0
def get_xform_pillow(pillow_id='xform-pillow',
                     ucr_division=None,
                     include_ucrs=None,
                     exclude_ucrs=None,
                     num_processes=1,
                     process_num=0,
                     ucr_configs=None,
                     skip_ucr=False,
                     processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
                     topics=None,
                     **kwargs):
    # avoid circular dependency
    from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter
    from corehq.pillows.mappings.user_mapping import USER_INDEX
    if topics:
        assert set(topics).issubset(
            FORM_TOPICS), "This is a pillow to process cases only"
    topics = topics or FORM_TOPICS
    change_feed = KafkaChangeFeed(topics,
                                  client_id=pillow_id,
                                  num_processes=num_processes,
                                  process_num=process_num)

    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[
            DynamicDataSourceProvider('XFormInstance'),
            StaticDataSourceProvider('XFormInstance')
        ],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
        run_migrations=(
            process_num == 0),  # only first process runs migrations
    )
    xform_to_es_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=XFORM_INDEX_INFO,
        doc_prep_fn=transform_xform_for_elasticsearch,
        doc_filter_fn=xform_pillow_filter,
    )
    unknown_user_form_processor = UnknownUsersProcessor()
    form_meta_processor = FormSubmissionMetadataTrackerProcessor()
    checkpoint_id = "{}-{}-{}-{}".format(pillow_id, XFORM_INDEX_INFO.index,
                                         REPORT_XFORM_INDEX_INFO.index,
                                         USER_INDEX)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint,
        checkpoint_frequency=1000,
        change_feed=change_feed,
        checkpoint_callback=ucr_processor)
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    processors = [xform_to_es_processor]
    if settings.RUN_UNKNOWN_USER_PILLOW:
        processors.append(unknown_user_form_processor)
    if settings.RUN_FORM_META_PILLOW:
        processors.append(form_meta_processor)
    if not settings.ENTERPRISE_MODE:
        xform_to_report_es_processor = ElasticProcessor(
            elasticsearch=get_es_new(),
            index_info=REPORT_XFORM_INDEX_INFO,
            doc_prep_fn=transform_xform_for_report_forms_index,
            doc_filter_fn=report_xform_filter)
        processors.append(xform_to_report_es_processor)
    if not skip_ucr:
        processors.append(ucr_processor)
    return ConstructedPillow(name=pillow_id,
                             change_feed=change_feed,
                             checkpoint=checkpoint,
                             change_processed_event_handler=event_handler,
                             processor=processors,
                             processor_chunk_size=processor_chunk_size)
Пример #14
0
            uploaded_filename=uploaded_filename
        )
        raise e
    else:
        notify_success(
            subject=f"[{settings.SERVER_ENVIRONMENT}] - Other Cases Reassignment Completed",
            body=f"The request has been successfully completed for file {uploaded_filename}.",
            email=user_email
        )


@task(queue=settings.CELERY_LOCATION_REASSIGNMENT_QUEUE)
def process_ucr_changes(domain, case_ids):
    cases = CaseAccessorSQL.get_cases(case_ids)
    docs = [case.to_json() for case in cases]
    data_source_providers = [DynamicDataSourceProvider(), StaticDataSourceProvider()]

    all_configs = [
        source
        for provider in data_source_providers
        for source in provider.by_domain(domain)
    ]

    adapters = [
        get_indicator_adapter(config, raise_errors=True, load_source='location_reassignment')
        for config in all_configs
    ]

    for doc in docs:
        eval_context = EvaluationContext(doc)
        for adapter in adapters:
Пример #15
0
            subject=
            f"[{settings.SERVER_ENVIRONMENT}] - Other Cases Reassignment Completed",
            body=
            f"The request has been successfully completed for file {uploaded_filename}.",
            to=[user_email],
            from_email=settings.DEFAULT_FROM_EMAIL)
        email.send()


@task(queue=settings.CELERY_LOCATION_REASSIGNMENT_QUEUE)
def process_ucr_changes(domain, case_ids):
    cases = CaseAccessorSQL.get_cases(case_ids)
    docs = [case.to_json() for case in cases]
    data_source_providers = [
        DynamicDataSourceProvider(),
        StaticDataSourceProvider()
    ]

    all_configs = [
        source for provider in data_source_providers
        for source in provider.by_domain(domain)
    ]

    adapters = [
        get_indicator_adapter(config,
                              raise_errors=True,
                              load_source='location_reassignment')
        for config in all_configs
    ]

    for doc in docs:
Пример #16
0
def get_xform_pillow(pillow_id='xform-pillow',
                     ucr_division=None,
                     include_ucrs=None,
                     exclude_ucrs=None,
                     num_processes=1,
                     process_num=0,
                     ucr_configs=None,
                     skip_ucr=False,
                     processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
                     topics=None,
                     dedicated_migration_process=False,
                     **kwargs):
    """Generic XForm change processor

    Processors:
      - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` (disabled when skip_ucr=True)
      - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor`
      - :py:class:`corehq.pillows.user.UnknownUsersProcessor` (disabled when RUN_UNKNOWN_USER_PILLOW=False)
      - :py:class:`pillowtop.form.FormSubmissionMetadataTrackerProcessor` (disabled when RUN_FORM_META_PILLOW=False)
      - :py:class:`corehq.apps.data_interfaces.pillow.CaseDeduplicationPillow``
    """
    # avoid circular dependency
    from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter
    from corehq.pillows.mappings.user_mapping import USER_INDEX
    if topics:
        assert set(topics).issubset(
            FORM_TOPICS), "This is a pillow to process cases only"
    topics = topics or FORM_TOPICS
    change_feed = KafkaChangeFeed(
        topics,
        client_id=pillow_id,
        num_processes=num_processes,
        process_num=process_num,
        dedicated_migration_process=dedicated_migration_process)

    ucr_processor = get_ucr_processor(
        data_source_providers=[
            DynamicDataSourceProvider('XFormInstance'),
            StaticDataSourceProvider('XFormInstance')
        ],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
        run_migrations=(
            process_num == 0),  # only first process runs migrations
        ucr_configs=ucr_configs)

    xform_to_es_processor = BulkElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=XFORM_INDEX_INFO,
        doc_prep_fn=transform_xform_for_elasticsearch,
        doc_filter_fn=xform_pillow_filter,
        change_filter_fn=is_couch_change_for_sql_domain)
    unknown_user_form_processor = UnknownUsersProcessor()
    form_meta_processor = FormSubmissionMetadataTrackerProcessor()
    checkpoint_id = "{}-{}-{}-{}".format(pillow_id, XFORM_INDEX_INFO.index,
                                         REPORT_XFORM_INDEX_INFO.index,
                                         USER_INDEX)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint,
        checkpoint_frequency=1000,
        change_feed=change_feed,
        checkpoint_callback=ucr_processor)
    processors = [xform_to_es_processor]
    if settings.RUN_UNKNOWN_USER_PILLOW:
        processors.append(unknown_user_form_processor)
    if settings.RUN_FORM_META_PILLOW:
        processors.append(form_meta_processor)
    if settings.RUN_DEDUPLICATION_PILLOW:
        processors.append(CaseDeduplicationProcessor())

    if not settings.ENTERPRISE_MODE:
        xform_to_report_es_processor = BulkElasticProcessor(
            elasticsearch=get_es_new(),
            index_info=REPORT_XFORM_INDEX_INFO,
            doc_prep_fn=transform_xform_for_report_forms_index,
            doc_filter_fn=report_xform_filter,
            change_filter_fn=is_couch_change_for_sql_domain)
        processors.append(xform_to_report_es_processor)
    if not skip_ucr:
        processors.append(ucr_processor)

    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size,
        process_num=process_num,
        is_dedicated_migration_process=dedicated_migration_process
        and (process_num == 0))