def get_location_pillow(pillow_id='location-ucr-pillow', include_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, **kwargs): # Todo; is ucr_division needed? change_feed = KafkaChangeFeed([LOCATION_TOPIC], client_id=pillow_id, num_processes=num_processes, process_num=process_num) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[ DynamicDataSourceProvider('Location'), StaticDataSourceProvider('Location') ], include_ucrs=include_ucrs, ) if ucr_configs: ucr_processor.bootstrap(ucr_configs) checkpoint = KafkaPillowCheckpoint(pillow_id, [LOCATION_TOPIC]) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor) return ConstructedPillow(name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=[ucr_processor])
def get_location_pillow(pillow_id='location-ucr-pillow', include_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, **kwargs): """Processes updates to locations for UCR Note this is only applicable if a domain on the environment has `LOCATIONS_IN_UCR` flag enabled. Processors: - :py:func:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` """ change_feed = KafkaChangeFeed( [LOCATION_TOPIC], client_id=pillow_id, num_processes=num_processes, process_num=process_num ) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[DynamicDataSourceProvider('Location'), StaticDataSourceProvider('Location')], include_ucrs=include_ucrs, ) if ucr_configs: ucr_processor.bootstrap(ucr_configs) checkpoint = KafkaPillowCheckpoint(pillow_id, [LOCATION_TOPIC]) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor ) return ConstructedPillow( name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=[ucr_processor] )
def get_kafka_ucr_static_pillow( pillow_id='kafka-ucr-static', ucr_division=None, include_ucrs=None, exclude_ucrs=None, topics=None, num_processes=1, process_num=0, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, **kwargs): # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/ topics = topics or KAFKA_TOPICS topics = [t for t in topics] return ConfigurableReportKafkaPillow( processor=ConfigurableReportPillowProcessor( data_source_providers=[StaticDataSourceProvider()], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, bootstrap_interval=7 * 24 * 60 * 60, # 1 week run_migrations=(process_num == 0 ) # only first process runs migrations ), pillow_name=pillow_id, topics=topics, num_processes=num_processes, process_num=process_num, retry_errors=True, processor_chunk_size=processor_chunk_size, )
def get_user_pillow(pillow_id='user-pillow', num_processes=1, process_num=0, skip_ucr=False, **kwargs): # Pillow that sends users to ES and UCR assert pillow_id == 'user-pillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, USER_INDEX_INFO, topics.USER_TOPICS) user_processor = get_user_es_processor() ucr_processor = ConfigurableReportPillowProcessor(data_source_providers=[ DynamicDataSourceProvider(), StaticDataSourceProvider() ], ) change_feed = KafkaChangeFeed(topics=topics.USER_TOPICS, client_id='users-to-es', num_processes=num_processes, process_num=process_num) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=[user_processor] if skip_ucr else [ucr_processor, user_processor], change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed), )
def get_kafka_ucr_static_pillow(pillow_id='kafka-ucr-static', ucr_division=None, include_ucrs=None, exclude_ucrs=None, topics=None, num_processes=1, process_num=0, dedicated_migration_process=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, **kwargs): """UCR pillow that reads from all Kafka topics and writes data into the UCR database tables. Only processes `static` UCR datasources (configuration lives in the codebase instead of the database). Processors: - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` """ # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/ topics = topics or KAFKA_TOPICS topics = [t for t in topics] return ConfigurableReportKafkaPillow( processor=ConfigurableReportPillowProcessor( data_source_providers=[StaticDataSourceProvider()], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, bootstrap_interval=7 * 24 * 60 * 60, # 1 week run_migrations=(process_num == 0) # only first process runs migrations ), pillow_name=pillow_id, topics=topics, num_processes=num_processes, process_num=process_num, retry_errors=True, is_dedicated_migration_process=dedicated_migration_process and (process_num == 0), processor_chunk_size=processor_chunk_size, )
def get_user_pillow(pillow_id='user-pillow', num_processes=1, process_num=0, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, **kwargs): """Processes users and sends them to ES and UCRs. Processors: - :py:func:`pillowtop.processors.elastic.BulkElasticProcessor` - :py:func:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` """ # Pillow that sends users to ES and UCR assert pillow_id == 'user-pillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, USER_INDEX_INFO, topics.USER_TOPICS) user_processor = get_user_es_processor() ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[DynamicDataSourceProvider('CommCareUser'), StaticDataSourceProvider('CommCareUser')], run_migrations=(process_num == 0), # only first process runs migrations ) change_feed = KafkaChangeFeed( topics=topics.USER_TOPICS, client_id='users-to-es', num_processes=num_processes, process_num=process_num ) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=[user_processor] if skip_ucr else [ucr_processor, user_processor], change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed ), processor_chunk_size=processor_chunk_size )
def get_kafka_ucr_static_pillow(pillow_id='kafka-ucr-static'): return ConfigurableReportKafkaPillow( processor=ConfigurableReportPillowProcessor( data_source_provider=StaticDataSourceProvider(), auto_repopulate_tables=True, ), pillow_name=pillow_id, )
def get_all_es_data_sources(): from corehq.apps.userreports.data_source_providers import DynamicDataSourceProvider, StaticDataSourceProvider data_sources = DynamicDataSourceProvider().get_data_sources() data_sources.extend(StaticDataSourceProvider().get_data_sources()) return [ s for s in data_sources if s.backend_id in [UCR_ES_BACKEND, UCR_LABORATORY_BACKEND, UCR_ES_PRIMARY] ]
def get_case_pillow( pillow_id='case-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs): """Return a pillow that processes cases. The processors include, UCR and elastic processors Processors: - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` (disabled when skip_ucr=True) - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor` - :py:function:`corehq.pillows.case_search.get_case_search_processor` - :py:class:`corehq.messaging.pillow.CaseMessagingSyncProcessor` """ if topics: assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only" topics = topics or CASE_TOPICS change_feed = KafkaChangeFeed( topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num ) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[DynamicDataSourceProvider('CommCareCase'), StaticDataSourceProvider('CommCareCase')], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, run_migrations=(process_num == 0), # only first process runs migrations ) if ucr_configs: ucr_processor.bootstrap(ucr_configs) case_to_es_processor = BulkElasticProcessor( elasticsearch=get_es_new(), index_info=CASE_INDEX_INFO, doc_prep_fn=transform_case_for_elasticsearch ) case_search_processor = get_case_search_processor() checkpoint_id = "{}-{}-{}-{}".format( pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index, 'messaging-sync') checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor ) processors = [case_to_es_processor, CaseMessagingSyncProcessor()] if settings.RUN_CASE_SEARCH_PILLOW: processors.append(case_search_processor) if not settings.ENTERPRISE_MODE: processors.append(get_case_to_report_es_processor()) if not skip_ucr: # this option is useful in tests to avoid extra UCR setup where unneccessary processors = [ucr_processor] + processors return ConstructedPillow( name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size )
def get_case_pillow( pillow_id='case-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=UCR_PROCESSING_CHUNK_SIZE, topics=None, **kwargs): """ Return a pillow that processes cases. The processors include, UCR and elastic processors Args: skip_ucr: Can be set to True to avoid passing UCR processor, useful for tests """ if topics: assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only" topics = topics or CASE_TOPICS change_feed = KafkaChangeFeed( topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num ) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[DynamicDataSourceProvider(), StaticDataSourceProvider()], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, ) if ucr_configs: ucr_processor.bootstrap(ucr_configs) case_to_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=CASE_INDEX_INFO, doc_prep_fn=transform_case_for_elasticsearch ) case_search_processor = get_case_search_processor() checkpoint_id = "{}-{}-{}".format( pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index) checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor ) processors = [case_to_es_processor, case_search_processor] if not settings.ENTERPRISE_MODE: processors.append(get_case_to_report_es_processor()) if not skip_ucr: # this option is useful in tests to avoid extra UCR setup where unneccessary processors = [ucr_processor] + processors return ConstructedPillow( name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size )
def get_kafka_ucr_static_pillow(pillow_id='kafka-ucr-static', ucr_division=None, include_ucrs=None, exclude_ucrs=None, topics=None): topics = topics or KAFKA_TOPICS topics = [str(t) for t in topics] return ConfigurableReportKafkaPillow( processor=ConfigurableReportPillowProcessor( data_source_provider=StaticDataSourceProvider(), auto_repopulate_tables=True, ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, filter_missing_domains=True, ), pillow_name=pillow_id, topics=topics )
def get_kafka_ucr_static_pillow(pillow_id='kafka-ucr-static', ucr_division=None, include_ucrs=None, exclude_ucrs=None, topics=None, num_processes=1, process_num=0, **kwargs): topics = topics or KAFKA_TOPICS topics = [kafka_bytestring(t) for t in topics] return ConfigurableReportKafkaPillow( processor=ConfigurableReportPillowProcessor( data_source_provider=StaticDataSourceProvider(), auto_repopulate_tables=True, ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, bootstrap_interval=7 * 24 * 60 * 60 # 1 week ), pillow_name=pillow_id, topics=topics, num_processes=num_processes, process_num=process_num, retry_errors=True )
def get_xform_pillow(pillow_id='xform-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs): # avoid circular dependency from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter from corehq.pillows.mappings.user_mapping import USER_INDEX if topics: assert set(topics).issubset( FORM_TOPICS), "This is a pillow to process cases only" topics = topics or FORM_TOPICS change_feed = KafkaChangeFeed(topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[ DynamicDataSourceProvider('XFormInstance'), StaticDataSourceProvider('XFormInstance') ], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, run_migrations=( process_num == 0), # only first process runs migrations ) xform_to_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_elasticsearch, doc_filter_fn=xform_pillow_filter, ) unknown_user_form_processor = UnknownUsersProcessor() form_meta_processor = FormSubmissionMetadataTrackerProcessor() checkpoint_id = "{}-{}-{}-{}".format(pillow_id, XFORM_INDEX_INFO.index, REPORT_XFORM_INDEX_INFO.index, USER_INDEX) checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor) if ucr_configs: ucr_processor.bootstrap(ucr_configs) processors = [xform_to_es_processor] if settings.RUN_UNKNOWN_USER_PILLOW: processors.append(unknown_user_form_processor) if settings.RUN_FORM_META_PILLOW: processors.append(form_meta_processor) if not settings.ENTERPRISE_MODE: xform_to_report_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=REPORT_XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_report_forms_index, doc_filter_fn=report_xform_filter) processors.append(xform_to_report_es_processor) if not skip_ucr: processors.append(ucr_processor) return ConstructedPillow(name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size)
uploaded_filename=uploaded_filename ) raise e else: notify_success( subject=f"[{settings.SERVER_ENVIRONMENT}] - Other Cases Reassignment Completed", body=f"The request has been successfully completed for file {uploaded_filename}.", email=user_email ) @task(queue=settings.CELERY_LOCATION_REASSIGNMENT_QUEUE) def process_ucr_changes(domain, case_ids): cases = CaseAccessorSQL.get_cases(case_ids) docs = [case.to_json() for case in cases] data_source_providers = [DynamicDataSourceProvider(), StaticDataSourceProvider()] all_configs = [ source for provider in data_source_providers for source in provider.by_domain(domain) ] adapters = [ get_indicator_adapter(config, raise_errors=True, load_source='location_reassignment') for config in all_configs ] for doc in docs: eval_context = EvaluationContext(doc) for adapter in adapters:
subject= f"[{settings.SERVER_ENVIRONMENT}] - Other Cases Reassignment Completed", body= f"The request has been successfully completed for file {uploaded_filename}.", to=[user_email], from_email=settings.DEFAULT_FROM_EMAIL) email.send() @task(queue=settings.CELERY_LOCATION_REASSIGNMENT_QUEUE) def process_ucr_changes(domain, case_ids): cases = CaseAccessorSQL.get_cases(case_ids) docs = [case.to_json() for case in cases] data_source_providers = [ DynamicDataSourceProvider(), StaticDataSourceProvider() ] all_configs = [ source for provider in data_source_providers for source in provider.by_domain(domain) ] adapters = [ get_indicator_adapter(config, raise_errors=True, load_source='location_reassignment') for config in all_configs ] for doc in docs:
def get_xform_pillow(pillow_id='xform-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, dedicated_migration_process=False, **kwargs): """Generic XForm change processor Processors: - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` (disabled when skip_ucr=True) - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor` - :py:class:`corehq.pillows.user.UnknownUsersProcessor` (disabled when RUN_UNKNOWN_USER_PILLOW=False) - :py:class:`pillowtop.form.FormSubmissionMetadataTrackerProcessor` (disabled when RUN_FORM_META_PILLOW=False) - :py:class:`corehq.apps.data_interfaces.pillow.CaseDeduplicationPillow`` """ # avoid circular dependency from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter from corehq.pillows.mappings.user_mapping import USER_INDEX if topics: assert set(topics).issubset( FORM_TOPICS), "This is a pillow to process cases only" topics = topics or FORM_TOPICS change_feed = KafkaChangeFeed( topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num, dedicated_migration_process=dedicated_migration_process) ucr_processor = get_ucr_processor( data_source_providers=[ DynamicDataSourceProvider('XFormInstance'), StaticDataSourceProvider('XFormInstance') ], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, run_migrations=( process_num == 0), # only first process runs migrations ucr_configs=ucr_configs) xform_to_es_processor = BulkElasticProcessor( elasticsearch=get_es_new(), index_info=XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_elasticsearch, doc_filter_fn=xform_pillow_filter, change_filter_fn=is_couch_change_for_sql_domain) unknown_user_form_processor = UnknownUsersProcessor() form_meta_processor = FormSubmissionMetadataTrackerProcessor() checkpoint_id = "{}-{}-{}-{}".format(pillow_id, XFORM_INDEX_INFO.index, REPORT_XFORM_INDEX_INFO.index, USER_INDEX) checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor) processors = [xform_to_es_processor] if settings.RUN_UNKNOWN_USER_PILLOW: processors.append(unknown_user_form_processor) if settings.RUN_FORM_META_PILLOW: processors.append(form_meta_processor) if settings.RUN_DEDUPLICATION_PILLOW: processors.append(CaseDeduplicationProcessor()) if not settings.ENTERPRISE_MODE: xform_to_report_es_processor = BulkElasticProcessor( elasticsearch=get_es_new(), index_info=REPORT_XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_report_forms_index, doc_filter_fn=report_xform_filter, change_filter_fn=is_couch_change_for_sql_domain) processors.append(xform_to_report_es_processor) if not skip_ucr: processors.append(ucr_processor) return ConstructedPillow( name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size, process_num=process_num, is_dedicated_migration_process=dedicated_migration_process and (process_num == 0))