def _get_pillow(configs, processor_chunk_size=0): pillow = get_case_pillow(processor_chunk_size=processor_chunk_size) # overwrite processors since we're only concerned with UCR here ucr_processor = ConfigurableReportPillowProcessor(data_source_providers=[]) ucr_processor.bootstrap(configs) pillow.processors = [ucr_processor] return pillow
def get_case_pillow( pillow_id='case-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs): """Return a pillow that processes cases. The processors include, UCR and elastic processors Processors: - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` (disabled when skip_ucr=True) - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor` - :py:function:`corehq.pillows.case_search.get_case_search_processor` - :py:class:`corehq.messaging.pillow.CaseMessagingSyncProcessor` """ if topics: assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only" topics = topics or CASE_TOPICS change_feed = KafkaChangeFeed( topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num ) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[DynamicDataSourceProvider('CommCareCase'), StaticDataSourceProvider('CommCareCase')], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, run_migrations=(process_num == 0), # only first process runs migrations ) if ucr_configs: ucr_processor.bootstrap(ucr_configs) case_to_es_processor = BulkElasticProcessor( elasticsearch=get_es_new(), index_info=CASE_INDEX_INFO, doc_prep_fn=transform_case_for_elasticsearch ) case_search_processor = get_case_search_processor() checkpoint_id = "{}-{}-{}-{}".format( pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index, 'messaging-sync') checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor ) processors = [case_to_es_processor, CaseMessagingSyncProcessor()] if settings.RUN_CASE_SEARCH_PILLOW: processors.append(case_search_processor) if not settings.ENTERPRISE_MODE: processors.append(get_case_to_report_es_processor()) if not skip_ucr: # this option is useful in tests to avoid extra UCR setup where unneccessary processors = [ucr_processor] + processors return ConstructedPillow( name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size )
def get_xform_pillow(pillow_id='xform-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs): # avoid circular dependency from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter from corehq.pillows.mappings.user_mapping import USER_INDEX if topics: assert set(topics).issubset(FORM_TOPICS), "This is a pillow to process cases only" topics = topics or FORM_TOPICS change_feed = KafkaChangeFeed( topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num ) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[DynamicDataSourceProvider(), StaticDataSourceProvider()], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, ) xform_to_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_elasticsearch, doc_filter_fn=xform_pillow_filter, ) unknown_user_form_processor = UnknownUsersProcessor() form_meta_processor = FormSubmissionMetadataTrackerProcessor() checkpoint_id = "{}-{}-{}-{}".format( pillow_id, XFORM_INDEX_INFO.index, REPORT_XFORM_INDEX_INFO.index, USER_INDEX) checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor ) if ucr_configs: ucr_processor.bootstrap(ucr_configs) processors = [xform_to_es_processor, form_meta_processor, unknown_user_form_processor] if not settings.ENTERPRISE_MODE: xform_to_report_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=REPORT_XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_report_forms_index, doc_filter_fn=report_xform_filter ) processors = [xform_to_report_es_processor] + processors if not skip_ucr: processors = [ucr_processor] + processors return ConstructedPillow( name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size )
def get_case_pillow( pillow_id='case-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs): """ Return a pillow that processes cases. The processors include, UCR and elastic processors Args: skip_ucr: Can be set to True to avoid passing UCR processor, useful for tests """ if topics: assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only" topics = topics or CASE_TOPICS change_feed = KafkaChangeFeed( topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num ) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[DynamicDataSourceProvider('CommCareCase'), StaticDataSourceProvider('CommCareCase')], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, ) if ucr_configs: ucr_processor.bootstrap(ucr_configs) case_to_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=CASE_INDEX_INFO, doc_prep_fn=transform_case_for_elasticsearch ) case_search_processor = get_case_search_processor() checkpoint_id = "{}-{}-{}".format( pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index) checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor ) processors = [case_to_es_processor, case_search_processor] if not settings.ENTERPRISE_MODE: processors.append(get_case_to_report_es_processor()) if not skip_ucr: # this option is useful in tests to avoid extra UCR setup where unneccessary processors = [ucr_processor] + processors return ConstructedPillow( name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size )
def get_case_pillow( pillow_id='case-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=UCR_PROCESSING_CHUNK_SIZE, topics=None, **kwargs): """ Return a pillow that processes cases. The processors include, UCR and elastic processors Args: skip_ucr: Can be set to True to avoid passing UCR processor, useful for tests """ if topics: assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only" topics = topics or CASE_TOPICS change_feed = KafkaChangeFeed( topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num ) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[DynamicDataSourceProvider(), StaticDataSourceProvider()], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, ) if ucr_configs: ucr_processor.bootstrap(ucr_configs) case_to_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=CASE_INDEX_INFO, doc_prep_fn=transform_case_for_elasticsearch ) case_search_processor = get_case_search_processor() checkpoint_id = "{}-{}-{}".format( pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index) checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor ) processors = [case_to_es_processor, case_search_processor] if not settings.ENTERPRISE_MODE: processors.append(get_case_to_report_es_processor()) if not skip_ucr: # this option is useful in tests to avoid extra UCR setup where unneccessary processors = [ucr_processor] + processors return ConstructedPillow( name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size )
def get_xform_pillow(pillow_id='xform-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs): # avoid circular dependency from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter from corehq.pillows.mappings.user_mapping import USER_INDEX if topics: assert set(topics).issubset( FORM_TOPICS), "This is a pillow to process cases only" topics = topics or FORM_TOPICS change_feed = KafkaChangeFeed(topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[ DynamicDataSourceProvider('XFormInstance'), StaticDataSourceProvider('XFormInstance') ], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, run_migrations=( process_num == 0), # only first process runs migrations ) xform_to_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_elasticsearch, doc_filter_fn=xform_pillow_filter, ) unknown_user_form_processor = UnknownUsersProcessor() form_meta_processor = FormSubmissionMetadataTrackerProcessor() checkpoint_id = "{}-{}-{}-{}".format(pillow_id, XFORM_INDEX_INFO.index, REPORT_XFORM_INDEX_INFO.index, USER_INDEX) checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor) if ucr_configs: ucr_processor.bootstrap(ucr_configs) processors = [xform_to_es_processor] if settings.RUN_UNKNOWN_USER_PILLOW: processors.append(unknown_user_form_processor) if settings.RUN_FORM_META_PILLOW: processors.append(form_meta_processor) if not settings.ENTERPRISE_MODE: xform_to_report_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=REPORT_XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_report_forms_index, doc_filter_fn=report_xform_filter) processors.append(xform_to_report_es_processor) if not skip_ucr: processors.append(ucr_processor) return ConstructedPillow(name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size)