def get_xform_to_elasticsearch_pillow(pillow_id='XFormToElasticsearchPillow', num_processes=1, process_num=0, **kwargs): assert pillow_id == 'XFormToElasticsearchPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, XFORM_INDEX_INFO, topics.FORM_TOPICS) form_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_elasticsearch, doc_filter_fn=xform_pillow_filter, ) kafka_change_feed = KafkaChangeFeed(topics=topics.FORM_TOPICS, group_id='forms-to-es', num_processes=num_processes, process_num=process_num) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=kafka_change_feed, processor=form_processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=kafka_change_feed), )
def get_xform_to_elasticsearch_pillow(pillow_id='XFormToElasticsearchPillow', num_processes=1, process_num=0, **kwargs): # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/ assert pillow_id == 'XFormToElasticsearchPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, XFORM_INDEX_INFO, FORM_TOPICS) form_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_elasticsearch, doc_filter_fn=xform_pillow_filter, ) kafka_change_feed = KafkaChangeFeed(topics=FORM_TOPICS, client_id='forms-to-es', num_processes=num_processes, process_num=process_num) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=kafka_change_feed, processor=form_processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=kafka_change_feed), )
def get_case_to_elasticsearch_pillow(pillow_id='CaseToElasticsearchPillow', num_processes=1, process_num=0, **kwargs): """Return a pillow that processes cases to Elasticsearch. Processors: - :py:class:`pillowtop.processors.elastic.ElasticProcessor` """ # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/ assert pillow_id == 'CaseToElasticsearchPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, CASE_INDEX_INFO, CASE_TOPICS) case_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=CASE_INDEX_INFO, doc_prep_fn=transform_case_for_elasticsearch, change_filter_fn=is_couch_change_for_sql_domain) kafka_change_feed = KafkaChangeFeed(topics=CASE_TOPICS, client_id='cases-to-es', num_processes=num_processes, process_num=process_num) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=kafka_change_feed, processor=case_processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=kafka_change_feed), )
def get_case_to_elasticsearch_pillow(pillow_id='CaseToElasticsearchPillow', num_processes=1, process_num=0, **kwargs): assert pillow_id == 'CaseToElasticsearchPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, CASE_INDEX_INFO, topics.CASE_TOPICS) case_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=CASE_INDEX_INFO, doc_prep_fn=transform_case_for_elasticsearch) kafka_change_feed = KafkaChangeFeed(topics=topics.CASE_TOPICS, client_id='cases-to-es', num_processes=num_processes, process_num=process_num) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=kafka_change_feed, processor=case_processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=kafka_change_feed), )
def get_case_pillow( pillow_id='case-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs): """ Return a pillow that processes cases. The processors include, UCR and elastic processors Args: skip_ucr: Can be set to True to avoid passing UCR processor, useful for tests """ if topics: assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only" topics = topics or CASE_TOPICS change_feed = KafkaChangeFeed( topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num ) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[DynamicDataSourceProvider('CommCareCase'), StaticDataSourceProvider('CommCareCase')], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, run_migrations=(process_num == 0), # only first process runs migrations ) if ucr_configs: ucr_processor.bootstrap(ucr_configs) case_to_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=CASE_INDEX_INFO, doc_prep_fn=transform_case_for_elasticsearch ) case_search_processor = get_case_search_processor() checkpoint_id = "{}-{}-{}-{}".format( pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index, 'messaging-sync') checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor ) processors = [case_to_es_processor, CaseMessagingSyncProcessor()] if settings.RUN_CASE_SEARCH_PILLOW: processors.append(case_search_processor) if not settings.ENTERPRISE_MODE: processors.append(get_case_to_report_es_processor()) if not skip_ucr: # this option is useful in tests to avoid extra UCR setup where unneccessary processors = [ucr_processor] + processors return ConstructedPillow( name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size )
def get_ledger_to_elasticsearch_pillow( pillow_id='LedgerToElasticsearchPillow'): assert pillow_id == 'LedgerToElasticsearchPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, LEDGER_INDEX_INFO) processor = ElasticProcessor(elasticsearch=get_es_new(), index_info=LEDGER_INDEX_INFO, doc_prep_fn=_prepare_ledger_for_es) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=KafkaChangeFeed(topics=[topics.LEDGER], group_id='ledgers-to-es'), processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100), )
def get_case_to_elasticsearch_pillow(pillow_id='CaseToElasticsearchPillow'): assert pillow_id == 'CaseToElasticsearchPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, CASE_INDEX_INFO) case_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=CASE_INDEX_INFO, doc_prep_fn=transform_case_for_elasticsearch ) kafka_change_feed = KafkaChangeFeed(topics=[topics.CASE, topics.CASE_SQL], group_id='cases-to-es') return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=kafka_change_feed, processor=case_processor, change_processed_event_handler=MultiTopicCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=kafka_change_feed ), )
def get_sql_sms_pillow(pillow_id='SqlSMSPillow'): assert pillow_id == 'SqlSMSPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, SMS_INDEX_INFO) processor = ElasticProcessor(elasticsearch=get_es_new(), index_info=SMS_INDEX_INFO, doc_prep_fn=lambda x: x) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=KafkaChangeFeed( topics=[topics.SMS], group_id=SMS_PILLOW_KAFKA_CONSUMER_GROUP_ID), processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, ), )
def get_sql_sms_pillow(pillow_id='SqlSMSPillow', num_processes=1, process_num=0, **kwargs): assert pillow_id == 'SqlSMSPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, SMS_INDEX_INFO, [topics.SMS]) processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=SMS_INDEX_INFO, doc_prep_fn=lambda x: x ) change_feed = KafkaChangeFeed( topics=[topics.SMS], client_id='sql-sms-to-es', num_processes=num_processes, process_num=process_num ) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed ), )
def get_xform_pillow(pillow_id='xform-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs): # avoid circular dependency from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter from corehq.pillows.mappings.user_mapping import USER_INDEX if topics: assert set(topics).issubset( FORM_TOPICS), "This is a pillow to process cases only" topics = topics or FORM_TOPICS change_feed = KafkaChangeFeed(topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[ DynamicDataSourceProvider('XFormInstance'), StaticDataSourceProvider('XFormInstance') ], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, run_migrations=( process_num == 0), # only first process runs migrations ) xform_to_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_elasticsearch, doc_filter_fn=xform_pillow_filter, ) unknown_user_form_processor = UnknownUsersProcessor() form_meta_processor = FormSubmissionMetadataTrackerProcessor() checkpoint_id = "{}-{}-{}-{}".format(pillow_id, XFORM_INDEX_INFO.index, REPORT_XFORM_INDEX_INFO.index, USER_INDEX) checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor) if ucr_configs: ucr_processor.bootstrap(ucr_configs) processors = [xform_to_es_processor] if settings.RUN_UNKNOWN_USER_PILLOW: processors.append(unknown_user_form_processor) if settings.RUN_FORM_META_PILLOW: processors.append(form_meta_processor) if not settings.ENTERPRISE_MODE: xform_to_report_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=REPORT_XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_report_forms_index, doc_filter_fn=report_xform_filter) processors.append(xform_to_report_es_processor) if not skip_ucr: processors.append(ucr_processor) return ConstructedPillow(name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size)