def get_change_feed_pillow_for_db(pillow_id, couch_db, default_topic=None): """Generic pillow for inserting Couch documents into Kafka. Reads from: - CouchDB Writes to: - Kafka """ processor = KafkaProcessor( data_source_type=data_sources.SOURCE_COUCH, data_source_name=couch_db.dbname, default_topic=default_topic, ) change_feed = CouchChangeFeed(couch_db) checkpoint = PillowCheckpoint(pillow_id, change_feed.sequence_format) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, ), )
def get_change_feed_pillow_for_db(pillow_id, couch_db): processor = KafkaProcessor( data_source_type=data_sources.SOURCE_COUCH, data_source_name=couch_db.dbname ) change_feed = CouchChangeFeed(couch_db) checkpoint = PillowCheckpoint(pillow_id, change_feed.sequence_format) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, ), )
def get_group_to_user_pillow(pillow_id='GroupToUserPillow'): assert pillow_id == 'GroupToUserPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, USER_INDEX_INFO) processor = GroupsToUsersProcessor() return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=KafkaChangeFeed(topics=[GROUP], group_id='groups-to-users'), processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, ), )
def get_change_feed_pillow_for_db(pillow_id, couch_db): kafka_client = get_kafka_client_or_none() processor = KafkaProcessor( kafka_client, data_source_type=data_sources.COUCH, data_source_name=couch_db.dbname ) change_feed = CouchChangeFeed(couch_db, include_docs=True) checkpoint = PillowCheckpoint(pillow_id, change_feed.sequence_format) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, ), )
def get_user_sync_history_pillow(pillow_id='UpdateUserSyncHistoryPillow', **kwargs): """ This gets a pillow which iterates through all synclogs """ couch_db = SyncLog.get_db() change_feed = CouchChangeFeed(couch_db, include_docs=True) checkpoint = PillowCheckpoint('synclog', change_feed.sequence_format) form_processor = UserSyncHistoryProcessor() return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=form_processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100), )
def get_ledger_to_elasticsearch_pillow( pillow_id='LedgerToElasticsearchPillow'): assert pillow_id == 'LedgerToElasticsearchPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, LEDGER_INDEX_INFO) processor = ElasticProcessor(elasticsearch=get_es_new(), index_info=LEDGER_INDEX_INFO, doc_prep_fn=_prepare_ledger_for_es) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=KafkaChangeFeed(topics=[topics.LEDGER], group_id='ledgers-to-es'), processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100), )
def get_app_to_elasticsearch_pillow(pillow_id='ApplicationToElasticsearchPillow'): assert pillow_id == 'ApplicationToElasticsearchPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, APP_INDEX_INFO) app_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=APP_INDEX_INFO, doc_prep_fn=transform_app_for_es ) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=KafkaChangeFeed(topics=[topics.APP], group_id='apps-to-es'), processor=app_processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, ), )
def get_domain_kafka_to_elasticsearch_pillow(pillow_id='KafkaDomainPillow'): assert pillow_id == 'KafkaDomainPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, DOMAIN_INDEX_INFO) domain_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=DOMAIN_INDEX_INFO, doc_prep_fn=transform_domain_for_elasticsearch) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=KafkaChangeFeed(topics=[DOMAIN], group_id='domains-to-es'), processor=domain_processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, ), )
def __init__(self, indicator_class, processor): self.indicator_class = indicator_class self.kafka_topic = indicator_class().kafka_topic self.domains = processor.domains self.doc_type = processor.doc_type name = '{}Pillow'.format(indicator_class.__name__) checkpoint = PillowCheckpoint('fluff.{}.{}'.format(name, get_machine_id())) super(FluffPillow, self).__init__( name=name, checkpoint=checkpoint, change_feed=KafkaChangeFeed(topics=[self.kafka_topic], group_id=indicator_class.__name__), processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, ) )
def _get_mvp_indicator_pillow(pillow_id, processor): checkpoint = PillowCheckpoint( 'mvp_docs.pillows.{}.{}'.format(pillow_id, get_machine_id()), ) feed = CouchChangeFeed(XFormInstance.get_db(), include_docs=True, couch_filter='hqadmin/domains_and_doc_types', extra_couch_view_params={ 'domains': ' '.join(processor.domains), 'doc_types': ' '.join(processor.doc_types), }) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=feed, processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100), )
def get_sql_sms_pillow(pillow_id='SqlSMSPillow'): assert pillow_id == 'SqlSMSPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, SMS_INDEX_INFO) processor = ElasticProcessor(elasticsearch=get_es_new(), index_info=SMS_INDEX_INFO, doc_prep_fn=lambda x: x) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=KafkaChangeFeed( topics=[topics.SMS], group_id=SMS_PILLOW_KAFKA_CONSUMER_GROUP_ID), processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, ), )
def _get_blob_deletion_pillow(pillow_id, couch_db, checkpoint=None, change_feed=None): if checkpoint is None: checkpoint = PillowCheckpoint(pillow_id) if change_feed is None: change_feed = CouchChangeFeed(couch_db, include_docs=False) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=BlobDeletionProcessor(get_blob_db(), couch_db.dbname), change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=KAFKA_CHECKPOINT_FREQUENCY, ), )
def get_group_pillow(pillow_id='GroupPillow'): """ This pillow adds users from xform submissions that come in to the User Index if they don't exist in HQ """ assert pillow_id == 'GroupPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, GROUP_INDEX_INFO) processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=GROUP_INDEX_INFO, ) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=KafkaChangeFeed(topics=[GROUP], group_id='groups-to-es'), processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, ), )