def test_checkpoint_with_multiple_topics(self):
        feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
        pillow_name = 'test-multi-topic-checkpoints'
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = CountingProcessor()
        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            )
        )
        offsets = feed.get_latest_offsets()
        self.assertEqual(set([(topics.FORM, 0), (topics.CASE, 0)]), set(offsets.keys()))

        # send a few changes to kafka so they should be picked up by the pillow
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE_SQL)
        pillow.process_changes(since=offsets, forever=False)
        self.assertEqual(4, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE_SQL)
        pillow.process_changes(pillow.get_last_checkpoint_sequence(), forever=False)
        self.assertEqual(8, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
    def test_dont_create_checkpoint_past_current(self):
        pillow_name = 'test-checkpoint-reset'

        # initialize change feed and pillow
        feed = KafkaChangeFeed(topics=topics.USER_TOPICS, group_id='test-kafka-feed')
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = CountingProcessor()
        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            )
        )

        original_kafka_offsets = feed.get_latest_offsets()
        current_kafka_offsets = deepcopy(original_kafka_offsets)
        self.assertEqual(feed.get_current_checkpoint_offsets(), {})
        self.assertEqual(pillow.get_last_checkpoint_sequence(), {})

        publish_stub_change(topics.COMMCARE_USER)
        # the following line causes tests to fail if you have multiple partitions
        current_kafka_offsets[(topics.COMMCARE_USER, 0)] += 1
        pillow.process_changes(since=original_kafka_offsets, forever=False)
        self.assertEqual(1, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), current_kafka_offsets)
Пример #3
0
def get_change_feed_pillow_for_db(pillow_id, couch_db, default_topic=None):
    """Generic pillow for inserting Couch documents into Kafka.

    Reads from:
      - CouchDB

    Writes to:
      - Kafka
    """
    processor = KafkaProcessor(
        data_source_type=data_sources.SOURCE_COUCH,
        data_source_name=couch_db.dbname,
        default_topic=default_topic,
    )
    change_feed = CouchChangeFeed(couch_db)
    checkpoint = PillowCheckpoint(pillow_id, change_feed.sequence_format)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=PillowCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
        ),
    )
Пример #4
0
def GetDocPillow():
    return FakeConstructedPillow(
        name='GetDocPillow',
        checkpoint=PillowCheckpoint('get_doc_processor', 'text'),
        change_feed=RandomChangeFeed(10),
        processor=GetDocProcessor(),
    )
Пример #5
0
def get_form_submission_metadata_tracker_pillow(
        pillow_id='FormSubmissionMetadataTrackerProcessor',
        num_processes=1,
        process_num=0,
        **kwargs):
    """
    This gets a pillow which iterates through all forms and marks the corresponding app
    as having submissions. This could be expanded to be more generic and include
    other processing that needs to happen on each form
    """
    change_feed = KafkaChangeFeed(topics=[topics.FORM, topics.FORM_SQL],
                                  group_id='form-processsor',
                                  num_processes=num_processes,
                                  process_num=process_num)
    checkpoint = PillowCheckpoint('form-submission-metadata-tracker',
                                  change_feed.sequence_format)
    form_processor = FormSubmissionMetadataTrackerProcessor()
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=form_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed,
        ),
    )
Пример #6
0
    def __init__(self,
                 indicator_name,
                 kafka_topic,
                 processor,
                 domains=None,
                 doc_type=None):
        self.kafka_topic = kafka_topic
        self.domains = domains or processor.domains
        self.doc_type = doc_type or processor.doc_type

        change_feed = KafkaChangeFeed(topics=[self.kafka_topic],
                                      group_id=indicator_name)

        name = '{}Pillow'.format(indicator_name)
        checkpoint = PillowCheckpoint(
            'fluff.{}.{}'.format(name, get_machine_id()),
            change_feed.sequence_format)

        super(FluffPillow, self).__init__(
            name=name,
            checkpoint=checkpoint,
            change_feed=change_feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint,
                checkpoint_frequency=1000,
                change_feed=change_feed))
Пример #7
0
def get_main_blob_deletion_pillow(pillow_id):
    """Get blob deletion pillow for the main couch database

    Using the KafkaChangeFeed ties this to the main couch database.
    """
    return _get_blob_deletion_pillow(
        pillow_id,
        get_db(None),
        PillowCheckpoint('kafka-blob-deletion-pillow-checkpoint'),
        KafkaChangeFeed(topics=[topics.META], group_id='blob-deletion-group'),
    )
Пример #8
0
def make_fake_constructed_pillow(pillow_id, checkpoint_id):
    from pillowtop.feed.mock import RandomChangeFeed
    from pillowtop.processors import LoggingProcessor

    pillow = FakeConstructedPillow(
        name=pillow_id,
        checkpoint=PillowCheckpoint(checkpoint_id, 'text'),
        change_feed=RandomChangeFeed(10),
        processor=LoggingProcessor(),
    )
    return pillow
Пример #9
0
def _make_couch_pillow(couch_db):
    from pillowtop.feed.couch import CouchChangeFeed
    from pillowtop.processors import LoggingProcessor
    from pillowtop.checkpoints.manager import PillowCheckpoint

    pillow = FakeConstructedPillow(
        name='fake-couch-pillow',
        checkpoint=PillowCheckpoint('fake-feed-test-checkpoint', 'text'),
        change_feed=CouchChangeFeed(couch_db=couch_db),
        processor=LoggingProcessor(),
    )
    pillow.process_change = MagicMock(return_value=True)
    return pillow
Пример #10
0
def get_change_feed_pillow_for_db(pillow_id, couch_db):
    processor = KafkaProcessor(
        data_source_type=data_sources.SOURCE_COUCH, data_source_name=couch_db.dbname
    )
    change_feed = CouchChangeFeed(couch_db)
    checkpoint = PillowCheckpoint(pillow_id, change_feed.sequence_format)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=PillowCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100,
        ),
    )
Пример #11
0
def get_change_feed_pillow_for_db(pillow_id, couch_db):
    kafka_client = get_kafka_client_or_none()
    processor = KafkaProcessor(
        kafka_client, data_source_type=data_sources.COUCH, data_source_name=couch_db.dbname
    )
    change_feed = CouchChangeFeed(couch_db, include_docs=True)
    checkpoint = PillowCheckpoint(pillow_id, change_feed.sequence_format)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=PillowCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100,
        ),
    )
Пример #12
0
 def __init__(self, processor, pillow_name):
     change_feed = KafkaChangeFeed(topics.ALL, group_id=pillow_name)
     checkpoint = PillowCheckpoint(pillow_name)
     event_handler = MultiTopicCheckpointEventHandler(
         checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed
     )
     super(ConfigurableReportKafkaPillow, self).__init__(
         name=pillow_name,
         change_feed=change_feed,
         processor=processor,
         checkpoint=checkpoint,
         change_processed_event_handler=event_handler
     )
     # set by the superclass constructor
     assert self._processor is not None
     assert self._processor.bootstrapped is not None
Пример #13
0
def get_user_sync_history_pillow(pillow_id='UpdateUserSyncHistoryPillow',
                                 **kwargs):
    """
    This gets a pillow which iterates through all synclogs
    """
    couch_db = SyncLog.get_db()
    change_feed = CouchChangeFeed(couch_db, include_docs=True)
    checkpoint = PillowCheckpoint('synclog', change_feed.sequence_format)
    form_processor = UserSyncHistoryProcessor()
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=form_processor,
        change_processed_event_handler=PillowCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100),
    )
Пример #14
0
def _get_mvp_indicator_pillow(pillow_id, processor):
    checkpoint = PillowCheckpoint(
        'mvp_docs.pillows.{}.{}'.format(pillow_id, get_machine_id()), )
    feed = CouchChangeFeed(XFormInstance.get_db(),
                           include_docs=True,
                           couch_filter='hqadmin/domains_and_doc_types',
                           extra_couch_view_params={
                               'domains': ' '.join(processor.domains),
                               'doc_types': ' '.join(processor.doc_types),
                           })
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=feed,
        processor=processor,
        change_processed_event_handler=PillowCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100),
    )
Пример #15
0
def _get_blob_deletion_pillow(pillow_id,
                              couch_db,
                              checkpoint=None,
                              change_feed=None):
    if checkpoint is None:
        checkpoint = PillowCheckpoint(pillow_id)
    if change_feed is None:
        change_feed = CouchChangeFeed(couch_db, include_docs=False)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=BlobDeletionProcessor(get_blob_db(), couch_db.dbname),
        change_processed_event_handler=PillowCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=KAFKA_CHECKPOINT_FREQUENCY,
        ),
    )
Пример #16
0
    def __init__(self, indicator_class, processor):
        self.indicator_class = indicator_class
        self.kafka_topic = indicator_class().kafka_topic
        self.domains = processor.domains
        self.doc_type = processor.doc_type

        name = '{}Pillow'.format(indicator_class.__name__)
        checkpoint = PillowCheckpoint('fluff.{}.{}'.format(name, get_machine_id()))

        super(FluffPillow, self).__init__(
            name=name,
            checkpoint=checkpoint,
            change_feed=KafkaChangeFeed(topics=[self.kafka_topic], group_id=indicator_class.__name__),
            processor=processor,
            change_processed_event_handler=PillowCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1000,
            )
        )
Пример #17
0
    def test_basic(self):
        # setup
        feed = KafkaChangeFeed(topics=[topics.CASE],
                               client_id='test-kafka-feed')
        pillow_name = 'test-chunked-processing'
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = ChunkedCountProcessor()
        original_process_change = processor.process_change
        original_process_changes_chunk = processor.process_changes_chunk

        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint,
                checkpoint_frequency=1,
                change_feed=feed),
            processor_chunk_size=2)

        since = feed.get_latest_offsets()
        self._produce_changes(2)
        # pillow should use process_changes_chunk (make process_change raise an exception for test)
        processor.process_change = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=since, forever=False)
        self.assertEqual(processor.count, 2)

        self._produce_changes(2)
        # if process_changes_chunk raises exception, pillow should use process_change
        processor.process_change = original_process_change
        processor.process_changes_chunk = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(),
                               forever=False)
        self.assertEqual(processor.count, 4)

        self._produce_changes(1)
        # offsets after full chunk should still be processed
        processor.process_change = MagicMock(side_effect=Exception('_'))
        processor.process_changes_chunk = original_process_changes_chunk
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(),
                               forever=False)
        self.assertEqual(processor.count, 5)
Пример #18
0
def get_app_form_submission_tracker_pillow(
        pillow_id='AppFormSubmissionTrackerPillow'):
    """
    This gets a pillow which iterates through all forms and marks the corresponding app
    as having submissions. This could be expanded to be more generic and include
    other processing that needs to happen on each form
    """
    checkpoint = PillowCheckpoint('app-form-submission-tracker')
    form_processor = AppFormSubmissionTrackerProcessor()
    change_feed = KafkaChangeFeed(topics=[topics.FORM, topics.FORM_SQL],
                                  group_id='form-processsor')
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=form_processor,
        change_processed_event_handler=MultiTopicCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed,
        ),
    )
 def checkpoint(self):
     return PillowCheckpoint(self._checkpoint_id, 'text')
 def test_checkpoint_id(self):
     checkpoint_id = 'test-checkpoint-id'
     self.assertEqual(checkpoint_id,
                      PillowCheckpoint(checkpoint_id, 'text').checkpoint_id)
Пример #21
0
 def __init__(self):
     super(FakePillow,
           self).__init__('fake pillow',
                          PillowCheckpoint('test_pillow_import', 'text'),
                          RandomChangeFeed(10), LoggingProcessor())