def create_unit_of_work(process_name, first_object_id, last_object_id): """ method is used to insert unit_of_work """ source_collection = ProcessContext.get_source_collection(process_name) target_collection = ProcessContext.get_target_collection(process_name) logger = ProcessContext.get_logger(process_name) unit_of_work = UnitOfWorkEntry() unit_of_work.set_timestamp('UNIT_TEST') unit_of_work.set_start_id(first_object_id) unit_of_work.set_end_id(last_object_id) unit_of_work.set_source_collection(source_collection) unit_of_work.set_target_collection(target_collection) unit_of_work.set_state(UnitOfWorkEntry.STATE_REQUESTED) unit_of_work.set_process_name(process_name) unit_of_work.set_number_of_retries(0) uow_id = unit_of_work_helper.insert(logger, unit_of_work) return uow_id
def compute_scope_of_processing(self, process_name, start_time, end_time, time_record): """method reads collection and identify slice for processing""" source_collection_name = ProcessContext.get_source_collection(process_name) target_collection_name = ProcessContext.get_target_collection(process_name) source_collection = CollectionContext.get_collection(self.logger, source_collection_name) query = { AbstractModel.TIMESTAMP : { '$gte' : start_time, '$lt' : end_time } } asc_search = source_collection.find(spec=query, fields='_id').sort('_id', ASCENDING).limit(1) if asc_search.count() == 0: raise LookupError('No messages in timeperiod: %s:%s in collection %s' % (start_time, end_time, source_collection_name)) first_object_id = asc_search[0]['_id'] dec_search = source_collection.find(spec=query, fields='_id').sort('_id', DESCENDING).limit(1) last_object_id = dec_search[0]['_id'] unit_of_work = UnitOfWorkEntry() unit_of_work.set_timestamp(start_time) unit_of_work.set_start_id(str(first_object_id)) unit_of_work.set_end_id(str(last_object_id)) unit_of_work.set_start_timestamp(start_time) unit_of_work.set_end_timestamp(end_time) unit_of_work.set_created_at(datetime.utcnow()) unit_of_work.set_source_collection(source_collection_name) unit_of_work.set_target_collection(target_collection_name) unit_of_work.set_state(unit_of_work.STATE_REQUESTED) unit_of_work.set_process_name(process_name) unit_of_work.set_number_of_retries(0) try: uow_id = unit_of_work_helper.insert(self.logger, unit_of_work) except DuplicateKeyError as e: e.first_object_id = str(first_object_id) e.last_object_id = str(last_object_id) e.process_name = process_name e.timestamp = start_time raise e self.publishers.get_publisher(process_name).publish(str(uow_id)) msg = 'Published: UOW %r for %r in timeperiod %r.' % (uow_id, process_name, start_time) self._log_message(INFO, process_name, time_record, msg) return unit_of_work
def _get_source_collection(self): """collection with data for processing""" return CollectionContext.get_collection(self.logger, ProcessContext.get_source_collection(self.process_name))