def _process_state_final_run(self, process_name, time_record): """method takes care of processing timetable records in STATE_FINAL_RUN state""" uow_id = time_record.get_related_unit_of_work() uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id)) if uow_obj.get_state() == UnitOfWorkEntry.STATE_PROCESSED: self.timetable.update_timetable_record(process_name, time_record, uow_obj, TimeTableEntry.STATE_PROCESSED) timetable_tree = self.timetable.get_tree(process_name) timetable_tree.build_tree() msg = 'Transferred time-record %s in timeperiod %s to STATE_PROCESSED for %s'\ % (time_record.get_document()['_id'], time_record.get_timestamp(), process_name) elif uow_obj.get_state() == UnitOfWorkEntry.STATE_CANCELED: self.timetable.update_timetable_record(process_name, time_record, uow_obj, TimeTableEntry.STATE_SKIPPED) msg = 'Transferred time-record %s in timeperiod %s to STATE_SKIPPED for %s'\ % (time_record.get_document()['_id'], time_record.get_timestamp(), process_name) else: msg = 'Suppressed creating uow for %s in timeperiod %s; time_record is in %s; uow is in %s'\ % (process_name, time_record.get_timestamp(), time_record.get_state(), uow_obj.get_state()) self._log_message(INFO, process_name, time_record, msg)
def _callback_reprocess(self, process_name, timestamp, tree_node): """ is called from tree to answer reprocessing request. It is possible that timetable record will be transfered to STATE_IN_PROGRESS with no related unit_of_work""" uow_id = tree_node.time_record.get_related_unit_of_work() if uow_id is not None: tree_node.time_record.set_state(TimeTableEntry.STATE_IN_PROGRESS) uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id)) uow_obj.set_state(uow_obj.STATE_INVALID) uow_obj.set_number_of_retries(0) uow_obj.set_created_at(datetime.utcnow()) unit_of_work_helper.update(self.logger, uow_obj) msg = 'Transferred time-record %s in timeperiod %s to %s; Transferred unit_of_work to %s'\ % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(), tree_node.time_record.get_state(), uow_obj.get_state()) else: tree_node.time_record.set_state(TimeTableEntry.STATE_EMBRYO) msg = 'Transferred time-record %s in timeperiod %s to %s;'\ % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(), tree_node.time_record.get_state()) tree_node.time_record.set_number_of_failures(0) self._save_time_record(process_name, tree_node.time_record) self.logger.warning(msg) tree_node.add_log_entry([datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), msg]) if process_name not in self.reprocess: self.reprocess[process_name] = dict() self.reprocess[process_name][timestamp] = tree_node
def _process_state_in_progress(self, process_name, time_record, start_time): """ method that takes care of processing timetable records in STATE_IN_PROGRESS state""" end_time = time_helper.increment_time(process_name, start_time) actual_time = time_helper.actual_time(process_name) can_finalize_timerecord = self.timetable.can_finalize_timetable_record(process_name, time_record) uow_id = time_record.get_related_unit_of_work() uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id)) if start_time == actual_time or can_finalize_timerecord == False: if uow_obj.get_state() == UnitOfWorkEntry.STATE_INVALID\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_REQUESTED: # current uow has not been processed yet. update it self.update_scope_of_processing(process_name, uow_obj, start_time, end_time, time_record) else: # cls.STATE_IN_PROGRESS, cls.STATE_PROCESSED, cls.STATE_CANCELED # create new uow to cover new inserts self._compute_and_transfer_to_progress(process_name, start_time, end_time, time_record) elif start_time < actual_time and can_finalize_timerecord == True: # create new uow for FINAL RUN self._compute_and_transfer_to_final_run(process_name, start_time, end_time, time_record) else: msg = 'Time-record %s has timestamp from future %s vs current time %s'\ % (time_record.get_document()['_id'], start_time, actual_time) self._log_message(ERROR, process_name, time_record, msg)
def _mq_callback(self, message): """ try/except wrapper in case exception breaks the abstract method, this method: - catches the exception - logs the exception - marks unit of work as INVALID""" unit_of_work = None try: # @param object_id: ObjectId of the unit_of_work from mq object_id = ObjectId(message.body) unit_of_work = unit_of_work_helper.retrieve_by_id(self.logger, object_id) if unit_of_work.get_state() == UnitOfWorkEntry.STATE_CANCELED \ or unit_of_work.get_state() == UnitOfWorkEntry.STATE_PROCESSED: # garbage collector might have reposted this UOW self.logger.warning('Skipping unit_of_work: id %s; state %s;' \ % (str(message.body), unit_of_work.get_state()), exc_info=False) self.consumer.acknowledge(message.delivery_tag) return except Exception: self.logger.error('Safety fuse. Can not identify unit_of_work %s' % str(message.body), exc_info=True) self.consumer.acknowledge(message.delivery_tag) return try: start_timestamp = unit_of_work.get_start_timestamp() end_timestamp = unit_of_work.get_end_timestamp() unit_of_work.set_state(UnitOfWorkEntry.STATE_IN_PROGRESS) unit_of_work.set_started_at(datetime.utcnow()) unit_of_work_helper.update(self.logger, unit_of_work) self.performance_ticker.start_uow(unit_of_work) self._start_process(start_timestamp, end_timestamp) code = None alive = True while alive: alive, code = self._poll_process() if code == 0: unit_of_work.set_number_of_processed_documents(self.performance_ticker.posts_per_job) unit_of_work.set_finished_at(datetime.utcnow()) unit_of_work.set_state(UnitOfWorkEntry.STATE_PROCESSED) self.performance_ticker.finish_uow() else: unit_of_work.set_state(UnitOfWorkEntry.STATE_INVALID) self.performance_ticker.cancel_uow() self.logger.info('Hadoop Map/Reduce return code is %r' % code) unit_of_work_helper.update(self.logger, unit_of_work) except Exception as e: unit_of_work.set_state(UnitOfWorkEntry.STATE_INVALID) unit_of_work_helper.update(self.logger, unit_of_work) self.performance_ticker.cancel_uow() self.logger.error('Safety fuse while processing unit_of_work %s in timeperiod %s : %r'\ % (message.body, unit_of_work.get_timestamp(), e), exc_info=True) finally: self.consumer.acknowledge(message.delivery_tag) self.consumer.close()
def _process_state_in_progress(self, process_name, time_record, start_time): """ method that takes care of processing timetable records in STATE_IN_PROGRESS state""" end_time = time_helper.increment_time(process_name, start_time) actual_time = time_helper.actual_time(process_name) can_finalize_timerecord = self.timetable.can_finalize_timetable_record(process_name, time_record) uow_id = time_record.get_related_unit_of_work() uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id)) iteration = int(uow_obj.get_end_id()) try: if start_time == actual_time or can_finalize_timerecord == False: if uow_obj.get_state() == UnitOfWorkEntry.STATE_REQUESTED\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_IN_PROGRESS\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_INVALID: # Hadoop processing takes more than 1 tick of Scheduler # Let the Hadoop processing complete - do no updates to Scheduler records pass elif uow_obj.get_state() == UnitOfWorkEntry.STATE_PROCESSED\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_CANCELED: # create new uow to cover new inserts uow_obj = self.insert_uow(process_name, start_time, end_time, iteration + 1, time_record) self.timetable.update_timetable_record(process_name, time_record, uow_obj, TimeTableEntry.STATE_IN_PROGRESS) elif start_time < actual_time and can_finalize_timerecord == True: if uow_obj.get_state() == UnitOfWorkEntry.STATE_REQUESTED\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_IN_PROGRESS\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_INVALID: # Hadoop processing has not started yet # Let the Hadoop processing complete - do no updates to Scheduler records pass elif uow_obj.get_state() == UnitOfWorkEntry.STATE_PROCESSED\ or uow_obj.get_state() == UnitOfWorkEntry.STATE_CANCELED: # create new uow for FINAL RUN uow_obj = self.insert_uow(process_name, start_time, end_time, iteration + 1, time_record) self.timetable.update_timetable_record(process_name, time_record, uow_obj, TimeTableEntry.STATE_FINAL_RUN) else: msg = 'Time-record %s has timestamp from future %s vs current time %s'\ % (time_record.get_document()['_id'], start_time, actual_time) self._log_message(ERROR, process_name, time_record, msg) except DuplicateKeyError as e: uow_obj = self.recover_from_duplicatekeyerror(e) if uow_obj is not None: self.timetable.update_timetable_record(process_name, time_record, uow_obj, time_record.get_state()) else: msg = 'MANUAL INTERVENTION REQUIRED! Unable to identify unit_of_work for %s in %s'\ % (process_name, time_record.get_timestamp()) self._log_message(ERROR, process_name, time_record, msg)
def action_get_uow(self): resp = dict() timetable = self.mbean.timetable tree = timetable.get_tree(self.process_name) if tree is not None: self.timestamp = time_helper.cast_to_time_qualifier(self.process_name, self.timestamp) node = tree.get_node_by_process(self.process_name, self.timestamp) uow_id = node.time_record.get_related_unit_of_work() if uow_id is None: resp = {'response' : 'no related unit_of_work'} else: resp = unit_of_work_helper.retrieve_by_id(self.logger, uow_id).get_document() for key in resp: resp[key] = str(resp[key]) return resp
def _callback_skip(self, process_name, timestamp, tree_node): """ is called from tree to answer skip request""" tree_node.time_record.set_state(TimeTableEntry.STATE_SKIPPED) uow_id = tree_node.time_record.get_related_unit_of_work() if uow_id is not None: uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id)) uow_obj.set_state(uow_obj.STATE_CANCELED) unit_of_work_helper.update(self.logger, uow_obj) msg = 'Transferred time-record %s in timeperiod %s to %s; Transferred unit_of_work to %s'\ % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(), tree_node.time_record.get_state(), uow_obj.get_state()) else: msg = 'Transferred time-record %s in timeperiod %s to %s;'\ % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(), tree_node.time_record.get_state()) self._save_time_record(process_name, tree_node.time_record) self.logger.warning(msg) tree_node.add_log_entry([datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), msg]) if process_name in self.reprocess and timestamp in self.reprocess[process_name]: del self.reprocess[process_name][timestamp]
def _mq_callback(self, message): """ try/except wrapper in case exception breaks the abstract method, this method: - catches the exception - logs the exception - marks unit of work as INVALID""" unit_of_work = None try: # @param object_id: ObjectId of the unit_of_work from mq object_id = ObjectId(message.body) unit_of_work = unit_of_work_helper.retrieve_by_id(self.logger, object_id) if unit_of_work.get_state() == UnitOfWorkEntry.STATE_CANCELED \ or unit_of_work.get_state() == UnitOfWorkEntry.STATE_PROCESSED: # garbage collector might have reposted this UOW self.logger.warning('Skipping unit_of_work: id %s; state %s;' \ % (str(message.body), unit_of_work.get_state()), exc_info=False) self.consumer.acknowledge(message.delivery_tag) return except Exception: self.logger.error('Safety fuse. Can not identify unit_of_work %s' % str(message.body), exc_info=True) self.consumer.acknowledge(message.delivery_tag) return try: start_id_obj = ObjectId(unit_of_work.get_start_id()) end_id_obj = ObjectId(unit_of_work.get_end_id()) start_timestamp = unit_of_work.get_start_timestamp() end_timestamp = unit_of_work.get_end_timestamp() unit_of_work.set_state(UnitOfWorkEntry.STATE_IN_PROGRESS) unit_of_work.set_started_at(datetime.utcnow()) unit_of_work_helper.update(self.logger, unit_of_work) self.performance_ticker.start_uow(unit_of_work) bulk_threshold = settings['bulk_threshold'] iteration = 0 while True: source_collection = self._get_source_collection() if iteration == 0: queue = { '_id' : { '$gte' : start_id_obj, '$lte' : end_id_obj } } else: queue = { '_id' : { '$gt' : start_id_obj, '$lte' : end_id_obj } } if start_timestamp is not None and end_timestamp is not None: # remove all accident objects that may be in [start_id_obj : end_id_obj] range queue[AbstractModel.TIMESTAMP] = { '$gte' : start_timestamp, '$lt' : end_timestamp } cursor = source_collection.find(queue).sort('_id', ASCENDING).limit(bulk_threshold) count = cursor.count(with_limit_and_skip=True) if count == 0 and iteration == 0: msg = 'No entries in %s at range [%s : %s]'\ % (str(source_collection.name), unit_of_work.get_start_id(), unit_of_work.get_end_id()) self.logger.warning(msg) break else: shall_continue, new_start_id = self._process_not_empty_cursor(cursor) if shall_continue: start_id_obj = new_start_id iteration += 1 else: break msg = 'Cursor exploited after %s iterations' % str(iteration) self.logger.info(msg) self.perform_post_processing(unit_of_work.get_timestamp()) number_of_aggregated_objects = self._flush_aggregated_objects() unit_of_work.set_number_of_aggregated_documents(number_of_aggregated_objects) unit_of_work.set_number_of_processed_documents(self.performance_ticker.posts_per_job) unit_of_work.set_finished_at(datetime.utcnow()) unit_of_work.set_state(UnitOfWorkEntry.STATE_PROCESSED) unit_of_work_helper.update(self.logger, unit_of_work) self.performance_ticker.finish_uow() except Exception as e: unit_of_work.set_state(UnitOfWorkEntry.STATE_INVALID) unit_of_work_helper.update(self.logger, unit_of_work) self.performance_ticker.cancel_uow() del self.aggregated_objects self.aggregated_objects = dict() gc.collect() self.logger.error('Safety fuse while processing unit_of_work %s in timeperiod %s : %r'\ % (message.body, unit_of_work.get_timestamp(), e), exc_info=True) finally: self.consumer.acknowledge(message.delivery_tag) self.consumer.close()