示例#1
0
    def _callback_reprocess(self, process_name, timestamp, tree_node):
        """ is called from tree to answer reprocessing request.
        It is possible that timetable record will be transfered to STATE_IN_PROGRESS with no related unit_of_work"""
        uow_id = tree_node.time_record.get_related_unit_of_work()
        if uow_id is not None:
            tree_node.time_record.set_state(TimeTableEntry.STATE_IN_PROGRESS)
            uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id))
            uow_obj.set_state(uow_obj.STATE_INVALID)
            uow_obj.set_number_of_retries(0)
            uow_obj.set_created_at(datetime.utcnow())
            unit_of_work_helper.update(self.logger, uow_obj)
            msg = 'Transferred time-record %s in timeperiod %s to %s; Transferred unit_of_work to %s'\
                    % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(),
                       tree_node.time_record.get_state(), uow_obj.get_state())
        else:
            tree_node.time_record.set_state(TimeTableEntry.STATE_EMBRYO)
            msg = 'Transferred time-record %s in timeperiod %s to %s;'\
                    % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(),
                       tree_node.time_record.get_state())

        tree_node.time_record.set_number_of_failures(0)
        self._save_time_record(process_name, tree_node.time_record)
        self.logger.warning(msg)
        tree_node.add_log_entry([datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), msg])

        if process_name not in self.reprocess:
            self.reprocess[process_name] = dict()
        self.reprocess[process_name][timestamp] = tree_node
    def update_scope_of_processing(self, process_name, unit_of_work, start_time, end_time, time_record):
        """method reads collection and refine slice upper bound for processing"""
        source_collection_name = unit_of_work.get_source_collection()
        source_collection = CollectionContext.get_collection(self.logger, source_collection_name)

        query = { AbstractModel.TIMESTAMP : { '$gte' : start_time, '$lt' : end_time } }
        dec_search = source_collection.find(spec=query, fields='_id').sort('_id', DESCENDING).limit(1)
        last_object_id = dec_search[0]['_id']
        unit_of_work.set_end_id(str(last_object_id))
        unit_of_work_helper.update(self.logger, unit_of_work)

        msg = 'Updated range to process for %s in timeperiod %s for collection %s: [%s : %s]'\
                % (process_name, time_record.get_timestamp(), source_collection_name,
                   unit_of_work.get_start_id(), str(last_object_id))
        self._log_message(INFO, process_name, time_record, msg)
    def _mq_callback(self, message):
        """ try/except wrapper
        in case exception breaks the abstract method, this method:
        - catches the exception
        - logs the exception
        - marks unit of work as INVALID"""
        unit_of_work = None
        try:
            # @param object_id: ObjectId of the unit_of_work from mq
            object_id = ObjectId(message.body)
            unit_of_work = unit_of_work_helper.retrieve_by_id(self.logger, object_id)
            if unit_of_work.get_state() == UnitOfWorkEntry.STATE_CANCELED \
                or unit_of_work.get_state() == UnitOfWorkEntry.STATE_PROCESSED:
                # garbage collector might have reposted this UOW
                self.logger.warning('Skipping unit_of_work: id %s; state %s;' \
                                    % (str(message.body), unit_of_work.get_state()), exc_info=False)
                self.consumer.acknowledge(message.delivery_tag)
                return
        except Exception:
            self.logger.error('Safety fuse. Can not identify unit_of_work %s' % str(message.body), exc_info=True)
            self.consumer.acknowledge(message.delivery_tag)
            return

        try:
            self.performance_ticker.start_uow(unit_of_work)
            unit_of_work.set_state(UnitOfWorkEntry.STATE_PROCESSED)
            unit_of_work.set_number_of_processed_documents(0)
            unit_of_work.set_started_at(datetime.utcnow())
            unit_of_work.set_finished_at(datetime.utcnow())
            unit_of_work_helper.update(self.logger, unit_of_work)
            self.performance_ticker.finish_uow()
        except Exception as e:
            unit_of_work.set_state(UnitOfWorkEntry.STATE_INVALID)
            unit_of_work_helper.update(self.logger, unit_of_work)
            self.performance_ticker.cancel_uow()
            self.logger.error('Safety fuse while processing unit_of_work %s in timeperiod %s : %r'\
                              % (message.body, unit_of_work.get_timestamp(), e), exc_info=True)
        finally:
            self.consumer.acknowledge(message.delivery_tag)
            self.consumer.close()
    def _process_single_document(self, document):
        """ actually inspects UOW retrieved from the database"""
        repost = False
        unit_of_work = UnitOfWorkEntry(document)
        process_name = unit_of_work.get_process_name()

        if unit_of_work.get_state() == UnitOfWorkEntry.STATE_INVALID:
            repost = True
        elif (
            unit_of_work.get_state() == UnitOfWorkEntry.STATE_IN_PROGRESS
            or unit_of_work.get_state() == UnitOfWorkEntry.STATE_REQUESTED
        ):

            last_activity = unit_of_work.get_started_at()
            if last_activity is None:
                last_activity = unit_of_work.get_created_at()

            if datetime.utcnow() - last_activity > timedelta(hours=REPOST_AFTER_HOURS):
                repost = True

        if repost:
            creation_time = unit_of_work.get_created_at()
            if datetime.utcnow() - creation_time < timedelta(hours=LIFE_SUPPORT_HOURS):
                unit_of_work.set_state(UnitOfWorkEntry.STATE_REQUESTED)
                unit_of_work.set_number_of_retries(unit_of_work.get_number_of_retries() + 1)
                unit_of_work_helper.update(self.logger, unit_of_work)
                self.publishers.get_publisher(process_name).publish(str(document["_id"]))

                self.logger.info(
                    "UOW marked for re-processing: process %s; id %s; attempt %d"
                    % (process_name, str(document["_id"]), unit_of_work.get_number_of_retries())
                )
                self.performance_ticker.increment()
            else:
                unit_of_work.set_state(UnitOfWorkEntry.STATE_CANCELED)
                unit_of_work_helper.update(self.logger, unit_of_work)
                self.logger.info(
                    "UOW transfered to STATE_CANCELED: process %s; id %s; attempt %d"
                    % (process_name, str(document["_id"]), unit_of_work.get_number_of_retries())
                )
示例#5
0
    def _callback_skip(self, process_name, timestamp, tree_node):
        """ is called from tree to answer skip request"""
        tree_node.time_record.set_state(TimeTableEntry.STATE_SKIPPED)
        uow_id = tree_node.time_record.get_related_unit_of_work()
        if uow_id is not None:
            uow_obj = unit_of_work_helper.retrieve_by_id(self.logger, ObjectId(uow_id))
            uow_obj.set_state(uow_obj.STATE_CANCELED)
            unit_of_work_helper.update(self.logger, uow_obj)
            msg = 'Transferred time-record %s in timeperiod %s to %s; Transferred unit_of_work to %s'\
                    % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(),
                       tree_node.time_record.get_state(), uow_obj.get_state())
        else:
            msg = 'Transferred time-record %s in timeperiod %s to %s;'\
                    % (tree_node.time_record.get_document()['_id'], tree_node.time_record.get_timestamp(),
                       tree_node.time_record.get_state())

        self._save_time_record(process_name, tree_node.time_record)
        self.logger.warning(msg)
        tree_node.add_log_entry([datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), msg])

        if process_name in self.reprocess and timestamp in self.reprocess[process_name]:
            del self.reprocess[process_name][timestamp]
    def _mq_callback(self, message):
        """ try/except wrapper
        in case exception breaks the abstract method, this method:
        - catches the exception
        - logs the exception
        - marks unit of work as INVALID"""
        unit_of_work = None
        try:
            # @param object_id: ObjectId of the unit_of_work from mq
            object_id = ObjectId(message.body)
            unit_of_work = unit_of_work_helper.retrieve_by_id(self.logger, object_id)
            if unit_of_work.get_state() == UnitOfWorkEntry.STATE_CANCELED \
                or unit_of_work.get_state() == UnitOfWorkEntry.STATE_PROCESSED:
                # garbage collector might have reposted this UOW
                self.logger.warning('Skipping unit_of_work: id %s; state %s;' \
                                    % (str(message.body), unit_of_work.get_state()), exc_info=False)
                self.consumer.acknowledge(message.delivery_tag)
                return
        except Exception:
            self.logger.error('Safety fuse. Can not identify unit_of_work %s' % str(message.body), exc_info=True)
            self.consumer.acknowledge(message.delivery_tag)
            return

        try:
            start_id_obj = ObjectId(unit_of_work.get_start_id())
            end_id_obj = ObjectId(unit_of_work.get_end_id())
            start_timestamp = unit_of_work.get_start_timestamp()
            end_timestamp = unit_of_work.get_end_timestamp()

            unit_of_work.set_state(UnitOfWorkEntry.STATE_IN_PROGRESS)
            unit_of_work.set_started_at(datetime.utcnow())
            unit_of_work_helper.update(self.logger, unit_of_work)
            self.performance_ticker.start_uow(unit_of_work)

            bulk_threshold = settings['bulk_threshold']
            iteration = 0
            while True:
                source_collection = self._get_source_collection()
                if iteration == 0:
                    queue = { '_id' : { '$gte' : start_id_obj, '$lte' : end_id_obj } }
                else:
                    queue = { '_id' : { '$gt' : start_id_obj, '$lte' : end_id_obj } }

                if start_timestamp is not None and end_timestamp is not None:
                    # remove all accident objects that may be in [start_id_obj : end_id_obj] range
                    queue[AbstractModel.TIMESTAMP] = { '$gte' : start_timestamp, '$lt' : end_timestamp }

                cursor = source_collection.find(queue).sort('_id', ASCENDING).limit(bulk_threshold)
                count = cursor.count(with_limit_and_skip=True)
                if count == 0 and iteration == 0:
                    msg = 'No entries in %s at range [%s : %s]'\
                            % (str(source_collection.name), unit_of_work.get_start_id(), unit_of_work.get_end_id())
                    self.logger.warning(msg)
                    break
                else:
                    shall_continue, new_start_id = self._process_not_empty_cursor(cursor)
                    if shall_continue:
                        start_id_obj = new_start_id
                        iteration += 1
                    else:
                        break

            msg = 'Cursor exploited after %s iterations' % str(iteration)
            self.logger.info(msg)

            self.perform_post_processing(unit_of_work.get_timestamp())
            number_of_aggregated_objects = self._flush_aggregated_objects()
            unit_of_work.set_number_of_aggregated_documents(number_of_aggregated_objects)
            unit_of_work.set_number_of_processed_documents(self.performance_ticker.posts_per_job)
            unit_of_work.set_finished_at(datetime.utcnow())
            unit_of_work.set_state(UnitOfWorkEntry.STATE_PROCESSED)
            unit_of_work_helper.update(self.logger, unit_of_work)
            self.performance_ticker.finish_uow()
        except Exception as e:
            unit_of_work.set_state(UnitOfWorkEntry.STATE_INVALID)
            unit_of_work_helper.update(self.logger, unit_of_work)
            self.performance_ticker.cancel_uow()

            del self.aggregated_objects
            self.aggregated_objects = dict()
            gc.collect()

            self.logger.error('Safety fuse while processing unit_of_work %s in timeperiod %s : %r'\
                              % (message.body, unit_of_work.get_timestamp(), e), exc_info=True)
        finally:
            self.consumer.acknowledge(message.delivery_tag)
            self.consumer.close()