Exemplo n.º 1
0
class WorkQueueReqMgrInterface(object):
    """Helper class for ReqMgr interaction"""

    def __init__(self, **kwargs):
        if not kwargs.get('logger'):
            import logging
            kwargs['logger'] = logging
        self.logger = kwargs['logger']
        # this will break all in one test
        self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None))

        centralurl = kwargs.get("central_logdb_url", "")
        identifier = kwargs.get("log_reporter", "")

        # set the thread name before creat the log db.
        # only sets that when it is not set already
        myThread = threading.currentThread()
        if myThread.getName() == "MainThread":
            myThread.setName(self.__class__.__name__)

        self.logdb = LogDB(centralurl, identifier, logger=self.logger)
        self.previous_state = {}

    def __call__(self, queue):
        """Synchronize WorkQueue and RequestManager"""
        msg = ''
        try:  # pull in new work
            work = self.queueNewRequests(queue)
            msg += "New Work: %d\n" % work
        except Exception:
            self.logger.exception("Error caught during RequestManager pull")

        try:  # get additional open-running work
            extraWork = self.addNewElementsToOpenRequests(queue)
            msg += "Work added: %d\n" % extraWork
        except Exception:
            self.logger.exception("Error caught during RequestManager split")

        try:  # report back to ReqMgr
            uptodate_elements = self.report(queue)
            msg += "Updated ReqMgr status for: %s\n" % ", ".join([x['RequestName'] for x in uptodate_elements])
        except Exception:
            self.logger.exception("Error caught during RequestManager update")
        else:
            try:  # Delete finished requests from WorkQueue
                self.deleteFinishedWork(queue, uptodate_elements)
            except Exception:
                self.logger.exception("Error caught during work deletion")

        queue.backend.recordTaskActivity('reqmgr_sync', msg)

    def queueNewRequests(self, queue):
        """Get requests from regMgr and queue to workqueue"""
        self.logger.info("Contacting Request manager for more work")
        work = 0
        workLoads = []

        if queue.params['DrainMode']:
            self.logger.info('Draining queue: Skip requesting work from ReqMgr')
            return 0

        try:
            workLoads = self.getAvailableRequests()
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for team, reqName, workLoadUrl in workLoads:
            try:
                try:
                    Lexicon.couchurl(workLoadUrl)
                except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                    # check its not a local file
                    if not os.path.exists(workLoadUrl):
                        error = WorkQueueWMSpecError(None, "Workflow url validation error: %s" % str(ex))
                        raise error

                self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl))
                units = queue.queueWork(workLoadUrl, request=reqName, team=team)
                self.logdb.delete(reqName, "error", this_thread=True)
            except TERMINAL_EXCEPTIONS as ex:
                # fatal error - report back to ReqMgr
                self.logger.error('Permanent failure processing request "%s": %s' % (reqName, str(ex)))
                self.logger.info("Marking request %s as failed in ReqMgr" % reqName)
                self.reportRequestStatus(reqName, 'Failed', message=str(ex))
                continue
            except (IOError, socket.error, CouchError, CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' % reqName
                msg += '\nError: "%s"' % str(ex)
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' % reqName
                msg += '\nSee log for details.\nError: "%s"' % str(ex)
                self.logger.exception('Unknown error processing %s' % reqName)
                self.logdb.post(reqName, msg, 'error')
                continue

            try:
                self.reportRequestStatus(reqName, "acquired")
            except Exception as ex:
                self.logger.warning("Unable to update ReqMgr state: %s" % str(ex))
                self.logger.warning('Will try again later')

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

            self.logger.info("%s element(s) obtained from RequestManager" % work)
        return work

    def report(self, queue):
        """Report queue status to ReqMgr."""
        new_state = {}
        uptodate_elements = []
        now = time.time()

        elements = queue.statusInbox(dictKey="RequestName")
        if not elements:
            return new_state

        for ele in elements:
            ele = elements[ele][0]  # 1 element tuple
            try:
                request = self.reqMgr2.getRequestByNames(ele['RequestName'])
                if not request:
                    msg = 'Failed to get request "%s" from ReqMgr2. Will try again later.' % ele['RequestName']
                    self.logger.warning(msg)
                    continue
                request = request[0][ele['RequestName']]
                if request['RequestStatus'] in ('failed', 'completed', 'announced',
                                                'epic-FAILED', 'closed-out', 'rejected'):
                    # requests can be done in reqmgr but running in workqueue
                    # if request has been closed but agent cleanup actions
                    # haven't been run (or agent has been retired)
                    # Prune out obviously too old ones to avoid build up
                    if queue.params.get('reqmgrCompleteGraceTime', -1) > 0:
                        if (now - float(ele.updatetime)) > queue.params['reqmgrCompleteGraceTime']:
                            # have to check all elements are at least running and are old enough
                            request_elements = queue.statusInbox(WorkflowName=request['RequestName'])
                            if not any(
                                    [x for x in request_elements if x['Status'] != 'Running' and not x.inEndState()]):
                                last_update = max([float(x.updatetime) for x in request_elements])
                                if (now - last_update) > queue.params['reqmgrCompleteGraceTime']:
                                    self.logger.info(
                                        "Finishing request %s as it is done in reqmgr" % request['RequestName'])
                                    queue.doneWork(WorkflowName=request['RequestName'])
                                    continue
                    else:
                        pass  # assume workqueue status will catch up later
                elif request['RequestStatus'] in ['aborted', 'force-complete']:
                    queue.cancelWork(WorkflowName=request['RequestName'])
                # Check consistency of running-open/closed and the element closure status
                elif request['RequestStatus'] == 'running-open' and not ele.get('OpenForNewData', False):
                    self.reportRequestStatus(ele['RequestName'], 'running-closed')
                elif request['RequestStatus'] == 'running-closed' and ele.get('OpenForNewData', False):
                    queue.closeWork(ele['RequestName'])
                # we do not want to move the request to 'failed' status
                elif ele['Status'] == 'Failed':
                    continue
                elif ele['Status'] not in self._reqMgrToWorkQueueStatus(request['RequestStatus']):
                    self.reportElement(ele)

                uptodate_elements.append(ele)
            except Exception as ex:
                msg = 'Error talking to ReqMgr about request "%s": %s' % (ele['RequestName'], str(ex))
                self.logger.exception(msg)

        return uptodate_elements

    def deleteFinishedWork(self, queue, elements):
        """Delete work from queue that is finished in ReqMgr"""
        finished = []
        for element in elements:
            if element.inEndState() and self._workQueueToReqMgrStatus(element['Status']) in ('aborted', 'failed', 'completed',
                                                                                             'announced', 'epic-FAILED',
                                                                                             'closed-out', 'rejected'):
                finished.append(element['RequestName'])
        return queue.deleteWorkflows(*finished)

    def getAvailableRequests(self):
        """
        Get available requests and sort by team and priority
        returns [(team, request_name, request_spec_url)]
        """

        tempResults = self.reqMgr2.getRequestByStatus("assigned")
        filteredResults = []
        for requests in tempResults:
            for request in requests.values():
                filteredResults.append(request)
        filteredResults.sort(key=itemgetter('RequestPriority'), reverse=True)
        filteredResults.sort(key=lambda r: r["Teams"][0])

        results = [(x["Teams"][0], x["RequestName"], x["RequestWorkflow"]) for x in filteredResults]

        return results

    def reportRequestStatus(self, request, status, message=None):
        """Change state in RequestManager
           Optionally, take a message to append to the request
        """
        if message:
            self.logdb.post(request, str(message), 'info')
        reqmgrStatus = self._workQueueToReqMgrStatus(status)
        if reqmgrStatus:  # only send known states
            try:
                self.reqMgr2.updateRequestStatus(request, reqmgrStatus)
            except Exception as ex:
                msg = "%s : fail to update status will try later: %s" % (request, str(ex))
                msg += traceback.format_exc()
                self.logdb.post(request, msg, 'warning')
                raise ex
        return

    def _workQueueToReqMgrStatus(self, status):
        """Map WorkQueue Status to that reported to ReqMgr"""
        statusMapping = {'Acquired': 'acquired',
                         'Running': 'running-open',
                         'Failed': 'failed',
                         'Canceled': 'aborted',
                         'CancelRequested': 'aborted',
                         'Done': 'completed'
                        }
        if status in statusMapping:
            # if wq status passed convert to reqmgr status
            return statusMapping[status]
        elif status in REQUEST_STATE_LIST:
            # if reqmgr status passed return reqmgr status
            return status
        else:
            # unknown status
            return None

    def _reqMgrToWorkQueueStatus(self, status):
        """Map ReqMgr status to that in a WorkQueue element, it is not a 1-1 relation"""
        statusMapping = {'acquired': ['Acquired'],
                         'running': ['Running'],
                         'running-open': ['Running'],
                         'running-closed': ['Running'],
                         'failed': ['Failed'],
                         'aborted': ['Canceled', 'CancelRequested'],
                         'force-complete': ['Canceled', 'CancelRequested'],
                         'completed': ['Done']}
        if status in statusMapping:
            return statusMapping[status]
        else:
            return []

    def reportElement(self, element):
        """Report element to ReqMgr"""
        self.reportRequestStatus(element['RequestName'], element['Status'])

    def addNewElementsToOpenRequests(self, queue):
        """Add new elements to open requests which are in running-open state, only works adding new blocks from the input dataset"""
        self.logger.info("Checking Request Manager for open requests and closing old ones")

        # First close any open inbox element which hasn't found anything new in a while
        queue.closeWork()
        self.report(queue)

        work = 0
        requests = []

        # Drain mode, don't pull any work into open requests. They will be closed if the queue stays in drain long enough
        if queue.params['DrainMode']:
            self.logger.info('Draining queue: Skip requesting work from ReqMgr')
            return 0

        try:
            requests = self.reqMgr2.getRequestByStatus("running-open", detail=False)
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for reqName in requests:
            try:
                self.logger.info("Processing request %s" % (reqName))
                units = queue.addWork(requestName=reqName)
                self.logdb.delete(reqName, 'error', True)
            except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex:
                # fatal error - but at least it was split the first time. Log and skip.
                msg = 'Error adding further work to request "%s". Will try again later' % reqName
                msg += '\nError: "%s"' % str(ex)
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except (IOError, socket.error, CouchError, CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' % reqName
                msg += '\nError: "%s"' % str(ex)
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' % reqName
                msg += '\nSee log for details.\nError: "%s"' % str(ex)
                traceMsg = traceback.format_exc()
                msg = "%s\n%s" % (msg, traceMsg)
                self.logger.exception('Unknown error processing %s' % reqName)
                self.logdb.post(reqName, msg, 'error')
                continue

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

        self.logger.info("%s element(s) added to open requests" % work)
        return work
Exemplo n.º 2
0
class WorkQueueReqMgrInterface(object):
    """Helper class for ReqMgr interaction"""
    def __init__(self, **kwargs):
        if not kwargs.get('logger'):
            import logging
            kwargs['logger'] = logging
        self.logger = kwargs['logger']
        # this will break all in one test
        self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None))

        centralurl = kwargs.get("central_logdb_url", "")
        identifier = kwargs.get("log_reporter", "")

        # set the thread name before creat the log db.
        # only sets that when it is not set already
        myThread = threading.currentThread()
        if myThread.getName() == "MainThread":
            myThread.setName(self.__class__.__name__)

        self.logdb = LogDB(centralurl, identifier, logger=self.logger)
        self.previous_state = {}

    def __call__(self, queue):
        """Synchronize WorkQueue and RequestManager"""
        msg = ''
        try:  # pull in new work
            self.logger.info("queueing new work")
            work = self.queueNewRequests(queue)
            msg += "New Work: %d\n" % work
        except Exception as ex:
            errorMsg = "Error caught during RequestManager pull"
            self.logger.exception("%s: %s", errorMsg, str(ex))

        try:  # get additional open-running work
            self.logger.info("adding new element to open requests")
            extraWork = self.addNewElementsToOpenRequests(queue)
            msg += "Work added: %d\n" % extraWork
        except Exception as ex:
            errorMsg = "Error caught during RequestManager split"
            self.logger.exception("%s: %s", errorMsg, str(ex))

        try:  # report back to ReqMgr
            self.logger.info("cancel aborted requests")
            count = self.cancelWork(queue)
            self.logger.info("finised canceling requests")
            msg += "Work canceled: %s " % count
        except Exception as ex:
            errorMsg = "Error caught during canceling the request"
            self.logger.exception("%s: %s", errorMsg, str(ex))

        queue.backend.recordTaskActivity('reqmgr_sync', msg)

    def queueNewRequests(self, queue):
        """Get requests from regMgr and queue to workqueue"""
        self.logger.info("Contacting Request manager for more work")
        work = 0
        workLoads = []

        try:
            workLoads = self.getAvailableRequests()
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for team, reqName, workLoadUrl in workLoads:
            try:
                try:
                    Lexicon.couchurl(workLoadUrl)
                except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                    # check its not a local file
                    if not os.path.exists(workLoadUrl):
                        error = WorkQueueWMSpecError(
                            None,
                            "Workflow url validation error: %s" % str(ex))
                        raise error

                self.logger.info("Processing request %s at %s" %
                                 (reqName, workLoadUrl))
                units = queue.queueWork(workLoadUrl,
                                        request=reqName,
                                        team=team)
                self.logdb.delete(reqName,
                                  "error",
                                  this_thread=True,
                                  agent=False)
            except TERMINAL_EXCEPTIONS as ex:
                # fatal error - report back to ReqMgr
                self.logger.error(
                    'Permanent failure processing request "%s": %s' %
                    (reqName, str(ex)))
                self.logger.info("Marking request %s as failed in ReqMgr" %
                                 reqName)
                self.reportRequestStatus(reqName, 'Failed', message=str(ex))
                continue
            except (IOError, socket.error, CouchError,
                    CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' % reqName
                msg += '\nError: "%s"' % str(ex)
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' % reqName
                msg += '\nSee log for details.\nError: "%s"' % str(ex)
                self.logger.exception('Unknown error processing %s' % reqName)
                self.logdb.post(reqName, msg, 'error')
                continue

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

            self.logger.info("%s element(s) obtained from RequestManager" %
                             work)
        return work

    def cancelWork(self, queue):
        requests = self.reqMgr2.getRequestByStatus(
            ['aborted', 'force-complete'], detail=False)
        count = 0
        for req in requests:
            try:
                queue.cancelWork(req)
                count += 1
            except Exception as ex:
                msg = 'Error to cancel the request "%s": %s' % (req, str(ex))
                self.logger.exception(msg)
        return count

    def report(self, queue):
        """Report queue status to ReqMgr."""
        new_state = {}
        uptodate_elements = []
        now = time.time()

        elements = queue.statusInbox(dictKey="RequestName")
        if not elements:
            return new_state

        for ele in elements:
            ele = elements[ele][0]  # 1 element tuple
            try:
                request = self.reqMgr2.getRequestByNames(ele['RequestName'])
                if not request:
                    msg = 'Failed to get request "%s" from ReqMgr2. Will try again later.' % ele[
                        'RequestName']
                    self.logger.warning(msg)
                    continue
                request = request[0][ele['RequestName']]
                if request['RequestStatus'] in ('failed', 'completed',
                                                'announced', 'closed-out',
                                                'rejected'):
                    # requests can be done in reqmgr but running in workqueue
                    # if request has been closed but agent cleanup actions
                    # haven't been run (or agent has been retired)
                    # Prune out obviously too old ones to avoid build up
                    if queue.params.get('reqmgrCompleteGraceTime', -1) > 0:
                        if (now - float(ele.updatetime)
                            ) > queue.params['reqmgrCompleteGraceTime']:
                            # have to check all elements are at least running and are old enough
                            request_elements = queue.statusInbox(
                                WorkflowName=request['RequestName'])
                            if not any([
                                    x for x in request_elements
                                    if x['Status'] != 'Running'
                                    and not x.inEndState()
                            ]):
                                last_update = max([
                                    float(x.updatetime)
                                    for x in request_elements
                                ])
                                if (
                                        now - last_update
                                ) > queue.params['reqmgrCompleteGraceTime']:
                                    self.logger.info(
                                        "Finishing request %s as it is done in reqmgr"
                                        % request['RequestName'])
                                    queue.doneWork(
                                        WorkflowName=request['RequestName'])
                                    continue
                    else:
                        pass  # assume workqueue status will catch up later
                elif request['RequestStatus'] in ['aborted', 'force-complete']:
                    queue.cancelWork(WorkflowName=request['RequestName'])
                # Check consistency of running-open/closed and the element closure status
                elif request['RequestStatus'] == 'running-open' and not ele.get(
                        'OpenForNewData', False):
                    self.reportRequestStatus(ele['RequestName'],
                                             'running-closed')
                elif request['RequestStatus'] == 'running-closed' and ele.get(
                        'OpenForNewData', False):
                    queue.closeWork(ele['RequestName'])
                # we do not want to move the request to 'failed' status
                elif ele['Status'] == 'Failed':
                    continue
                elif ele['Status'] not in self._reqMgrToWorkQueueStatus(
                        request['RequestStatus']):
                    self.reportElement(ele)

                uptodate_elements.append(ele)
            except Exception as ex:
                msg = 'Error talking to ReqMgr about request "%s": %s' % (
                    ele['RequestName'], str(ex))
                self.logger.exception(msg)

        return uptodate_elements

    def deleteFinishedWork(self, queue, elements):
        """Delete work from queue that is finished in ReqMgr"""
        finished = []
        for element in elements:
            if element.inEndState():
                finished.append(element['RequestName'])
        return queue.deleteWorkflows(*finished)

    def getAvailableRequests(self):
        """
        Get available requests and sort by team and priority
        returns [(team, request_name, request_spec_url)]
        """
        tempResults = self.reqMgr2.getRequestByStatus("staged")
        filteredResults = []
        for requests in tempResults:
            for request in requests.values():
                filteredResults.append(request)
        filteredResults.sort(key=itemgetter('RequestPriority'), reverse=True)
        filteredResults.sort(key=lambda r: r["Team"])

        results = [(x["Team"], x["RequestName"], x["RequestWorkflow"])
                   for x in filteredResults]

        return results

    def reportRequestStatus(self, request, status, message=None):
        """Change state in RequestManager
           Optionally, take a message to append to the request
        """
        if message:
            logType = "error" if status == "Failed" else "info"
            self.logdb.post(request, str(message), logType)
        reqmgrStatus = self._workQueueToReqMgrStatus(status)

        if reqmgrStatus:  # only send known states
            try:
                self.reqMgr2.updateRequestStatus(request, reqmgrStatus)
            except Exception as ex:
                msg = "%s : fail to update status will try later: %s" % (
                    request, str(ex))
                msg += traceback.format_exc()
                self.logdb.post(request, msg, 'warning')
                raise ex
        return

    def _workQueueToReqMgrStatus(self, status):
        """Map WorkQueue Status to that reported to ReqMgr"""
        statusMapping = {
            'Acquired': 'acquired',
            'Running': 'running-open',
            'Failed': 'failed',
            'Canceled': 'aborted',
            'CancelRequested': 'aborted',
            'Done': 'completed'
        }
        if status in statusMapping:
            # if wq status passed convert to reqmgr status
            return statusMapping[status]
        elif status in REQUEST_STATE_LIST:
            # if reqmgr status passed return reqmgr status
            return status
        else:
            # unknown status
            return None

    def _reqMgrToWorkQueueStatus(self, status):
        """Map ReqMgr status to that in a WorkQueue element, it is not a 1-1 relation"""
        statusMapping = {
            'acquired': ['Acquired'],
            'running': ['Running'],
            'running-open': ['Running'],
            'running-closed': ['Running'],
            'failed': ['Failed'],
            'aborted': ['Canceled', 'CancelRequested'],
            'force-complete': ['Canceled', 'CancelRequested'],
            'completed': ['Done']
        }
        if status in statusMapping:
            return statusMapping[status]
        else:
            return []

    def reportElement(self, element):
        """Report element to ReqMgr"""
        self.reportRequestStatus(element['RequestName'], element['Status'])

    def addNewElementsToOpenRequests(self, queue):
        """Add new elements to open requests which are in running-open state, only works adding new blocks from the input dataset"""
        self.logger.info(
            "Checking Request Manager for open requests and closing old ones")

        work = 0
        requests = []

        try:
            requests = self.reqMgr2.getRequestByStatus("running-open",
                                                       detail=False)
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for reqName in requests:
            try:
                self.logger.info("Processing request %s" % (reqName))
                units = queue.addWork(requestName=reqName)
                self.logdb.delete(reqName, 'error', True, agent=False)
            except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex:
                # fatal error - but at least it was split the first time. Log and skip.
                msg = 'Error adding further work to request "%s". Will try again later' % reqName
                msg += '\nError: "%s"' % str(ex)
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except (IOError, socket.error, CouchError,
                    CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' % reqName
                msg += '\nError: "%s"' % str(ex)
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' % reqName
                msg += '\nSee log for details.\nError: "%s"' % str(ex)
                traceMsg = traceback.format_exc()
                msg = "%s\n%s" % (msg, traceMsg)
                self.logger.exception('Unknown error processing %s' % reqName)
                self.logdb.post(reqName, msg, 'error')
                continue

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

        self.logger.info("%s element(s) added to open requests" % work)
        return work
Exemplo n.º 3
0
class LogDBTest(unittest.TestCase):
    """
    _LogDBTest_

    """
    def setUp(self):
        """
        _setUp_

        Setup the database and logging connection. 
        """
        logging.basicConfig()
        self.logger = logging.getLogger('LogDBTest')
#        self.logger.setLevel(logging.DEBUG)
        url = 'http://localhost:5984/logdb_t'
        identifier = 'agentname'
        self.agent_inst = LogDB(url, identifier, logger=self.logger, create=True)
        self.agent_inst2 = LogDB(url, identifier, logger=self.logger, create=True, thread_name="Test")
        identifier = '/DC=org/DC=doegrids/OU=People/CN=First Last 123'
        self.user_inst = LogDB(url, identifier, logger=self.logger, create=True)
        self.report = LogDBReport(self.agent_inst)

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.agent_inst.backend.deleteDatabase()

    def test_docid(self):
        "Test docid API"
        request = 'abs'
        mtype = self.agent_inst.backend.prefix('info')
        res = self.agent_inst.backend.docid(request, mtype)
        key = '--'.join([request, self.agent_inst.identifier, self.agent_inst.thread_name, mtype])
        khash = gen_hash(key)
        self.assertEqual(res, khash)

    def test_prefix(self):
        "Test prefix LogDBBackend API"
        request = 'abc'

        self.assertEqual(self.agent_inst.agent, 1)
        mtype = self.agent_inst.backend.prefix('msg')
        self.assertEqual(mtype, 'agent-msg')

        self.assertEqual(self.user_inst.agent, 0)
        mtype = self.user_inst.backend.prefix('msg')
        self.assertEqual(mtype, 'msg')

    def test_apis(self):
        "Test LogDB APIs"
        request = 'abc'

        # if we post messages for the same request only last one should survive
        self.agent_inst.post(request, 'msg1')
#        time.sleep(1)
        self.agent_inst.post(request, 'msg2')
#        time.sleep(1)
        self.agent_inst.post(request, 'msg3')
#        time.sleep(1)
        self.agent_inst.post(request, 'msg4')
#        time.sleep(1)
        docs = self.agent_inst.get(request)
        self.assertEqual(len(docs), 1)
        self.assertEqual(docs[0]['msg'], 'msg4')

        # add message as user, user based messages will grow
        self.user_inst.post(request, 'doc1')
#        time.sleep(1)
        docs = self.user_inst.get(request)
        self.assertEqual(len(docs), 2) # we have msg4 and doc1
        self.user_inst.post(request, 'doc1') # we have msg2 and two doc1 messages
#        time.sleep(1)
        docs = self.user_inst.get(request)
        self.assertEqual(len(docs), 3)

    def test_cleanup(self):
        "Test clean-up LogDB API"
        request = 'abc'
        self.agent_inst.post(request, 'msg1', 'info')
        self.agent_inst.post(request, 'msg2', 'comment')
        self.agent_inst.post(request, 'msg3', 'warning')
        self.agent_inst.post(request, 'msg4', 'error')
        all_docs = self.agent_inst.get(request)
        self.agent_inst.backend.cleanup(thr=10) # look into past
        past_docs = self.agent_inst.get(request)
        self.assertEqual(len(all_docs), len(past_docs))
        self.agent_inst.backend.cleanup(thr=-10) # look into future
        docs = self.agent_inst.get(request)
        self.assertEqual(len(docs), 0)

    def test_get_all_requests(self):
        "Test get_all_requests LogDB API"
        request = 'abc'
        self.agent_inst.post(request, 'msg1', 'info')
        self.agent_inst.post(request, 'msg2', 'info')
        self.agent_inst.post(request, 'msg3', 'warning')
        self.agent_inst.post(request, 'msg4', 'error')
        sum_docs = self.agent_inst.get(request)
        self.assertEqual(len(sum_docs), 3) # since we have two info records
        request = 'abc2'
        self.agent_inst.post(request, 'msg1', 'info')
        all_docs = self.agent_inst.get_all_requests()
        self.assertEqual(len(all_docs), 2) # we only have two distinct request names

    def test_delete(self):
        "Test delete LogDB API"
        request = 'abc'
        self.agent_inst.post(request, 'msg1', 'info')
        self.agent_inst.delete(request)
        all_docs = self.agent_inst.get_all_requests()
        self.assertEqual(len(all_docs), 0)

    def test_get(self):
        "Test get LogDB API"
        request = 'abc'
        self.agent_inst2.post(request, 'msg1', 'info') # doc with different thread name
        self.agent_inst.post(request, 'msg1', 'info')
        self.agent_inst.post(request, 'msg2', 'info')
        self.agent_inst.post(request, 'msg3', 'warning')
        self.agent_inst.post(request, 'msg4', 'error')
        docs = self.agent_inst.get(request)
        self.assertEqual(len(docs), 4) # there should be 4 docs one for Test thread and 3 for MainThread
        docs = self.agent_inst2.get(request)
        self.assertEqual(len(docs), 4) # this should result the same as above.

        docs1 = self.agent_inst.get(request, 'info')
        docs2 = self.agent_inst.get(request, 'info')
        req1 = set([r['request'] for r in docs1])
        self.assertEqual(len(req1), 1)
        req2 = set([r['request'] for r in docs2])
        self.assertEqual(len(req2), 1)
        self.assertEqual(req1, req2)

    def test_thread_name(self):
        "Test thread name support by LogDB APIs"
        request = 'abc'
        self.agent_inst2.post(request, 'msg1', 'info') # doc with different thread name
        self.agent_inst.post(request, 'msg1', 'info')
        self.agent_inst.post(request, 'msg2', 'info')
        self.agent_inst.post(request, 'msg3', 'warning')
        self.agent_inst.post(request, 'msg4', 'error')
        maindocs = self.agent_inst.get(request)
        self.assertEqual(len(maindocs), 4) # since we have two info records in agent_inst
        thr_docs = self.agent_inst2.get(request)
        self.assertEqual(len(thr_docs), 4) # number of docs in different thread
        req1 = set([r['request'] for r in maindocs])
        self.assertEqual(len(req1), 1)
        req2 = set([r['request'] for r in thr_docs])
        self.assertEqual(len(req2), 1)
        self.assertEqual(req1, req2)
        worker1 = set([r['thr'] for r in maindocs])
        self.assertEqual(worker1, set(["Test", "MainThread"]))
        worker2 = set([r['thr'] for r in thr_docs])
        self.assertEqual(worker2, set(["Test", "MainThread"]))

        docs = set([r['identifier'] for r in maindocs])
        self.assertEqual(docs, set([self.agent_inst.identifier]))
        docs = set([r['identifier'] for r in thr_docs])
        self.assertEqual(docs, set([self.agent_inst.identifier]))

    def test_checks(self):
        "Tests LogDB check/mtype functionality"
        request = 'abc'
        res = self.agent_inst.post(request, 'msg', 'ingo') # should be silent, i.o. no Exceptions
        self.assertEqual('post-error', res)
        args = (request, 'msg', 'ingo') # wrong type
        self.assertRaises(LogDBError, self.agent_inst.backend.agent_update, *args)
        args = (request, 'ingo') # wrong type
        self.assertRaises(LogDBError, self.agent_inst.backend.check, *args)
        args = ('', 'msg', 'info') # empty request string
        self.assertRaises(LogDBError, self.agent_inst.backend.agent_update, *args)
        args = ('', 'info') # empty request string
        self.assertRaises(LogDBError, self.agent_inst.backend.check, *args)

    def test_report(self):
        "Test LogDBReport APIs"
        request = 'abc'
        self.agent_inst.post(request, 'msg1', 'info')
        time.sleep(1)
        self.agent_inst.post(request, 'msg2', 'comment')
        time.sleep(1)
        self.agent_inst.post(request, 'msg3', 'warning')
        time.sleep(1)
        self.agent_inst.post(request, 'msg4', 'error')
        time.sleep(1)
        self.report.to_stdout(request)
        out = self.report.to_txt(request)
        print "\n### txt report\n", out
        out = self.report.to_html(request)
        print "\n### html report\n", out

        docs = self.agent_inst.get(request)
        tdocs = self.report.orderby(docs, order='ts')
        messages = [d['msg'] for d in tdocs]
        self.assertEqual(messages, ['msg4', 'msg3', 'msg2', 'msg1'])
Exemplo n.º 4
0
class WorkQueueReqMgrInterface():
    """Helper class for ReqMgr interaction"""
    def __init__(self, **kwargs):
        if not kwargs.get('logger'):
            import logging
            kwargs['logger'] = logging
        self.logger = kwargs['logger']
        #TODO: (reqmgr2Only - remove this line when reqmgr is replaced)
        self.reqMgr = RequestManager(kwargs)
        #this will break all in one test
        self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None))

        centralurl = kwargs.get("central_logdb_url", "")
        identifier = kwargs.get("log_reporter", "")

        # set the thread name before creat the log db.
        # only sets that when it is not set already
        myThread = threading.currentThread()
        if myThread.getName() == "MainThread":
            myThread.setName(self.__class__.__name__)

        self.logdb = LogDB(centralurl, identifier, logger=self.logger)
        self.previous_state = {}

    def __call__(self, queue):
        """Synchronize WorkQueue and RequestManager"""
        msg = ''
        try:  # pull in new work
            work = self.queueNewRequests(queue)
            msg += "New Work: %d\n" % work
        except Exception:
            self.logger.exception("Error caught during RequestManager pull")

        try:  # get additional open-running work
            extraWork = self.addNewElementsToOpenRequests(queue)
            msg += "Work added: %d\n" % extraWork
        except Exception:
            self.logger.exception("Error caught during RequestManager split")

        try:  # report back to ReqMgr
            uptodate_elements = self.report(queue)
            msg += "Updated ReqMgr status for: %s\n" % ", ".join(
                [x['RequestName'] for x in uptodate_elements])
        except Exception:
            self.logger.exception("Error caught during RequestManager update")
        else:
            try:  # Delete finished requests from WorkQueue
                self.deleteFinishedWork(queue, uptodate_elements)
            except Exception:
                self.logger.exception("Error caught during work deletion")

        queue.backend.recordTaskActivity('reqmgr_sync', msg)

    def queueNewRequests(self, queue):
        """Get requests from regMgr and queue to workqueue"""
        self.logger.info("Contacting Request manager for more work")
        work = 0
        workLoads = []

        if queue.params['DrainMode']:
            self.logger.info(
                'Draining queue: Skip requesting work from ReqMgr')
            return 0

        try:
            workLoads = self.getAvailableRequests(queue.params['Teams'])
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for team, reqName, workLoadUrl in workLoads:
            #            try:
            #                self.reportRequestStatus(reqName, "negotiating")
            #            except Exception, ex:
            #                self.logger.error("""
            #                    Unable to update ReqMgr state to negotiating: %s
            #                    Ignoring this request: %s""" % (str(ex), reqName))
            #                continue

            try:
                try:
                    Lexicon.couchurl(workLoadUrl)
                except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                    # check its not a local file
                    if not os.path.exists(workLoadUrl):
                        error = WorkQueueWMSpecError(
                            None,
                            "Workflow url validation error: %s" % str(ex))
                        raise error

                self.logger.info("Processing request %s at %s" %
                                 (reqName, workLoadUrl))
                units = queue.queueWork(workLoadUrl,
                                        request=reqName,
                                        team=team)
                self.logdb.delete(reqName, "error", this_thread=True)
            except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex:
                # fatal error - report back to ReqMgr
                self.logger.info(
                    'Permanent failure processing request "%s": %s' %
                    (reqName, str(ex)))
                self.logger.info("Marking request %s as failed in ReqMgr" %
                                 reqName)
                self.reportRequestStatus(reqName, 'Failed', message=str(ex))
                continue
            except (IOError, socket.error, CouchError,
                    CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' \
                '\nError: "%s"' % (reqName, str(ex))
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' \
                '\nSee log for details.\nError: "%s"' % (reqName, str(ex))
                self.logger.exception('Unknown error processing %s' % reqName)
                self.logdb.post(reqName, msg, 'error')
                continue

            try:
                self.reportRequestStatus(reqName, "acquired")
            except Exception as ex:
                self.logger.warning("Unable to update ReqMgr state: %s" %
                                    str(ex))
                self.logger.warning('Will try again later')

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

            self.logger.info("%s element(s) obtained from RequestManager" %
                             work)
        return work

    def report(self, queue):
        """Report queue status to ReqMgr."""
        new_state = {}
        uptodate_elements = []
        now = time.time()

        elements = queue.statusInbox(dictKey="RequestName")
        if not elements:
            return new_state

        for ele in elements:
            ele = elements[ele][0]  # 1 element tuple
            try:
                request = self.reqMgr2.getRequestByNames(ele['RequestName'])
                if not request:
                    msg = 'Failed to get request "%s" from ReqMgr2. Will try again later.' % ele[
                        'RequestName']
                    self.logger.warning(msg)
                    continue
                request = request[ele['RequestName']]
                if request['RequestStatus'] in ('failed', 'completed',
                                                'announced', 'epic-FAILED',
                                                'closed-out', 'rejected'):
                    # requests can be done in reqmgr but running in workqueue
                    # if request has been closed but agent cleanup actions
                    # haven't been run (or agent has been retired)
                    # Prune out obviously too old ones to avoid build up
                    if queue.params.get('reqmgrCompleteGraceTime', -1) > 0:
                        if (now - float(ele.updatetime)
                            ) > queue.params['reqmgrCompleteGraceTime']:
                            # have to check all elements are at least running and are old enough
                            request_elements = queue.statusInbox(
                                WorkflowName=request['RequestName'])
                            if not any([
                                    x for x in request_elements
                                    if x['Status'] != 'Running'
                                    and not x.inEndState()
                            ]):
                                last_update = max([
                                    float(x.updatetime)
                                    for x in request_elements
                                ])
                                if (
                                        now - last_update
                                ) > queue.params['reqmgrCompleteGraceTime']:
                                    self.logger.info(
                                        "Finishing request %s as it is done in reqmgr"
                                        % request['RequestName'])
                                    queue.doneWork(
                                        WorkflowName=request['RequestName'])
                                    continue
                    else:
                        pass  # assume workqueue status will catch up later
                elif request['RequestStatus'] == 'aborted' or request[
                        'RequestStatus'] == 'force-complete':
                    queue.cancelWork(WorkflowName=request['RequestName'])
                # Check consistency of running-open/closed and the element closure status
                elif request['RequestStatus'] == 'running-open' and not ele.get(
                        'OpenForNewData', False):
                    self.reportRequestStatus(ele['RequestName'],
                                             'running-closed')
                elif request['RequestStatus'] == 'running-closed' and ele.get(
                        'OpenForNewData', False):
                    queue.closeWork(ele['RequestName'])
                # update request status if necessary
                elif ele['Status'] not in self._reqMgrToWorkQueueStatus(
                        request['RequestStatus']):
                    self.reportElement(ele)

                uptodate_elements.append(ele)
            except Exception as ex:
                msg = 'Error talking to ReqMgr about request "%s": %s'
                traceMsg = traceback.format_exc()
                self.logger.error(msg % (ele['RequestName'], traceMsg))

        return uptodate_elements

    def deleteFinishedWork(self, queue, elements):
        """Delete work from queue that is finished in ReqMgr"""
        finished = []
        for element in elements:
            if self._workQueueToReqMgrStatus(element['Status']) in ('aborted', 'failed', 'completed', 'announced',
                                                         'epic-FAILED', 'closed-out', 'rejected') \
                                                         and element.inEndState():
                finished.append(element['RequestName'])
        return queue.deleteWorkflows(*finished)

    def _getRequestsByTeamsAndStatus(self, status, teams=[]):
        """
        TODO: now it assumes one team per requests - check whether this assumption is correct
        Check whether we actually use the team for this.
        Also switch to byteamandstatus couch call instead of 
        """
        requests = self.reqMgr2.getRequestByStatus(status)
        #Then sort by Team name then sort by Priority
        #https://docs.python.org/2/howto/sorting.html
        if teams and len(teams) > 0:
            results = {}
            for reqName, value in requests.items():
                if value["Teams"][0] in teams:
                    results[reqName] = value
            return results
        else:
            return requests

    def getAvailableRequests(self, teams):
        """
        Get available requests for the given teams and sort by team and priority
        returns [(team, request_name, request_spec_url)]
        """

        tempResults = self._getRequestsByTeamsAndStatus("assigned",
                                                        teams).values()
        filteredResults = []
        for request in tempResults:
            if "Teams" in request and len(request["Teams"]) == 1:
                filteredResults.append(request)
                self.logdb.delete(request["RequestName"],
                                  "error",
                                  this_thread=True)
            else:
                msg = "no team or more than one team (%s) are assigined: %s" % (
                    request.get("Teams", None), request["RequestName"])
                self.logger.error(msg)
                self.logdb.post(request["RequestName"], msg, 'error')
        filteredResults.sort(key=itemgetter('RequestPriority'), reverse=True)
        filteredResults.sort(key=lambda r: r["Teams"][0])

        results = [(x["Teams"][0], x["RequestName"], x["RequestWorkflow"])
                   for x in filteredResults]

        return results

    def reportRequestStatus(self, request, status, message=None):
        """Change state in RequestManager
           Optionally, take a message to append to the request
        """
        if message:
            self.logdb.post(request, str(message), 'info')
        reqmgrStatus = self._workQueueToReqMgrStatus(status)
        if reqmgrStatus:  # only send known states
            try:
                #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced)
                self.reqMgr.reportRequestStatus(request, reqmgrStatus)
                # And replace with this (remove all Exceptins)
                #self.reqMgr2.updateRequestStatus(request, reqmgrStatus)
            except HTTPException as ex:
                # If we get an HTTPException of 404 means reqmgr2 request
                if ex.status == 404:
                    # try reqmgr2 call
                    msg = "%s : reqmgr2 request: %s" % (request, str(ex))
                    self.logdb.post(request, msg, 'info')
                    self.reqMgr2.updateRequestStatus(request, reqmgrStatus)
                else:
                    msg = "%s : fail to update status with HTTP error: %s" % (
                        request, str(ex))
                    self.logdb.post(request, msg, 'warning')
                    raise ex
            except Exception as ex:
                msg = "%s : fail to update status will try later: %s" % (
                    request, str(ex))
                self.logdb.post(request, msg, 'warning')
                raise ex

    def markAcquired(self, request, url=None):
        """Mark request acquired"""
        self.reqMgr.putWorkQueue(request, url)

    def _workQueueToReqMgrStatus(self, status):
        """Map WorkQueue Status to that reported to ReqMgr"""
        statusMapping = {
            'Acquired': 'acquired',
            'Running': 'running-open',
            'Failed': 'failed',
            'Canceled': 'aborted',
            'CancelRequested': 'aborted',
            'Done': 'completed'
        }
        if status in statusMapping:
            # if wq status passed convert to reqmgr status
            return statusMapping[status]
        elif status in REQUEST_STATE_LIST:
            # if reqmgr status passed return reqmgr status
            return status
        else:
            # unknown status
            return None

    def _reqMgrToWorkQueueStatus(self, status):
        """Map ReqMgr status to that in a WorkQueue element, it is not a 1-1 relation"""
        statusMapping = {
            'acquired': ['Acquired'],
            'running': ['Running'],
            'running-open': ['Running'],
            'running-closed': ['Running'],
            'failed': ['Failed'],
            'aborted': ['Canceled', 'CancelRequested'],
            'force-complete': ['Canceled', 'CancelRequested'],
            'completed': ['Done']
        }
        if status in statusMapping:
            return statusMapping[status]
        else:
            return []

    def reportElement(self, element):
        """Report element to ReqMgr"""
        self.reportRequestStatus(element['RequestName'], element['Status'])

    def addNewElementsToOpenRequests(self, queue):
        """Add new elements to open requests which are in running-open state, only works adding new blocks from the input dataset"""
        self.logger.info(
            "Checking Request Manager for open requests and closing old ones")

        # First close any open inbox element which hasn't found anything new in a while
        queue.closeWork()
        self.report(queue)

        work = 0
        requests = []

        # Drain mode, don't pull any work into open requests. They will be closed if the queue stays in drain long enough
        if queue.params['DrainMode']:
            self.logger.info(
                'Draining queue: Skip requesting work from ReqMgr')
            return 0

        try:
            requests = self._getRequestsByTeamsAndStatus(
                "running-open", queue.params['Teams']).keys()
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for reqName in requests:
            try:
                self.logger.info("Processing request %s" % (reqName))
                units = queue.addWork(requestName=reqName)
                self.logdb.delete(request["RequestName"], 'error', True)
            except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex:
                # fatal error - but at least it was split the first time. Log and skip.
                msg = 'Error adding further work to request "%s". Will try again later' \
                '\nError: "%s"' % (reqName, str(ex))
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except (IOError, socket.error, CouchError,
                    CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' \
                '\nError: "%s"' % (reqName, str(ex))
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' \
                '\nSee log for details.\nError: "%s"' % (reqName, str(ex))
                self.logger.exception('Unknown error processing %s' % reqName)
                self.logdb.post(reqName, msg, 'error')
                continue

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

        self.logger.info("%s element(s) added to open requests" % work)
        return work
Exemplo n.º 5
0
class MSRuleCleaner(MSCore):
    """
    MSRuleCleaner.py class provides the logic used to clean the Rucio
    block level data placement rules created by WMAgent.
    """

    def __init__(self, msConfig, logger=None):
        """
        Runs the basic setup and initialization for the MSRuleCleaner module
        :param msConfig: micro service configuration
        """
        super(MSRuleCleaner, self).__init__(msConfig, logger=logger)

        self.msConfig.setdefault("verbose", True)
        self.msConfig.setdefault("interval", 60)
        self.msConfig.setdefault("services", ['ruleCleaner'])
        self.msConfig.setdefault("rucioWmaAccount", "wma_test")
        self.msConfig.setdefault("rucioMStrAccount", "wmcore_transferor")
        self.msConfig.setdefault('enableRealMode', False)

        self.mode = "RealMode" if self.msConfig['enableRealMode'] else "DryRunMode"
        self.curlMgr = RequestHandler()
        self.targetStatusRegex = re.compile(r'.*archived')
        self.logDB = LogDB(self.msConfig["logDBUrl"],
                           self.msConfig["logDBReporter"],
                           logger=self.logger)
        self.wmstatsSvc = WMStatsServer(self.msConfig['wmstatsUrl'], logger=self.logger)

        # Building all the Pipelines:
        pName = 'plineMSTrCont'
        self.plineMSTrCont = Pipeline(name=pName,
                                      funcLine=[Functor(self.setPlineMarker, pName),
                                                Functor(self.setParentDatasets),
                                                Functor(self.getRucioRules, 'container', self.msConfig['rucioMStrAccount']),
                                                Functor(self.cleanRucioRules)])
        pName = 'plineMSTrBlock'
        self.plineMSTrBlock = Pipeline(name=pName,
                                       funcLine=[Functor(self.setPlineMarker, pName),
                                                 Functor(self.setParentDatasets),
                                                 Functor(self.getRucioRules, 'block', self.msConfig['rucioMStrAccount']),
                                                 Functor(self.cleanRucioRules)])
        pName = 'plineAgentCont'
        self.plineAgentCont = Pipeline(name=pName,
                                       funcLine=[Functor(self.setPlineMarker, pName),
                                                 Functor(self.getRucioRules, 'container', self.msConfig['rucioWmaAccount']),
                                                 Functor(self.cleanRucioRules)])
        pName = 'plineAgentBlock'
        self.plineAgentBlock = Pipeline(name=pName,
                                        funcLine=[Functor(self.setPlineMarker, pName),
                                                  Functor(self.getRucioRules, 'block', self.msConfig['rucioWmaAccount']),
                                                  Functor(self.cleanRucioRules)])
        pName = 'plineArchive'
        self.plineArchive = Pipeline(name=pName,
                                     funcLine=[Functor(self.setPlineMarker, pName),
                                               Functor(self.findTargetStatus),
                                               Functor(self.setClean),
                                               Functor(self.setArchivalDelayExpired),
                                               Functor(self.setLogDBClean),
                                               Functor(self.archive)])

        # Building the different set of plines we will need later:
        # NOTE: The following are all the functional pipelines which are supposed to include
        #       a cleanup function and report cleanup status in the MSRuleCleanerWflow object
        self.cleanuplines = [self.plineMSTrCont,
                             self.plineMSTrBlock,
                             self.plineAgentCont,
                             self.plineAgentBlock]
        # Building an auxiliary list of cleanup pipeline names only:
        self.cleanupPipeNames = [pline.name for pline in self.cleanuplines]

        # Building lists of pipelines related only to Agents or MStransferror
        self.agentlines = [self.plineAgentCont,
                           self.plineAgentBlock]
        self.mstrlines = [self.plineMSTrCont,
                          self.plineMSTrBlock]

        # Initialization of the 'cleaned' and 'archived' counters:
        self.wfCounters = {'cleaned': {},
                           'archived': {'normalArchived': 0,
                                        'forceArchived': 0}}
        self.globalLocks = set()

    def getGlobalLocks(self):
        """
        Fetches the list of 'globalLocks' from wmstats server and the list of
        'parentLocks' from request manager. Stores/updates the unified set in
        the 'globalLocks' instance variable. Returns the resultant unified set.
        :return: A union set of the 'globalLocks' and the 'parentLocks' lists
        """
        self.logger.info("Fetching globalLocks list from wmstats server.")
        try:
            globalLocks = set(self.wmstatsSvc.getGlobalLocks())
        except Exception as ex:
            msg = "Failed to refresh global locks list for the current polling cycle. Error: %s "
            msg += "Skipping this polling cycle."
            self.logger.error(msg, str(ex))
            raise ex
        self.logger.info("Fetching parentLocks list from reqmgr2 server.")
        try:
            parentLocks = set(self.reqmgr2.getParentLocks())
        except Exception as ex:
            msg = "Failed to refresh parent locks list for the current poling cycle. Error: %s "
            msg += "Skipping this polling cycle."
            self.logger.error(msg, str(ex))
            raise ex
        self.globalLocks = globalLocks | parentLocks

    def resetCounters(self):
        """
        A simple function for zeroing the cleaned and archived counters.
        """
        for pline in self.cleanuplines:
            self.wfCounters['cleaned'][pline.name] = 0
        self.wfCounters['archived']['normalArchived'] = 0
        self.wfCounters['archived']['forceArchived'] = 0

    def execute(self, reqStatus):
        """
        Executes the whole ruleCleaner logic
        :return: summary
        """
        # start threads in MSManager which should call this method
        summary = dict(RULECLEANER_REPORT)

        self.currThread = current_thread()
        self.currThreadIdent = self.currThread.name
        self.updateReportDict(summary, "thread_id", self.currThreadIdent)
        self.resetCounters()
        self.logger.info("MSRuleCleaner is running in mode: %s.", self.mode)

        # Build the list of workflows to work on:
        try:
            requestRecords = {}
            for status in reqStatus:
                requestRecords.update(self.getRequestRecords(status))
        except Exception as err:  # general error
            msg = "Unknown exception while fetching requests from ReqMgr2. Error: %s", str(err)
            self.logger.exception(msg)
            self.updateReportDict(summary, "error", msg)

        # Call _execute() and feed the relevant pipeline with the objects popped from requestRecords
        try:
            self.getGlobalLocks()
            totalNumRequests, cleanNumRequests, normalArchivedNumRequests, forceArchivedNumRequests = self._execute(requestRecords)
            msg = "\nNumber of processed workflows: %s."
            msg += "\nNumber of properly cleaned workflows: %s."
            msg += "\nNumber of normally archived workflows: %s."
            msg += "\nNumber of force archived workflows: %s."
            self.logger.info(msg,
                             totalNumRequests,
                             cleanNumRequests,
                             normalArchivedNumRequests,
                             forceArchivedNumRequests)
            self.updateReportDict(summary, "total_num_requests", totalNumRequests)
            self.updateReportDict(summary, "clean_num_requests", cleanNumRequests)
            self.updateReportDict(summary, "normal_archived_num_requests", normalArchivedNumRequests)
            self.updateReportDict(summary, "force_archived_num_requests", forceArchivedNumRequests)
        except Exception as ex:
            msg = "Unknown exception while running MSRuleCleaner thread Error: %s"
            self.logger.exception(msg, str(ex))
            self.updateReportDict(summary, "error", msg)

        return summary

    def _execute(self, reqRecords):
        """
        Executes the MSRuleCleaner pipelines based on the workflow status
        :param reqList: A list of RequestRecords to work on
        :return:        a tuple with:
                            number of properly cleaned requests
                            number of processed workflows
                            number of archived workflows
        """
        # NOTE: The Input Cleanup, the Block Level Cleanup and the Archival
        #       Pipelines are executed sequentially in the above order.
        #       This way we assure ourselves that we archive only workflows
        #       that have accomplished the needed cleanup

        cleanNumRequests = 0
        totalNumRequests = 0

        # Call the workflow dispatcher:
        for req in viewvalues(reqRecords):
            wflow = MSRuleCleanerWflow(req)
            self._dispatchWflow(wflow)
            msg = "\n----------------------------------------------------------"
            msg += "\nMSRuleCleanerWflow: %s"
            msg += "\n----------------------------------------------------------"
            self.logger.debug(msg, pformat(wflow))
            totalNumRequests += 1
            if self._checkClean(wflow):
                cleanNumRequests += 1

        # Report the counters:
        for pline in self.cleanuplines:
            msg = "Workflows cleaned by pipeline: %s: %d"
            self.logger.info(msg, pline.name, self.wfCounters['cleaned'][pline.name])
        normalArchivedNumRequests = self.wfCounters['archived']['normalArchived']
        forceArchivedNumRequests = self.wfCounters['archived']['forceArchived']
        self.logger.info("Workflows normally archived: %d", self.wfCounters['archived']['normalArchived'])
        self.logger.info("Workflows force archived: %d", self.wfCounters['archived']['forceArchived'])
        return totalNumRequests, cleanNumRequests, normalArchivedNumRequests, forceArchivedNumRequests

    def _dispatchWflow(self, wflow):
        """
        A function intended to dispatch a workflow (e.g based on its status)
        through one or more functional pipelines in case there is some more
        complicated logic involved in the order we execute them but not just
        a sequentially
        """
        self.logger.debug("Dispatching workflow: %s", wflow['RequestName'])
        # NOTE: The following dispatch logic is a subject to be changed at any time

        # Resolve:
        # NOTE: First resolve any preliminary flags that will be needed further
        #       in the logic of the _dispatcher() itself
        if wflow['RequestStatus'] == 'announced':
            self.getMSOutputTransferInfo(wflow)

        # Clean:
        # Do not clean any Resubmission, but still let them be archived
        if wflow['RequestType'] == 'Resubmission':
            wflow['ForceArchive'] = True
            msg = "Skipping cleanup step for workflow: %s - RequestType is %s."
            msg += " Will try to archive it directly."
            self.logger.info(msg, wflow['RequestName'], wflow['RequestType'])
        elif wflow['RequestStatus'] in ['rejected', 'aborted-completed']:
            # NOTE: We do not check the ParentageResolved flag for these
            #       workflows, but we do need to clean output data placement
            #       rules from the agents for them
            for pline in self.agentlines:
                try:
                    pline.run(wflow)
                except Exception as ex:
                    msg = "%s: General error from pipeline. Workflow: %s. Error: \n%s. "
                    msg += "\nWill retry again in the next cycle."
                    self.logger.exception(msg, pline.name, wflow['RequestName'], str(ex))
                    continue
                if wflow['CleanupStatus'][pline.name]:
                    self.wfCounters['cleaned'][pline.name] += 1
        elif wflow['RequestStatus'] == 'announced' and not wflow['ParentageResolved']:
            # NOTE: We skip workflows which are not having 'ParentageResolved'
            #       flag, but we still need some proper logging for them.
            msg = "Skipping workflow: %s - 'ParentageResolved' flag set to false."
            msg += " Will retry again in the next cycle."
            self.logger.info(msg, wflow['RequestName'])
        elif wflow['RequestStatus'] == 'announced' and not wflow['TransferDone']:
            # NOTE: We skip workflows which have not yet finalised their TransferStatus
            #       in MSOutput, but we still need some proper logging for them.
            msg = "Skipping workflow: %s - 'TransferStatus' is 'pending' or 'TransferInfo' is missing in MSOutput."
            msg += " Will retry again in the next cycle."
            self.logger.info(msg, wflow['RequestName'])
        elif wflow['RequestStatus'] == 'announced' and not wflow['TransferTape']:
            # NOTE: We skip workflows which have not yet finalised their tape transfers.
            #       (i.e. even if a single output which is supposed to be covered
            #       by a tape rule is in any of the following transient states:
            #       {REPLICATING, STUCK, SUSPENDED, WAITING_APPROVAL}.)
            #       We still need some proper logging for them.
            msg = "Skipping workflow: %s - tape transfers are not yet completed."
            msg += " Will retry again in the next cycle."
            self.logger.info(msg, wflow['RequestName'])
        elif wflow['RequestStatus'] == 'announced':
            for pline in self.cleanuplines:
                try:
                    pline.run(wflow)
                except MSRuleCleanerResolveParentError as ex:
                    msg = "%s: Parentage Resolve Error: %s. "
                    msg += "Will retry again in the next cycle."
                    self.logger.error(msg, pline.name, str(ex))
                    continue
                except Exception as ex:
                    msg = "%s: General error from pipeline. Workflow: %s. Error:  \n%s. "
                    msg += "\nWill retry again in the next cycle."
                    self.logger.exception(msg, pline.name, wflow['RequestName'], str(ex))
                    continue
                if wflow['CleanupStatus'][pline.name]:
                    self.wfCounters['cleaned'][pline.name] += 1
        else:
            # We shouldn't be here:
            msg = "Skipping workflow: %s - "
            msg += "Does not fall under any of the defined categories."
            self.logger.error(msg, wflow['RequestName'])

        # Archive:
        try:
            self.plineArchive.run(wflow)
            if wflow['ForceArchive']:
                self.wfCounters['archived']['forceArchived'] += 1
            else:
                self.wfCounters['archived']['normalArchived'] += 1
        except MSRuleCleanerArchivalSkip as ex:
            msg = "%s: Proper conditions not met: %s. "
            msg += "Skipping archival in the current cycle."
            self.logger.info(msg, wflow['PlineMarkers'][-1], str(ex))
        except MSRuleCleanerArchivalError as ex:
            msg = "%s: Archival Error: %s. "
            msg += "Will retry again in the next cycle."
            self.logger.error(msg, wflow['PlineMarkers'][-1], str(ex))
        except Exception as ex:
            msg = "%s General error from pipeline. Workflow: %s. Error: \n%s. "
            msg += "\nWill retry again in the next cycle."
            self.logger.exception(msg, wflow['PlineMarkers'][-1], wflow['RequestName'], str(ex))

    def setPlineMarker(self, wflow, pName):
        """
        A function intended to mark which is the pipeline currently working
        on the workflow. It is supposed to be called always as a first function
        in the pipeline.
        :param  wflow:   A MSRuleCleaner workflow representation
        :param  pName:   The name of the functional pipeline
        :return:         The workflow object
        """
        # NOTE: The current functional pipeline MUST always be appended at the
        #       end of the 'PlineMarkers' list

        # First get rid of the default:
        if not wflow['PlineMarkers']:
            wflow['PlineMarkers'] = []

        # Then push our current value into the markers list:
        wflow['PlineMarkers'].append(pName)

        # Populate the list of flags to be used later:
        if pName not in wflow['RulesToClean']:
            if pName in self.cleanupPipeNames:
                wflow['RulesToClean'][pName] = []
        if pName not in wflow['CleanupStatus']:
            if pName in self.cleanupPipeNames:
                wflow['CleanupStatus'][pName] = False
        return wflow

    def _checkClean(self, wflow):
        """
        An auxiliary function used to only check the temporary cleanup status.
        It basically takes the pipelines registered in 'PlineMarkers' that have
        already worked on the workflow as a mask and applies this mask over
        the set of flags in the 'CleanupStatus' field and then reduces the
        result to a single bool value
        """
        # NOTE: This is one of the few functions taking a workflow as an argument
        #       but returning a bool, since it is an auxiliary function and is not
        #       supposed to be called as a standalone function in a pipeline.
        # NOTE: `all([]) == True`, ergo all the 'rejected' && 'aborted-completed' workflows
        #       are also counted as properly cleaned and can trigger archival later

        # Build a list of bool flags based on the mask of PlineMarkers
        cleanFlagsList = [wflow['CleanupStatus'][key]
                          for key in wflow['PlineMarkers']
                          if key in wflow['CleanupStatus']]

        # If no one have worked on the workflow set the clean status to false
        if not wflow['PlineMarkers']:
            cleanStatus = False
        # If we have a mask longer than the list of flags avoid false positives
        # because of the behavior explained above - `all([]) == True`
        elif not cleanFlagsList:
            cleanStatus = False
        # Figure out the final value
        else:
            cleanStatus = all(cleanFlagsList)
        return cleanStatus

    def setClean(self, wflow):
        """
        A function to set the 'IsClean' flag based on the status from all the
        pipelines which have worked on the workflow (and have put their markers
        in the 'PlineMarkers' list)
        :param  wflow:      A MSRuleCleaner workflow representation
        :return:            The workflow object
        """
        wflow['IsClean'] = self._checkClean(wflow)
        return wflow

    def _checkLogDBClean(self, wflow):
        """
        An auxiliary function used to only check the LogDB cleanup status.
        It makes a query to LogDB in order to verify there are no any records for
        the current workflow
        :param wflow:       A MSRuleCleaner workflow representation
        :return:            True if no records were found in LogDB about wflow
        """
        cleanStatus = False
        logDBRecords = self.logDB.get(wflow['RequestName'])
        self.logger.debug("logDBRecords: %s", pformat(logDBRecords))
        if not logDBRecords:
            cleanStatus = True
        return cleanStatus

    def setLogDBClean(self, wflow):
        """
        A function to set the 'IsLogDBClean' flag based on the presence of any
        records in LogDB for the current workflow.
        :param  wflow:      A MSRuleCleaner workflow representation
        :return:            The workflow object
        """
        wflow['IsLogDBClean'] = self._checkLogDBClean(wflow)
        if not wflow['IsLogDBClean'] and wflow['IsArchivalDelayExpired']:
            wflow['IsLogDBClean'] = self._cleanLogDB(wflow)
        return wflow

    def _cleanLogDB(self, wflow):
        """
        A function to be used for cleaning all the records related to a workflow in logDB.
        :param wflow:       A MSRuleCleaner workflow representation
        :return:            True if NO errors were encountered while deleting
                            records from LogDB
        """
        cleanStatus = False
        try:
            if self.msConfig['enableRealMode']:
                self.logger.info("Deleting %s records from LogDB WMStats...", wflow['RequestName'])
                res = self.logDB.delete(wflow['RequestName'], agent=False)
                if res == 'delete-error':
                    msg = "Failed to delete logDB docs for wflow: %s" % wflow['RequestName']
                    raise MSRuleCleanerArchivalError(msg)
                cleanStatus = True
            else:
                self.logger.info("DRY-RUN: NOT Deleting %s records from LogDB WMStats...", wflow['RequestName'])
        except Exception as ex:
            msg = "General Exception while cleaning LogDB records for wflow: %s : %s"
            self.logger.exception(msg, wflow['RequestName'], str(ex))
        return cleanStatus

    def findTargetStatus(self, wflow):
        """
        Find the proper targeted archival status
        :param  wflow:      A MSRuleCleaner workflow representation
        :return:            The workflow object
        """
        # Check the available status transitions before we decide the final status
        targetStatusList = RequestStatus.REQUEST_STATE_TRANSITION.get(wflow['RequestStatus'], [])
        for status in targetStatusList:
            if self.targetStatusRegex.match(status):
                wflow['TargetStatus'] = status
        self.logger.debug("TargetStatus: %s", wflow['TargetStatus'])
        return wflow

    def _checkArchDelayExpired(self, wflow):
        """
        A function to check Archival Expiration Delay based on the information
        returned by WMStatsServer regarding the time of the last request status transition
        :param wflow:      MSRuleCleaner workflow representation
        :return:           True if the archival delay have been expired
        """
        archDelayExpired = False
        currentTime = int(time.time())
        threshold = self.msConfig['archiveDelayHours'] * 3600
        try:
            lastTransitionTime = wflow['RequestTransition'][-1]['UpdateTime']
            if lastTransitionTime and (currentTime - lastTransitionTime) > threshold:
                archDelayExpired = True
        except KeyError:
            self.logger.debug("Could not find status transition history for %s", wflow['RequestName'])
        return archDelayExpired

    def setArchivalDelayExpired(self, wflow):
        """
        A function to set the 'IsArchivalDelayExpired' flag
        """
        wflow['IsArchivalDelayExpired'] = self._checkArchDelayExpired(wflow)
        return wflow

    def archive(self, wflow):
        """
        Move the workflow to the proper archived status after checking
        the full cleanup status
        :param  wflow:      A MSRuleCleaner workflow representation
        :return:            The workflow object
        """
        # Make all the needed checks before trying to archive
        if not (wflow['IsClean'] or wflow['ForceArchive']):
            msg = "Not properly cleaned workflow: %s" % wflow['RequestName']
            raise MSRuleCleanerArchivalSkip(msg)
        if not wflow['TargetStatus']:
            msg = "Could not determine which archival status to target for workflow: %s" % wflow['RequestName']
            raise MSRuleCleanerArchivalError(msg)
        if not wflow['IsLogDBClean']:
            msg = "LogDB records have not been cleaned for workflow: %s" % wflow['RequestName']
            raise MSRuleCleanerArchivalSkip(msg)
        if not wflow['IsArchivalDelayExpired']:
            msg = "Archival delay period has not yet expired for workflow: %s." % wflow['RequestName']
            raise MSRuleCleanerArchivalSkip(msg)
        if not self.msConfig['enableRealMode']:
            msg = "Real Run Mode not enabled."
            raise MSRuleCleanerArchivalSkip(msg)

        # Proceed with the actual archival:
        try:
            self.reqmgr2.updateRequestStatus(wflow['RequestName'], wflow['TargetStatus'])
            msg = "Successful status transition to: %s for workflow: %s"
            self.logger.info(msg, wflow['TargetStatus'], wflow['RequestName'])
        except Exception as ex:
            msg = "General Exception while trying status transition to: %s " % wflow['TargetStatus']
            msg += "for workflow: %s : %s" % (wflow['RequestName'], str(ex))
            raise MSRuleCleanerArchivalError(msg)
        return wflow

    def getMSOutputTransferInfo(self, wflow):
        """
        Fetches the transfer information from the MSOutput REST interface for
        the given workflow.
        :param  wflow:   A MSRuleCleaner workflow representation
        :return:         The workflow object
        """
        headers = {'Accept': 'application/json'}
        params = {}
        url = '%s/data/info?request=%s' % (self.msConfig['msOutputUrl'],
                                           wflow['RequestName'])
        try:
            res = self.curlMgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert())
            data = json.loads(res)['result'][0]
            transferInfo = data['transferDoc']
        except Exception as ex:
            msg = "General exception while fetching TransferInfo from MSOutput for %s. "
            msg += "Error: %s"
            self.logger.exception(msg, wflow['RequestName'], str(ex))

        # Set Transfer status - information fetched from MSOutput only
        if transferInfo is not None and transferInfo['TransferStatus'] == 'done':
            wflow['TransferDone'] = True

        # Set Tape rules status - information fetched from Rucio (tape rule ids from MSOutput)
        if transferInfo is not None and transferInfo['OutputMap']:
            tapeRulesStatusList = []
            # For setting 'TransferTape' = True we require either no tape rules for the
            # workflow have been created or all existing tape rules to be in status 'OK',
            # so every empty TapeRuleID we consider as completed.
            for mapRecord in transferInfo['OutputMap']:
                if not mapRecord['TapeRuleID']:
                    continue
                rucioRule = self.rucio.getRule(mapRecord['TapeRuleID'])
                if not rucioRule:
                    tapeRulesStatusList.append(False)
                    msg = "Tape rule: %s not found for workflow: %s "
                    msg += "Possible server side error."
                    self.logger.error(msg, mapRecord['TapeRuleID'], wflow['RequestName'])
                    continue
                if rucioRule['state'] == 'OK':
                    tapeRulesStatusList.append(True)
                    msg = "Tape rule: %s in final state: %s for workflow: %s"
                    self.logger.info(msg, mapRecord['TapeRuleID'], rucioRule['state'], wflow['RequestName'])
                else:
                    tapeRulesStatusList.append(False)
                    msg = "Tape rule: %s in non final state: %s for workflow: %s"
                    self.logger.info(msg, mapRecord['TapeRuleID'], rucioRule['state'], wflow['RequestName'])
            if all(tapeRulesStatusList):
                wflow['TransferTape'] = True

        return wflow

    def setParentDatasets(self, wflow):
        """
        Used to resolve parent datasets for a workflow.
        :param  wflow:   A MSRuleCleaner workflow representation
        :return:         The workflow object
        """
        if wflow['InputDataset'] and wflow['IncludeParents']:
            childDataset = wflow['InputDataset']
            parentDataset = findParent([childDataset], self.msConfig['dbsUrl'])
            # NOTE: If findParent() returned None then the DBS service failed to
            #       resolve the request (it is considered an ERROR outside WMCore)
            if parentDataset.get(childDataset, None) is None:
                msg = "Failed to resolve parent dataset for: %s in workflow: %s" % (childDataset, wflow['RequestName'])
                raise MSRuleCleanerResolveParentError(msg)
            elif parentDataset:
                wflow['ParentDataset'] = [parentDataset[childDataset]]
                msg = "Found parent %s for input dataset %s in workflow: %s "
                self.logger.info(msg, parentDataset, wflow['InputDataset'], wflow['RequestName'])
            else:
                msg = "Could not find parent for input dataset: %s in workflows: %s"
                self.logger.error(msg, wflow['InputDataset'], wflow['RequestName'])
        return wflow

    def getRucioRules(self, wflow, gran, rucioAcct):
        """
        Queries Rucio and builds the relevant list of blocklevel rules for
        the given workflow
        :param  wflow:   A MSRuleCleaner workflow representation
        :param  gran:    Data granularity to search for Rucio rules. Possible values:
                         'block' or 'container'
        :return:         The workflow object
        """
        currPline = wflow['PlineMarkers'][-1]

        # Create the container list to the rucio account map and set the checkGlobalLocks flag.
        mapRuleType = {self.msConfig['rucioWmaAccount']: ["OutputDatasets"],
                       self.msConfig['rucioMStrAccount']: ["InputDataset", "MCPileup",
                                                           "DataPileup", "ParentDataset"]}
        if rucioAcct == self.msConfig['rucioMStrAccount']:
            checkGlobalLocks = True
        else:
            checkGlobalLocks = False

        # Find all the data placement rules created by the components:
        for dataType in mapRuleType[rucioAcct]:
            dataList = wflow[dataType] if isinstance(wflow[dataType], list) else [wflow[dataType]]
            for dataCont in dataList:
                if dataCont is None:
                    continue
                self.logger.debug("getRucioRules: dataCont: %s", pformat(dataCont))
                if checkGlobalLocks and dataCont in self.globalLocks:
                    msg = "Found dataset: %s in GlobalLocks. NOT considering it for filling the "
                    msg += "RulesToClean list for both container and block level Rules for workflow: %s!"
                    self.logger.info(msg, dataCont, wflow['RequestName'])
                    continue
                if gran == 'container':
                    for rule in self.rucio.listDataRules(dataCont, account=rucioAcct):
                        wflow['RulesToClean'][currPline].append(rule['id'])
                        msg = "Found %s container-level rule to be deleted for container %s"
                        self.logger.info(msg, rule['id'], dataCont)
                elif gran == 'block':
                    try:
                        blocks = self.rucio.getBlocksInContainer(dataCont)
                        for block in blocks:
                            for rule in self.rucio.listDataRules(block, account=rucioAcct):
                                wflow['RulesToClean'][currPline].append(rule['id'])
                                msg = "Found %s block-level rule to be deleted for container %s"
                                self.logger.info(msg, rule['id'], dataCont)
                    except WMRucioDIDNotFoundException:
                        msg = "Container: %s not found in Rucio for workflow: %s."
                        self.logger.info(msg, dataCont, wflow['RequestName'])
        return wflow

    def cleanRucioRules(self, wflow):
        """
        Cleans all the Rules present in the field 'RulesToClean' in the MSRuleCleaner
        workflow representation. And fills the relevant Cleanup Status.
        :param wflow:   A MSRuleCleaner workflow representation
        :return:        The workflow object
        """
        # NOTE: The function should be called independently and sequentially from
        #       The Input and the respective BlockLevel pipelines.

        # NOTE: The current functional pipeline is always the last one in the PlineMarkers list
        currPline = wflow['PlineMarkers'][-1]
        delResults = []
        if self.msConfig['enableRealMode']:
            for rule in wflow['RulesToClean'][currPline]:
                self.logger.info("%s: Deleting ruleId: %s ", currPline, rule)
                delResult = self.rucio.deleteRule(rule)
                delResults.append(delResult)
                if not delResult:
                    self.logger.warning("%s: Failed to delete ruleId: %s ", currPline, rule)
        else:
            for rule in wflow['RulesToClean'][currPline]:
                delResults.append(True)
                self.logger.info("%s: DRY-RUN: Is about to delete ruleId: %s ", currPline, rule)

        # Set the cleanup flag:
        wflow['CleanupStatus'][currPline] = all(delResults)
        return wflow

    def getRequestRecords(self, reqStatus):
        """
        Queries ReqMgr2 for requests in a given status.
        :param reqStatus: The status for the requests to be fetched from ReqMgr2
        :return requests: A dictionary with all the workflows in the given status
        """
        self.logger.info("Fetching requests in status: %s", reqStatus)
        result = self.reqmgr2.getRequestByStatus([reqStatus], detail=True)
        if not result:
            requests = {}
        else:
            requests = result[0]
        self.logger.info('  retrieved %s requests in status: %s', len(requests), reqStatus)
        return requests
Exemplo n.º 6
0
class LogDBTest(unittest.TestCase):
    """
    _LogDBTest_

    """
    def setUp(self):
        """
        _setUp_

        Setup the database and logging connection.
        """
        logging.basicConfig()
        self.logger = logging.getLogger('LogDBTest')
        #        self.logger.setLevel(logging.DEBUG)

        self.schema = []
        self.couchApps = ["LogDB"]
        self.testInit = TestInitCouchApp('LogDBTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=self.schema, useDefault=False)
        dbName = 'logdb_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        url = "%s/%s" % (self.testInit.couchUrl, dbName)

        identifier = 'agentname'
        self.agent_inst = LogDB(url,
                                identifier,
                                logger=self.logger,
                                thread_name="MainThread")
        self.agent_inst2 = LogDB(url,
                                 identifier,
                                 logger=self.logger,
                                 thread_name="Test")
        identifier = '/DC=org/DC=doegrids/OU=People/CN=First Last 123'
        self.user_inst = LogDB(url,
                               identifier,
                               logger=self.logger,
                               thread_name="MainThread")
        self.report = LogDBReport(self.agent_inst)

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def test_docid(self):
        "Test docid API"
        request = 'abs'
        mtype = self.agent_inst.backend.prefix('info')
        res = self.agent_inst.backend.docid(request, mtype)
        key = '--'.join([
            request, self.agent_inst.identifier, self.agent_inst.thread_name,
            mtype
        ])
        khash = gen_hash(key)
        self.assertEqual(res, khash)

    def test_prefix(self):
        "Test prefix LogDBBackend API"

        self.assertEqual(self.agent_inst.agent, 1)
        mtype = self.agent_inst.backend.prefix('msg')
        self.assertEqual(mtype, 'agent-msg')

        self.assertEqual(self.user_inst.agent, 0)
        mtype = self.user_inst.backend.prefix('msg')
        self.assertEqual(mtype, 'msg')

    def test_apis(self):
        "Test LogDB APIs"
        request = 'abc'

        # if we post messages for the same request only last one should survive
        self.agent_inst.post(request, 'msg1')
        #        time.sleep(1)
        self.agent_inst.post(request, 'msg2')
        #        time.sleep(1)
        self.agent_inst.post(request, 'msg3')
        #        time.sleep(1)
        self.agent_inst.post(request, 'msg4')
        #        time.sleep(1)
        docs = self.agent_inst.get(request)
        self.assertEqual(len(docs), 1)
        self.assertEqual(docs[0]['msg'], 'msg4')

        # add message as user, user based messages will grow
        self.user_inst.post(request, 'doc1')
        #        time.sleep(1)
        docs = self.user_inst.get(request)
        self.assertEqual(len(docs), 2)  # we have msg4 and doc1
        self.user_inst.post(request,
                            'doc1')  # we have msg2 and two doc1 messages
        #        time.sleep(1)
        docs = self.user_inst.get(request)
        self.assertEqual(len(docs), 3)

    def test_cleanup(self):
        "Test clean-up LogDB API"
        request = 'abc'
        self.agent_inst.post(request, 'msg1', 'info')
        self.agent_inst.post(request, 'msg2', 'comment')
        self.agent_inst.post(request, 'msg3', 'warning')
        self.agent_inst.post(request, 'msg4', 'error')
        all_docs = self.agent_inst.get(request)
        self.agent_inst.backend.cleanup(thr=10)  # look into past
        past_docs = self.agent_inst.get(request)
        self.assertEqual(len(all_docs), len(past_docs))
        self.agent_inst.backend.cleanup(thr=-10)  # look into future
        docs = self.agent_inst.get(request)
        self.assertEqual(len(docs), 0)

    def test_get_all_requests(self):
        "Test get_all_requests LogDB API"
        request = 'abc'
        self.agent_inst.post(request, 'msg1', 'info')
        self.agent_inst.post(request, 'msg2', 'info')
        self.agent_inst.post(request, 'msg3', 'warning')
        self.agent_inst.post(request, 'msg4', 'error')
        sum_docs = self.agent_inst.get(request)
        self.assertEqual(len(sum_docs), 3)  # since we have two info records
        request = 'abc2'
        self.agent_inst.post(request, 'msg1', 'info')
        all_docs = self.agent_inst.get_all_requests()
        self.assertEqual(len(all_docs),
                         2)  # we only have two distinct request names

    def test_delete(self):
        "Test delete LogDB API"
        request = 'abc'
        self.agent_inst.post(request, 'msg1', 'info')
        self.agent_inst.delete(request)
        all_docs = self.agent_inst.get_all_requests()
        self.assertEqual(len(all_docs), 0)

    def test_get(self):
        "Test get LogDB API"
        request = 'abc'
        self.agent_inst2.post(request, 'msg1',
                              'info')  # doc with different thread name
        self.agent_inst.post(request, 'msg1', 'info')
        self.agent_inst.post(request, 'msg2', 'info')
        self.agent_inst.post(request, 'msg3', 'warning')
        self.agent_inst.post(request, 'msg4', 'error')
        docs = self.agent_inst.get(request)
        self.assertEqual(
            len(docs), 4
        )  # there should be 4 docs one for Test thread and 3 for MainThread
        docs = self.agent_inst2.get(request)
        self.assertEqual(len(docs), 4)  # this should result the same as above.

        docs1 = self.agent_inst.get(request, 'info')
        docs2 = self.agent_inst.get(request, 'info')
        req1 = set([r['request'] for r in docs1])
        self.assertEqual(len(req1), 1)
        req2 = set([r['request'] for r in docs2])
        self.assertEqual(len(req2), 1)
        self.assertEqual(req1, req2)

    def test_thread_name(self):
        "Test thread name support by LogDB APIs"
        request = 'abc'
        self.agent_inst2.post(request, 'msg1',
                              'info')  # doc with different thread name
        self.agent_inst.post(request, 'msg1', 'info')
        self.agent_inst.post(request, 'msg2', 'info')
        self.agent_inst.post(request, 'msg3', 'warning')
        self.agent_inst.post(request, 'msg4', 'error')
        maindocs = self.agent_inst.get(request)
        self.assertEqual(len(maindocs),
                         4)  # since we have two info records in agent_inst
        thr_docs = self.agent_inst2.get(request)
        self.assertEqual(len(thr_docs),
                         4)  # number of docs in different thread
        req1 = set([r['request'] for r in maindocs])
        self.assertEqual(len(req1), 1)
        req2 = set([r['request'] for r in thr_docs])
        self.assertEqual(len(req2), 1)
        self.assertEqual(req1, req2)
        worker1 = set([r['thr'] for r in maindocs])
        self.assertEqual(worker1, set(["Test", "MainThread"]))
        worker2 = set([r['thr'] for r in thr_docs])
        self.assertEqual(worker2, set(["Test", "MainThread"]))

        docs = set([r['identifier'] for r in maindocs])
        self.assertEqual(docs, set([self.agent_inst.identifier]))
        docs = set([r['identifier'] for r in thr_docs])
        self.assertEqual(docs, set([self.agent_inst.identifier]))

    def test_checks(self):
        "Tests LogDB check/mtype functionality"
        request = 'abc'
        res = self.agent_inst.post(
            request, 'msg', 'ingo')  # should be silent, i.o. no Exceptions
        self.assertEqual('post-error', res)
        args = (request, 'msg', 'ingo')  # wrong type
        self.assertRaises(LogDBError, self.agent_inst.backend.agent_update,
                          *args)
        args = (request, 'ingo')  # wrong type
        self.assertRaises(LogDBError, self.agent_inst.backend.check, *args)
        args = ('', 'msg', 'info')  # empty request string
        self.assertRaises(LogDBError, self.agent_inst.backend.agent_update,
                          *args)
        args = ('', 'info')  # empty request string
        self.assertRaises(LogDBError, self.agent_inst.backend.check, *args)

    def test_report(self):
        "Test LogDBReport APIs"
        request = 'abc'
        self.agent_inst.post(request, 'msg1', 'info')
        time.sleep(1)
        self.agent_inst.post(request, 'msg2', 'comment')
        time.sleep(1)
        self.agent_inst.post(request, 'msg3', 'warning')
        time.sleep(1)
        self.agent_inst.post(request, 'msg4', 'error')
        time.sleep(1)
        self.report.to_stdout(request)
        out = self.report.to_txt(request)
        print("\n### txt report\n", out)
        out = self.report.to_html(request)
        print("\n### html report\n", out)

        docs = self.agent_inst.get(request)
        tdocs = self.report.orderby(docs, order='ts')
        messages = [d['msg'] for d in tdocs]
        self.assertEqual(messages, ['msg4', 'msg3', 'msg2', 'msg1'])

    def test_heartbeat(self):
        "Test heartbeat_report function"
        threadName = 'MainThread'
        failThread = 'FailThread'
        self.agent_inst.post(msg="Test heartbeat", mtype="info")
        time.sleep(1)
        self.agent_inst.post(msg="Test heartbeatError", mtype="error")
        report = self.agent_inst.heartbeat_report()
        self.assertEqual(report[threadName]['type'], 'agent-error')

        report = self.agent_inst.wmstats_down_components_report([threadName])
        self.assertEqual(report['down_components'], [threadName])

        self.agent_inst.post(msg="Test heartbeat", mtype="info")
        report = self.agent_inst.wmstats_down_components_report([threadName])
        self.assertEqual(report['down_components'], [])

        report = self.agent_inst.wmstats_down_components_report(
            [threadName, failThread])
        self.assertEqual(report['down_components'], [failThread])

        self.agent_inst.post(msg="Test heartbeatError2", mtype="error")
        report = self.agent_inst.wmstats_down_components_report([threadName])
        self.assertEqual(report['down_components'], [threadName])

        self.agent_inst.delete(mtype="error", this_thread=True)
        report = self.agent_inst.wmstats_down_components_report([threadName])
        self.assertEqual(report['down_components'], [])