示例#1
0
    def msOutputProducer(self, requestRecords):
        """
        A top level function to fetch requests from ReqMgr2, and produce the correspondent
        records for MSOutput in MongoDB.
        :param requestRecords: list of request dictionaries retrieved from ReqMgr2

        It's implemented as a pipeline, performing the following sequential actions:
           1) document transformer - creates a MSOutputTemplate object from the request dict
           2) document info updater - parses the MSOutputTemplate object and updates the
              necessary data structure mapping output/locations/campaign/etc
           3) document uploader - inserts the MSOutputTemplate object into the correct
              MongoDB collection (ReVal is separated from standard workflows)
           4) document cleaner - releases memory reference to the MSOutputTemplate object
        """
        # DONE:
        #    to set a destructive function at the end of the pipeline
        # NOTE:
        #    To discuss the collection names
        # NOTE:
        #    Here we should never use docUploader with `update=True`, because
        #    this will erase the latest state of already existing and fully or
        #    partially processed documents by the Consumer pipeline
        self.logger.info("Running the msOutputProducer ...")
        msPipeline = Pipeline(name="MSOutputProducer Pipeline",
                              funcLine=[
                                  Functor(self.docTransformer),
                                  Functor(self.docInfoUpdate),
                                  Functor(self.docUploader),
                                  Functor(self.docCleaner)
                              ])
        # TODO:
        #    To generate the object from within the Function scope see above.
        counter = 0
        for request in viewvalues(requestRecords):
            if request['RequestName'] in self.requestNamesCached:
                # if it's cached, then it's already in MongoDB, no need to redo this thing!
                continue
            counter += 1
            try:
                pipeLineName = msPipeline.getPipelineName()
                msPipeline.run(request)
            except (KeyError, TypeError) as ex:
                msg = "%s Possibly broken read from ReqMgr2 API or other. Err: %s." % (
                    pipeLineName, str(ex))
                msg += " Continue to the next document."
                self.logger.exception(msg)
                continue
            except Exception as ex:
                msg = "%s General Error from pipeline. Err: %s. " % (
                    pipeLineName, str(ex))
                msg += "Giving up Now."
                self.logger.exception(str(ex))
                break
        return counter
示例#2
0
    def __init__(self, msConfig, logger=None):
        """
        Runs the basic setup and initialization for the MSUnmerged module
        :param msConfig: micro service configuration
        """
        super(MSUnmerged, self).__init__(msConfig, logger=logger)

        self.msConfig.setdefault("verbose", True)
        self.msConfig.setdefault("interval", 60)
        self.msConfig.setdefault("limitFilesPerRSE", 200)
        self.msConfig.setdefault("skipRSEs", [])
        self.msConfig.setdefault("rseExpr", "*")
        self.msConfig.setdefault("enableRealMode", False)
        self.msConfig.setdefault("dumpRSE", False)
        self.msConfig.setdefault("gfalLogLevel", 'normal')
        self.msConfig.setdefault("dirFilterIncl", [])
        self.msConfig.setdefault("dirFilterExcl", [])
        self.msConfig.setdefault("emulateGfal2", False)
        self.msConfig.setdefault("filesToDeleteSliceSize", 100)
        if self.msConfig['emulateGfal2'] is False and gfal2 is None:
            msg = "Failed to import gfal2 library while it's not "
            msg += "set to emulate it. Crashing the service!"
            raise ImportError(msg)

        # TODO: Add 'alertManagerUrl' to msConfig'
        # self.alertServiceName = "ms-unmerged"
        # self.alertManagerAPI = AlertManagerAPI(self.msConfig.get("alertManagerUrl", None), logger=logger)

        # Instantiating the Rucio Consistency Monitor Client
        self.rucioConMon = RucioConMon(self.msConfig['rucioConMon'], logger=self.logger)

        self.wmstatsSvc = WMStatsServer(self.msConfig['wmstatsUrl'], logger=self.logger)

        # Building all the Pipelines:
        pName = 'plineUnmerged'
        self.plineUnmerged = Pipeline(name=pName,
                                      funcLine=[Functor(self.updateRSETimestamps, start=True, end=False),
                                                Functor(self.consRecordAge),
                                                Functor(self.getUnmergedFiles),
                                                Functor(self.filterUnmergedFiles),
                                                Functor(self.getPfn),
                                                Functor(self.cleanRSE),
                                                Functor(self.updateRSECounters, pName),
                                                Functor(self.updateRSETimestamps, start=False, end=True),
                                                Functor(self.purgeRseObj, dumpRSE=self.msConfig['dumpRSE'])])
        # Initialization of the deleted files counters:
        self.rseCounters = {}
        self.plineCounters = {}
        self.rseTimestamps = {}
        self.rseConsStats = {}
        self.protectedLFNs = []

        # The basic /store/unmerged regular expression:
        self.regStoreUnmergedLfn = re.compile("^/store/unmerged/.*$")
        self.regStoreUnmergedPfn = re.compile("^.+/store/unmerged/.*$")
示例#3
0
    def __init__(self, msConfig, logger=None):
        """
        Runs the basic setup and initialization for the MSUnmerged module
        :param msConfig: micro service configuration
        """
        super(MSUnmerged, self).__init__(msConfig, logger=logger)

        self.msConfig.setdefault("verbose", True)
        self.msConfig.setdefault("interval", 60)
        # self.msConfig.setdefault('limitRSEsPerInstance', 100)
        # self.msConfig.setdefault('limitTiersPerInstance', ['T1', 'T2', 'T3'])
        # self.msConfig.setdefault("rucioAccount", "FIXME_RUCIO_ACCT")
        self.msConfig.setdefault("rseExpr", "*")
        # TODO: Add 'alertManagerUrl' to msConfig'
        # self.alertServiceName = "ms-unmerged"
        # self.alertManagerAPI = AlertManagerAPI(self.msConfig.get("alertManagerUrl", None), logger=logger)

        # Building all the Pipelines:
        pName = 'plineUnmerged'
        self.plineUnmerged = Pipeline(name=pName,
                                      funcLine=[Functor(self.cleanFiles)])
        # Initialization of the deleted files counters:
        self.rseCounters = {}
        self.plineCounters = {}
示例#4
0
    def __init__(self, msConfig, logger=None):
        """
        Runs the basic setup and initialization for the MSRuleCleaner module
        :param msConfig: micro service configuration
        """
        super(MSRuleCleaner, self).__init__(msConfig, logger=logger)

        self.msConfig.setdefault("verbose", True)
        self.msConfig.setdefault("interval", 60)
        self.msConfig.setdefault("services", ['ruleCleaner'])
        self.msConfig.setdefault("rucioWmaAccount", "wma_test")
        self.msConfig.setdefault("rucioMStrAccount", "wmcore_transferor")
        self.msConfig.setdefault('enableRealMode', False)
        self.mode = "RealMode" if self.msConfig[
            'enableRealMode'] else "DryRunMode"
        self.emailAlert = EmailAlert(self.msConfig)
        self.curlMgr = RequestHandler()

        # Building all the Pipelines:
        pName = 'plineMSTrCont'
        self.plineMSTrCont = Pipeline(name=pName,
                                      funcLine=[
                                          Functor(self.setPlineMarker, pName),
                                          Functor(self.cleanRucioRules)
                                      ])
        pName = 'plineMSTrBlock'
        self.plineMSTrBlock = Pipeline(name=pName,
                                       funcLine=[
                                           Functor(self.setPlineMarker, pName),
                                           Functor(self.cleanRucioRules)
                                       ])
        pName = 'plineAgentCont'
        self.plineAgentCont = Pipeline(
            name=pName,
            funcLine=[
                Functor(self.setPlineMarker, pName),
                Functor(self.getRucioRules, 'container',
                        self.msConfig['rucioWmaAccount']),
                Functor(self.cleanRucioRules)
            ])
        pName = 'plineAgentBlock'
        self.plineAgentBlock = Pipeline(
            name=pName,
            funcLine=[
                Functor(self.setPlineMarker, pName),
                Functor(self.getRucioRules, 'block',
                        self.msConfig['rucioWmaAccount']),
                Functor(self.cleanRucioRules)
            ])
        pName = 'plineArchive'
        self.plineArchive = Pipeline(name=pName,
                                     funcLine=[
                                         Functor(self.setPlineMarker, pName),
                                         Functor(self.setClean),
                                         Functor(self.archive)
                                     ])

        # Building the different set of plines we will need later:
        # NOTE: The following are all the functional pipelines which are supposed to include
        #       a cleanup function and report cleanup status in the MSRuleCleanerWflow object
        self.cleanuplines = [
            self.plineMSTrCont, self.plineMSTrBlock, self.plineAgentCont,
            self.plineAgentBlock
        ]
        # Building an auxiliary list of cleanup pipeline names only:
        self.cleanupPipeNames = [pline.name for pline in self.cleanuplines]

        # Building lists of pipelines related only to Agents or MStransferror
        self.agentlines = [self.plineAgentCont, self.plineAgentBlock]
        self.mstrlines = [self.plineMSTrCont, self.plineMSTrBlock]

        # Initialization of the 'cleaned' and 'archived' counters:
        self.wfCounters = {'cleaned': {}, 'archived': 0}
示例#5
0
    def msOutputProducer(self, requestRecords):
        """
        A top level function to drive the upload of all the documents to MongoDB
        """

        # DONE:
        #    To implement this as a functional pipeline in the following sequence:
        #    1) document streamer - to generate all the records coming from Reqmgr2
        #    2) document stripper - to cut all the cut all the kews we do not need
        #       Mongodb document creator - to pass it through the MongoDBTemplate
        #    3) document updater - fetch & update all the needed info like campaign config etc.
        #    4) MongoDB upload/update - to upload/update the document in Mongodb

        # DONE:
        #    to have the requestRecords generated through a call to docStreamer
        #    and the call should happen from inside this function so that all
        #    the Objects generated do not leave the scope of this function and
        #    with that  to reduce big memory footprint

        # DONE:
        #    to set a destructive function at the end of the pipeline
        # NOTE:
        #    To discuss the collection names
        # NOTE:
        #    Here we should never use docUploader with `update=True`, because
        #    this will erase the latest state of already existing and fully or
        #    partially processed documents by the Consumer pipeline
        self.logger.info("Running the msOutputProducer ...")
        msPipelineRelVal = Pipeline(name="MSOutputProducer PipelineRelVal",
                                    funcLine=[
                                        Functor(self.docTransformer),
                                        Functor(self.docKeyUpdate,
                                                isRelVal=True),
                                        Functor(self.docInfoUpdate,
                                                pipeLine='PipelineRelVal'),
                                        Functor(self.docUploader,
                                                self.msOutRelValColl),
                                        Functor(self.docCleaner)
                                    ])
        msPipelineNonRelVal = Pipeline(
            name="MSOutputProducer PipelineNonRelVal",
            funcLine=[
                Functor(self.docTransformer),
                Functor(self.docKeyUpdate, isRelVal=False),
                Functor(self.docInfoUpdate, pipeLine='PipelineNonRelVal'),
                Functor(self.docUploader, self.msOutNonRelValColl),
                Functor(self.docCleaner)
            ])
        # TODO:
        #    To generate the object from within the Function scope see above.
        counter = 0
        for _, request in requestRecords:
            counter += 1
            try:
                if request.get('SubRequestType') == 'RelVal':
                    pipeLine = msPipelineRelVal
                    pipeLineName = pipeLine.getPipelineName()
                    pipeLine.run(request)
                else:
                    pipeLine = msPipelineNonRelVal
                    pipeLineName = pipeLine.getPipelineName()
                    pipeLine.run(request)
            except KeyError as ex:
                msg = "%s Possibly broken read from Reqmgr2 API or other Err: %s. " % (
                    pipeLineName, str(ex))
                msg += "Continue to the next document."
                self.logger.exception(msg)
                continue
            except TypeError as ex:
                msg = "%s Possibly broken read from Reqmgr2 API or other Err: %s. " % (
                    pipeLineName, str(ex))
                msg += "Continue to the next document."
                self.logger.exception(msg)
                continue
            except Exception as ex:
                msg = "%s General Error from pipeline. Err: %s. " % (
                    pipeLineName, str(ex))
                msg += "Giving up Now."
                self.logger.error(msg)
                self.logger.exception(ex)
                break
        return counter
示例#6
0
    def msOutputConsumer(self):
        """
        A top level function to drive the creation and book keeping of all the
        subscriptions to the Data Management System
        """
        # DONE:
        #    Done: To check if the 'enableDataPlacement' flag is really taken into account
        #    Done: To make this for both relvals and non relvals
        #    Done: To return the result
        #    Done: To make report document
        #    Done: To build it through a pipe
        #    Done: To write back the updated document to MonogoDB
        msPipelineRelVal = Pipeline(name="MSOutputConsumer PipelineRelVal",
                                    funcLine=[
                                        Functor(self.docReadfromMongo,
                                                self.msOutRelValColl,
                                                setTaken=False),
                                        Functor(self.makeSubscriptions),
                                        Functor(self.docKeyUpdate,
                                                isTaken=False,
                                                isTakenBy=None,
                                                lastUpdate=int(time())),
                                        Functor(self.docUploader,
                                                self.msOutRelValColl,
                                                update=True,
                                                keys=[
                                                    'isTaken', 'lastUpdate',
                                                    'transferStatus',
                                                    'transferIDs'
                                                ]),
                                        Functor(self.docDump,
                                                pipeLine='PipelineRelVal'),
                                        Functor(self.docCleaner)
                                    ])
        msPipelineNonRelVal = Pipeline(
            name="MSOutputConsumer PipelineNonRelVal",
            funcLine=[
                Functor(self.docReadfromMongo,
                        self.msOutNonRelValColl,
                        setTaken=False),
                Functor(self.makeSubscriptions),
                Functor(self.docKeyUpdate,
                        isTaken=False,
                        isTakenBy=None,
                        lastUpdate=int(time())),
                Functor(self.docUploader,
                        self.msOutNonRelValColl,
                        update=True,
                        keys=[
                            'isTaken', 'lastUpdate', 'transferStatus',
                            'transferIDs'
                        ]),
                Functor(self.docDump, pipeLine='PipelineNonRelVal'),
                Functor(self.docCleaner)
            ])

        # NOTE:
        #    If we actually have any exception that has reached to the top level
        #    exception handlers (eg. here - outside the pipeLine), this means
        #    some function from within the pipeLine has not caught it and the msOutDoc
        #    has left the pipe and died before the relevant document in MongoDB
        #    has been released (its flag 'isTaken' to be set back to False)
        wfCounters = {}
        for pipeLine in [msPipelineRelVal, msPipelineNonRelVal]:
            pipeLineName = pipeLine.getPipelineName()
            wfCounters[pipeLineName] = 0
            while wfCounters[pipeLineName] < self.msConfig[
                    'limitRequestsPerCycle']:
                # take only workflows:
                # - which are not already taken or
                # - a transfer subscription have never been done for them and
                # - avoid retrying workflows in the same cycle
                # NOTE:
                #    Once we are running the service not in a dry run mode we may
                #    consider adding and $or condition in mQueryDict for transferStatus:
                #    '$or': [{'transferStatus': None},
                #            {'transferStatus': 'incomplete'}]
                #    So that we can collect also workflows with partially or fully
                #    unsuccessful transfers
                currTime = int(time())
                treshTime = currTime - self.msConfig['interval']
                mQueryDict = {
                    '$and': [{
                        'isTaken': False
                    }, {
                        '$or': [{
                            'transferStatus': None
                        }, {
                            'transferStatus': 'incomplete'
                        }]
                    }, {
                        '$or': [{
                            'lastUpdate': None
                        }, {
                            'lastUpdate': {
                                '$lt': treshTime
                            }
                        }]
                    }]
                }

                # FIXME:
                #    To redefine those exceptions as MSoutputExceptions and
                #    start using those here so we do not mix with general errors
                try:
                    pipeLine.run(mQueryDict)
                except KeyError as ex:
                    msg = "%s Possibly malformed record in MongoDB. Err: %s. " % (
                        pipeLineName, str(ex))
                    msg += "Continue to the next document."
                    self.logger.exception(msg)
                    continue
                except TypeError as ex:
                    msg = "%s Possibly malformed record in MongoDB. Err: %s. " % (
                        pipeLineName, str(ex))
                    msg += "Continue to the next document."
                    self.logger.exception(msg)
                    continue
                except EmptyResultError as ex:
                    msg = "%s All relevant records in MongoDB exhausted. " % pipeLineName
                    msg += "We are done for the current cycle."
                    self.logger.info(msg)
                    break
                except Exception as ex:
                    msg = "%s General Error from pipeline. Err: %s. " % (
                        pipeLineName, str(ex))
                    msg += "Giving up Now."
                    self.logger.error(msg)
                    self.logger.exception(ex)
                    break
                wfCounters[pipeLineName] += 1

        wfCounterTotal = sum(wfCounters.values())
        return wfCounterTotal
示例#7
0
    def msOutputConsumer(self):
        """
        A top level function to drive the creation and book keeping of all the
        subscriptions to the Data Management System
        """
        # DONE:
        #    Done: To check if the 'enableDataPlacement' flag is really taken into account
        #    Done: To make this for both relvals and non relvals
        #    Done: To return the result
        #    Done: To make report document
        #    Done: To build it through a pipe
        #    Done: To write back the updated document to MonogoDB
        msPipelineRelVal = Pipeline(
            name="MSOutputConsumer PipelineRelVal",
            funcLine=[
                Functor(self.makeSubscriptions),
                Functor(self.makeTapeSubscriptions),
                Functor(self.docUploader,
                        update=True,
                        keys=['LastUpdate', 'TransferStatus', 'OutputMap']),
                Functor(self.docDump, pipeLine='PipelineRelVal'),
                Functor(self.docCleaner)
            ])
        msPipelineNonRelVal = Pipeline(
            name="MSOutputConsumer PipelineNonRelVal",
            funcLine=[
                Functor(self.makeSubscriptions),
                Functor(self.makeTapeSubscriptions),
                Functor(self.docUploader,
                        update=True,
                        keys=['LastUpdate', 'TransferStatus', 'OutputMap']),
                Functor(self.docDump, pipeLine='PipelineNonRelVal'),
                Functor(self.docCleaner)
            ])

        wfCounterTotal = 0
        mQueryDict = {'TransferStatus': 'pending'}
        pipeCollections = [(msPipelineRelVal, self.msOutRelValColl),
                           (msPipelineNonRelVal, self.msOutNonRelValColl)]
        for pipeColl in pipeCollections:
            wfCounters = 0
            pipeLine = pipeColl[0]
            dbColl = pipeColl[1]
            pipeLineName = pipeLine.getPipelineName()
            for docOut in self.getDocsFromMongo(
                    mQueryDict, dbColl,
                    self.msConfig['limitRequestsPerCycle']):
                # FIXME:
                #    To redefine those exceptions as MSoutputExceptions and
                #    start using those here so we do not mix with general errors
                try:
                    # If it's in MongoDB, it can get into our in-memory cache
                    self.requestNamesCached.append(docOut['RequestName'])
                    pipeLine.run(docOut)
                except (KeyError, TypeError) as ex:
                    msg = "%s Possibly malformed record in MongoDB. Err: %s. " % (
                        pipeLineName, str(ex))
                    msg += "Continue to the next document."
                    self.logger.exception(msg)
                    continue
                except EmptyResultError as ex:
                    msg = "%s All relevant records in MongoDB exhausted. " % pipeLineName
                    msg += "We are done for the current cycle."
                    self.logger.info(msg)
                    break
                except Exception as ex:
                    msg = "%s General error from pipeline. Err: %s. " % (
                        pipeLineName, str(ex))
                    msg += "Will retry again in the next cycle."
                    self.logger.exception(msg)
                    break
                wfCounters += 1
            self.logger.info("Processed %d workflows from pipeline: %s",
                             wfCounters, pipeLineName)
            wfCounterTotal += wfCounters

        return wfCounterTotal