示例#1
0
    def unified(self, workflows):
        """
        Unified Transferor black box
        :param workflows: input workflow objects
        """
        # get aux info for dataset/blocks from inputs/parents/pileups
        # make subscriptions based on site white/black lists
        self.logger.info("Unified method processing %d requests",
                         len(workflows))

        orig = time.time()
        # start by finding what are the parent datasets for requests requiring it
        time0 = time.time()
        parentMap = self.getParentDatasets(workflows)
        self.setParentDatasets(workflows, parentMap)
        self.logger.debug(elapsedTime(time0, "### getParentDatasets"))

        # then check the secondary dataset sizes and locations
        time0 = time.time()
        sizeByDset, locationByDset = self.getSecondaryDatasets(workflows)
        self.setSecondaryDatasets(workflows, sizeByDset, locationByDset)
        self.logger.debug(elapsedTime(time0, "### getSecondaryDatasets"))

        # get final primary and parent list of valid blocks,
        # considering run, block and lumi lists
        time0 = time.time()
        blocksByDset = self.getInputDataBlocks(workflows)
        self.setInputDataBlocks(workflows, blocksByDset)
        self.logger.debug(elapsedTime(time0, "### getInputDataBlocks"))

        # get a final list of parent blocks
        time0 = time.time()
        parentageMap = self.getParentChildBlocks(workflows)
        self.setParentChildBlocks(workflows, parentageMap)
        self.logger.debug(elapsedTime(time0, "### getParentChildBlocks"))
        self.logger.info(elapsedTime(orig,
                                     '### total time for unified method'))
        self.logger.info("Unified method successfully processed %d requests",
                         len(workflows))

        return workflows
示例#2
0
    def unifiedUnused(self):
        """
        FIXME FIXME TODO
        Leave this code in a different method until we evaluate what
        is needed and what is not, and refactor this thing...
        """
        # FIXME making pylint happy, remove these assignments
        requestNames = []
        uConfig = {}

        # requestNames = [r.getName() for r in workflows]
        # TODO: the logic below shows original unified port and it should be
        #       revisited wrt new proposal specs and unified codebase

        # get workflows from list of requests
        orig = time.time()
        time0 = time.time()
        requestWorkflows = self._getRequestWorkflows(requestNames)
        requestWorkflows = requestWorkflows.values()
        self.logger.debug(elapsedTime(time0, "### getWorkflows"))

        # get workflows info summaries and collect datasets we need to process
        winfo = workflowsInfo(requestWorkflows)
        datasets = [d for row in winfo.values() for d in row['datasets']]

        # find dataset info
        time0 = time.time()
        datasetBlocks, datasetSizes, _datasetTransfers = dbsInfo(
            datasets, self.msConfig['dbsUrl'])
        self.logger.debug(elapsedTime(time0, "### dbsInfo"))

        # find block nodes information for our datasets
        time0 = time.time()
        blockNodes = phedexInfo(datasets, self.msConfig['phedexUrl'])
        self.logger.debug(elapsedTime(time0, "### phedexInfo"))

        # find events-lumis info for our datasets
        time0 = time.time()
        eventsLumis = eventsLumisInfo(datasets, self.msConfig['dbsUrl'])
        self.logger.debug(elapsedTime(time0, "### eventsLumisInfo"))

        # get specs for all requests and re-use them later in getSiteWhiteList as cache
        reqSpecs = self._getRequestSpecs(requestNames)

        # get siteInfo instance once and re-use it later, it is time-consumed object
        siteInfo = SiteInfo(uConfig)

        requestsToProcess = []
        tst0 = time.time()
        totBlocks = totEvents = totSize = totCpuT = 0
        for wflow in requestWorkflows:
            for wname, wspec in wflow.items():
                time0 = time.time()
                cput = getComputingTime(wspec,
                                        eventsLumis=eventsLumis,
                                        dbsUrl=self.msConfig['dbsUrl'],
                                        logger=self.logger)
                ncopies = getNCopies(cput)

                attrs = winfo[wname]
                ndatasets = len(attrs['datasets'])
                npileups = len(attrs['pileups'])
                nblocks = nevts = nlumis = size = 0
                nodes = set()
                for dataset in attrs['datasets']:
                    blocks = datasetBlocks[dataset]
                    for blk in blocks:
                        for node in blockNodes.get(blk, []):
                            nodes.add(node)
                    nblocks += len(blocks)
                    size += datasetSizes[dataset]
                    edata = eventsLumis.get(dataset, {
                        'num_event': 0,
                        'num_lumi': 0
                    })
                    nevts += edata['num_event']
                    nlumis += edata['num_lumi']
                totBlocks += nblocks
                totEvents += nevts
                totSize += size
                totCpuT += cput
                sites = json.dumps(sorted(list(nodes)))
                self.logger.debug("### %s", wname)
                self.logger.debug(
                    "%s datasets, %s blocks, %s bytes (%s TB), %s nevts, %s nlumis, cput %s, copies %s, %s",
                    ndatasets, nblocks, size, teraBytes(size), nevts, nlumis,
                    cput, ncopies, sites)
                # find out which site can serve given workflow request
                t0 = time.time()
                lheInput, primary, parent, secondary, allowedSites \
                    = self._getSiteWhiteList(uConfig, wspec, siteInfo, reqSpecs)
                if not isinstance(primary, list):
                    primary = [primary]
                if not isinstance(secondary, list):
                    secondary = [secondary]
                wflowDatasets = primary + secondary
                wflowDatasetsBlocks = []
                for dset in wflowDatasets:
                    for item in datasetBlocks.get(dset, []):
                        wflowDatasetsBlocks.append(item)
                rdict = dict(name=wname,
                             datasets=wflowDatasets,
                             blocks=wflowDatasetsBlocks,
                             npileups=npileups,
                             size=size,
                             nevents=nevts,
                             nlumis=nlumis,
                             cput=cput,
                             ncopies=ncopies,
                             sites=sites,
                             allowedSites=allowedSites,
                             parent=parent,
                             lheInput=lheInput,
                             primary=primary,
                             secondary=secondary)
                requestsToProcess.append(rdict)
                self.logger.debug(elapsedTime(t0, "### getSiteWhiteList"))
        self.logger.debug(
            "total # of workflows %s, datasets %s, blocks %s, evts %s, size %s (%s TB), cput %s (hours)",
            len(winfo.keys()), len(datasets), totBlocks, totEvents, totSize,
            teraBytes(totSize), totCpuT)
        self.logger.debug(elapsedTime(tst0, '### workflows info'))
        self.logger.debug(elapsedTime(orig, '### total time'))
        return requestsToProcess
示例#3
0
def requestsInfo(reqmgrAuxSvc, state='assignment-approved'):
    """
    Helper function to get information about all requests
    in assignment-approved state in ReqMgr
    """
    # get list of known request in workqueue
    requestJobs = workqueueRequests(state)
    requests = requestJobs.keys()

    # get workflows from list of requests 
    time0 = orig = time.time()
    requestWorkflows = getRequestWorkflows(requests)
    workflows = requestWorkflows.values()
    elapsedTime(time0, "### getWorkflows")

#     time0 = orig = time.time()
#     workflows = getWorkflows(state)
#     elapsedTime(time0, "### getWorkflows")

    # get workflows info summaries and collect datasets we need to process
    winfo = workflowsInfo(workflows)
    datasets = [d for row in winfo.values() for d in row['datasets']]

    # find dataset info
    time0 = time.time()
    datasetBlocks, datasetSizes = dbsInfo(datasets)
    elapsedTime(time0, "### dbsInfo")

    # find block nodes information for our datasets
    time0 = time.time()
    blockNodes = phedexInfo(datasets)
    elapsedTime(time0, "### phedexInfo")

    # find events-lumis info for our datasets
    time0 = time.time()
    eventsLumis = eventsLumisInfo(datasets)
    elapsedTime(time0, "### eventsLumisInfo")

    # get specs for all requests and re-use them later in getSiteWhiteList as cache
    requests = [v['RequestName'] for w in workflows for v in w.values()]
    reqSpecs = getRequestSpecs(requests)

    # get siteInfo instance once and re-use it later, it is time-consumed object
    siteInfo = SiteInfo()

    requests = {}
    totBlocks = totEvents = totSize = totCpuT = 0
    tst0 = time.time()
    for wflow in workflows:
        for wname, wspec in wflow.items():
            time0 = time.time()
            cput = getComputingTime(wspec, eventsLumis=eventsLumis)
            ncopies = getNCopies(cput)

            attrs = winfo[wname]
            ndatasets = len(attrs['datasets'])
            npileups = len(attrs['pileups'])
            nblocks = nevts = nlumis = size = 0
            nodes = set()
            for dataset in attrs['datasets']:
                blocks = datasetBlocks[dataset]
                for blk in blocks:
                    for node in blockNodes.get(blk, []):
                        nodes.add(node)
                nblocks += len(blocks)
                size += datasetSizes[dataset]
                edata = eventsLumis.get(dataset, {'num_event':0, 'num_lumi':0})
                nevts += edata['num_event']
                nlumis += edata['num_lumi']
            totBlocks += nblocks
            totEvents += nevts
            totSize += size
            totCpuT += cput
            sites = json.dumps(sorted(list(nodes)))
            njobs = requestJobs[wname]
            print("\n### %s" % wname)
            print("%s datasets, %s blocks, %s bytes (%s TB), %s nevts, %s nlumis, cput %s, copies %s, %s" \
                    % (ndatasets, nblocks, size, teraBytes(size), nevts, nlumis, cput, ncopies, sites))
            # find out which site can serve given workflow request
            t0 = time.time()
            lheInput, primary, parent, secondary, allowedSites \
                    = getSiteWhiteList(wspec, siteInfo, reqmgrAuxSvc, reqSpecs)
            rdict = dict(name=wname, datasets=datasets, blocks=datasetBlocks,\
                    npileups=npileups, size=size, njobs=njobs,\
                    nevents=nevts, nlumis=nlumis, cput=cput, ncopies=ncopies,\
                    sites=sites, allowedSites=allowedSites, parent=parent,\
                    lheInput=lheInput, primary=primary, secondary=secondary)
            requests[wname] = rdict
            print("sites", allowedSites)
            elapsedTime(t0, "getSiteWhiteList")
    print("\ntotal # of workflows %s, datasets %s, blocks %s, evts %s, size %s (%s TB), cput %s (hours)" \
            % (len(winfo.keys()), len(datasets), totBlocks, totEvents, totSize, teraBytes(totSize), totCpuT))
    elapsedTime(tst0, 'workflows info')
    elapsedTime(orig)
    return requests
示例#4
0
def unified(svc, requestRecords, logger):
    """
    Unified Transferror box

    Input parameters:
    :param requestRecords: list of request records, see definition in requestRecord
    :param logger: logger
    """
    # get aux info for dataset/blocks from inputs/parents/pileups
    # make subscriptions based on site white/black lists
    logger.debug("### unified transferor")

    requests = [r['name'] for r in requestRecords]

    ### TODO: the logic below shows original unified port and it should be
    ###       revisited wrt new proposal specs and unified codebase

    # get workflows from list of requests 
    orig = time.time()
    time0 = time.time()
    requestWorkflows = getRequestWorkflows(requests)
    workflows = requestWorkflows.values()
    logger.debug(elapsedTime(time0, "### getWorkflows"))

    # get workflows info summaries and collect datasets we need to process
    winfo = workflowsInfo(workflows)
    datasets = [d for row in winfo.values() for d in row['datasets']]

    # find dataset info
    time0 = time.time()
    datasetBlocks, datasetSizes = dbsInfo(datasets)
    logger.debug(elapsedTime(time0, "### dbsInfo"))

    # find block nodes information for our datasets
    time0 = time.time()
    blockNodes = phedexInfo(datasets)
    logger.debug(elapsedTime(time0, "### phedexInfo"))

    # find events-lumis info for our datasets
    time0 = time.time()
    eventsLumis = eventsLumisInfo(datasets)
    logger.debug(elapsedTime(time0, "### eventsLumisInfo"))

    # get specs for all requests and re-use them later in getSiteWhiteList as cache
    requests = [v['RequestName'] for w in workflows for v in w.values()]
    reqSpecs = getRequestSpecs(requests)

    # get siteInfo instance once and re-use it later, it is time-consumed object
    siteInfo = SiteInfo()

    requestsToProcess = []
    totBlocks = totEvents = totSize = totCpuT = 0
    tst0 = time.time()
    for wflow in workflows:
        for wname, wspec in wflow.items():
            time0 = time.time()
            cput = getComputingTime(wspec, eventsLumis=eventsLumis)
            ncopies = getNCopies(cput)

            attrs = winfo[wname]
            ndatasets = len(attrs['datasets'])
            npileups = len(attrs['pileups'])
            nblocks = nevts = nlumis = size = 0
            nodes = set()
            for dataset in attrs['datasets']:
                blocks = datasetBlocks[dataset]
                for blk in blocks:
                    for node in blockNodes.get(blk, []):
                        nodes.add(node)
                nblocks += len(blocks)
                size += datasetSizes[dataset]
                edata = eventsLumis.get(dataset, {'num_event': 0, 'num_lumi': 0})
                nevts += edata['num_event']
                nlumis += edata['num_lumi']
            totBlocks += nblocks
            totEvents += nevts
            totSize += size
            totCpuT += cput
            sites = json.dumps(sorted(list(nodes)))
            logger.debug("### %s", wname)
            logger.debug("%s datasets, %s blocks, %s bytes (%s TB), %s nevts, %s nlumis, cput %s, copies %s, %s", ndatasets, nblocks, size, teraBytes(size), nevts, nlumis, cput, ncopies, sites)
            # find out which site can serve given workflow request
            t0 = time.time()
            lheInput, primary, parent, secondary, allowedSites \
                = getSiteWhiteList(svc, wspec, siteInfo, reqSpecs)
            rdict = dict(name=wname, datasets=datasets, blocks=datasetBlocks, \
                         npileups=npileups, size=size,
                         nevents=nevts, nlumis=nlumis, cput=cput, ncopies=ncopies, \
                         sites=sites, allowedSites=allowedSites, parent=parent, \
                         lheInput=lheInput, primary=primary, secondary=secondary)
            requestsToProcess.append(rdict)
            logger.debug(elapsedTime(t0, "### getSiteWhiteList"))
    logger.debug("total # of workflows %s, datasets %s, blocks %s, evts %s, size %s (%s TB), cput %s (hours)", len(winfo.keys()), len(datasets), totBlocks, totEvents, totSize, teraBytes(totSize), totCpuT)
    logger.debug(elapsedTime(tst0, '### workflows info'))
    logger.debug(elapsedTime(orig, '### total time'))

    return requestsToProcess
示例#5
0
def requestsInfo(reqmgrAuxSvc, state='assignment-approved'):
    """
    Helper function to get information about all requests
    in assignment-approved state in ReqMgr
    """
    # get list of known request in workqueue
    requestJobs = workqueueRequests(state)
    requests = requestJobs.keys()

    # get workflows from list of requests
    time0 = orig = time.time()
    requestWorkflows = getRequestWorkflows(requests)
    workflows = requestWorkflows.values()
    elapsedTime(time0, "### getWorkflows")

    #     time0 = orig = time.time()
    #     workflows = getWorkflows(state)
    #     elapsedTime(time0, "### getWorkflows")

    # get workflows info summaries and collect datasets we need to process
    winfo = workflowsInfo(workflows)
    datasets = [d for row in winfo.values() for d in row['datasets']]

    # find dataset info
    time0 = time.time()
    datasetBlocks, datasetSizes = dbsInfo(datasets)
    elapsedTime(time0, "### dbsInfo")

    # find block nodes information for our datasets
    time0 = time.time()
    blockNodes = phedexInfo(datasets)
    elapsedTime(time0, "### phedexInfo")

    # find events-lumis info for our datasets
    time0 = time.time()
    eventsLumis = eventsLumisInfo(datasets)
    elapsedTime(time0, "### eventsLumisInfo")

    # get specs for all requests and re-use them later in getSiteWhiteList as cache
    requests = [v['RequestName'] for w in workflows for v in w.values()]
    reqSpecs = getRequestSpecs(requests)

    # get siteInfo instance once and re-use it later, it is time-consumed object
    siteInfo = SiteInfo()

    requests = {}
    totBlocks = totEvents = totSize = totCpuT = 0
    tst0 = time.time()
    for wflow in workflows:
        for wname, wspec in wflow.items():
            time0 = time.time()
            cput = getComputingTime(wspec, eventsLumis=eventsLumis)
            ncopies = getNCopies(cput)

            attrs = winfo[wname]
            ndatasets = len(attrs['datasets'])
            npileups = len(attrs['pileups'])
            nblocks = nevts = nlumis = size = 0
            nodes = set()
            for dataset in attrs['datasets']:
                blocks = datasetBlocks[dataset]
                for blk in blocks:
                    for node in blockNodes.get(blk, []):
                        nodes.add(node)
                nblocks += len(blocks)
                size += datasetSizes[dataset]
                edata = eventsLumis.get(dataset, {
                    'num_event': 0,
                    'num_lumi': 0
                })
                nevts += edata['num_event']
                nlumis += edata['num_lumi']
            totBlocks += nblocks
            totEvents += nevts
            totSize += size
            totCpuT += cput
            sites = json.dumps(sorted(list(nodes)))
            njobs = requestJobs[wname]
            print("\n### %s" % wname)
            print("%s datasets, %s blocks, %s bytes (%s TB), %s nevts, %s nlumis, cput %s, copies %s, %s" \
                  % (ndatasets, nblocks, size, teraBytes(size), nevts, nlumis, cput, ncopies, sites))
            # find out which site can serve given workflow request
            t0 = time.time()
            lheInput, primary, parent, secondary, allowedSites \
                = getSiteWhiteList(wspec, siteInfo, reqmgrAuxSvc, reqSpecs)
            rdict = dict(name=wname, datasets=datasets, blocks=datasetBlocks, \
                         npileups=npileups, size=size, njobs=njobs, \
                         nevents=nevts, nlumis=nlumis, cput=cput, ncopies=ncopies, \
                         sites=sites, allowedSites=allowedSites, parent=parent, \
                         lheInput=lheInput, primary=primary, secondary=secondary)
            requests[wname] = rdict
            print("sites", allowedSites)
            elapsedTime(t0, "getSiteWhiteList")
    print("\ntotal # of workflows %s, datasets %s, blocks %s, evts %s, size %s (%s TB), cput %s (hours)" \
          % (len(winfo.keys()), len(datasets), totBlocks, totEvents, totSize, teraBytes(totSize), totCpuT))
    elapsedTime(tst0, 'workflows info')
    elapsedTime(orig)
    return requests