Пример #1
0
 def wrapped_func(*args, **kwargs):
     if 'sitedb' in services and (
             not args[0].allCMSNames.sites or
         (args[0].allCMSNames.cachetime + 1800 < mktime(gmtime()))):
         args[0].allCMSNames = CMSSitesCache(
             sites=SiteDBJSON(config={
                 'cert': serverCert,
                 'key': serverKey
             }).getAllCMSNames(),
             cachetime=mktime(gmtime()))
         args[0].allPNNNames = CMSSitesCache(
             sites=SiteDBJSON(config={
                 'cert': serverCert,
                 'key': serverKey
             }).getAllPhEDExNodeNames(),
             cachetime=mktime(gmtime()))
     if 'phedex' in services and not args[0].phedex:
         phdict = args[0].phedexargs
         phdict.update({'cert': serverCert, 'key': serverKey})
         args[0].phedex = PhEDEx(responseType='xml', dict=phdict)
     if 'centralconfig' in services and (
             not args[0].centralcfg.centralconfig or
         (args[0].centralcfg.cachetime + 1800 < mktime(gmtime()))):
         args[0].centralcfg = ConfigCache(
             centralconfig=getCentralConfig(
                 extconfigurl=args[0].config.extconfigurl,
                 mode=args[0].config.mode),
             cachetime=mktime(gmtime()))
     if 'servercert' in services:
         args[0].serverCert = serverCert
         args[0].serverKey = serverKey
     return func(*args, **kwargs)
Пример #2
0
    def testEmulator(self):

        EmulatorHelper.setEmulators(True, True, True, True)
        self.assertEqual(PhEDEx().wrapped.__module__,
                         'WMQuality.Emulators.PhEDExClient.PhEDEx')
        self.assertEqual(DBSReader(self.globalDBS).wrapped.__module__,
                         'WMQuality.Emulators.DBSClient.DBSReader')
        self.assertEqual(SiteDBJSON().wrapped.__module__,
                         'WMQuality.Emulators.SiteDBClient.SiteDB')
        self.assertEqual(RequestManager().wrapped.__module__,
                         'WMQuality.Emulators.RequestManagerClient.RequestManager')

        self.assertEqual(PhEDEx().__class__.__name__, 'PhEDEx')
        self.assertEqual(DBSReader(self.globalDBS).__class__.__name__, 'DBSReader')
        self.assertEqual(SiteDBJSON().__class__.__name__, 'SiteDBJSON')
        self.assertEqual(RequestManager().__class__.__name__, 'RequestManager')

        EmulatorHelper.resetEmulators()
        self.assertEqual(PhEDEx().wrapped.__module__,
                         'WMCore.Services.PhEDEx.PhEDEx')
        self.assertEqual(DBSReader(self.globalDBS).wrapped.__module__,
                         'WMCore.Services.DBS.DBS2Reader')
        self.assertEqual(SiteDBJSON().wrapped.__module__,
                         'WMCore.Services.SiteDB.SiteDB')
        self.assertEqual(RequestManager().wrapped.__module__,
                         'WMCore.Services.RequestManager.RequestManager')

        self.assertEqual(PhEDEx().__class__.__name__, 'PhEDEx')
        self.assertEqual(DBSReader(self.globalDBS).__class__.__name__, 'DBS2Reader')
        self.assertEqual(SiteDBJSON().__class__.__name__, 'SiteDBJSON')
        self.assertEqual(RequestManager().__class__.__name__, 'RequestManager')
Пример #3
0
    def execute(self, *args, **kwargs):

        totalevents = kwargs['task']['tm_totalunits']
        firstEvent = 1
        lastEvent = totalevents
        firstLumi = 1
        lastLumi = 10

        # Set a default of 100 events per lumi.  This is set as a task
        # property, as the splitting considers it independently of the file
        # information provided by the fake dataset.
        if not kwargs['task']['tm_events_per_lumi']:
            kwargs['task']['tm_events_per_lumi'] = 100

        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCFakeFileSet")
        newFile = File("MCFakeFile", size = 1000, events = totalevents)
        if hasattr(self.config.Sites, 'available'):
            newFile.setLocation(self.config.Sites.available)
        else:
            sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey,
                              "cert":self.config.TaskWorker.cmscert})
            newFile.setLocation(sbj.getAllCMSNames())
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["block"] = 'MCFackBlock'
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        singleMCFileset.addFile(newFile)

        return Result(task=kwargs['task'], result=singleMCFileset)
Пример #4
0
    def __init__(self, config, noSiteDB=False):
        """
        _init_

        Note, noSiteDB added for TESTING PURPOSED ONLY!
        """
        WebAPI.__init__(self, config)
        ReqMgrAuth.assign_roles = config.security_roles
        # Take a guess
        self.templatedir = config.templates
        self.couchUrl = config.couchUrl
        self.configDBName = config.configDBName
        self.workloadDBName = config.workloadDBName
        self.configDBName = config.configDBName
        self.wmstatWriteURL = "%s/%s" % (self.couchUrl.rstrip('/'),
                                         config.wmstatDBName)
        if not noSiteDB:
            try:
                # Download a list of all the sites from SiteDB, uses v2 API.
                sitedb = SiteDBJSON()
                self.sites = sitedb.getAllCMSNames()
                self.sites.sort()
            except Exception, ex:
                msg = "ERROR: Could not retrieve sites from SiteDB, reason: %s" % ex
                cherrypy.log(msg)
                raise
Пример #5
0
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl},
                             "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")
        self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode",
                                False)
        self.replicaOnly = getattr(config.PhEDExInjector, "replicaOnly", False)

        # Subscribed state in the DBSBuffer table for datasets
        self.terminalSubscriptionState = 1
        if self.safeMode:
            self.terminalSubscriptionState = 2

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available

        self.initAlerts(compName="PhEDExInjector")
Пример #6
0
 def setUp(self):
     """
     Setup for unit tests
     """
     super(SiteDBTest, self).setUp()
     EmulatorHelper.setEmulators(phedex=False, dbs=False, siteDB=False, requestMgr=True)
     self.mySiteDB = SiteDBJSON()
 def getUsernameFromSiteDB(self):
     """
     Retrieve the user's username as it appears in SiteDB.
     """
     proxy = self.proxy()
     userdn = proxy.getSubjectFromCert(self.certLocation)
     sitedb = SiteDBJSON({"key": proxy.getProxyFilename(), "cert": proxy.getProxyFilename()})
     username = sitedb.dnUserName(userdn)
     return username
Пример #8
0
    def __init__(self,config):
        self.br=Browser()

        self.config = config
        
        # Initialise connections
        self.mySiteDB = SiteDBJSON()
        self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
Пример #9
0
def sites():
    "Return known CMS site list from SiteDB"
    try:
        # Download a list of all the sites from SiteDB, uses v2 API.
        sitedb = SiteDBJSON()
        sites = sorted(sitedb.getAllCMSNames())
    except Exception as exc:
        msg = "ERROR: Could not retrieve sites from SiteDB, reason: %s" % str(
            exc)
        raise Exception(msg)
    return sites
Пример #10
0
    def __init__(self,config):
        self.br=Browser()

        self.config = config
        
        # Initialise connections
        self.mySiteDB = SiteDBJSON()
        self.phedex = PhEDEx({"endpoint":"https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json")
        self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
Пример #11
0
 def gethnName_urlenc(self,dn):
     from WMCore.Services.SiteDB.SiteDB import SiteDBJSON
     hnUserName = None
     userdn = dn
     mySiteDB = SiteDBJSON()
     status = 0 
     try:
         hnUserName = mySiteDB.dnUserName(dn=userdn)
     except:
         status = 1 
     return status,hnUserName
Пример #12
0
    def execute(self, *args, **kwargs):
        self.logger.info(
            "Data discovery and splitting for %s using user-provided files" %
            kwargs['task']['tm_taskname'])

        userfiles = kwargs['task']['tm_arguments'].get('userfiles')
        splitting = kwargs['task']['tm_split_algo']
        total_units = kwargs['task']['tm_totalunits']
        if not userfiles or splitting != 'FileBased':
            if not userfiles:
                msg = "No files specified to process for task %s." % kwargs[
                    'task']['tm_taskname']
            if splitting != 'FileBased':
                msg = "Data.splitting must be set to 'FileBased' when using a custom set of files."
            self.logger.error("Setting %s as failed: %s" %
                              (kwargs['task']['tm_taskname'], msg))
            configreq = {
                'workflow': kwargs['task']['tm_taskname'],
                'status': "FAILED",
                'subresource': 'failure',
                'failure': b64encode(msg)
            }
            self.server.post(self.resturi, data=urllib.urlencode(configreq))
            raise StopHandler(msg)

        if hasattr(self.config.Sites, 'available'):
            locations = self.config.Sites.available
        else:
            sbj = SiteDBJSON({
                "key": self.config.TaskWorker.cmskey,
                "cert": self.config.TaskWorker.cmscert
            })
            locations = sbj.getAllCMSNames()

        userFileset = Fileset(name=kwargs['task']['tm_taskname'])
        self.logger.info("There are %d files specified by the user." %
                         len(userfiles))
        if total_units > 0:
            self.logger.info("Will run over the first %d files." % total_units)
        file_counter = 0
        for userfile, idx in zip(userfiles, range(len(userfiles))):
            newFile = File(userfile, size=1000, events=1)
            newFile.setLocation(locations)
            newFile.addRun(Run(1, idx))
            newFile["block"] = 'UserFilesFakeBlock'
            newFile["first_event"] = 1
            newFile["last_event"] = 2
            userFileset.addFile(newFile)
            file_counter += 1
            if total_units > 0 and file_counter >= total_units:
                break

        return Result(task=kwargs['task'], result=userFileset)
Пример #13
0
 def getUsernameFromSiteDB(self):
     """
     Return a the client hypernews name
     """
     proxy = self.proxy()
     userdn = proxy.getSubjectFromCert(self.certLocation)
     sitedb = SiteDBJSON({
         "key": proxy.getProxyFilename(),
         "cert": proxy.getProxyFilename()
     })
     username = sitedb.dnUserName(userdn)
     return username
Пример #14
0
def pnns():
    """
    Returns all PhEDEx node names, excluding Buffer endpoints
    """
    try:
        sitedb = SiteDBJSON()
        pnns = sorted(sitedb.getAllPhEDExNodeNames(excludeBuffer=True))
    except Exception as exc:
        msg = "ERROR: Could not retrieve PNNs from SiteDB, reason: %s" % str(
            exc)
        raise Exception(msg)
    return pnns
Пример #15
0
    def insertAllSEs(self,
                     siteName,
                     pendingSlots=0,
                     runningSlots=0,
                     ceName=None,
                     plugin=None,
                     taskList=[]):
        """
        _insertAllSEs_

        Insert all SEs into WMBS ResourceControl
        This uses the Services.SiteDB to insert all SEs under a common
        CE.  It is meant to be used with WMS submission.

        Sites will be named siteName_SEName

        It expects a taskList of the following form:

        [{'taskType': taskType, 'priority': priority, 'maxSlots': maxSlots, 'pendingSlots' : pendingSlots}]

        for each entry in the taskList, a threshold is inserted into the database
        for EVERY SE
        """

        from WMCore.Services.SiteDB.SiteDB import SiteDBJSON
        siteDB = SiteDBJSON()

        cmsNames = siteDB.getAllCMSNames()
        for cmsName in cmsNames:
            seNames = siteDB.cmsNametoSE(cmsName)
            for SE in seNames:
                sName = '%s_%s' % (siteName, SE)
                self.insertSite(siteName=sName,
                                pendingSlots=pendingSlots,
                                seName=SE,
                                runningSlots=runningSlots,
                                ceName=ceName,
                                cmsName=cmsName,
                                plugin=plugin)
                for task in taskList:
                    if not task.has_key('maxSlots') or not task.has_key('taskType') \
                           or not task.has_key('priority'):
                        msg = "Incomplete task in taskList for ResourceControl.insertAllSEs\n"
                        msg += task
                        raise ResourceControlException(msg)
                    self.insertThreshold(siteName=sName,
                                         taskType=task['taskType'],
                                         maxSlots=task['maxSlots'],
                                         pendingSlots=task['pendingSlots'],
                                         priority=task['priority'])

        return
Пример #16
0
def sites():
    "Return known CMS site list from SiteDB"
    try:
        # Download a list of all the sites from SiteDB, uses v2 API.
        if os.getenv("WMAGENT_USE_CRIC", False) or os.getenv("WMCORE_USE_CRIC", False):
            cric = CRIC()
            site_list = sorted(cric.getAllPSNs())
        else:
            sitedb = SiteDBJSON()
            site_list = sorted(sitedb.getAllCMSNames())
    except Exception as exc:
        msg = "ERROR: Could not retrieve sites from SiteDB, reason: %s" % str(exc)
        raise Exception(msg)
    return site_list
Пример #17
0
 def wrapped_func(*args, **kwargs):
     if 'sitedb' in services and (
             not args[0].allCMSNames.sites or
         (args[0].allCMSNames.cachetime + 1800 < mktime(gmtime()))):
         args[0].allCMSNames = CMSSitesCache(
             sites=SiteDBJSON(config={
                 'cert': serverCert,
                 'key': serverKey
             }).getAllCMSNames(),
             cachetime=mktime(gmtime()))
     if 'phedex' in services and not args[0].phedex:
         args[0].phedex = PhEDEx(responseType='xml',
                                 dict=args[0].phedexargs)
     return func(*args, **kwargs)
Пример #18
0
def pnns():
    """
    Returns all PhEDEx node names, excluding Buffer endpoints
    """
    if os.getenv("WMAGENT_USE_CRIC", False) or os.getenv("WMCORE_USE_CRIC", False):
        sitedb = CRIC()  # FIXME: rename it to cric
    else:
        sitedb = SiteDBJSON()

    try:
        pnn_list = sorted(sitedb.getAllPhEDExNodeNames(excludeBuffer=True))
    except Exception as exc:
        msg = "ERROR: Could not retrieve PNNs from SiteDB, reason: %s" % str(exc)
        raise Exception(msg)
    return pnn_list
Пример #19
0
def getDNFromUserName(username, log, ckey=None, cert=None):
    """
    Parse site string to know the fts server to use
    """
    dn = ''
    site_db = SiteDBJSON(config={'key': ckey, 'cert': cert})
    try:
        dn = site_db.userNameDn(username)
    except IndexError:
        log.error("user does not exist")
        return dn
    except RuntimeError:
        log.error("SiteDB URL cannot be accessed")
        return dn
    return dn
Пример #20
0
    def setupMCWMSpec(self):
        """Setup MC workflow"""
        self.wmspec = self.createMCWMSpec()
        self.topLevelTask = getFirstTask(self.wmspec)
        self.inputDataset = self.topLevelTask.inputDataset()
        self.dataset = self.topLevelTask.getInputDatasetPath()
        self.dbs = None
        self.siteDB = SiteDBJSON()

        # add sites that would normally be added by operator via resource_control
        locationDAO = self.daoFactory(classname="Locations.New")
        self.pnns = []
        for site in ['T2_XX_SiteA', 'T2_XX_SiteB']:
            locationDAO.execute(siteName=site, pnn=self.siteDB.cmsNametoPhEDExNode(site)[0])
            self.pnns.append(self.siteDB.cmsNametoPhEDExNode(site)[0])
Пример #21
0
    def execute(self, *args, **kwargs):
        self.logger.info(
            "Data discovery and splitting for %s using user-provided files" %
            kwargs['task']['tm_taskname'])

        userfiles = kwargs['task']['tm_user_files']
        splitting = kwargs['task']['tm_split_algo']
        total_units = kwargs['task']['tm_totalunits']
        if not userfiles or splitting != 'FileBased':
            if not userfiles:
                msg = "No files specified to process for task %s." % kwargs[
                    'task']['tm_taskname']
            if splitting != 'FileBased':
                msg = "Data.splitting must be set to 'FileBased' when using a custom set of files."
            raise TaskWorkerException(msg)

        if hasattr(self.config.Sites, 'available'):
            locations = self.config.Sites.available
        else:
            sbj = SiteDBJSON({
                "key": self.config.TaskWorker.cmskey,
                "cert": self.config.TaskWorker.cmscert
            })
            locations = sbj.getAllCMSNames()

        userFileset = Fileset(name=kwargs['task']['tm_taskname'])
        self.logger.info("There are %d files specified by the user." %
                         len(userfiles))
        if total_units > 0:
            self.logger.info("Will run over the first %d files." % total_units)
        file_counter = 0
        for userfile, idx in zip(userfiles, range(len(userfiles))):
            newFile = File(userfile, size=1000, events=1)
            newFile.setLocation(locations)
            newFile.addRun(Run(1, idx))
            newFile["block"] = 'UserFilesFakeBlock'
            newFile["first_event"] = 1
            newFile["last_event"] = 2
            userFileset.addFile(newFile)
            file_counter += 1
            if total_units > 0 and file_counter >= total_units:
                break

        return Result(task=kwargs['task'], result=userFileset)
Пример #22
0
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}        
        self.phedexNodes = {'MSS':[], 'Disk':[]}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")
Пример #23
0
def getSiteInfo(config):
    sitedb = SiteDBJSON()
    sites = sitedb.getAllCMSNames()
    sites.sort()
    wildcardKeys = getattr(config, 'wildcardKeys', {
        'T1*': 'T1_*',
        'T2*': 'T2_*',
        'T3*': 'T3_*'
    })
    wildcardSites = {}

    for k in wildcardKeys.keys():
        reValue = wildcardKeys.get(k)
        found = False
        for s in sites:
            if re.search(reValue, s):
                found = True
                if not k in wildcardSites.keys():
                    wildcardSites[k] = []
                wildcardSites[k].append(s)
        if found:
            sites.append(k)
    return sites
Пример #24
0
    def formatOutput(self, task, requestname, datasetfiles, locations):
        """Receives as input the result of the data location
           discovery operations and fill up the WMCore objects."""
        self.logger.debug(" Formatting data discovery output ")
        # TEMPORARY
        secmsmap = {}
        sbj = SiteDBJSON({"key":self.config.MyProxy.serverhostkey,
                          "cert":self.config.MyProxy.serverhostcert})

        wmfiles = []
        lumicounter = evecounter = 0
        for lfn, infos in datasetfiles.iteritems():
            wmfile = File(lfn=lfn, events=infos['NumberOfEvents'], size=infos['Size'], checksums=infos['Checksums'])
            wmfile['block'] = infos['BlockName']
            wmfile['locations'] = []
            if locations.has_key(infos['BlockName']):
                for se in locations[infos['BlockName']]:
                    if se not in secmsmap:
                        self.logger.debug("Translating SE %s" %se)
                        try:
                            secmsmap[se] = sbj.seToCMSName(se)
                        except KeyError, ke:
                            self.logger.error("Impossible translating %s to a CMS name through SiteDB" %se)
                            secmsmap[se] = ''
                    if se in secmsmap:
                        if type(secmsmap[se]) == list:
                            wmfile['locations'].extend(secmsmap[se])
                        else:
                            wmfile['locations'].append(secmsmap[se])
                wmfile['workflow'] = requestname
                evecounter += infos['NumberOfEvents']
                for run, lumis in infos['Lumis'].iteritems():
                    #self.logger.debug(' - adding run %d and lumis %s' %(run, lumis))
                    wmfile.addRun(Run(run, *lumis))
                    lumicounter += len(lumis)
                wmfiles.append(wmfile)
Пример #25
0
    def __init__(self,
                 whiteList=None,
                 blackList=None,
                 logger=None,
                 mapper=None,
                 dict={}):
        self.logger = logger
        self.kind = 'se'
        self.mapper = mapper
        self.siteDBAPI = SiteDBJSON(dict)
        if type(whiteList) == type("string"):
            if whiteList:
                whiteList = whiteList.split(',')
            else:
                whiteList = []
        elif type(whiteList) == type([]):
            pass
        else:
            whiteList = []

        if type(blackList) == type("string"):
            if blackList:
                blackList = blackList.split(',')
            else:
                blackList = []
        elif type(blackList) == type([]):
            pass
        else:
            blackList = []

        logger.debug('Input whitelist: %s' % ', '.join(whiteList))
        logger.debug('Input blacklist: %s' % ', '.join(blackList))
        self.blacklist = set(self.expandList(blackList))
        self.whitelist = set(self.expandList(whiteList))
        logger.debug('Converted whitelist: %s' % ', '.join(self.whitelist))
        logger.debug('Converted blacklist: %s' % ', '.join(self.blacklist))
Пример #26
0
 def __init__(self, whiteList=None, blackList=None, logger=None, dict={}):
     if logger: dict['logger'] = logger
     self.siteDBAPI = SiteDBJSON(dict)
     super(CEBlackWhiteListParser,
           self).__init__(whiteList, blackList, logger,
                          self.siteDBAPI.cmsNametoCE, dict)
Пример #27
0
def getFiles(datasetName,
             runBlacklist,
             runWhitelist,
             blockBlacklist,
             blockWhitelist,
             dbsUrl,
             fakeLocation=False):
    """
    _getFiles_

    Get the full information of a dataset including files, blocks, runs and lumis.
    Filter it using run and block white/black lists.

    It can receive and optional DBSUrl.
    """
    dbsReader = DBSReader(endpoint=dbsUrl)
    phedexReader = PhEDEx()
    siteDB = SiteDBJSON()

    class BlockBuster(threading.Thread):
        def __init__(self, **args):
            threading.Thread.__init__(self)
            for k, v in args.items():
                setattr(self, k, v)
            self.major_failure = False

        def run(self):
            self.files = {}
            logging = self.l
            has_parent = self.hp
            fakeLocation = self.fl
            blockName = self.bn
            blockBlacklist = self.bbl
            blockWhitelist = self.bwl

            if blockBlacklist and blockName in blockBlacklist:
                return
            if blockWhitelist and blockName not in blockWhitelist:
                return

            phedexReader = PhEDEx()
            siteDB = SiteDBJSON()
            dbsReader = DBSReader(endpoint=self.dbs)
            replicaInfo = phedexReader.getReplicaInfoForBlocks(block=blockName,
                                                               subscribed='y')
            blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True)
            if has_parent:
                try:
                    blockFileParents = dbsReader.listFilesInBlockWithParents(
                        blockName)
                except:
                    print blockName, "does not appear to have a parent, even though it should. Very suspicious"
                    blockFileParents = dbsReader.listFilesInBlock(blockName)
            else:
                blockFileParents = dbsReader.listFilesInBlock(blockName)

            blockLocations = set()
            # load block locations
            if len(replicaInfo["phedex"]["block"]) > 0:
                for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                    PNN = replica["node"]
                    PSNs = siteDB.PNNtoPSN(PNN)
                    blockLocations.add(PNN)
                    #logging.debug("PhEDEx Node Name: %s\tPSNs: %s", PNN, PSNs)

            # We cannot upload docs without location, so force it in case it's empty
            if not blockLocations:
                if fakeLocation:
                    #logging.info("\t\t %s\tno location", blockName)
                    blockLocations.update([u'T1_US_FNAL_Disk', u'T2_CH_CERN'])
                elif not has_parent:  ## this should be the source
                    logging.info("Blockname: %s\tno location, ABORT",
                                 blockName)
                    self.major_failure = True
                    #sys.exit(1)

            #logging.info("Blockname: %s\tLocations: %s", blockName, blockLocations)

            # for each file on the block
            for blockFile in blockFiles:
                parentLFNs = []
                # populate parent information
                if blockFileParents and "ParentList" in blockFileParents[0]:
                    for fileParent in blockFileParents[0]["ParentList"]:
                        parentLFNs.append(fileParent["LogicalFileName"])
                runInfo = {}
                # Lumis not included in file
                for lumiSection in blockFile["LumiList"]:
                    if runBlacklist and lumiSection[
                            "RunNumber"] in runBlacklist:
                        continue
                    if runWhitelist and lumiSection[
                            "RunNumber"] not in runWhitelist:
                        continue

                    if lumiSection["RunNumber"] not in runInfo.keys():
                        runInfo[lumiSection["RunNumber"]] = []

                    runInfo[lumiSection["RunNumber"]].append(
                        lumiSection["LumiSectionNumber"])
                if len(runInfo.keys()) > 0:
                    self.files[blockFile["LogicalFileName"]] = {
                        "runs": runInfo,
                        "events": blockFile["NumberOfEvents"],
                        "size": blockFile["FileSize"],
                        "locations": list(blockLocations),
                        "parents": parentLFNs
                    }
            return

    files = {}
    outputDatasetParts = datasetName.split("/")
    print "dataset", datasetName, "parts", outputDatasetParts
    try:
        # retrieve list of blocks from dataset
        blockNames = dbsReader.listFileBlocks(datasetName)
    except:
        raise RuntimeError("Dataset %s doesn't exist in given DBS instance" %
                           datasetName)

    has_parent = False
    try:
        parents = dbsReader.listDatasetParents(datasetName)
        if parents: has_parent = True
    except:
        print "Dataset with no parent"
        pass

    bthreads = []
    # traverse each block
    for blockName in blockNames:
        bthreads.append(
            BlockBuster(bn=blockName,
                        hp=has_parent,
                        fl=fakeLocation,
                        bbl=blockBlacklist,
                        bwl=blockWhitelist,
                        l=logging,
                        dbs=dbsUrl))

    print len(bthreads), "block query created"
    bthreads = ThreadBuster(bthreads, 40, 2., verbose=False)

    for t in bthreads:
        if t.major_failure:
            print "There was a major failure in processing block files"
            sys.exit(1)
        files.update(t.files)

    print len(files)
    return files
Пример #28
0
        def run(self):
            self.files = {}
            logging = self.l
            has_parent = self.hp
            fakeLocation = self.fl
            blockName = self.bn
            blockBlacklist = self.bbl
            blockWhitelist = self.bwl

            if blockBlacklist and blockName in blockBlacklist:
                return
            if blockWhitelist and blockName not in blockWhitelist:
                return

            phedexReader = PhEDEx()
            siteDB = SiteDBJSON()
            dbsReader = DBSReader(endpoint=self.dbs)
            replicaInfo = phedexReader.getReplicaInfoForBlocks(block=blockName,
                                                               subscribed='y')
            blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True)
            if has_parent:
                try:
                    blockFileParents = dbsReader.listFilesInBlockWithParents(
                        blockName)
                except:
                    print blockName, "does not appear to have a parent, even though it should. Very suspicious"
                    blockFileParents = dbsReader.listFilesInBlock(blockName)
            else:
                blockFileParents = dbsReader.listFilesInBlock(blockName)

            blockLocations = set()
            # load block locations
            if len(replicaInfo["phedex"]["block"]) > 0:
                for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                    PNN = replica["node"]
                    PSNs = siteDB.PNNtoPSN(PNN)
                    blockLocations.add(PNN)
                    #logging.debug("PhEDEx Node Name: %s\tPSNs: %s", PNN, PSNs)

            # We cannot upload docs without location, so force it in case it's empty
            if not blockLocations:
                if fakeLocation:
                    #logging.info("\t\t %s\tno location", blockName)
                    blockLocations.update([u'T1_US_FNAL_Disk', u'T2_CH_CERN'])
                elif not has_parent:  ## this should be the source
                    logging.info("Blockname: %s\tno location, ABORT",
                                 blockName)
                    self.major_failure = True
                    #sys.exit(1)

            #logging.info("Blockname: %s\tLocations: %s", blockName, blockLocations)

            # for each file on the block
            for blockFile in blockFiles:
                parentLFNs = []
                # populate parent information
                if blockFileParents and "ParentList" in blockFileParents[0]:
                    for fileParent in blockFileParents[0]["ParentList"]:
                        parentLFNs.append(fileParent["LogicalFileName"])
                runInfo = {}
                # Lumis not included in file
                for lumiSection in blockFile["LumiList"]:
                    if runBlacklist and lumiSection[
                            "RunNumber"] in runBlacklist:
                        continue
                    if runWhitelist and lumiSection[
                            "RunNumber"] not in runWhitelist:
                        continue

                    if lumiSection["RunNumber"] not in runInfo.keys():
                        runInfo[lumiSection["RunNumber"]] = []

                    runInfo[lumiSection["RunNumber"]].append(
                        lumiSection["LumiSectionNumber"])
                if len(runInfo.keys()) > 0:
                    self.files[blockFile["LogicalFileName"]] = {
                        "runs": runInfo,
                        "events": blockFile["NumberOfEvents"],
                        "size": blockFile["FileSize"],
                        "locations": list(blockLocations),
                        "parents": parentLFNs
                    }
            return
Пример #29
0
    def formatOutput(self, task, requestname, datasetfiles, locations,
                     tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")
        # TEMPORARY
        pnn_psn_map = {}
        sbj = SiteDBJSON({
            "key": self.config.TaskWorker.cmskey,
            "cert": self.config.TaskWorker.cmscert
        })

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        ## Loop over the sorted list of files.
        for lfn, infos in datasetfiles.iteritems():
            ## Skip the file if the block has not been found or has no locations.
            if not infos['BlockName'] in locations or not locations[
                    infos['BlockName']]:
                self.logger.warning(
                    "Skipping %s because its block (%s) has no locations" %
                    (lfn, infos['BlockName']))
                continue
            ## Skip the file if it is not in VALID state.
            if not infos.get('ValidFile', True):
                self.logger.warning("Skipping invalid file %s" % lfn)
                continue

            if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                raise TaskWorkerException(
                    "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n"
                    +
                    "because you specified useParents=True but some your files have no"
                    + "parents.\nExample: " + lfn)
            ## Create a WMCore File object.
            try:
                size = infos['FileSize']
                checksums = {
                    'Checksum': infos['Checksum'],
                    'Adler32': infos['Adler32'],
                    'Md5': infos['Md5']
                }
            except:
                #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                size = infos['Size']
                checksums = infos['Checksums']
            wmfile = File(lfn=lfn,
                          events=infos['NumberOfEvents'],
                          size=size,
                          checksums=checksums,
                          parents=infos['Parents'])
            wmfile['block'] = infos['BlockName']
            wmfile['locations'] = []
            for pnn in locations[infos['BlockName']]:
                if pnn and pnn not in pnn_psn_map:
                    self.logger.debug("Translating PNN %s" % pnn)
                    try:
                        pnn_psn_map[pnn] = sbj.PNNtoPSN(pnn)
                    except KeyError:
                        self.logger.error(
                            "Impossible translating %s to a CMS name through SiteDB"
                            % pnn)
                        pnn_psn_map[pnn] = ''
                    except httplib.HTTPException as ex:
                        self.logger.error("Couldn't map SE to site: %s" % pnn)
                        print("Couldn't map SE to site: %s" % pnn)
                        print("got problem: %s" % ex)
                        print("got another problem: %s" % ex.__dict__)
                if pnn and pnn in pnn_psn_map:
                    if isinstance(pnn_psn_map[pnn], list):
                        wmfile['locations'].extend(pnn_psn_map[pnn])
                    else:
                        wmfile['locations'].append(pnn_psn_map[pnn])
            wmfile['workflow'] = requestname
            event_counter += infos['NumberOfEvents']
            for run, lumis in infos['Lumis'].iteritems():
                datasetLumis.setdefault(run, []).extend(lumis)
                wmfile.addRun(Run(run, *lumis))
                for lumi in lumis:
                    uniquelumis.add((run, lumi))
                lumi_counter += len(lumis)
            wmfiles.append(wmfile)

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d' % event_counter)
        self.logger.debug('Tot lumis found: %d' % uniquelumis)
        self.logger.debug('Duplicate lumis found: %d' %
                          (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d' % len(wmfiles))

        self.logger.debug(
            "Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList(
        )
        self.logger.debug(
            "Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"),
                  "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"),
                  "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task=task,
                      result=Fileset(name='FilesToSplit', files=set(wmfiles)))
Пример #30
0
 def __init__(self, *args, **kwargs):
     TaskAction.__init__(self, *args, **kwargs)
     self.sbj = SiteDBJSON({
         "key": self.config.TaskWorker.cmskey,
         "cert": self.config.TaskWorker.cmscert
     })