Пример #1
0
    def formatOutput(self, task, requestname, datasetfiles, locations,
                     tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")
        # TEMPORARY
        pnn_psn_map = {}
        sbj = SiteDBJSON({
            "key": self.config.TaskWorker.cmskey,
            "cert": self.config.TaskWorker.cmscert
        })

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        ## Loop over the sorted list of files.
        for lfn, infos in datasetfiles.iteritems():
            ## Skip the file if the block has not been found or has no locations.
            if not infos['BlockName'] in locations or not locations[
                    infos['BlockName']]:
                self.logger.warning(
                    "Skipping %s because its block (%s) has no locations" %
                    (lfn, infos['BlockName']))
                continue
            ## Skip the file if it is not in VALID state.
            if not infos.get('ValidFile', True):
                self.logger.warning("Skipping invalid file %s" % lfn)
                continue

            if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                raise TaskWorkerException(
                    "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n"
                    +
                    "because you specified useParents=True but some your files have no"
                    + "parents.\nExample: " + lfn)
            ## Create a WMCore File object.
            try:
                size = infos['FileSize']
                checksums = {
                    'Checksum': infos['Checksum'],
                    'Adler32': infos['Adler32'],
                    'Md5': infos['Md5']
                }
            except:
                #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                size = infos['Size']
                checksums = infos['Checksums']
            wmfile = File(lfn=lfn,
                          events=infos['NumberOfEvents'],
                          size=size,
                          checksums=checksums,
                          parents=infos['Parents'])
            wmfile['block'] = infos['BlockName']
            wmfile['locations'] = []
            for pnn in locations[infos['BlockName']]:
                if pnn and pnn not in pnn_psn_map:
                    self.logger.debug("Translating PNN %s" % pnn)
                    try:
                        pnn_psn_map[pnn] = sbj.PNNtoPSN(pnn)
                    except KeyError:
                        self.logger.error(
                            "Impossible translating %s to a CMS name through SiteDB"
                            % pnn)
                        pnn_psn_map[pnn] = ''
                    except httplib.HTTPException as ex:
                        self.logger.error("Couldn't map SE to site: %s" % pnn)
                        print("Couldn't map SE to site: %s" % pnn)
                        print("got problem: %s" % ex)
                        print("got another problem: %s" % ex.__dict__)
                if pnn and pnn in pnn_psn_map:
                    if isinstance(pnn_psn_map[pnn], list):
                        wmfile['locations'].extend(pnn_psn_map[pnn])
                    else:
                        wmfile['locations'].append(pnn_psn_map[pnn])
            wmfile['workflow'] = requestname
            event_counter += infos['NumberOfEvents']
            for run, lumis in infos['Lumis'].iteritems():
                datasetLumis.setdefault(run, []).extend(lumis)
                wmfile.addRun(Run(run, *lumis))
                for lumi in lumis:
                    uniquelumis.add((run, lumi))
                lumi_counter += len(lumis)
            wmfiles.append(wmfile)

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d' % event_counter)
        self.logger.debug('Tot lumis found: %d' % uniquelumis)
        self.logger.debug('Duplicate lumis found: %d' %
                          (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d' % len(wmfiles))

        self.logger.debug(
            "Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList(
        )
        self.logger.debug(
            "Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"),
                  "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"),
                  "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task=task,
                      result=Fileset(name='FilesToSplit', files=set(wmfiles)))
Пример #2
0
class SiteDBTest(unittest.TestCase):
    """
    Unit tests for SiteScreening module
    """

    def setUp(self):
        """
        Setup for unit tests
        """
        self.mySiteDB = SiteDBJSON()


    def testCmsNametoPhEDExNode(self):
        """
        Tests CmsNametoSE
        """
        target = ['T1_US_FNAL_MSS','T1_US_FNAL_Buffer']
        results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL")
        self.failUnless(sorted(results) == sorted(target))
        
        target = ['T1_US_FNAL_Disk']
        results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL_Disk")
        self.failUnless(sorted(results) == sorted(target))

    def testCmsNametoSE(self):
        """
        Tests CmsNametoSE
        """
        target = [u'srm-cms-disk.gridpp.rl.ac.uk', u'srm-cms.gridpp.rl.ac.uk']
        results = self.mySiteDB.cmsNametoSE("T1_UK_RAL")
        self.failUnless(sorted(results) == sorted(target))

    def testCmsNamePatterntoSE(self):
        """
        Tests CmsNamePatterntoSE
        """
        target = [u'srm-eoscms.cern.ch', u'srm-eoscms.cern.ch', u'storage01.lcg.cscs.ch', u'eoscmsftp.cern.ch']
        results = self.mySiteDB.cmsNametoSE("%T2_CH")
        self.failUnless(sorted(results) == sorted(target))

    def testSEtoCmsName(self):
        """
        Tests CmsNametoSE
        """
        target = [u'T1_US_FNAL']
        results = self.mySiteDB.seToCMSName("cmssrm.fnal.gov")
        self.failUnless(results == target)
        
        target = sorted([u'T2_CH_CERN', u'T2_CH_CERN_HLT'])
        results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch"))
        self.failUnless(sorted(results) == sorted(target))
        
        target = sorted([u'T0_CH_CERN', u'T1_CH_CERN'])
        results = sorted(self.mySiteDB.seToCMSName("srm-cms.cern.ch"))
        self.failUnless(sorted(results) == sorted(target))
        
        target = sorted([u'T2_CH_CERN_AI'])
        results = sorted(self.mySiteDB.seToCMSName("eoscmsftp.cern.ch"))
        self.failUnless(sorted(results) == sorted(target))

    def testDNUserName(self):
        """
        Tests DN to Username lookup
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=gutsche/CN=582680/CN=Oliver Gutsche"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.failUnless(testUserName == userName)

    def testDNWithApostrophe(self):
        """
        Tests a DN with an apostrophy in - will fail till SiteDB2 appears
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.failUnless(testUserName == userName)

    def testSEFinder(self):
        """
        _testSEFinder_

        See if we can retrieve seNames from all sites
        """

        seNames = self.mySiteDB.getAllSENames()
        self.assertTrue(len(seNames) > 1)
        self.assertTrue('cmssrm.fnal.gov' in seNames)
        return

    def testPNNtoPSN(self):
        """
        _testPNNtoPSN_

        Test converting PhEDEx Node Name to Processing Site Name
        """

        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk')
        self.failUnless(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape')
        self.failUnless(result == [])
        result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC')
        self.failUnless(result == ['T2_UK_London_IC'])
        return
    
    def testCMSNametoList(self):
        result = self.mySiteDB.cmsNametoList("T1_US*", "SE")
        self.failUnless(result == [u'cmssrm.fnal.gov', u'cmssrmdisk.fnal.gov'])
Пример #3
0
        def run(self):
            self.files = {}
            logging = self.l
            has_parent = self.hp
            fakeLocation = self.fl
            blockName = self.bn
            blockBlacklist = self.bbl
            blockWhitelist = self.bwl

            if blockBlacklist and blockName in blockBlacklist:
                return
            if blockWhitelist and blockName not in blockWhitelist:
                return

            phedexReader = PhEDEx()
            siteDB = SiteDBJSON()
            dbsReader = DBSReader(endpoint=self.dbs)
            replicaInfo = phedexReader.getReplicaInfoForBlocks(block=blockName,
                                                               subscribed='y')
            blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True)
            if has_parent:
                try:
                    blockFileParents = dbsReader.listFilesInBlockWithParents(
                        blockName)
                except:
                    print blockName, "does not appear to have a parent, even though it should. Very suspicious"
                    blockFileParents = dbsReader.listFilesInBlock(blockName)
            else:
                blockFileParents = dbsReader.listFilesInBlock(blockName)

            blockLocations = set()
            # load block locations
            if len(replicaInfo["phedex"]["block"]) > 0:
                for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                    PNN = replica["node"]
                    PSNs = siteDB.PNNtoPSN(PNN)
                    blockLocations.add(PNN)
                    #logging.debug("PhEDEx Node Name: %s\tPSNs: %s", PNN, PSNs)

            # We cannot upload docs without location, so force it in case it's empty
            if not blockLocations:
                if fakeLocation:
                    #logging.info("\t\t %s\tno location", blockName)
                    blockLocations.update([u'T1_US_FNAL_Disk', u'T2_CH_CERN'])
                elif not has_parent:  ## this should be the source
                    logging.info("Blockname: %s\tno location, ABORT",
                                 blockName)
                    self.major_failure = True
                    #sys.exit(1)

            #logging.info("Blockname: %s\tLocations: %s", blockName, blockLocations)

            # for each file on the block
            for blockFile in blockFiles:
                parentLFNs = []
                # populate parent information
                if blockFileParents and "ParentList" in blockFileParents[0]:
                    for fileParent in blockFileParents[0]["ParentList"]:
                        parentLFNs.append(fileParent["LogicalFileName"])
                runInfo = {}
                # Lumis not included in file
                for lumiSection in blockFile["LumiList"]:
                    if runBlacklist and lumiSection[
                            "RunNumber"] in runBlacklist:
                        continue
                    if runWhitelist and lumiSection[
                            "RunNumber"] not in runWhitelist:
                        continue

                    if lumiSection["RunNumber"] not in runInfo.keys():
                        runInfo[lumiSection["RunNumber"]] = []

                    runInfo[lumiSection["RunNumber"]].append(
                        lumiSection["LumiSectionNumber"])
                if len(runInfo.keys()) > 0:
                    self.files[blockFile["LogicalFileName"]] = {
                        "runs": runInfo,
                        "events": blockFile["NumberOfEvents"],
                        "size": blockFile["FileSize"],
                        "locations": list(blockLocations),
                        "parents": parentLFNs
                    }
            return
Пример #4
0
class SiteDBTest(EmulatedUnitTestCase):
    """
    Unit tests for SiteScreening module
    """

    def  __init__(self, methodName='runTest'):
        super(SiteDBTest, self).__init__(methodName=methodName)

    def setUp(self):
        """
        Setup for unit tests
        """
        super(SiteDBTest, self).setUp()
        EmulatorHelper.setEmulators(phedex=False, dbs=False, siteDB=False, requestMgr=True)
        self.mySiteDB = SiteDBJSON()


    def tearDown(self):
        """
        _tearDown_
        """
        super(SiteDBTest, self).tearDown()
        EmulatorHelper.resetEmulators()
        return


    def testCmsNametoPhEDExNode(self):
        """
        #Tests CmsNametoSE
        """
        target = ['T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS']
        results = self.mySiteDB.cmsNametoPhEDExNode('T1_US_FNAL')
        self.assertItemsEqual(results, target)

    def testSEtoCmsName(self):
        """
        Tests CmsNametoSE
        """
        target = [u'T1_US_FNAL', u'T1_US_FNAL_Disk']
        results = self.mySiteDB.seToCMSName("cmsdcadisk01.fnal.gov")
        self.assertTrue(results == target)
        target = sorted([u'T2_CH_CERN', u'T2_CH_CERN_HLT'])
        results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch"))
        self.assertItemsEqual(results, target)
        target = sorted([u'T0_CH_CERN', u'T1_CH_CERN'])
        results = sorted(self.mySiteDB.seToCMSName("srm-cms.cern.ch"))
        self.assertItemsEqual(results, target)
        target = sorted([u'T2_CH_CERN_AI'])
        results = sorted(self.mySiteDB.seToCMSName("eoscmsftp.cern.ch"))
        self.assertItemsEqual(results, target)

    def testDNUserName(self):
        """
        Tests DN to Username lookup
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jha/CN=618566/CN=Manoj Jha"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.assertTrue(testUserName == userName)

    def testDNWithApostrophe(self):
        """
        Tests a DN with an apostrophy in - will fail till SiteDB2 appears
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.assertTrue(testUserName == userName)

    def testSEFinder(self):
        """
        _testSEFinder_

        See if we can retrieve seNames from all sites
        """

        seNames = self.mySiteDB.getAllSENames()
        self.assertTrue(len(seNames) > 1)
        self.assertTrue('cmsdcadisk01.fnal.gov' in seNames)
        return

    def testPNNtoPSN(self):
        """
        _testPNNtoPSN_

        Test converting PhEDEx Node Name to Processing Site Name
        """

        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk')
        self.assertTrue(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape')
        self.assertTrue(result == [])
        result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC')
        self.assertTrue(result == ['T2_UK_London_IC'])
        return

    def testCMSNametoList(self):
        """
        Test PNN to storage list
        """
        result = self.mySiteDB.cmsNametoList("T1_US*", "SE")
        self.assertItemsEqual(result, [u'cmsdcadisk01.fnal.gov'])

    def testCheckAndConvertSENameToPNN(self):
        """
        Test the conversion of SE name to PNN for single
        and multiple sites/PNNs using checkAndConvertSENameToPNN
        """

        fnalSE = u'cmsdcadisk01.fnal.gov'
        purdueSE = u'srm.rcac.purdue.edu'
        fnalPNNs = [u'T1_US_FNAL_Buffer', u'T1_US_FNAL_MSS', u'T1_US_FNAL_Disk']
        purduePNN = [u'T2_US_Purdue']

        pnnList = fnalPNNs + purduePNN
        seList = [fnalSE, purdueSE]

        self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(fnalSE), fnalPNNs)
        self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN([fnalSE]), fnalPNNs)
        self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(purdueSE), purduePNN)
        self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN([purdueSE]), purduePNN)
        self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(seList), purduePNN + fnalPNNs)
        self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(pnnList), pnnList)
        return

    def testPNNstoPSNs(self):
        """
        _testPNNstoPSNs_

        Test converting PhEDEx Node Names to Processing Site Names
        """

        result = self.mySiteDB.PNNstoPSNs(['T1_US_FNAL_Disk', 'T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS'])
        self.assertTrue(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNstoPSNs(['T2_UK_London_IC', 'T2_US_Purdue'])
        self.assertItemsEqual(result, ['T2_UK_London_IC', 'T2_US_Purdue'])
        return
Пример #5
0
class RequestQuery:

    def __init__(self,config):
        self.br=Browser()

        self.config = config
        
        # Initialise connections
        self.mySiteDB = SiteDBJSON()
        self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
        
    def __del__(self):
        self.br.close()

    def getScramArchByCMSSW(self):
        """
        Get from the list of available CMSSW releases
        return a dictionary of ScramArchitecture by CMSSW
        """
        
        # Set temporary conection to the server and get the response from cmstags
        url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML'
        br = Browser()
        br.set_handle_robots(False)
        response=br.open(url)
        soup = BeautifulSoup(response.read())
        
        # Dictionary form
        # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... }
        archByCmssw={}
        
        # Fill the dictionary
        for arch in soup.find_all('architecture'): 
            for cmssw in arch.find_all('project'): 
                # CMSSW release
                cmsswLabel = cmssw.get('label').encode('ascii', 'ignore')
                if cmsswLabel not in archByCmssw:
                    archByCmssw[cmsswLabel]=[]
                # ScramArch related to this CMSSW release
                archName = arch.get('name').encode('ascii', 'ignore')
                archByCmssw[cmsswLabel].append(archName)
        
        return archByCmssw
      
    def getDatasetOriginSites(self, dbs_url, data):
        """
        Get the origin sites for each block of the dataset.
        Return a list block origin sites.
        """
        
        sites=[]
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listBlocks(detail=True,dataset=data)
        
        seList = []
        for block in response:
            if block['origin_site_name'] not in seList:
                seList.append(block['origin_site_name'])
        
        siteNames = []
        for node in self.nodeMappings['phedex']['node']:
            if node['se'] in seList:
                siteNames.append(node['name']) 
        
        return siteNames, seList
    
    def setGlobalTagFromOrigin(self, dbs_url,input_dataset):
        """
        Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN'
        """
        
        globalTag = ""
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset)
        
        globalTag = response[0]['global_tag']
        # GlobalTag cannot be empty
        if globalTag == '':
            globalTag = 'UNKNOWN'
            
        return globalTag
    
    def isDataAtUrl(self, dbs_url,input_dataset):
        """
        Returns True if the dataset is at the dbs url, if not returns False
        """
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listDatasets(dataset=input_dataset)
        # This means that the dataset is not at the url
        if not response:
            return False
        else:
            return True
         
    def getLabelByValueDict(self, control):
        """
        From control items, create a dictionary by values
        """   
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[value] = label
                
        return d
    
    def getValueByLabelDict(self, control):
        """
        From control items, create a dictionary by labels
        """
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[label] = value

        return d
    
    def createRequestJSON(self, ticket, input_dataset, dbs_url, cmssw_release, group_name, version = 1):
        """
        Creates a JSON file 'Ticket_#TICKET.json' with the needed
        information for creating a requeston ReqMgr.
        Input:
            - ticket: the ticket #, for instance 110773 on https://ggus.eu/?mode=ticket_info&ticket_id=110773
            - input_dataset
            - dbs_url: only the instance name, For example: "phys01" for 
             https://cmsweb.cern.ch/dbs/prod/phys01/DBSReader
            - cmssw_release
            - group_name: the physics group name
            - version: the dataset version, 1 by default.
        It returns a dictionary that contains the request information.
        """

        scramArchByCMSSW = self.getScramArchByCMSSW()
        self.nodeMappings = self.phedex.getNodeMap()
        task = ticket
        print("Processing ticket: %s" % task)
        
        #splitting input dataset       
        input_primary_dataset = input_dataset.split('/')[1].replace(' ','')
        input_processed_dataset = input_dataset.split('/')[2].replace(' ','')
        data_tier = input_dataset.split('/')[3].replace(' ','')
                
        # Transform input value to a valid DBS url
        #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader"
        dbs_url = dbs_base_url+dbs_url+"/DBSReader"
        release_id = cmssw_release
                
        # check if deprecated release was used
        release = cmssw_release
        # check if release has not ScramArch match
        if release not in scramArchByCMSSW:
            raise Exception("Error on ticket %s due to ScramArch mismatch" % task)
        else:
            scram_arch = scramArchByCMSSW[release][-1]

        # check if dataset is not at dbs url
        try:
            data_at_url = self.isDataAtUrl(dbs_url,input_dataset)
        except:
            raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url))

        if not data_at_url:
            raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url))
                    
        ## Get Physics Group
        group_squad = 'cms-storeresults-'+group_name.replace("-","_").lower()

        ## Get Dataset Version
        dataset_version = str(version)

        # Set default Adquisition Era for StoreResults 
        acquisitionEra = "StoreResults"

        ## Construction of the new dataset name (ProcessingString)
        ## remove leading hypernews or physics group name and StoreResults+Version
        if input_processed_dataset.find(group_name)==0:
            new_dataset = input_processed_dataset.replace(group_name,"",1)
        else:
            stripped_dataset = input_processed_dataset.split("-")[1:]
            new_dataset = '_'.join(stripped_dataset)
                        
        # Get dataset site info:
        phedex_map, se_names = self.getDatasetOriginSites(dbs_url,input_dataset)
        sites = set([self.mySiteDB.PNNtoPSN(node) for node in phedex_map])

        infoDict = {}
        # Build store results json
        # First add all the defaults values
        infoDict["RequestType"] = "StoreResults"
        infoDict["UnmergedLFNBase"] = "/store/unmerged" 
        infoDict["MergedLFNBase"] = "/store/results/" + group_name.replace("-","_").lower()
        infoDict["MinMergeSize"] = 1500000000
        infoDict["MaxMergeSize"] = 5000000000
        infoDict["MaxMergeEvents"] = 100000
        infoDict["TimePerEvent"] = 40
        infoDict["SizePerEvent"] = 512.0
        infoDict["Memory"] = 2394
        infoDict["CmsPath"] = "/uscmst1/prod/sw/cms"                                        
        infoDict["Group"] = "DATAOPS"
        infoDict["DbsUrl"] = dbs_url
        
        # Add all the information pulled from Savannah
        infoDict["AcquisitionEra"] = acquisitionEra
        infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(dbs_url, input_dataset)
        infoDict["DataTier"] = data_tier
        infoDict["InputDataset"] = input_dataset
        infoDict["ProcessingString"] = new_dataset
        infoDict["CMSSWVersion"] = release
        infoDict["ScramArch"] = scram_arch
        infoDict["ProcessingVersion"] = dataset_version                    
        infoDict["SiteWhitelist"] = list(sites)
        
        # Create report for Migration2Global
        report = {}
         
        #Fill json file, if status is done
        self.writeJSONFile(task, infoDict)
        report["json"] = 'y'
        report["task"] = int(task)
        report["InputDataset"] = input_dataset
        report["ProcessingString"] = new_dataset
        report["localUrl"] = dbs_url
        report["sites"] = list(sites)
        report["se_names"] = list(se_names)

        return report

    def writeJSONFile(self, task, infoDict):
        """
        This writes a JSON file at ComponentDir
        """
        ##check if file already exists
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'
        if not os.access(filename,os.F_OK):
            jsonfile = open(filename,'w')
            request = {'createRequest':infoDict} ## CHECK THIS BEFORE FINISHING
            jsonfile.write(json.dumps(request,sort_keys=True, indent=4))
            jsonfile.close

        return

    def removeJSONFile(self,task):
        """
        This removes the JSON file at ComponentDir if it was created
        """
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'

        if os.access(filename,os.F_OK):
            os.remove(filename)
        return

    def printReport(self, report):
        """
        Print out a report
        """
        print("%20s %5s %10s %50s %50s" %( 'Ticket','json','local DBS','Sites','se_names')) 
        print("%20s %5s %10s %50s %50s" %( '-'*20,'-'*5,'-'*10,'-'*50,'-'*50 ))
        
        json = report["json"]
        ticket = report["task"]
        #status = report["ticketStatus"]
        localUrl = report["localUrl"].split('/')[5]
        site = ', '.join(report["sites"])
        se_names = ', '.join(report["se_names"])
        print("%20s %5s %10s %50s %50s" %(ticket,json,localUrl,site,se_names))  
Пример #6
0
class SiteDBTest(EmulatedUnitTestCase):
    """
    Unit tests for SiteScreening module
    """
    def __init__(self, methodName='runTest'):
        super(SiteDBTest, self).__init__(methodName=methodName)

    def setUp(self):
        """
        Setup for unit tests
        """
        super(SiteDBTest, self).setUp()
        self.mySiteDB = SiteDBJSON()

    def testCmsNametoPhEDExNode(self):
        """
        #Tests CmsNametoSE
        """
        target = ['T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS']
        results = self.mySiteDB.cmsNametoPhEDExNode('T1_US_FNAL')
        self.assertItemsEqual(results, target)

    def testSEtoCmsName(self):
        """
        Tests CmsNametoSE
        """
        target = [u'T1_US_FNAL', u'T1_US_FNAL_Disk']
        results = self.mySiteDB.seToCMSName("cmsdcadisk01.fnal.gov")
        self.assertTrue(results == target)
        target = sorted([u'T2_CH_CERN', u'T2_CH_CERN_HLT'])
        results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch"))
        self.assertItemsEqual(results, target)
        target = sorted([u'T0_CH_CERN', u'T1_CH_CERN'])
        results = sorted(self.mySiteDB.seToCMSName("srm-cms.cern.ch"))
        self.assertItemsEqual(results, target)
        target = sorted([u'T2_CH_CERN_AI'])
        results = sorted(self.mySiteDB.seToCMSName("eoscmsftp.cern.ch"))
        self.assertItemsEqual(results, target)

    def testDNUserName(self):
        """
        Tests DN to Username lookup
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jha/CN=618566/CN=Manoj Jha"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.assertTrue(testUserName == userName)

    def testDNWithApostrophe(self):
        """
        Tests a DN with an apostrophy in - will fail till SiteDB2 appears
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.assertTrue(testUserName == userName)

    def testSEFinder(self):
        """
        _testSEFinder_

        See if we can retrieve seNames from all sites
        """

        seNames = self.mySiteDB.getAllSENames()
        self.assertTrue(len(seNames) > 1)
        self.assertTrue('cmsdcadisk01.fnal.gov' in seNames)
        return

    def testPNNtoPSN(self):
        """
        _testPNNtoPSN_

        Test converting PhEDEx Node Name to Processing Site Name
        """

        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk')
        self.assertTrue(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape')
        self.assertTrue(result == [])
        result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC')
        self.assertTrue(result == ['T2_UK_London_IC'])
        return

    def testCMSNametoList(self):
        """
        Test PNN to storage list
        """
        result = self.mySiteDB.cmsNametoList("T1_US*", "SE")
        self.assertItemsEqual(result, [u'cmsdcadisk01.fnal.gov'])

    def testPNNstoPSNs(self):
        """
        _testPNNstoPSNs_

        Test converting PhEDEx Node Names to Processing Site Names
        """

        result = self.mySiteDB.PNNstoPSNs(
            ['T1_US_FNAL_Disk', 'T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS'])
        self.assertTrue(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNstoPSNs(['T2_UK_London_IC', 'T2_US_Purdue'])
        self.assertItemsEqual(result, ['T2_UK_London_IC', 'T2_US_Purdue'])
        return
Пример #7
0
    def formatOutput(self, task, requestname, datasetfiles, locations):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")
        # TEMPORARY
        pnn_psn_map = {}
        sbj = SiteDBJSON({
            "key": self.config.TaskWorker.cmskey,
            "cert": self.config.TaskWorker.cmscert
        })

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        file_counter = 0
        uniquelumis = set()
        ## Loop over the sorted list of files.
        for lfn, infos in datasetfiles.iteritems():
            ## Skip the file if the block has not been found or has no locations.
            if not infos['BlockName'] in locations or not locations[
                    infos['BlockName']]:
                self.logger.warning(
                    "Skipping %s because its block (%s) has no locations" %
                    (lfn, infos['BlockName']))
                continue
            ## Skip the file if it is not in VALID state.
            if not infos.get('ValidFile', True):
                self.logger.warning("Skipping invalid file %s" % lfn)
                continue

            if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                raise TaskWorkerException(
                    "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n"
                    +
                    "because you specified useParents=True but some your files have no"
                    + "parents.\nExample: " + lfn)
            ## Createa a WMCore File object.
            wmfile = File(lfn=lfn,
                          events=infos['NumberOfEvents'],
                          size=infos['Size'],
                          checksums=infos['Checksums'],
                          parents=infos['Parents'])
            wmfile['block'] = infos['BlockName']
            wmfile['locations'] = []
            for pnn in locations[infos['BlockName']]:
                if pnn and pnn not in pnn_psn_map:
                    self.logger.debug("Translating PNN %s" % pnn)
                    try:
                        pnn_psn_map[pnn] = sbj.PNNtoPSN(pnn)
                    except KeyError, ke:
                        self.logger.error(
                            "Impossible translating %s to a CMS name through SiteDB"
                            % pnn)
                        pnn_psn_map[pnn] = ''
                    except httplib.HTTPException, ex:
                        self.logger.error("Couldn't map SE to site: %s" % pnn)
                        print "Couldn't map SE to site: %s" % pnn
                        print "got problem: %s" % ex
                        print "got another problem: %s" % ex.__dict__
                if pnn and pnn in pnn_psn_map:
                    if type(pnn_psn_map[pnn]) == list:
                        wmfile['locations'].extend(pnn_psn_map[pnn])
                    else:
                        wmfile['locations'].append(pnn_psn_map[pnn])
Пример #8
0
class SiteDBTest(EmulatedUnitTestCase):
    """
    Unit tests for SiteScreening module
    """
    def __init__(self, methodName='runTest'):
        super(SiteDBTest, self).__init__(methodName=methodName)

    def setUp(self):
        """
        Setup for unit tests
        """
        super(SiteDBTest, self).setUp()
        self.mySiteDB = SiteDBJSON()

    def testCmsNametoPhEDExNode(self):
        """
        Tests CMS Name to PhEDEx Node Name
        """
        target = ['T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS']
        results = self.mySiteDB.cmsNametoPhEDExNode('T1_US_FNAL')
        self.assertItemsEqual(results, target)

    def testDNUserName(self):
        """
        Tests DN to Username lookup
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jha/CN=618566/CN=Manoj Jha"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.assertTrue(testUserName == userName)

    def testDNWithApostrophe(self):
        """
        Tests a DN with an apostrophy in - will fail till SiteDB2 appears
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.assertTrue(testUserName == userName)

    def testSEFinder(self):
        """
        _testSEFinder_

        See if we can retrieve seNames from all sites
        """

        seNames = self.mySiteDB.getAllSENames()
        self.assertTrue(len(seNames) > 1)
        self.assertTrue('cmsdcadisk01.fnal.gov' in seNames)
        return

    def testPNNtoPSN(self):
        """
        _testPNNtoPSN_

        Test converting PhEDEx Node Name to Processing Site Name
        """

        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk')
        self.assertTrue(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape')
        self.assertTrue(result == [])
        result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC')
        self.assertTrue(result == ['T2_UK_London_IC'])
        return

    def testCMSNametoList(self):
        """
        Test PNN to storage list
        """
        result = self.mySiteDB.cmsNametoList("T1_US*", "SE")
        self.assertItemsEqual(result, [u'cmsdcadisk01.fnal.gov'])

    def testPNNstoPSNs(self):
        """
        _testPNNstoPSNs_

        Test converting PhEDEx Node Names to Processing Site Names
        """
        result = self.mySiteDB.PNNstoPSNs(
            ['T1_US_FNAL_Disk', 'T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS'])
        self.assertTrue(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNstoPSNs(['T2_UK_London_IC', 'T2_US_Purdue'])
        self.assertItemsEqual(result, ['T2_UK_London_IC', 'T2_US_Purdue'])
        return

    def testPSNtoPNNMap(self):
        """
        _PSNtoPNNMap_

        Test API to get a map of PSNs and PNNs 
        """
        result = self.mySiteDB.PSNtoPNNMap()
        self.assertTrue(
            [psn for psn in result.keys() if psn.startswith('T1_')])
        self.assertTrue(
            [psn for psn in result.keys() if psn.startswith('T2_')])
        self.assertTrue(
            [psn for psn in result.keys() if psn.startswith('T3_')])
        self.assertTrue(len(result) > 50)

        result = self.mySiteDB.PSNtoPNNMap(psnPattern='T1.*')
        self.assertFalse(
            [psn for psn in result.keys() if not psn.startswith('T1_')])
        self.assertTrue(len(result) < 10)

        result = self.mySiteDB.PSNtoPNNMap(psnPattern='T2.*')
        self.assertFalse(
            [psn for psn in result.keys() if not psn.startswith('T2_')])
        self.assertTrue(len(result) > 10)

        result = self.mySiteDB.PSNtoPNNMap(psnPattern='T3.*')
        self.assertFalse(
            [psn for psn in result.keys() if not psn.startswith('T3_')])
        self.assertTrue(len(result) > 10)

        return

    def testGetAllPhEDExNodeNames(self):
        """
        _testGetAllPhEDExNodeNames_

        Test API to get all PhEDEx Node Names 
        """
        result = self.mySiteDB.getAllPhEDExNodeNames(excludeBuffer=True)
        self.assertFalse([pnn for pnn in result if pnn.endswith('_Buffer')])

        result = self.mySiteDB.getAllPhEDExNodeNames(excludeBuffer=False)
        self.assertTrue(
            len([pnn for pnn in result if pnn.endswith('_Buffer')]) > 5)

        result = self.mySiteDB.getAllPhEDExNodeNames(pattern='T1.*',
                                                     excludeBuffer=True)
        self.assertFalse([pnn for pnn in result if not pnn.startswith('T1_')])
        self.assertTrue(len(result) > 10)

        result = self.mySiteDB.getAllPhEDExNodeNames(pattern='.*',
                                                     excludeBuffer=True)
        self.assertTrue([pnn for pnn in result if pnn.startswith('T1_')])
        self.assertTrue([pnn for pnn in result if pnn.startswith('T2_')])
        self.assertTrue([pnn for pnn in result if pnn.startswith('T3_')])
        self.assertTrue(len(result) > 60)

        return
Пример #9
0
class SiteDBTest(unittest.TestCase):
    """
    Unit tests for SiteScreening module
    """
    def setUp(self):
        """
        Setup for unit tests
        """
        EmulatorHelper.setEmulators(siteDB=True)
        self.mySiteDB = SiteDBJSON()

    def tearDown(self):
        EmulatorHelper.resetEmulators()

    def testCmsNametoPhEDExNode(self):
        """
        Tests CmsNametoSE
        """
        target = ['T1_US_FNAL_MSS', 'T1_US_FNAL_Buffer']
        results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL")
        self.failUnless(sorted(results) == sorted(target))

    def testPhEDExNodetocmsName(self):
        """
        Tests PhEDExNodetocmsName
        """
        result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_MSS')
        self.failUnless(result == 'T1_US_FNAL')
        result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_Buffer')
        self.failUnless(result == 'T1_US_FNAL')
        result = self.mySiteDB.phEDExNodetocmsName('T2_UK_London_IC')
        self.failUnless(result == 'T2_UK_London_IC')
        # don't check this anymore, see comment in phEDExNodetocmsName function
        #self.assertRaises(ValueError, self.mySiteDB.phEDExNodetocmsName,
        #                  'T9_DOESNT_EXIST_Buffer')

    def testCmsNametoSE(self):
        """
        Tests CmsNametoSE
        """
        target = [u'srm-cms-disk.gridpp.rl.ac.uk', u'srm-cms.gridpp.rl.ac.uk']
        results = self.mySiteDB.cmsNametoSE("T1_UK_RAL")
        self.failUnless(sorted(results) == sorted(target))

    def testCmsNamePatterntoSE(self):
        """
        Tests CmsNamePatterntoSE
        """
        target = [u'T2_XX_SiteA', u'T2_XX_SiteB', u'T2_XX_SiteC']
        results = self.mySiteDB.cmsNametoSE("%T2_XX")
        self.failUnless(sorted(results) == sorted(target))

    def testSEtoCmsName(self):
        """
        Tests CmsNametoSE
        """
        target = [u'T1_US_FNAL']
        results = self.mySiteDB.seToCMSName("cmssrm.fnal.gov")
        self.failUnless(results == target)
        target = sorted([
            u'T2_CH_CERN', u'T2_CH_CERN_AI', u'T2_CH_CERN_HLT',
            u'T2_CH_CERN_T0'
        ])
        results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch"))
        self.failUnless(sorted(results) == sorted(target))

    def testDNUserName(self):
        """
        Tests DN to Username lookup
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=gutsche/CN=582680/CN=Oliver Gutsche"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.failUnless(testUserName == userName)

    def testDNWithApostrophe(self):
        """
        Tests a DN with an apostrophy in - will fail till SiteDB2 appears
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.failUnless(testUserName == userName)

    def testSEFinder(self):
        """
        _testSEFinder_

        See if we can retrieve seNames from all sites
        """

        seNames = self.mySiteDB.getAllSENames()
        self.assertTrue(len(seNames) > 1)
        self.assertTrue('cmssrm.fnal.gov' in seNames)
        return

    def testPNNtoPSN(self):
        """
        _testPNNtoPSN_

        Test converting PhEDEx Node Name to Processing Site Name
        """

        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk')
        self.failUnless(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape')
        self.failUnless(result == [])
        result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC')
        self.failUnless(result == ['T2_UK_London_IC'])
        return
Пример #10
0
def getFiles(datasetName,
             runBlacklist,
             runWhitelist,
             blockBlacklist,
             blockWhitelist,
             dbsUrl,
             fakeLocation=False):
    """
    _getFiles_

    Get the full information of a dataset including files, blocks, runs and lumis.
    Filter it using run and block white/black lists.

    It can receive and optional DBSUrl.
    """
    dbsReader = DBSReader(endpoint=dbsUrl)
    phedexReader = PhEDEx()
    siteDB = SiteDBJSON()

    files = {}
    outputDatasetParts = datasetName.split("/")
    print "dataset", datasetName, "parts", outputDatasetParts
    try:
        #retrieve list of blocks from dataset
        blockNames = dbsReader.listFileBlocks(datasetName)
    except:
        raise RuntimeError("Dataset %s doesn't exist in given DBS instance" %
                           datasetName)

    #traverse each block
    for blockName in blockNames:
        #deal with white and black list.
        if blockBlacklist and blockName in blockBlacklist:
            continue
        if blockWhitelist and blockName not in blockWhitelist:
            continue

        #existing blocks in phedex
        replicaInfo = phedexReader.getReplicaInfoForBlocks(block=blockName,
                                                           subscribed='y')
        blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True)

        blockLocations = set()
        #load block locations
        if len(replicaInfo["phedex"]["block"]) > 0:
            for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                node = replica["node"]
                cmsSites = siteDB.PNNtoPSN(node)
                if type(cmsSites) != list:
                    cmsSites = [cmsSites]
                for cmsName in cmsSites:
                    se = siteDB.cmsNametoSE(cmsName)
                    blockLocations.update(se)
                    logging.debug("cmsName %s mapped to se %s", cmsName, se)
                logging.debug("PhEDEx node %s, cmsSites %s, blockLocations %s",
                              node, cmsSites, blockLocations)

        # We cannot upload docs without location, so force it in case it's empty
        if fakeLocation and not blockLocations:
            blockLocations.update(
                [u'cmssrmdisk.fnal.gov', u'srm-eoscms.cern.ch'])
        logging.info("Blockname: %s\tLocations: %s", blockName, blockLocations)

        #for each file on the block
        for blockFile in blockFiles:
            parentLFNs = []
            #get parent information about file
            #blockFileParents = dbsReader.listFilesInBlockWithParents(blockName)
            blockFileParents = dbsReader.listFilesInBlock(blockName)
            #populate parent information
            if blockFileParents and "ParentList" in blockFileParents[0]:
                for fileParent in blockFileParents[0]["ParentList"]:
                    parentLFNs.append(fileParent["LogicalFileName"])
            runInfo = {}
            #Lumis not included in file
            for lumiSection in blockFile["LumiList"]:
                if runBlacklist and lumiSection["RunNumber"] in runBlacklist:
                    continue
                if runWhitelist and lumiSection[
                        "RunNumber"] not in runWhitelist:
                    continue

                if lumiSection["RunNumber"] not in runInfo.keys():
                    runInfo[lumiSection["RunNumber"]] = []

                runInfo[lumiSection["RunNumber"]].append(
                    lumiSection["LumiSectionNumber"])
            if len(runInfo.keys()) > 0:
                files[blockFile["LogicalFileName"]] = {
                    "runs": runInfo,
                    "events": blockFile["NumberOfEvents"],
                    "size": blockFile["FileSize"],
                    "locations": list(blockLocations),
                    "parents": parentLFNs
                }
    return files