Python DataCollectionService.getChunkFiles示例

编程语言: Python

命名空间/包名称: WMCore.ACDC.DataCollectionService

方法/功能: getChunkFiles

hotexamples.com的示例: 6

Python DataCollectionService.getChunkFiles - 已找到6个示例。这些是从开源项目中提取的最受好评的WMCore.ACDC.DataCollectionService.DataCollectionService.getChunkFiles现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

DataCollectionService(27)

failedJobs(8)

getLumiWhitelist(6)

getChunkInfo(4)

getChunkFiles(3)

getLumilistWhitelist(3)

getProductionACDCInfo(3)

chunkFileset(2)

getDataCollection(2)

_getFilesetInfo(1)

createCollection(1)

singleChunkFileset(1)

示例#1

显示文件

    def _getDBSBlock(self, match, wmspec):
        """Get DBS info for this block"""
        blockName = match['Inputs'].keys()[0] #TODO: Allow more than one

        if match['ACDC']:
            acdcInfo = match['ACDC']
            acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
            collection = acdc.getDataCollection(acdcInfo['collection'])
            splitedBlockName = ACDCBlock.splitBlockName(blockName)
            fileLists = acdc.getChunkFiles(acdcInfo['collection'],
                                           acdcInfo['fileset'],
                                           splitedBlockName['Offset'],
                                           splitedBlockName['NumOfFiles'],
                                           user = wmspec.getOwner().get("name"),
                                           group = wmspec.getOwner().get("group"))
            block = {}
            block["Files"] = fileLists
            return blockName, block
        else:
            dbs = get_dbs(match['Dbs'])
            if wmspec.getTask(match['TaskName']).parentProcessingFlag():
                dbsBlockDict = dbs.getFileBlockWithParents(blockName)
            else:
                dbsBlockDict = dbs.getFileBlock(blockName)
        return blockName, dbsBlockDict[blockName]

示例#2

显示文件

文件： WorkQueue.py 项目： ticoann/WMCore

    def _getDBSBlock(self, match, wmspec):
        """Get DBS info for this block"""
        blockName = match['Inputs'].keys()[0] #TODO: Allow more than one

        if match['ACDC']:
            acdcInfo = match['ACDC']
            acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
            collection = acdc.getDataCollection(acdcInfo['collection'])
            splitedBlockName = ACDCBlock.splitBlockName(blockName)
            fileLists = acdc.getChunkFiles(acdcInfo['collection'],
                                           acdcInfo['fileset'],
                                           splitedBlockName['Offset'],
                                           splitedBlockName['NumOfFiles'],
                                           user = wmspec.getOwner().get("name"),
                                           group = wmspec.getOwner().get("group"))
            block = {}
            block["Files"] = fileLists
            return blockName, block
        else:
            dbs = get_dbs(match['Dbs'])
            if wmspec.getTask(match['TaskName']).parentProcessingFlag():
                dbsBlockDict = dbs.getFileBlockWithParents(blockName)
            else:
                dbsBlockDict = dbs.getFileBlock(blockName)

            if wmspec.locationDataSourceFlag():
                blockInfo = dbsBlockDict[blockName]
                seElements = []
                for cmsSite in match['Inputs'].values()[0]: #TODO: Allow more than one
                    ses = self.SiteDB.cmsNametoSE(cmsSite)
                    seElements.extend(ses)
                seElements = list(set(seElements))
                blockInfo['StorageElements'] = seElements
        return blockName, dbsBlockDict[blockName]

示例#3

显示文件

def main():
    start = time.time()
    # blockName = match['Inputs'].keys()[0]
    blockName = "/acdc/vlimant_ACDC0_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_190218_145226_481/:pdmvserv_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_181211_005112_2222:SUS-RunIIFall18wmLHEGS-00025_0/0/31055"

    # acdcInfo = match['ACDC']
    acdcInfo = {"database": "acdcserver",
                "fileset": "/pdmvserv_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_181211_005112_2222/SUS-RunIIFall18wmLHEGS-00025_0",
                "collection": "pdmvserv_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_181211_005112_2222",
                "server": "https://cmsweb.cern.ch/couchdb"}

    acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
    splitedBlockName = ACDCBlock.splitBlockName(blockName)
    print("Splitted block name: %s" % splitedBlockName)

    fileLists = acdc.getChunkFiles(acdcInfo['collection'],
                                   acdcInfo['fileset'],
                                   splitedBlockName['Offset'],
                                   splitedBlockName['NumOfFiles'])
    print("Retrieved %d unique files from the ACDCServer" % len(fileLists))

    block = {}
    block["Files"] = fileLists

    wantedLumis = set([252052, 240646])
    for f in fileLists:
        for run in f['runs']:
            maskDict = run.json()
            lumisSet = set(maskDict['Lumis'].keys())
            if wantedLumis.intersection(lumisSet):
                print("File: %s with events: %s, contains these lumis: %s" % (f['lfn'], f['events'], wantedLumis.intersection(lumisSet)))

    # with open("chunkfiles.json", 'w') as fo:
    #     json.dump(block, fo)

    end = time.time()
    print("Spent %s secs running so far" % (end - start))
    sys.exit(1)

    ### Now doing the WMBSHelper stuff
    reqUrl = "https://cmsweb.cern.ch/couchdb/reqmgr_workload_cache"
    requestName = "vlimant_ACDC0_task_HIG-RunIIFall17wmLHEGS-01122__v1_T_180808_130708_5376"

    wmspec = WMWorkloadHelper()
    wmspec.loadSpecFromCouch(reqUrl, requestName)
    taskName = "HIG-RunIIFall17DRPremix-00788_0"
    mask = None
    cacheDir = "/data/srv/wmagent/v1.1.14.patch6/install/wmagent/WorkQueueManager/cache"
    # wmbsHelper = WMBSHelper(wmspec, match['TaskName'], blockName, mask, self.params['CacheDir'])
    wmbsHelper = WMBSHelper(wmspec, taskName, blockName, mask, cacheDir)
    sub, numFilesAdded = wmbsHelper.createSubscriptionAndAddFiles(block=block)

示例#4

显示文件

    def _getDBSBlock(self, match, wmspec):
        """Get DBS info for this block"""
        blockName = match['Inputs'].keys()[0]  #TODO: Allow more than one

        if match['ACDC']:
            acdcInfo = match['ACDC']
            acdc = DataCollectionService(acdcInfo["server"],
                                         acdcInfo["database"])
            collection = acdc.getDataCollection(acdcInfo['collection'])
            splitedBlockName = ACDCBlock.splitBlockName(blockName)
            fileLists = acdc.getChunkFiles(
                acdcInfo['collection'],
                acdcInfo['fileset'],
                splitedBlockName['Offset'],
                splitedBlockName['NumOfFiles'],
                user=wmspec.getOwner().get("name"),
                group=wmspec.getOwner().get("group"))
            block = {}
            block["Files"] = fileLists
            return blockName, block
        else:
            dbs = get_dbs(match['Dbs'])
            if wmspec.getTask(match['TaskName']).parentProcessingFlag():
                dbsBlockDict = dbs.getFileBlockWithParents(blockName)
            else:
                dbsBlockDict = dbs.getFileBlock(blockName)

            if wmspec.locationDataSourceFlag():
                blockInfo = dbsBlockDict[blockName]
                seElements = []
                for cmsSite in match['Inputs'].values(
                )[0]:  #TODO: Allow more than one
                    ses = self.SiteDB.cmsNametoSE(cmsSite)
                    seElements.extend(ses)
                seElements = list(set(seElements))
                blockInfo['StorageElements'] = seElements
        return blockName, dbsBlockDict[blockName]

示例#5

显示文件

文件： DataCollectionService_t.py 项目： cinquo/WMCore

    def testChunking(self):
        """
        _testChunking_

        Insert a workload and files that have several distinct sets of
        locations.  Verify that the chunks are created correctly and that they
        only groups files that have the same set of locations.  Also verify that
        the chunks are pulled out of ACDC correctly.
        """
        dcs = DataCollectionService(url = self.testInit.couchUrl,
                                    database = "wmcore-acdc-datacollectionsvc")

        def getJob():
            job = Job()
            job["task"] = "/ACDCTest/reco"
            job["workflow"] = "ACDCTest"
            job["location"] = "cmssrm.fnal.gov"
            job["owner"] = "cmsdataops"
            job["group"] = "cmsdataops"
            return job

        testFileA = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileA.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileA.addRun(Run(1, 1, 2))
        testFileB = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileB.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileB.addRun(Run(1, 3, 4))
        testFileC = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileC.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileC.addRun(Run(1, 5, 6))
        testJobA = getJob()
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)
        testJobA.addFile(testFileC)

        testFileD = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileD.setLocation(["cmssrm.fnal.gov"])
        testFileD.addRun(Run(2, 1, 2))
        testFileE = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileE.setLocation(["cmssrm.fnal.gov"])
        testFileE.addRun(Run(2, 3, 4))
        testJobB = getJob()
        testJobB.addFile(testFileD)
        testJobB.addFile(testFileE)

        testFileF = File(lfn = makeUUID(), size = 1024, events = 1024,
                         parents = set(["/some/parent/F"]))
        testFileF.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileF.addRun(Run(3, 1, 2))
        testFileG = File(lfn = makeUUID(), size = 1024, events = 1024,
                         parents = set(["/some/parent/G"]))
        testFileG.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"] )
        testFileG.addRun(Run(3, 3, 4))
        testFileH = File(lfn = makeUUID(), size = 1024, events = 1024,
                         parents = set(["/some/parent/H"]))
        testFileH.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileH.addRun(Run(3, 5, 6))
        testJobC = getJob()
        testJobC.addFile(testFileF)
        testJobC.addFile(testFileG)
        testJobC.addFile(testFileH)

        testFileI = File(lfn = makeUUID(), size = 1024, events = 1024, merged = True)
        testFileI.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileI.addRun(Run(4, 1, 2))
        testFileJ = File(lfn = makeUUID(), size = 1024, events = 1024, merged = True)
        testFileJ.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"] )
        testFileJ.addRun(Run(4, 3, 4))
        testFileK = File(lfn = makeUUID(), size = 1024, events = 1024, merged = True)
        testFileK.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileK.addRun(Run(4, 5, 6))
        testJobD = getJob()
        testJobD.addFile(testFileI)
        testJobD.addFile(testFileJ)
        testJobD.addFile(testFileK)

        dcs.failedJobs([testJobA, testJobB, testJobC, testJobD])
        chunks = dcs.chunkFileset("ACDCTest", "/ACDCTest/reco",
                                  chunkSize = 5)

        self.assertEqual(len(chunks), 4, "Error: There should be four chunks: %s" % len(chunks))

        goldenMetaData = {1: {"lumis": 2, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 1024},
                          2: {"lumis": 4, "locations": ["cmssrm.fnal.gov"], "events": 2048},
                          3: {"lumis": 6, "locations": ["castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"], "events": 3072},
                          5: {"lumis": 10, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 5120}}

        testFiles =[testFileA, testFileB, testFileC, testFileI, testFileJ, testFileK]
        lastFile = testFileA
        for testFile in testFiles:
            if lastFile["lfn"] < testFile["lfn"]:
                lastFile = testFile

        testFiles.remove(lastFile)

        goldenFiles = {1: [lastFile],
                       2: [testFileD, testFileE],
                       3: [testFileF, testFileG, testFileH],
                       5: testFiles}

        for chunk in chunks:
            chunkMetaData = dcs.getChunkInfo("ACDCTest", "/ACDCTest/reco",
                                             chunk["offset"], chunk["files"])

            self.assertEqual(chunkMetaData["files"], chunk["files"],
                             "Error: Metadata doesn't match.")
            self.assertEqual(chunkMetaData["lumis"], chunk["lumis"],
                             "Error: Metadata doesn't match.")
            self.assertEqual(chunkMetaData["events"], chunk["events"],
                             "Error: Metadata doesn't match.")
            self.assertEqual(chunkMetaData["locations"], chunk["locations"],
                             "Error: Metadata doesn't match.")

            self.assertTrue(chunk["files"] in goldenMetaData.keys(),
                            "Error: Extra chunk found.")
            self.assertEqual(chunk["lumis"], goldenMetaData[chunk["files"]]["lumis"],
                             "Error: Lumis in chunk is wrong.")
            self.assertEqual(chunk["locations"], goldenMetaData[chunk["files"]]["locations"],
                             "Error: Locations in chunk is wrong.")
            self.assertEqual(chunk["events"], goldenMetaData[chunk["files"]]["events"],
                             "Error: Events in chunk is wrong.")
            del goldenMetaData[chunk["files"]]

            chunkFiles = dcs.getChunkFiles("ACDCTest", "/ACDCTest/reco",
                                           chunk["offset"], chunk["files"])

            self.assertTrue(chunk["files"] in goldenFiles.keys(),
                            "Error: Extra chunk found.")
            goldenChunkFiles = goldenFiles[chunk["files"]]
            self.assertEqual(len(chunkFiles), len(goldenChunkFiles))

            for chunkFile in chunkFiles:
                foundFile = None
                for goldenChunkFile in goldenChunkFiles:
                    if chunkFile["lfn"] == goldenChunkFile["lfn"]:
                        foundFile = goldenChunkFile
                        break

                self.assertTrue(foundFile != None,
                                "Error: Missing chunk file: %s, %s" % (chunkFiles, goldenChunkFiles))
                self.assertEqual(foundFile["parents"], chunkFile["parents"],
                                 "Error: File parents should match.")
                self.assertEqual(foundFile["merged"], chunkFile["merged"],
                                 "Error: File merged status should match.")
                self.assertEqual(foundFile["locations"], chunkFile["locations"],
                                 "Error: File locations should match.")
                self.assertEqual(foundFile["events"], chunkFile["events"],
                                 "Error: File locations should match: %s" % chunk["files"])
                self.assertEqual(foundFile["size"], chunkFile["size"],
                                 "Error: File locations should match.")
                self.assertEqual(len(foundFile["runs"]), len(chunkFile["runs"]),
                                 "Error: Wrong number of runs.")
                for run in foundFile["runs"]:
                    runMatch = False
                    for chunkRun in chunkFile["runs"]:
                        if chunkRun.run == run.run and chunkRun.lumis == run.lumis:
                            runMatch = True
                            break

                    self.assertTrue(runMatch, "Error: Run information is wrong.")

            del goldenFiles[chunk["files"]]

        singleChunk = dcs.singleChunkFileset("ACDCTest", "/ACDCTest/reco")
        self.assertEqual(singleChunk, {"offset" : 0,
                                       "files" : 11,
                                       "events" : 11264,
                                       "lumis" : 22,
                                       "locations" : set(["castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"])},
                         "Error: Single chunk metadata is wrong")

        return

示例#6

显示文件

    def testChunking(self):
        """
        _testChunking_

        Insert a workload and files that have several distinct sets of
        locations.  Verify that the chunks are created correctly and that they
        only groups files that have the same set of locations.  Also verify that
        the chunks are pulled out of ACDC correctly.
        """
        dcs = DataCollectionService(url=self.testInit.couchUrl,
                                    database="wmcore-acdc-datacollectionsvc")

        testFileA = File(lfn=makeUUID(), size=1024, events=1024)
        testFileA.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileA.addRun(Run(1, 1, 2))
        testFileB = File(lfn=makeUUID(), size=1024, events=1024)
        testFileB.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileB.addRun(Run(1, 3, 4))
        testFileC = File(lfn=makeUUID(), size=1024, events=1024)
        testFileC.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileC.addRun(Run(1, 5, 6))
        testJobA = self.getMinimalJob()
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)
        testJobA.addFile(testFileC)

        testFileD = File(lfn=makeUUID(), size=1024, events=1024)
        testFileD.setLocation(["cmssrm.fnal.gov"])
        testFileD.addRun(Run(2, 1, 2))
        testFileE = File(lfn=makeUUID(), size=1024, events=1024)
        testFileE.setLocation(["cmssrm.fnal.gov"])
        testFileE.addRun(Run(2, 3, 4))
        testJobB = self.getMinimalJob()
        testJobB.addFile(testFileD)
        testJobB.addFile(testFileE)

        testFileF = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         parents={"/some/parent/F"})
        testFileF.setLocation(
            ["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileF.addRun(Run(3, 1, 2))
        testFileG = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         parents={"/some/parent/G"})
        testFileG.setLocation(
            ["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileG.addRun(Run(3, 3, 4))
        testFileH = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         parents={"/some/parent/H"})
        testFileH.setLocation(
            ["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileH.addRun(Run(3, 5, 6))
        testJobC = self.getMinimalJob()
        testJobC.addFile(testFileF)
        testJobC.addFile(testFileG)
        testJobC.addFile(testFileH)

        testFileI = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileI.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileI.addRun(Run(4, 1, 2))
        testFileJ = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileJ.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileJ.addRun(Run(4, 3, 4))
        testFileK = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileK.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileK.addRun(Run(4, 5, 6))
        testJobD = self.getMinimalJob()
        testJobD.addFile(testFileI)
        testJobD.addFile(testFileJ)
        testJobD.addFile(testFileK)

        dcs.failedJobs([testJobA, testJobB, testJobC, testJobD])
        chunks = dcs.chunkFileset("ACDCTest", "/ACDCTest/reco", chunkSize=5)

        self.assertEqual(
            len(chunks), 4,
            "Error: There should be four chunks: %s" % len(chunks))

        goldenMetaData = {
            1: {
                "lumis": 2,
                "locations": ["castor.cern.ch", "cmssrm.fnal.gov"],
                "events": 1024
            },
            2: {
                "lumis": 4,
                "locations": ["cmssrm.fnal.gov"],
                "events": 2048
            },
            3: {
                "lumis": 6,
                "locations":
                ["castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"],
                "events": 3072
            },
            5: {
                "lumis": 10,
                "locations": ["castor.cern.ch", "cmssrm.fnal.gov"],
                "events": 5120
            }
        }

        testFiles = [
            testFileA, testFileB, testFileC, testFileI, testFileJ, testFileK
        ]
        lastFile = testFileA
        for testFile in testFiles:
            if lastFile["lfn"] < testFile["lfn"]:
                lastFile = testFile

        testFiles.remove(lastFile)

        goldenFiles = {
            1: [lastFile],
            2: [testFileD, testFileE],
            3: [testFileF, testFileG, testFileH],
            5: testFiles
        }

        for chunk in chunks:
            chunkMetaData = dcs.getChunkInfo("ACDCTest", "/ACDCTest/reco",
                                             chunk["offset"], chunk["files"])

            self.assertEqual(chunkMetaData["files"], chunk["files"])
            self.assertEqual(chunkMetaData["lumis"], chunk["lumis"])
            self.assertEqual(chunkMetaData["events"], chunk["events"])
            self.assertEqual(chunkMetaData["locations"], chunk["locations"])

            self.assertTrue(chunk["files"] in goldenMetaData.keys(),
                            "Error: Extra chunk found.")
            self.assertEqual(chunk["lumis"],
                             goldenMetaData[chunk["files"]]["lumis"],
                             "Error: Lumis in chunk is wrong.")
            self.assertEqual(chunk["locations"],
                             goldenMetaData[chunk["files"]]["locations"],
                             "Error: Locations in chunk is wrong.")
            self.assertEqual(chunk["events"],
                             goldenMetaData[chunk["files"]]["events"],
                             "Error: Events in chunk is wrong.")
            del goldenMetaData[chunk["files"]]

            chunkFiles = dcs.getChunkFiles("ACDCTest", "/ACDCTest/reco",
                                           chunk["offset"], chunk["files"])

            self.assertTrue(chunk["files"] in goldenFiles.keys(),
                            "Error: Extra chunk found.")
            goldenChunkFiles = goldenFiles[chunk["files"]]
            self.assertEqual(len(chunkFiles), len(goldenChunkFiles))

            for chunkFile in chunkFiles:
                foundFile = None
                for goldenChunkFile in goldenChunkFiles:
                    if chunkFile["lfn"] == goldenChunkFile["lfn"]:
                        foundFile = goldenChunkFile
                        break

                self.assertIsNotNone(
                    foundFile, "Error: Missing chunk file: %s, %s" %
                    (chunkFiles, goldenChunkFiles))
                self.assertEqual(set(foundFile["parents"]),
                                 chunkFile["parents"],
                                 "Error: File parents should match.")
                self.assertEqual(foundFile["merged"], chunkFile["merged"],
                                 "Error: File merged status should match.")
                self.assertEqual(foundFile["locations"],
                                 chunkFile["locations"],
                                 "Error: File locations should match.")
                self.assertEqual(foundFile["events"], chunkFile["events"])
                self.assertEqual(foundFile["size"], chunkFile["size"])
                self.assertEqual(len(foundFile["runs"]),
                                 len(chunkFile["runs"]),
                                 "Error: Wrong number of runs.")
                for run in foundFile["runs"]:
                    runMatch = False
                    for chunkRun in chunkFile["runs"]:
                        if chunkRun.run == run.run and chunkRun.lumis == run.lumis:
                            runMatch = True
                            break

                    self.assertTrue(runMatch,
                                    "Error: Run information is wrong.")

            del goldenFiles[chunk["files"]]

        singleChunk = dcs.singleChunkFileset("ACDCTest", "/ACDCTest/reco")
        self.assertEqual(
            singleChunk, {
                "offset": 0,
                "files": 11,
                "events": 11264,
                "lumis": 22,
                "locations":
                {"castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"}
            }, "Error: Single chunk metadata is wrong")

        return