Пример #1
0
    def testAddFile(self):
        """
        _testAddFile_

        Testcase for the addFile method of the Fileset class

        """

        # First test - Add file and check if it's there
        testfile = File('/tmp/lfntest', 9999, 9, 9)
        self.fileset.addFile(testfile)
        self.assertTrue(
            testfile in self.fileset.listNewFiles(),
            "Couldn't add file to fileset - fileset.addfile method not working"
        )

        # Second test - Add file that was already at Fileset.files, and check if it gets updated
        testFileSame = File('/tmp/lfntest', 9999, 9, 9)
        testFileSame.setLocation(set('dummyse.dummy.com'))
        self.fileset.addFile(testFileSame)
        self.assertTrue(
            testFileSame in self.fileset.getFiles(),
            'Same file copy ailed - fileset.addFile not updating location of already existing files'
        )
        self.assertTrue(
            testfile in self.fileset.getFiles(),
            'Same file copy failed - fileset.addFile unable to remove previous file from list'
        )

        # Third test - Add file that was already at Fileset.newfiles, and check if it gets updated
        self.assertTrue(
            testFileSame in self.fileset.listNewFiles(),
            'Same file copy failed - fileset.addFile not adding file to fileset.newFiles'
        )
Пример #2
0
    def testG_LumiMask(self):
        """
        _testG_LumiMask_

        Test that we can use a lumi-mask to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn="/this/is/file1", size=1000, events=800)
        fileB = File(lfn="/this/is/file2", size=1000, events=400)
        fileC = File(lfn="/this/is/file3", size=1000, events=500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("somese.cern.ch")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("somese.cern.ch")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("somese.cern.ch")

        testFileset = Fileset(name='Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]}
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=850,
                               runs=['1', '2', '4'],
                               lumis=['10,14', '20,21', '40,41'],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2, "Two jobs must be in the jobgroup")
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {1: [[10, 14]], 2: [[20, 21]], 4: [[40, 40]]})
        self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {4: [[41, 41]]})
Пример #3
0
    def setUp(self):
        """
        Create a dummy fileset and populate it with random files,
        in order to use it for the testcase methods

        """
        logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
                    datefmt='%m-%d %H:%M',
                    filename=__file__.replace('.py','.log'),
                    filemode='w')
        self.logger = logging.getLogger('FilesetClassTest')

        #Setup the initial testcase environment:
        initialfile = File('/tmp/lfn1',1000,1,1,1)
        self.initialSet = set()
        self.initialSet.add(initialfile)

        #Create a Fileset, containing a initial file on it.
        self.fileset = Fileset(name = 'self.fileset', files = self.initialSet)
        #Populate the fileset with random files
        for i in range(1,1000):
            lfn = '/store/data/%s/%s/file.root' % (random.randint(1000, 9999),
                                              random.randint(1000, 9999))
            size = random.randint(1000, 2000)
            events = 1000
            run = random.randint(0, 2000)
            lumi = random.randint(0, 8)

            file = File(lfn=lfn, size=size, events=events, checksums = {"cksum": "1"})
            file.addRun(Run(run, *[lumi]))
            self.fileset.addFile(file)
Пример #4
0
    def testComparison(self):
        """
        testComparison

        tests that File.__eq__() works properly
        ACHTUNG! File.__eq__() focuses only on self['lfn'], 
                 it does not check for any other key/property
        """

        testFile1 = File(lfn="lfn")
        testFile1_bis = File(lfn="lfn")
        self.assertEqual(testFile1, testFile1_bis)
        self.assertTrue(testFile1 == testFile1_bis)
        self.assertTrue(testFile1 is testFile1)
        self.assertTrue(testFile1 is not testFile1_bis)
        self.assertEqual(testFile1, "lfn")
        self.assertTrue(testFile1 == "lfn")
        self.assertEqual("lfn", testFile1)
        self.assertTrue("lfn" == testFile1)

        testFile2 = File(lfn="lfn-2")
        self.assertNotEqual(testFile1, testFile2)
        self.assertTrue(testFile1 != testFile2)
        self.assertNotEqual(testFile1, "lfn-2")
        self.assertTrue(testFile1 != "lfn-2")
        self.assertNotEqual("lfn-2", testFile1)
        self.assertTrue("lfn-2" != testFile1)

        # The following two comparisons work in py2 (File.File inherits __cmp__
        # from dict), but in py3 fail with
        # TypeError: '<' not supported between instances of 'dict' and 'dict'
        # self.assertFalse(testFile1 > testFile2)
        # self.assertTrue(testFile1 < testFile2)

        return
Пример #5
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation('blenheim')
            newFile.setLocation('malpaquet')
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.setLocation('blenheim')
        self.singleFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")

        #self.multipleFileSubscription.create()
        #self.singleFileSubscription.create()

        return
Пример #6
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(i, *[45 + i]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.addRun(Run(1, *[45]))
        self.singleFileFileset.addFile(newFile)

        self.multipleFileLumiset = Fileset(name="TestFileset3")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(1, *[45 + i / 3]))
            self.multipleFileLumiset.addFile(newFile)

        self.singleLumiFileset = Fileset(name="TestFileset4")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(1, *[45]))
            self.singleLumiFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.multipleLumiSubscription = Subscription(
            fileset=self.multipleFileLumiset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.singleLumiSubscription = Subscription(
            fileset=self.singleLumiFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")

        return
Пример #7
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations=set(["somese.cern.ch"]))
        self.singleFileFileset.addFile(newFile)

        self.multipleSiteFileset = Fileset(name="TestFileset3")
        for i in range(5):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.setLocation("somese.cern.ch")
            self.multipleSiteFileset.addFile(newFile)
        for i in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation(["somese.cern.ch", "otherse.cern.ch"])
            self.multipleSiteFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="SizeBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="SizeBased",
            type="Processing")
        self.multipleSiteSubscription = Subscription(
            fileset=self.multipleSiteFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        return
Пример #8
0
    def createTestJob(self):
        """
        _createTestJob_

        Create a test job that has parents for each input file.

        """
        newJob = Job(name = "TestJob")
        newJob.addFile(File(lfn = "/some/file/one",
                            parents = set([File(lfn = "/some/parent/one")])))
        newJob.addFile(File(lfn = "/some/file/two",
                            parents = set([File(lfn = "/some/parent/two")])))
        return newJob
Пример #9
0
def createFile():
    """
    _createFile_

    Create a file with some random metdata.
    """
    newFile = File(lfn=makeUUID(),
                   size=random.randrange(1024, 1048576, 1024),
                   events=random.randrange(10, 100000, 50),
                   parents=[File(lfn=makeUUID())],
                   locations=makeUUID())
    newFile["first_event"] = 0
    newFile["last_event"] = 0
    newFile["id"] = 1
    return newFile
Пример #10
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation('se01')
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.setLocation('se02')
        self.singleFileFileset.addFile(newFile)

        self.emptyFileFileset = Fileset(name="TestFileset3")
        newFile = File("/some/file/name", size=1000, events=0)
        newFile.setLocation('se03')
        self.emptyFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.emptyFileSubscription = Subscription(
            fileset=self.emptyFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")

        self.eventsPerJob = 100
        self.performanceParams = {
            'timePerEvent': None,
            'memoryRequirement': 2300,
            'sizePerEvent': 400
        }

        return
Пример #11
0
    def testCommit(self):
        """
            Testcase for the commit method of the Fileset class

        """
        localTestFileSet = Fileset('LocalTestFileset', self.initialSet)
        fsSize = len(localTestFileSet.getFiles(type="lfn"))
        #Dummy file to test
        fileTestCommit = File('/tmp/filetestcommit', 0000, 1, 1)
        #File is added to the newfiles attribute of localTestFileSet
        localTestFileSet.addFile(fileTestCommit)
        assert fsSize == len(localTestFileSet.getFiles(type = "lfn")) - 1, 'file not added'\
                'correctly to test fileset'
        newfilestemp = localTestFileSet.newfiles
        assert fileTestCommit in newfilestemp, 'test file not in the new files'\
                'list'
        #After commit, dummy file is supposed to move from newfiles to files
        localTestFileSet.commit()
        #First, testing if the new file is present at file set object attribute of the Fileset object

        assert newfilestemp.issubset(localTestFileSet.files), 'Test file not ' \
                'present at fileset.files - fileset.commit ' \
                'not working properly'
        #Second, testing if the newfile set object attribute is empty
        assert localTestFileSet.newfiles == set(), \
                'Test file not present at fileset.newfiles ' \
                '- fileset.commit not working properly'
Пример #12
0
    def populateCouchDB(self):
        """
        _populateCouchDB_

        Populate the ACDC records
        """
        svc = CouchService(url=self.testInit.couchUrl,
                           database=self.testInit.couchDbName)

        ownerA = svc.newOwner("somegroup", "someuserA")
        ownerB = svc.newOwner("somegroup", "someuserB")

        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionA.setOwner(ownerA)
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Struckthunder")
        testCollectionB.setOwner(ownerA)
        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionC.setOwner(ownerB)
        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionD.setOwner(ownerB)

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testCollectionA.addFileset(testFilesetA)
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testCollectionB.addFileset(testFilesetB)
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionC.addFileset(testFilesetC)
        testFilesetD = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetD")
        testCollectionC.addFileset(testFilesetD)

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA.add(testFiles)
        time.sleep(1)
        testFilesetB.add(testFiles)
        time.sleep(1)
        testFilesetC.add(testFiles)
        time.sleep(2)
        testFilesetD.add(testFiles)
Пример #13
0
    def testFileset(self):
        """
        _testFileset_

        Verify that converting an ACDC fileset to a DataStructs fileset works
        correctly.
        """
        testCollection = CouchCollection(database=self.testInit.couchDbName,
                                         url=self.testInit.couchUrl,
                                         name="Thunderstruck")
        testFileset = CouchFileset(database=self.testInit.couchDbName,
                                   url=self.testInit.couchUrl,
                                   name="TestFileset")
        testCollection.addFileset(testFileset)

        testFiles = {}
        for i in range(5):
            lfn = makeUUID()
            testFile = File(lfn=lfn,
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles[lfn] = testFile
            testFileset.add([testFile])

        for file in testFileset.fileset().files:
            self.assertTrue(file["lfn"] in testFiles, "Error: File missing.")
            self.assertEqual(file["events"], testFiles[file["lfn"]]["events"],
                             "Error: Wrong number of events.")
            self.assertEqual(file["size"], testFiles[file["lfn"]]["size"],
                             "Error: Wrong file size.")
        return
Пример #14
0
    def getChunkFiles(self,
                      collectionName,
                      filesetName,
                      chunkOffset,
                      chunkSize=100,
                      user="******",
                      group="cmsdataops"):
        """
        _getChunkFiles_

        Retrieve a chunk of files from the given collection and task.
        """
        chunkFiles = []
        files = self._getFilesetInfo(collectionName, filesetName, user, group,
                                     chunkOffset, chunkSize)

        files = mergeFakeFiles(files)
        for fileInfo in files:
            newFile = File(lfn=fileInfo["lfn"],
                           size=fileInfo["size"],
                           events=fileInfo["events"],
                           parents=set(fileInfo["parents"]),
                           locations=set(fileInfo["locations"]),
                           merged=fileInfo["merged"])
            for run in fileInfo["runs"]:
                newRun = Run(run["run_number"])
                newRun.extend(run["lumis"])
                newFile.addRun(newRun)

            chunkFiles.append(newFile)

        return chunkFiles
Пример #15
0
    def setUp(self):
        """
        _setUp_

        Initial Setup for the Job Testcase
        """
        self.inputFiles = []

        for i in range(1, 1000):
            lfn = "/store/data/%s/%s/file.root" % (random.randint(
                1000, 9999), random.randint(1000, 9999))
            size = random.randint(1000, 2000)
            events = 1000
            run = random.randint(0, 2000)
            lumi = random.randint(0, 8)

            file = File(lfn=lfn,
                        size=size,
                        events=events,
                        checksums={"cksum": "1"})
            file.addRun(Run(run, *[lumi]))
            self.inputFiles.append(file)

        self.dummyJob = Job(files=self.inputFiles)
        return
Пример #16
0
    def testAddRun(self):
        """
        This tests the addRun() function of a DataStructs File object

        """

        testLFN = "lfn"
        testSize = "1024"
        testEvents = "100"
        testCksum = "1"
        testParents = "parent"

        testLumi = 1
        testRunNumber = 1000000

        testFile = File(lfn=testLFN,
                        size=testSize,
                        events=testEvents,
                        checksums=testCksum,
                        parents=testParents)
        testRun = Run(testRunNumber, testLumi)

        testFile.addRun(testRun)

        assert testRun in testFile[
            'runs'], "Run not added properly to run in File.addRun()"

        return
Пример #17
0
    def testListCollectionsFilesets(self):
        """
        _testListCollectionsFilesets_

        Verify that collections and filesets in ACDC can be listed.
        """
        svc = CouchService(url=self.testInit.couchUrl,
                           database=self.testInit.couchDbName)

        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Struckthunder")
        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testCollectionA.addFileset(testFilesetA)
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testCollectionB.addFileset(testFilesetB)
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionC.addFileset(testFilesetC)
        testFilesetD = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetD")
        testCollectionC.addFileset(testFilesetD)

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA.add(testFiles)
        testFilesetB.add(testFiles)
        testFilesetC.add(testFiles)
        testFilesetD.add(testFiles)

        goldenFilesetNames = ["TestFilesetA", "TestFilesetC", "TestFilesetD"]
        for fileset in svc.listFilesets(testCollectionD):
            self.assertTrue(fileset["name"] in goldenFilesetNames,
                            "Error: Missing fileset.")
            goldenFilesetNames.remove(fileset["name"])
        self.assertEqual(len(goldenFilesetNames), 0,
                         "Error: Missing filesets.")

        return
Пример #18
0
    def setupACDCDatabase(self, collectionName, taskPath,
                          user, group):
        """
        _setupACDCDatabase_

        Populate an ACDC database with bogus records
        associated to certain collection name, user and task path.
        """
        acdcServer = CouchService(url = self.testInit.couchUrl,
                                  database = "%s_acdc" % self.couchDBName)
        owner = acdcServer.newOwner(group, user)
        testCollection = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = collectionName)
        testCollection.setOwner(owner)
        testFileset = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = taskPath)
        testCollection.addFileset(testFileset)

        testFiles = []
        for _ in range(5):
            testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFiles.append(testFile)

        testFileset.add(testFiles)
Пример #19
0
    def execute(self, *args, **kwargs):

        totalevents = kwargs['task']['tm_totalunits']
        firstEvent = 1
        lastEvent = totalevents
        firstLumi = 1
        lastLumi = 10

        # Set a default of 100 events per lumi.  This is set as a task
        # property, as the splitting considers it independently of the file
        # information provided by the fake dataset.
        if not kwargs['task']['tm_events_per_lumi']:
            kwargs['task']['tm_events_per_lumi'] = 100

        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCFakeFileSet")
        newFile = File("MCFakeFile", size = 1000, events = totalevents)
        if hasattr(self.config.Sites, 'available'):
            newFile.setLocation(self.config.Sites.available)
        else:
            sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey,
                              "cert":self.config.TaskWorker.cmscert})
            newFile.setLocation(sbj.getAllCMSNames())
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["block"] = 'MCFackBlock'
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        singleMCFileset.addFile(newFile)

        return Result(task=kwargs['task'], result=singleMCFileset)
Пример #20
0
    def execute(self, *args, **kwargs):  #pylint: disable=unused-argument

        # since https://github.com/dmwm/CRABServer/issues/5633 totalunits can be a float
        # but that would confuse WMCore, therefore cast to int
        totalevents = int(kwargs['task']['tm_totalunits'])
        firstEvent = 1
        lastEvent = totalevents
        firstLumi = 1
        lastLumi = 10

        # Set a default of 100 events per lumi.  This is set as a task
        # property, as the splitting considers it independently of the file
        # information provided by the fake dataset.
        if not kwargs['task']['tm_events_per_lumi']:
            kwargs['task']['tm_events_per_lumi'] = 100

        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name="MCFakeFileSet")
        newFile = File("MCFakeFile", size=1000, events=totalevents)
        newFile.setLocation(self.getListOfSites())
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["block"] = 'MCFakeBlock'
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        singleMCFileset.addFile(newFile)

        return Result(task=kwargs['task'], result=singleMCFileset)
Пример #21
0
    def testDefinition(self):
        """
        This tests the definition of a DataStructs File object

        """

        testLFN = "lfn"
        testSize = "1024"
        testEvents = "100"
        testCksum = {"cksum": "1"}
        testParents = "parent"

        testFile = File(lfn=testLFN,
                        size=testSize,
                        events=testEvents,
                        checksums=testCksum,
                        parents=testParents)

        self.assertEqual(testFile['lfn'], testLFN)
        self.assertEqual(testFile['size'], testSize)
        self.assertEqual(testFile['events'], testEvents)
        self.assertEqual(testFile['checksums'], testCksum)
        self.assertEqual(testFile['parents'], testParents)

        return
Пример #22
0
    def testListFiles(self):
        """
        _testListFiles_

        Verify that the files iterator works correctly.
        """
        testCollection = CouchCollection(database=self.testInit.couchDbName,
                                         url=self.testInit.couchUrl,
                                         name="Thunderstruck")
        testCollection.setOwner(self.owner)
        testFileset = CouchFileset(database=self.testInit.couchDbName,
                                   url=self.testInit.couchUrl,
                                   name="TestFileset")
        testCollection.addFileset(testFileset)

        testFiles = {}
        for i in range(5):
            lfn = makeUUID()
            testFile = File(lfn=lfn,
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles[lfn] = testFile
            testFileset.add([testFile])

        for file in testFileset.listFiles():
            self.assertTrue(file["lfn"] in testFiles.keys(),
                            "Error: File missing.")
            self.assertEqual(file["events"], testFiles[file["lfn"]]["events"],
                             "Error: Wrong number of events.")
            self.assertEqual(file["size"], testFiles[file["lfn"]]["size"],
                             "Error: Wrong file size.")
        return
Пример #23
0
    def doBlock( self, entity, fileset ):

        connection = urlopen( self.nodeURL + "&block=%s" % quote( entity ) )
        aString = connection.read()
        connection.close()

        if aString[2:8] != "phedex":
            print "PhEDExNotifier: bad string from server follows."
            print "%s" % aString

        phedex = eval( aString.replace( "null", "None" ), {}, {} )

        blocks = phedex[ 'phedex' ][ 'block' ]
        if len( blocks ) != 1:
            print "PhEDExNotifier: Found %d blocks, expected 1, will only consider first block" % len( blocks)

        files = blocks[0][ 'file' ]
        for file in files:
            lfn = file[ 'name' ]
            events = self.getEvents( lfn )
            (runs,lumis) = self.getRunLumi( lfn )
            fileToAdd = File( lfn, file[ 'bytes'], events, runs[0], lumis[0] )
            replicas = file[ 'replica' ]
            if len( replicas ) > 0:
                locations = []
                for replica in replicas:
                    locations.append( replica[ 'node' ] )
                fileToAdd.setLocation( locations )
                fileset.addFile( fileToAdd )
Пример #24
0
    def generateFakeMCFile(self,
                           numEvents=100,
                           firstEvent=1,
                           lastEvent=100,
                           firstLumi=1,
                           lastLumi=10,
                           existingSub=None):
        # MC comes with only one MCFakeFile
        newFile = File("MCFakeFileTest", size=1000, events=numEvents)
        newFile.setLocation('se01')
        if firstLumi == lastLumi:
            newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        else:
            newFile.addRun(Run(1, *range(firstLumi, lastLumi)))
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent

        if existingSub is None:
            singleMCFileset = Fileset(name="MCTestFileset")
            singleMCFileset.addFile(newFile)
            testWorkflow = Workflow()
            existingSub = Subscription(fileset=singleMCFileset,
                                       workflow=testWorkflow,
                                       split_algo="EventBased",
                                       type="Production")
        else:
            existingSub['fileset'].addFile(newFile)

        return existingSub
Пример #25
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation('blenheim')
            newFile.setLocation('malpaquet')
            lumis = []
            for lumi in range(20):
                lumis.append((i * 100) + lumi)
                newFile.addRun(Run(i, *lumis))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.setLocation('blenheim')
        lumis = list(range(50, 60)) + list(range(70, 80))
        newFile.addRun(Run(13, *lumis))
        self.singleFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")

        #self.multipleFileSubscription.create()
        #self.singleFileSubscription.create()

        self.performanceParams = {
            'timePerEvent': 12,
            'memoryRequirement': 2300,
            'sizePerEvent': 400
        }

        return
Пример #26
0
    def testDropCount(self):
        """
        _testDropCount_

        Verify that dropping a fileset and counting the files in a fileset works
        correctly.
        """
        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(), size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionA.addFileset(testFilesetA)
        testCollectionB.addFileset(testFilesetB)
        testCollectionB.addFileset(testFilesetC)
        testFilesetA.add(testFiles)
        testFilesetB.add(testFiles)
        testFilesetC.add(testFiles)

        testFilesetC.drop()

        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")
        testCollectionC.populate()

        self.assertEqual(len(testCollectionC["filesets"]), 1,
                         "Error: There should be one fileset in this collection.")
        self.assertEqual(testCollectionC["filesets"][0].fileCount(), 5,
                         "Error: Wrong number of files in fileset.")

        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionD.populate()

        self.assertEqual(len(testCollectionD["filesets"]), 1,
                         "Error: There should be one fileset in this collection.")
        self.assertEqual(testCollectionD["filesets"][0].fileCount(), 5,
                         "Error: Wrong number of files in fileset.")
        return
Пример #27
0
    def stuffACDCDatabase(self, numFiles = 50, lumisPerFile = 20, lumisPerACDCRecord = 2):
        """
        _stuffACDCDatabase_

        Fill the ACDC database with ACDC records, both for processing
        and merge
        """
        filesetName = '/%s/DataProcessing' % self.workflowName
        owner = 'unknown'
        group = 'unknown'


        for i in range(numFiles):
            for j in range(1, lumisPerFile + 1, lumisPerACDCRecord):
                lfn = '/store/data/a/%d' % i
                acdcFile = File(lfn = lfn, size = 100, events = 250, locations = self.validLocations, merged = 1)
                run = Run(i + 1, *range(j, min(j + lumisPerACDCRecord, lumisPerFile + 1)))
                acdcFile.addRun(run)
                acdcDoc = {'collection_name' : self.workflowName,
                           'collection_type' : 'ACDC.CollectionTypes.DataCollection',
                           'files' : {lfn : acdcFile},
                           'fileset_name' : filesetName,
                           'owner' : {'user': owner,
                                      'group' : group}}
                self.acdcDB.queue(acdcDoc)
        filesetName = '/%s/DataProcessing/DataProcessingMergeRECOoutput' % self.workflowName

        for i in range(numFiles):
            for j in range(1, lumisPerFile + 1, lumisPerACDCRecord):
                lfn = '/store/unmerged/b/%d' % i
                acdcFile = File(lfn = lfn, size = 100, events = 250, locations = set([choice(self.validLocations)]), merged = 0)
                run = Run(i + 1, *range(j, min(j + lumisPerACDCRecord, lumisPerFile + 1)))
                acdcFile.addRun(run)
                acdcDoc = {'collection_name' : self.workflowName,
                           'collection_type' : 'ACDC.CollectionTypes.DataCollection',
                           'files' : {lfn : acdcFile},
                           'fileset_name' : filesetName,
                           'owner' : {'user': owner,
                                      'group' : group}}
                self.acdcDB.queue(acdcDoc)
        self.acdcDB.commit()

        return
Пример #28
0
    def createTestJob(self):
        """
        Create a test job to pass to the DashboardInterface

        """

        job = Job(name = "ThisIsASillyName")

        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10)
        testFileA.addRun(Run(1, *[45]))
        testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10)
        testFileB.addRun(Run(1, *[46]))

        job.addFile(testFileA)
        job.addFile(testFileB)

        job['id'] = 1

        return job
Пример #29
0
    def testDefinition(self):
        """
        This tests the definition of a DataStructs File object

        """
        testFile = File()
        self.assertEqual(testFile['lfn'], "")
        self.assertEqual(testFile['size'], 0)
        self.assertEqual(testFile['events'], 0)
        self.assertEqual(testFile['checksums'], {})
        self.assertItemsEqual(testFile['parents'], {})
        self.assertItemsEqual(testFile['locations'], {})
        self.assertFalse(testFile['merged'])

        param = {
            "lfn": "my_lfn",
            "size": 1024,
            "events": 100,
            "checksums": {
                'adler32': 'BLAH',
                'cksum': '12345'
            },
            "parents": "my_parent",
            "locations": {"PNN_Location"},
            "merged": True
        }
        testFile = File(lfn=param['lfn'],
                        size=param['size'],
                        events=param['events'],
                        checksums=param['checksums'],
                        parents=param['parents'],
                        locations=param['locations'],
                        merged=param['merged'])

        self.assertEqual(testFile['lfn'], param['lfn'])
        self.assertEqual(testFile['size'], param['size'])
        self.assertEqual(testFile['events'], param['events'])
        self.assertItemsEqual(testFile['checksums'], param['checksums'])
        self.assertEqual(testFile['parents'], param['parents'])
        self.assertItemsEqual(testFile['locations'], param['locations'])
        self.assertTrue(testFile['merged'])

        return
Пример #30
0
    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("SplitSize = %s" % self.splitSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        thefiles = Fileset(name='FilesToSplit')
        reader = DBSReader(self.dbsUrl)
        fileList = reader.dbs.listFiles(
            analysisDataset=self.inputDataset(),
            retriveList=['retrive_block', 'retrive_run'])

        blocks = {}

        for f in fileList:
            block = f['Block']['Name']
            if not blocks.has_key(block):
                blocks[block] = reader.listFileBlockLocation(block)
            f['Block']['StorageElementList'].extend(blocks[block])
            wmbsFile = File(f['LogicalFileName'])
            [wmbsFile['locations'].add(x) for x in blocks[block]]
            wmbsFile['block'] = block
            thefiles.addFile(wmbsFile)

        work = Workflow()
        subs = Subscription(fileset=thefiles,
                            workflow=work,
                            split_algo='FileBased',
                            type="Processing")
        splitter = SplitterFactory()
        jobfactory = splitter(subs)

        jobs = jobfactory(files_per_job=self.splitSize)

        jobDefs = []
        for job in jobs.jobs:
            #job.mask.setMaxAndSkipEvents(-1, 0)
            jobDef = JobDefinition()
            jobDef['LFNS'].extend(job.listLFNs())
            jobDef['SkipEvents'] = 0
            jobDef['MaxEvents'] = -1
            [
                jobDef['SENames'].extend(list(x['locations']))
                for x in job.listFiles()
            ]
            jobDefs.append(jobDef)

        return jobDefs