Exemplo n.º 1
0
def makePhEDExDrop(dbsUrl, datasetPath, *blockNames):
    """
    _makePhEDExDrop_

    Given a DBS Url, dataset name and list of blockNames,
    generate an XML structure for injection

    """
    spec = XMLInjectionSpec(dbsUrl)


    reader = DBSReader(dbsUrl, version = "DBS_2_0_9")

    dataset = spec.getDataset(datasetPath)

    for block in blockNames:
        blockContent = reader.getFileBlock(block)
        isOpen = reader.blockIsOpen(block)
        if isOpen:
            xmlBlock = dataset.getFileblock(block, "y")
        else:
            xmlBlock = dataset.getFileblock(block, "n")

        #Any Checksum from DBS is type cksum
        for x in blockContent[block]['Files']:
            checksums = {'cksum' : x['Checksum']}
            if x.get('Adler32') not in (None, ''):
                checksums['adler32'] = x['Adler32']
            xmlBlock.addFile(x['LogicalFileName'], checksums, x['FileSize'])

    xml = spec.save()
    return xml
Exemplo n.º 2
0
def makePhEDExDrop(dbsUrl, datasetPath, *blockNames):
    """
    _makePhEDExDrop_

    Given a DBS2 Url, dataset name and list of blockNames,
    generate an XML structure for injection

    """
    spec = XMLInjectionSpec(dbsUrl)

    reader = DBSReader(dbsUrl, version="DBS_2_0_9")

    dataset = spec.getDataset(datasetPath)

    for block in blockNames:
        blockContent = reader.getFileBlock(block)
        isOpen = reader.blockIsOpen(block)
        if isOpen:
            xmlBlock = dataset.getFileblock(block, "y")
        else:
            xmlBlock = dataset.getFileblock(block, "n")

        #Any Checksum from DBS is type cksum
        for x in blockContent[block]['Files']:
            checksums = {'cksum': x['Checksum']}
            if x.get('Adler32') not in (None, ''):
                checksums['adler32'] = x['Adler32']
            xmlBlock.addFile(x['LogicalFileName'], checksums, x['FileSize'])

    xml = spec.save()
    return xml
Exemplo n.º 3
0
class DBSReaderTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        self.dbs = None
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listPrimaryDatasets('Jet*')
        self.assertTrue('Jet' in results)
        self.assertTrue('JetMET' in results)
        self.assertTrue('JetMETTau' in results)
        self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
        self.assertEqual(1, len(dataset))
        self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
        self.assertEqual('Run2011A-v1', dataset[0]['Name'])
        self.assertFalse(
            self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))

    @attr("integration")
    def testlistRuns(self):
        """listRuns returns known runs"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRuns(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK)
        self.assertEqual([173657], runs)

    @attr("integration")
    def testlistRunLumis(self):
        """listRunLumis returns known runs and lumicounts"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRunLumis(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(173692 in runs)
        self.assertEqual(runs[173692], 2782)
        runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK)
        self.assertEqual({173657: 94}, runs)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        self.dbs = DBSReader(self.endpoint)
        datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
        self.assertTrue('Run2011A-v1' in datasets)
        self.assertTrue('Run2011B-v1' in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
        self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))

    @attr("integration")
    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    @attr("integration")
    def testlistDatasetFileDetails(self):
        """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset"""
        TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root'
        for endpoint in [
                self.endpoint,
                'test/python/WMCore_t/Services_t/DBS_t/DBSReader_t.py:'
        ]:
            self.dbs = DBSReader(endpoint)
            details = self.dbs.listDatasetFileDetails(DATASET)
            self.assertEqual(len(details), 49)
            self.assertTrue(TESTFILE in details)
            self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545)
            self.assertEqual(details[TESTFILE]['Size'], 286021145)
            self.assertEqual(
                details[TESTFILE]['BlockName'],
                '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace'
            )
            self.assertEqual(details[TESTFILE]['Checksums'], {
                'Checksum': '22218315',
                'Adler32': 'a41a1446',
                'Md5': 'NOTSET'
            })
            self.assertTrue(173658 in details[TESTFILE]['Lumis'])
            self.assertEqual( sorted(details[TESTFILE]['Lumis'][173658]),
                sorted( map( long, [8, 12, 9, 14, 10, 6, 2, 1, 4, 3, 36, 49, 16, 11, 27, 35, 46, 39, 20, 24, 52, 23, 40, 42, 45, 21, 32, 37,  \
                                    25, 22, 5, 33, 17, 15, 26, 50, 18, 29, 51, 44, 69, 43, 30, 73, 19, 41, 13, 38, 7, 31, 75, 48, 59, 65, 55, \
                                    57, 34, 28, 74, 47, 64, 61, 68, 77, 66, 71, 60, 76, 70, 67, 62, 78, 82, 79, 88, 56, 101, 92, 58, 72, 54,  \
                                    63, 96, 53, 84, 95, 89, 85, 99, 81, 91, 102, 80, 100, 107, 94, 93, 90, 86, 87, 83, 97, 104, 110, 111, 106,\
                                    108, 98, 103, 109, 105]))
            )

    @attr("integration")
    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], '22075')
        self.assertEqual(dataset['NumberOfBlocks'], '46')
        self.assertEqual(dataset['total_size'], '4001680824')
        self.assertEqual(dataset['NumberOfFiles'], '49')
        self.assertEqual(dataset['NumberOfLumis'], '7223')

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], '')
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], '377')
        self.assertEqual(block['NumberOfBlocks'], '1')
        self.assertEqual(block['total_size'], '150780132')
        self.assertEqual(block['NumberOfFiles'], '2')
        self.assertEqual(block['NumberOfLumis'], '94')

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo,
                          DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        #self.assertEqual(377, block['NumberOfEvents'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        sites = [
            x['Name'] for x in block['StorageElementList']
            if x['Name'].find('cern.ch') > -1
        ]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo,
                          DATASET + 'blah')
        self.assertFalse(
            self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + 'asas'))

    @attr("integration")
    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.listFileBlocks(DATASET)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET,
                                        blockName=BLOCK,
                                        onlyClosedBlocks=True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    @attr("integration")
    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    @attr("integration")
    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertFalse(self.dbs.blockExists(DATASET + '#somethingelse'))

    @attr("integration")
    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(
            FILE in
            [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock,
                          DATASET + '#blah')

    @attr("integration")
    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        # hope PromptReco doesn't get deleted
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'
        )
        self.assertEqual(1, len(files))
        self.assertEqual(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60',
            files[0]['Block']['Name'])
        self.assertEqual(
            '/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
            files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents,
                          BLOCK + 'asas')

    @attr("integration")
    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        WRONG_BLOCK = BLOCK[:-4] + 'abcd'
        BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace'
        DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e'
        DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab'
        self.dbs = DBSReader(
            'https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/')
        # assume one site is cern
        sites = [
            x for x in self.dbs.listFileBlockLocation(BLOCK)
            if x and x.find('cern.ch') > -1
        ]
        self.assertTrue(sites)
        #This block is only found on DBS
        self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK))
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK))
        #test bulk call:
        ## two blocks in phedex
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK,
                                                                BLOCK2])))
        ## one block in phedex one does not exist
        self.assertEqual(
            1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK])))
        ## one in phedex one in dbs
        self.assertEqual(
            2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK])))
        ## two in dbs
        self.assertEqual(
            2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2])))
        ## one in DBS and one does not exist
        self.assertEqual(
            1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK])))

    @attr("integration")
    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'
        )
        self.assertEqual(len(block), 1)
        block = block[
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60']
        self.assertEqual(
            '/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
            block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFiles(self):
        """getFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    @attr("integration")
    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'
        )
        self.assertEqual(1, len(parents))
        self.assertEqual(
            '/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60',
            parents[0]['Name'])
        sites = [
            x for x in parents[0]['StorageElementList']
            if x.find("cern.ch") > -1
        ]
        self.assertTrue(sites)

        self.assertFalse(
            self.dbs.listBlockParents(
                '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl'
            ))

    @attr("integration")
    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    @attr("integration")
    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.dbs = DBSReader(self.endpoint)
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + 'asas'))
Exemplo n.º 4
0
class DBSReaderTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        self.dbs = None
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listPrimaryDatasets('Jet*')
        self.assertTrue('Jet' in results)
        self.assertTrue('JetMET' in results)
        self.assertTrue('JetMETTau' in results)
        self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
        self.assertEqual(1, len(dataset))
        self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
        self.assertEqual('Run2011A-v1', dataset[0]['Name'])
        self.assertFalse(
            self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))

    @attr("integration")
    def testlistRuns(self):
        """listRuns returns known runs"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRuns(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK)
        self.assertEqual([173657], runs)

    @attr("integration")
    def testlistRunLumis(self):
        """listRunLumis returns known runs and lumicounts"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRunLumis(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(173692 in runs)
        self.assertEqual(runs[173692], 2782)
        runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK)
        self.assertEqual({173657: 94}, runs)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        self.dbs = DBSReader(self.endpoint)
        datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
        self.assertTrue('Run2011A-v1' in datasets)
        self.assertTrue('Run2011B-v1' in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
        self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))

    @attr("integration")
    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    @attr("integrtion")
    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], '22075')
        self.assertEqual(dataset['NumberOfBlocks'], '46')
        self.assertEqual(dataset['total_size'], '4001680824')
        self.assertEqual(dataset['NumberOfFiles'], '49')
        self.assertEqual(dataset['NumberOfLumis'], '7223')

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], '')
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], '377')
        self.assertEqual(block['NumberOfBlocks'], '1')
        self.assertEqual(block['total_size'], '150780132')
        self.assertEqual(block['NumberOfFiles'], '2')
        self.assertEqual(block['NumberOfLumis'], '94')

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo,
                          DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        #self.assertEqual(377, block['NumberOfEvents'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        sites = [
            x['Name'] for x in block['StorageElementList']
            if x['Name'].find('cern.ch') > -1
        ]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo,
                          DATASET + 'blah')
        self.assertFalse(
            self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + 'asas'))

    @attr("integration")
    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.listFileBlocks(DATASET)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET,
                                        blockName=BLOCK,
                                        onlyClosedBlocks=True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    @attr("integration")
    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    @attr("integration")
    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertFalse(self.dbs.blockExists(DATASET + '#somethingelse'))

    @attr("integration")
    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(
            FILE in
            [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock,
                          DATASET + '#blah')

    @attr("integration")
    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        # hope PromptReco doesn't get deleted
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'
        )
        self.assertEqual(1, len(files))
        self.assertEqual(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60',
            files[0]['Block']['Name'])
        self.assertEqual(
            '/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
            files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents,
                          BLOCK + 'asas')

    @attr("integration")
    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        self.dbs = DBSReader(self.endpoint)
        # assume one site is cern
        sites = [
            x for x in self.dbs.listFileBlockLocation(BLOCK)
            if x.find('cern.ch') > -1
        ]
        self.assertTrue(sites)
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(BLOCK + 'blah'))

    @attr("integration")
    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'
        )
        self.assertEqual(len(block), 1)
        block = block[
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60']
        self.assertEqual(
            '/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
            block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFiles(self):
        """getFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    @attr("integration")
    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'
        )
        self.assertEqual(1, len(parents))
        self.assertEqual(
            '/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60',
            parents[0]['Name'])
        sites = [
            x for x in parents[0]['StorageElementList']
            if x.find("cern.ch") > -1
        ]
        self.assertTrue(sites)

        self.assertFalse(
            self.dbs.listBlockParents(
                '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl'
            ))

    @attr("integration")
    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    @attr("integration")
    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.dbs = DBSReader(self.endpoint)
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + 'asas'))
Exemplo n.º 5
0
class DBSReaderTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        self.dbs = None
        return

    @attr("integration")
    def testListDatatiers(self):
        """
        listDatatiers returns all datatiers available
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listDatatiers()
        self.assertTrue('RAW' in results)
        self.assertTrue('GEN-SIM-RECO' in results)
        self.assertTrue('GEN-SIM' in results)
        self.assertFalse('RAW-ALAN' in results)
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listPrimaryDatasets('Jet*')
        self.assertTrue('Jet' in results)
        self.assertTrue('JetMET' in results)
        self.assertTrue('JetMETTau' in results)
        self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
        self.assertEqual(1, len(dataset))
        self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
        self.assertEqual('Run2011A-v1', dataset[0]['Name'])
        self.assertFalse(
            self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))

    def testlistRuns(self):
        """listRuns returns known runs"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRuns(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(block=BLOCK)
        self.assertEqual(1, len(runs))
        self.assertEqual([173657], runs)

    def testlistRunLumis(self):
        """listRunLumis returns known runs and lumicounts (None for DBS3)"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRunLumis(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(173692 in runs)
        self.assertEqual(runs[173692], None)
        runs = self.dbs.listRunLumis(block=BLOCK)
        self.assertEqual(1, len(runs))
        self.assertTrue(173657 in runs)
        self.assertEqual(runs[173657], None)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        self.dbs = DBSReader(self.endpoint)
        datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
        self.assertTrue('Run2011A-v1' in datasets)
        self.assertTrue('Run2011B-v1' in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
        self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))

    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    def testlistDatasetFileDetails(self):
        """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset"""
        TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root'
        self.dbs = DBSReader(self.endpoint)
        details = self.dbs.listDatasetFileDetails(DATASET)
        self.assertEqual(len(details), 49)
        self.assertTrue(TESTFILE in details)
        self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545)
        self.assertEqual(details[TESTFILE]['file_size'], 286021145)
        self.assertEqual(
            details[TESTFILE]['BlockName'],
            '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace')
        self.assertEqual(details[TESTFILE]['Md5'], 'NOTSET')
        self.assertEqual(details[TESTFILE]['md5'], 'NOTSET')
        self.assertEqual(details[TESTFILE]['Adler32'], 'a41a1446')
        self.assertEqual(details[TESTFILE]['adler32'], 'a41a1446')
        self.assertEqual(details[TESTFILE]['Checksum'], '22218315')
        self.assertEqual(details[TESTFILE]['check_sum'], '22218315')
        self.assertTrue(173658 in details[TESTFILE]['Lumis'])
        self.assertEqual(sorted(details[TESTFILE]['Lumis'][173658]), [
            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
            20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
            37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
            54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
            71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
            88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
            104, 105, 106, 107, 108, 109, 110, 111
        ])

    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], 22075)
        self.assertEqual(dataset['NumberOfBlocks'], 46)
        self.assertEqual(dataset['FileSize'], 4001680824)
        self.assertEqual(dataset['file_size'], 4001680824)
        self.assertEqual(dataset['NumberOfFiles'], 49)
        self.assertEqual(dataset['NumberOfLumis'], 7223)

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], '')
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], 377)
        self.assertEqual(block['NumberOfBlocks'], 1)
        self.assertEqual(block['FileSize'], 150780132)
        self.assertEqual(block['file_size'], 150780132)
        self.assertEqual(block['NumberOfFiles'], 2)
        self.assertEqual(block['NumberOfLumis'], 94)

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo,
                          DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        self.assertEqual(0, block['OpenForWriting'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        sites = [
            x['Name'] for x in block['PhEDExNodeList']
            if x['Name'].find('CH_CERN') > -1
        ]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo,
                          DATASET + 'blah')
        self.assertRaises(DBSReaderError,
                          self.dbs.getFileBlocksInfo,
                          DATASET,
                          blockName=BLOCK + 'asas')

    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.listFileBlocks(DATASET)
        self.assertTrue(BLOCK in blocks)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET,
                                        blockName=BLOCK,
                                        onlyClosedBlocks=True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.blockExists,
                          DATASET + '#somethingelse')

    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(
            FILE in
            [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock,
                          DATASET + '#blah')

    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents(
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0'
        )
        self.assertEqual(4, len(files))
        self.assertEqual(
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0',
            files[0]['block_name'])
        self.assertEqual(
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0',
            files[0]['BlockName'])
        self.assertEqual(
            '/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root',
            files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents,
                          BLOCK + 'asas')

    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(
            FILE in
            [x['logical_file_name'] for x in self.dbs.lfnsInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        WRONG_BLOCK = BLOCK[:-4] + 'abcd'
        BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace'
        DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e'
        DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab'
        self.dbs = DBSReader(
            'https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/')
        # assume one site is cern
        sites = [
            x for x in self.dbs.listFileBlockLocation(BLOCK)
            if x and x.find('CH_CERN') > -1
        ]
        self.assertTrue(sites)
        #This block is only found on DBS
        self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK))
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK))
        #test bulk call:
        ## two blocks in phedex
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK,
                                                                BLOCK2])))
        ## one block in phedex one does not exist
        self.assertEqual(
            1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK])))
        ## one in phedex one in dbs
        self.assertEqual(
            2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK])))
        ## two in dbs
        self.assertEqual(
            2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2])))
        ## one in DBS and one does not exist
        self.assertEqual(
            1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK])))

    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock,
                          BLOCK + 'asas')

    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents(
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0'
        )
        self.assertEqual(len(block), 1)
        block = block[
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0']
        self.assertEqual(
            '/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root',
            block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents,
                          BLOCK + 'asas')

    def testGetFiles(self):
        """getFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents(
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0'
        )
        self.assertEqual(1, len(parents))
        self.assertEqual(
            '/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0',
            parents[0]['Name'])
        sites = [
            x for x in parents[0]['PhEDExNodeList'] if x.find("CH_CERN") > -1
        ]
        self.assertTrue(sites)

        self.assertFalse(
            self.dbs.listBlockParents(
                '/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0'
            ))

    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.dbs = DBSReader(self.endpoint)
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertRaises(DBSReaderError, self.dbs.blockToDatasetPath,
                          BLOCK + 'asas')
Exemplo n.º 6
0
class DBSReaderTest(unittest.TestCase):

    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        #endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        self.dbs = DBSReader(endpoint)
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        results = self.dbs.listPrimaryDatasets('Jet*')
        self.assertTrue('Jet' in results)
        self.assertTrue('JetMET' in results)
        self.assertTrue('JetMETTau' in results)
        self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
        self.assertEqual(1, len(dataset))
        self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
        self.assertEqual('Run2011A-v1', dataset[0]['Name'])
        self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))

    @attr("integration")
    def testlistRuns(self):
        """listRuns returns known runs"""
        runs = self.dbs.listRuns(dataset = DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(dataset = DATASET, block = BLOCK)
        self.assertEqual([173657], runs)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
        self.assertTrue('Run2011A-v1' in datasets)
        self.assertTrue('Run2011B-v1' in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
        self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))

    @attr("integration")
    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    @attr("integrtion")
    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], '22075')
        self.assertEqual(dataset['NumberOfBlocks'], '46')
        self.assertEqual(dataset['total_size'], '4001680824')
        self.assertEqual(dataset['NumberOfFiles'], '49')
        self.assertEqual(dataset['NumberOfLumis'], '7223')

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], '')
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], '377')
        self.assertEqual(block['NumberOfBlocks'], '1')
        self.assertEqual(block['total_size'], '150780132')
        self.assertEqual(block['NumberOfFiles'], '2')
        self.assertEqual(block['NumberOfLumis'], '94')

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName = BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        #self.assertEqual(377, block['NumberOfEvents'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        sites = [x['Name'] for x in block['StorageElementList'] if x['Name'].find('cern.ch') > -1]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + 'blah')
        self.assertFalse(self.dbs.getFileBlocksInfo(DATASET, blockName = BLOCK + 'asas'))

    @attr("integration")
    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        blocks = self.dbs.listFileBlocks(DATASET)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET, blockName = BLOCK, onlyClosedBlocks = True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    @attr("integration")
    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    @attr("integration")
    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertFalse(self.dbs.blockExists(DATASET + '#somethingelse'))

    @attr("integration")
    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.assertTrue(FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah')

    @attr("integration")
    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        # hope PromptReco doesn't get deleted
        files = self.dbs.listFilesInBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60')
        self.assertEqual(1, len(files))
        self.assertEqual('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60', files[0]['Block']['Name'])
        self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
                         files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas')

    @attr("integration")
    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        # assume one site is cern
        sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x.find('cern.ch') > -1]
        self.assertTrue(sites)
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(BLOCK + 'blah'))

    @attr("integration")
    def testGetFileBlock(self):
        """getFileBlock returns block"""
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        block = self.dbs.getFileBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60')
        self.assertEqual(len(block), 1)
        block = block['/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60']
        self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
                         block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas')

    @attr("integration")
    def testGetFiles(self):
        """getFiles returns files in dataset"""
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    @attr("integration")
    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        parents = self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60')
        self.assertEqual(1, len(parents))
        self.assertEqual('/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60',
                         parents[0]['Name'])
        sites = [x for x in parents[0]['StorageElementList'] if x.find("cern.ch") > -1]
        self.assertTrue(sites)

        self.assertFalse(self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl'))

    @attr("integration")
    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    @attr("integration")
    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + 'asas'))
Exemplo n.º 7
0
class DBSReaderTest(EmulatedUnitTestCase):

    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        self.dbs = None
        super(DBSReaderTest, self).setUp()
        return

    def tearDown(self):
        """
        _tearDown_


        :return:
        """

        super(DBSReaderTest, self).tearDown()
        return

    @attr("integration")
    def testListDatatiers(self):
        """
        listDatatiers returns all datatiers available
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listDatatiers()
        self.assertTrue('RAW' in results)
        self.assertTrue('GEN-SIM-RECO' in results)
        self.assertTrue('GEN-SIM' in results)
        self.assertFalse('RAW-ALAN' in results)
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listPrimaryDatasets('Jet*')
        self.assertTrue('Jet' in results)
        self.assertTrue('JetMET' in results)
        self.assertTrue('JetMETTau' in results)
        self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
        self.assertEqual(1, len(dataset))
        self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
        self.assertEqual('Run2011A-v1', dataset[0]['Name'])
        self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))

    def testlistRuns(self):
        """listRuns returns known runs"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRuns(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(block=BLOCK)
        self.assertEqual(1, len(runs))
        self.assertEqual([173657], runs)

    def testlistRunLumis(self):
        """listRunLumis returns known runs and lumicounts (None for DBS3)"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRunLumis(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(173692 in runs)
        self.assertEqual(runs[173692], None)
        runs = self.dbs.listRunLumis(block=BLOCK)
        self.assertEqual(1, len(runs))
        self.assertTrue(173657 in runs)
        self.assertEqual(runs[173657], None)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        self.dbs = DBSReader(self.endpoint)
        datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
        self.assertTrue('Run2011A-v1' in datasets)
        self.assertTrue('Run2011B-v1' in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
        self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))

    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    def testlistDatasetFileDetails(self):
        """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset"""
        TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root'
        self.dbs = DBSReader(self.endpoint)
        details = self.dbs.listDatasetFileDetails(DATASET)
        self.assertEqual(len(details), 49)
        self.assertTrue(TESTFILE in details)
        self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545)
        self.assertEqual(details[TESTFILE]['file_size'], 286021145)
        self.assertEqual(details[TESTFILE]['BlockName'], '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace')
        self.assertEqual(details[TESTFILE]['Md5'], 'NOTSET')
        self.assertEqual(details[TESTFILE]['md5'], 'NOTSET')
        self.assertEqual(details[TESTFILE]['Adler32'], 'a41a1446')
        self.assertEqual(details[TESTFILE]['adler32'], 'a41a1446')
        self.assertEqual(details[TESTFILE]['Checksum'], '22218315')
        self.assertEqual(details[TESTFILE]['check_sum'], '22218315')
        self.assertTrue(173658 in details[TESTFILE]['Lumis'])
        self.assertEqual(sorted(details[TESTFILE]['Lumis'][173658]),
                         [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
                          27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
                          51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
                          75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
                          99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111])

    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], 22075)
        self.assertEqual(dataset['NumberOfBlocks'], 46)
        self.assertEqual(dataset['FileSize'], 4001680824)
        self.assertEqual(dataset['file_size'], 4001680824)
        self.assertEqual(dataset['NumberOfFiles'], 49)
        self.assertEqual(dataset['NumberOfLumis'], 7223)

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], '')
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], 377)
        self.assertEqual(block['NumberOfBlocks'], 1)
        self.assertEqual(block['FileSize'], 150780132)
        self.assertEqual(block['file_size'], 150780132)
        self.assertEqual(block['NumberOfFiles'], 2)
        self.assertEqual(block['NumberOfLumis'], 94)

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        self.assertEqual(0, block['OpenForWriting'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        sites = [x['Name'] for x in block['PhEDExNodeList'] if x['Name'].find('CH_CERN') > -1]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET, blockName=BLOCK + 'asas')

    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.listFileBlocks(DATASET)
        self.assertTrue(BLOCK in blocks)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.blockExists, DATASET + '#somethingelse')

    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah')

    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0')
        self.assertEqual(4, len(files))
        self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['block_name'])
        self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['BlockName'])
        self.assertEqual('/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root',
                         files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas')

    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in [x['logical_file_name'] for x in self.dbs.lfnsInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        WRONG_BLOCK = BLOCK[:-4]+'abcd'
        BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace'
        DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e'
        DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab'
        self.dbs = DBSReader('https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/')
        # assume one site is cern
        sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x and x.find('CH_CERN') > -1]
        self.assertTrue(sites)
        #This block is only found on DBS
        self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK))
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK))
        #test bulk call:
        ## two blocks in phedex
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2])))
        ## one block in phedex one does not exist
        self.assertEqual(1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK])))
        ## one in phedex one in dbs
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK])))
        ## two in dbs
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2])))
        ## one in DBS and one does not exist
        self.assertEqual(1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK])))

    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas')

    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0')
        self.assertEqual(len(block), 1)
        block = block['/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0']
        self.assertEqual('/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root',
                         block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas')

    def testGetFiles(self):
        """getFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0')
        self.assertEqual(1, len(parents))
        self.assertEqual('/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0', parents[0]['Name'])
        sites = [x for x in parents[0]['PhEDExNodeList'] if x.find("CH_CERN") > -1]
        self.assertTrue(sites)

        self.assertFalse(self.dbs.listBlockParents('/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0'))

    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.dbs = DBSReader(self.endpoint)
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertRaises(DBSReaderError, self.dbs.blockToDatasetPath, BLOCK + 'asas')
Exemplo n.º 8
0
class DBSReaderTest(unittest.TestCase):

    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        self.dbs      = None
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listPrimaryDatasets('Jet*')
        self.assertTrue('Jet' in results)
        self.assertTrue('JetMET' in results)
        self.assertTrue('JetMETTau' in results)
        self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
        self.assertEqual(1, len(dataset))
        self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
        self.assertEqual('Run2011A-v1', dataset[0]['Name'])
        self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))

    @attr("integration")
    def testlistRuns(self):
        """listRuns returns known runs"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRuns(dataset = DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(dataset = DATASET, block = BLOCK)
        self.assertEqual([173657], runs)

    @attr("integration")
    def testlistRunLumis(self):
        """listRunLumis returns known runs and lumicounts"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRunLumis(dataset = DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(173692 in runs)
        self.assertEqual(runs[173692], 2782)
        runs = self.dbs.listRuns(dataset = DATASET, block = BLOCK)
        self.assertEqual({173657 : 94}, runs)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        self.dbs = DBSReader(self.endpoint)
        datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
        self.assertTrue('Run2011A-v1' in datasets)
        self.assertTrue('Run2011B-v1' in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
        self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))

    @attr("integration")
    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    @attr("integration")
    def testlistDatasetFileDetails(self):
        """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset"""
        TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root'
        for endpoint in [self.endpoint, 'test/python/WMCore_t/Services_t/DBS_t/DBSReader_t.py:']:
            self.dbs = DBSReader(endpoint)
            details = self.dbs.listDatasetFileDetails(DATASET)
            self.assertEqual(len(details), 49)
            self.assertTrue(TESTFILE in details)
            self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545)
            self.assertEqual(details[TESTFILE]['Size'], 286021145)
            self.assertEqual(details[TESTFILE]['BlockName'], '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace')
            self.assertEqual(details[TESTFILE]['Checksums'],
                {'Checksum': '22218315', 'Adler32': 'a41a1446', 'Md5': 'NOTSET'}
            )
            self.assertTrue( 173658 in details[TESTFILE]['Lumis'])
            self.assertEqual( sorted(details[TESTFILE]['Lumis'][173658]),
                sorted( map( long, [8, 12, 9, 14, 10, 6, 2, 1, 4, 3, 36, 49, 16, 11, 27, 35, 46, 39, 20, 24, 52, 23, 40, 42, 45, 21, 32, 37,  \
                                    25, 22, 5, 33, 17, 15, 26, 50, 18, 29, 51, 44, 69, 43, 30, 73, 19, 41, 13, 38, 7, 31, 75, 48, 59, 65, 55, \
                                    57, 34, 28, 74, 47, 64, 61, 68, 77, 66, 71, 60, 76, 70, 67, 62, 78, 82, 79, 88, 56, 101, 92, 58, 72, 54,  \
                                    63, 96, 53, 84, 95, 89, 85, 99, 81, 91, 102, 80, 100, 107, 94, 93, 90, 86, 87, 83, 97, 104, 110, 111, 106,\
                                    108, 98, 103, 109, 105]))
            )

    @attr("integration")
    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], '22075')
        self.assertEqual(dataset['NumberOfBlocks'], '46')
        self.assertEqual(dataset['total_size'], '4001680824')
        self.assertEqual(dataset['NumberOfFiles'], '49')
        self.assertEqual(dataset['NumberOfLumis'], '7223')

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], '')
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], '377')
        self.assertEqual(block['NumberOfBlocks'], '1')
        self.assertEqual(block['total_size'], '150780132')
        self.assertEqual(block['NumberOfFiles'], '2')
        self.assertEqual(block['NumberOfLumis'], '94')

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName = BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        #self.assertEqual(377, block['NumberOfEvents'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        sites = [x['Name'] for x in block['StorageElementList'] if x['Name'].find('cern.ch') > -1]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + 'blah')
        self.assertFalse(self.dbs.getFileBlocksInfo(DATASET, blockName = BLOCK + 'asas'))

    @attr("integration")
    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.listFileBlocks(DATASET)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET, blockName = BLOCK, onlyClosedBlocks = True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    @attr("integration")
    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    @attr("integration")
    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertFalse(self.dbs.blockExists(DATASET + '#somethingelse'))

    @attr("integration")
    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah')

    @attr("integration")
    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        # hope PromptReco doesn't get deleted
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60')
        self.assertEqual(1, len(files))
        self.assertEqual('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60', files[0]['Block']['Name'])
        self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
                         files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas')

    @attr("integration")
    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        WRONG_BLOCK = BLOCK[:-4]+'abcd'
        BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace'
        DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e'
        DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab'
        self.dbs = DBSReader('https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/')
        # assume one site is cern
        sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x and x.find('cern.ch') > -1]
        self.assertTrue(sites)
        #This block is only found on DBS
        self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK))
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK))
        #test bulk call:
        ## two blocks in phedex
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2])))
        ## one block in phedex one does not exist
        self.assertEqual(1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK])))
        ## one in phedex one in dbs
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK])))
        ## two in dbs
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2])))
        ## one in DBS and one does not exist
        self.assertEqual(1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK])))

    @attr("integration")
    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60')
        self.assertEqual(len(block), 1)
        block = block['/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60']
        self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
                         block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas')

    @attr("integration")
    def testGetFiles(self):
        """getFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    @attr("integration")
    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60')
        self.assertEqual(1, len(parents))
        self.assertEqual('/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60',
                         parents[0]['Name'])
        sites = [x for x in parents[0]['StorageElementList'] if x.find("cern.ch") > -1]
        self.assertTrue(sites)

        self.assertFalse(self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl'))

    @attr("integration")
    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    @attr("integration")
    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.dbs = DBSReader(self.endpoint)
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + 'asas'))
Exemplo n.º 9
0
class DBSWriter:
    """
    _DBSWriter_

    General API for writing data to DBS


    """
    def __init__(self, url, **contact):
        args = {"url": url, "level": 'ERROR'}
        args.update(contact)
        try:
            self.dbs = DbsApi(args)
            self.args = args
            self.version = args.get('version', None)
            self.globalDBSUrl = args.get('globalDBSUrl', None)
            self.globalVersion = args.get('globalVersion', None)
            if self.globalDBSUrl:
                globalArgs = {'url': url, 'level': 'ERROR'}
                globalArgs.update(contact)
                self.globalDBS = DbsApi(globalArgs)

        except DbsException as ex:
            msg = "Error in DBSWriterError with DbsApi\n"
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)
        self.reader = DBSReader(**args)

    def createDatasets(self, workflowSpec):
        """
        _createDatasets_

        Create All the output datasets found in the workflow spec instance
        provided

        """
        try:
            workflowSpec.payload.operate(
                _CreateDatasetOperator(self.dbs, workflowSpec))
        except DbsException as ex:
            msg = "Error in DBSWriter.createDatasets\n"
            msg += "For Workflow: %s\n" % workflowSpec.workflowName()
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)
        return

    def insertFilesForDBSBuffer(self,
                                files,
                                procDataset,
                                algos,
                                jobType="NotMerge",
                                insertDetectorData=False,
                                maxFiles=100,
                                maxSize=99999999,
                                timeOut=None,
                                fileCommitLength=5):
        """
        _insertFiles_

        list of files inserted in DBS
        """
        #TODO: Whats the purpose of insertDetectorData

        if len(files) < 1:
            return
        affectedBlocks = []
        insertFiles = []
        addedRuns = []
        seName = None

        #Get the algos in insertable form
        # logging.error("About to input algos")
        # logging.error(algos)
        ialgos = [
            DBSWriterObjects.createAlgorithmForInsert(dict(algo))
            for algo in algos
        ]

        #print ialgos

        for outFile in files:
            #  //
            # // Convert each file into a DBS File object
            #//
            lumiList = []

            #Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file

            for runlumiinfo in outFile.getRuns():
                lrun = long(runlumiinfo.run)
                run = DbsRun(
                    RunNumber=lrun,
                    NumberOfEvents=0,
                    NumberOfLumiSections=0,
                    TotalLuminosity=0,
                    StoreNumber=0,
                    StartOfRun=0,
                    EndOfRun=0,
                )
                #Only added if not added by another file in this loop, why waste a call to DBS
                if lrun not in addedRuns:
                    self.dbs.insertRun(run)
                    addedRuns.append(
                        lrun)  #save it so we do not try to add it again to DBS
                    logging.debug("run %s added to DBS " % str(lrun))
                for alsn in runlumiinfo:
                    lumi = DbsLumiSection(
                        LumiSectionNumber=long(alsn),
                        StartEventNumber=0,
                        EndEventNumber=0,
                        LumiStartTime=0,
                        LumiEndTime=0,
                        RunNumber=lrun,
                    )
                    lumiList.append(lumi)

            logging.debug("lumi list created for the file")

            dbsfile = DbsFile(
                #Checksum = str(outFile['cksum']),
                NumberOfEvents=outFile['events'],
                LogicalFileName=outFile['lfn'],
                FileSize=int(outFile['size']),
                Status="VALID",
                ValidationStatus='VALID',
                FileType='EDM',
                Dataset=procDataset,
                TierList=DBSWriterObjects.makeTierList(
                    procDataset['Path'].split('/')[3]),
                AlgoList=ialgos,
                LumiList=lumiList,
                ParentList=outFile.getParentLFNs(),
                #BranchHash = outFile['BranchHash'],
            )
            #Set checksums by hand
            #dbsfile['Checksum'] = 0  #Set a default?
            for entry in outFile['checksums'].keys():
                #This should be a dictionary with a cktype key and cksum value
                if entry.lower() == 'cksum':
                    dbsfile['Checksum'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'adler32':
                    dbsfile['Adler32'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'md5':
                    dbsfile['Md5'] = str(outFile['checksums'][entry])

            #This check comes from ProdAgent, not sure if its required
            if len(outFile["locations"]) > 0:
                seName = list(outFile["locations"])[0]
                logging.debug("SEname associated to file is: %s" % seName)
            else:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No SEname associated to file"
                #print "FAKING seName for now"
                #seName="cmssrm.fnal.gov"
                raise DBSWriterError(msg)
            insertFiles.append(dbsfile)
        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks

        sumSize = 0
        sumFiles = 0
        tmpFiles = []
        blockList = []
        #First, get the block.  See if the block already exists
        try:
            fileBlock = DBSWriterObjects.getDBSFileBlock(
                self.dbs, procDataset, seName)
            fileBlock['files'] = []
            #if not fileBlock in affectedBlocks:
            #    affectedBlocks.append(fileBlock)
        except DbsException as ex:
            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
            msg += "Cannot retrieve FileBlock for dataset:\n"
            msg += " %s\n" % procDataset['Path']
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)

        filesToCommit = []
        for file in insertFiles:
            # First see if the block is full
            if self.manageFileBlock(fileBlock=fileBlock,
                                    maxFiles=maxFiles,
                                    maxSize=maxSize,
                                    timeOut=timeOut,
                                    algos=ialgos,
                                    filesToCommit=filesToCommit,
                                    procDataset=procDataset):
                fileBlock['OpenForWriting'] = 0
                if not fileBlock in affectedBlocks:
                    affectedBlocks.append(fileBlock)
                # Then we need a new block
                try:
                    fileBlock = DBSWriterObjects.getDBSFileBlock(
                        self.dbs, procDataset, seName)
                    fileBlock['files'] = []
                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
                    msg += "Cannot retrieve FileBlock for dataset:\n"
                    msg += " %s\n" % procDataset['Path']
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)

            fileBlock['files'].append(file['LogicalFileName'])
            filesToCommit.append(file)
            if len(filesToCommit) >= fileCommitLength:
                # Only commit the files if there are more of them then the maximum length
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []
                    logging.debug("Inserted files: %s to FileBlock: %s" \
                                  % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    msg += " %s\n" % (
                        [x['LogicalFileName'] for x in insertFiles], )
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)

        if len(filesToCommit) > 0:
            try:
                self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                filesToCommit = []
                logging.debug("Inserted files: %s to FileBlock: %s" \
                              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "Cannot insert processed files:\n"
                msg += " %s\n" % ([x['LogicalFileName']
                                   for x in insertFiles], )
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

        if not fileBlock in affectedBlocks:
            affectedBlocks.append(fileBlock)

        ## Do bulk inserts now for DBS
        #filesToCommit = []
        #count         = 0
        #count2        = 0
        #for file in insertFiles:
        #    count += 1
        #    #Try and close the box
        #    logging.error("Should have a file")
        #    logging.error(len(filesToCommit))
        #    count2 += len(filesToCommit)
        #    if self.manageFileBlock(fileBlock = fileBlock, maxFiles = maxFiles,
        #                            maxSize = maxSize, timeOut = timeOut, algos = ialgos,
        #                            filesToCommit = filesToCommit, procDataset = procDataset):
        #        fileBlock['OpenForWriting'] = '0'
        #        if not fileBlock in affectedBlocks:
        #            affectedBlocks.append(fileBlock)
        #
        #
        #
        #        # Then we need a new block
        #        try:
        #            fileBlock = DBSWriterObjects.getDBSFileBlock(
        #                self.dbs,
        #                procDataset,
        #                seName)
        #            fileBlock['files'] = []
        #        except DbsException, ex:
        #            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
        #            msg += "Cannot retrieve FileBlock for dataset:\n"
        #            msg += " %s\n" % procDataset['Path']
        #            msg += "%s\n" % formatEx(ex)
        #            raise DBSWriterError(msg)
        #    #At this point, we should commit the block as is
        #    fileBlock['files'].append(file['LogicalFileName'])
        #    if jobType == "MergeSpecial":
        #        for file in fileList:
        #            file['Block'] = fileBlock
        #            msg="calling: self.dbs.insertMergedFile(%s, %s)" % (str(file['ParentList']),str(file))
        #            logging.debug(msg)
        #            try:
        #                #
        #                #
        #                # NOTE To Anzar From Anzar (File cloning as in DBS API can be done here and then I can use Bulk insert on Merged files as well)
        #                self.dbs.insertMergedFile(file['ParentList'],
        #                                          file)
        #
        #            except DbsException, ex:
        #                msg = "Error in DBSWriter.insertFiles\n"
        #                msg += "Cannot insert merged file:\n"
        #                msg += "  %s\n" % file['LogicalFileName']
        #                msg += "%s\n" % formatEx(ex)
        #                raise DBSWriterError(msg)
        #            logging.debug("Inserted merged file: %s to FileBlock: %s"%(file['LogicalFileName'],fileBlock['Name']))
        #    else:
        #        filesToCommit.append(file)
        #        if len(filesToCommit) >= fileCommitLength:
        #            # Only commit the files if there are more of them then the maximum length
        #            try:
        #                logging.error("About to commit %i files" %(len(filesToCommit)))
        #                count2 += len(filesToCommit)
        #                self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
        #                filesToCommit = []
        #                logging.debug("Inserted files: %s to FileBlock: %s" \
        #                              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))
        #
        #            except DbsException, ex:
        #                msg = "Error in DBSWriter.insertFiles\n"
        #                msg += "Cannot insert processed files:\n"
        #                msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
        #                msg += "%s\n" % formatEx(ex)
        #                raise DBSWriterError(msg)
        #
        #
        #
        #
        ## If we still have files to commit, commit them
        #logging.error("Got to the end of the loop")
        #logging.error(len(filesToCommit))
        #logging.error(count2)
        #if len(filesToCommit) > 0:
        #    try:
        #        logging.error("About to insert some files")
        #        self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
        #        filesToCommit = []
        #        logging.debug("Inserted files: %s to FileBlock: %s" \
        #                      % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))
        #
        #    except DbsException, ex:
        #        msg = "Error in DBSWriter.insertFiles\n"
        #        msg += "Cannot insert processed files:\n"
        #        msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
        #        msg += "%s\n" % formatEx(ex)
        #        raise DBSWriterError(msg)

        if not fileBlock in affectedBlocks:
            affectedBlocks.append(fileBlock)

        return list(affectedBlocks)

    def insertFiles(self, fwkJobRep, insertDetectorData=False):
        """
        _insertFiles_

        Process the files in the FwkJobReport instance and insert
        them into the associated datasets

        A list of affected fileblock names is returned both for merged
        and unmerged fileblocks. Only merged blocks will have to be managed.
        #for merged file
        #blocks to facilitate management of those blocks.
        #This list is not populated for processing jobs since we dont really
        #care about the processing job blocks.

        """

        insertLists = {}
        orderedHashes = []
        affectedBlocks = set()

        if len(fwkJobRep.files) <= 0:
            msg = "Error in DBSWriter.insertFiles\n"
            msg += "No files found in FrameWorkJobReport for:\n"
            msg += "==> JobSpecId: %s" % fwkJobRep.jobSpecId
            msg += " Workflow: %s" % fwkJobRep.workflowSpecId
            raise DBSWriterError(msg)

        for outFile in fwkJobRep.sortFiles():
            #  //
            # // Convert each file into a DBS File object
            #//
            seName = None
            if "SEName" in outFile:
                if outFile['SEName']:
                    seName = outFile['SEName']
                    logging.debug("SEname associated to file is: %s" % seName)


## remove the fallback to site se-name if no SE is associated to File
## because it's likely that there is some stage out problem if there
## is no SEName associated to the file.
#            if not seName:
#                if fwkJobRep.siteDetails.has_key("se-name"):
#                   seName = fwkJobRep.siteDetails['se-name']
#                   seName = str(seName)
#                   logging.debug("site SEname: %s"%seName)
            if not seName:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No SEname associated to files in FrameWorkJobReport for "
                #                msg += "No SEname found in FrameWorkJobReport for "
                msg += "==> JobSpecId: %s" % fwkJobRep.jobSpecId
                msg += " Workflow: %s" % fwkJobRep.workflowSpecId
                raise DBSWriterError(msg)
            try:
                if (insertDetectorData):
                    dbsFiles = DBSWriterObjects.createDBSFiles(
                        outFile, fwkJobRep.jobType, self.dbs)
                else:
                    dbsFiles = DBSWriterObjects.createDBSFiles(
                        outFile, fwkJobRep.jobType)
            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles:\n"
                msg += "Error creating DbsFile instances for file:\n"
                msg += "%s\n" % outFile['LFN']
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            if len(dbsFiles) <= 0:
                msg = "No DbsFile instances created. Not enough info in the FrameWorkJobReport for"
                msg += "==> JobSpecId: %s" % fwkJobRep.jobSpecId
                msg += " Workflow: %s" % fwkJobRep.workflowSpecId
                raise DBSWriterError(msg)

            for f in dbsFiles:
                datasetName = makeDBSDSName(f)
                hashName = "%s-%s" % (seName, datasetName)

                if hashName not in insertLists:
                    insertLists[hashName] = _InsertFileList(
                        seName, datasetName)
                insertLists[hashName].append(f)

                if not orderedHashes.count(hashName):
                    orderedHashes.append(hashName)

        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks

        for hash in orderedHashes:

            fileList = insertLists[hash]
            procDataset = fileList[0]['Dataset']

            try:
                fileBlock = DBSWriterObjects.getDBSFileBlock(
                    self.dbs, procDataset, fileList.seName)

            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "Cannot retrieve FileBlock for dataset:\n"
                msg += " %s\n" % procDataset
                msg += "In Storage Element:\n %s\n" % fileList.seName
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            if fwkJobRep.jobType == "Merge":
                #  //
                # // Merge files
                #//
                for mergedFile in fileList:
                    mergedFile['Block'] = fileBlock
                    affectedBlocks.add(fileBlock['Name'])
                    msg = "calling: self.dbs.insertMergedFile(%s, %s)" % (str(
                        mergedFile['ParentList']), str(mergedFile))
                    logging.debug(msg)
                    try:
                        self.dbs.insertMergedFile(mergedFile['ParentList'],
                                                  mergedFile)

                    except DbsException as ex:
                        msg = "Error in DBSWriter.insertFiles\n"
                        msg += "Cannot insert merged file:\n"
                        msg += "  %s\n" % mergedFile['LogicalFileName']
                        msg += "%s\n" % formatEx(ex)
                        raise DBSWriterError(msg)
                    logging.debug(
                        "Inserted merged file: %s to FileBlock: %s" %
                        (mergedFile['LogicalFileName'], fileBlock['Name']))
            else:
                #  //
                # // Processing files
                #//
                affectedBlocks.add(fileBlock['Name'])
                msg = "calling: self.dbs.insertFiles(%s, %s, %s)" % (
                    str(procDataset), str(list(fileList)), str(fileBlock))
                logging.debug(msg)

                try:
                    self.dbs.insertFiles(procDataset, list(fileList),
                                         fileBlock)
                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    msg += " %s\n" % ([x['LogicalFileName']
                                       for x in fileList], )

                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)
                logging.debug("Inserted files: %s to FileBlock: %s" %
                              (([x['LogicalFileName']
                                 for x in fileList]), fileBlock['Name']))

        return list(affectedBlocks)

    def manageFileBlock(self,
                        fileBlock,
                        maxFiles=100,
                        maxSize=None,
                        timeOut=None,
                        algos=[],
                        filesToCommit=[],
                        procDataset=None):
        """
        _manageFileBlock_

        Check to see wether the fileblock with the provided name
        is closeable based on number of files or total size.

        If the block equals or exceeds wither the maxFiles or maxSize
        parameters, close the block and return True, else do nothing and
        return False

        """

        #  //
        # // Check that the block exists, and is open before we close it
        #//

        fileblockName = fileBlock['Name']

        blockInstance = self.dbs.listBlocks(block_name=fileblockName)
        if len(blockInstance) > 1:
            msg = "Multiple Blocks matching name: %s\n" % fileblockName
            msg += "Unable to manage file block..."
            raise DBSWriterError(msg)

        if len(blockInstance) == 0:
            msg = "Block name %s not found\n" % fileblockName
            msg += "Cant manage a non-existent fileblock"
            raise DBSWriterError(msg)
        blockInstance = blockInstance[0]
        isClosed = blockInstance.get('OpenForWriting', '1')
        if isClosed != '1':
            msg = "Block %s already closed" % fileblockName
            logging.warning(msg)
            # Now we need to commit files
            if len(filesToCommit) > 0:
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []

                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    raise DBSWriterError(msg)

            # Attempting to migrate to global
            if self.globalDBSUrl:

                self.dbs.dbsMigrateBlock(
                    srcURL=self.args['url'],
                    dstURL=self.globalDBSUrl,
                    block_name=fileblockName,
                    srcVersion=self.version,
                    dstVersion=self.globalVersion,
                )
                #for algo in algos:
                #    self.globalDBS.insertAlgoInPD(dataset = get_path(fileblockName.split('#')[0]),
                #                                  algorithm = algo)
                logging.info(
                    "Migrated block %s to global due to pre-closed status" %
                    (fileblockName))
            else:
                logging.error(
                    "Should've migrated block %s because it was already closed, but didn't"
                    % (fileblockName))
            return True

        #  //
        # // We have an open block, sum number of files and file sizes
        #//

        #fileCount = int(blockInstance.get('NumberOfFiles', 0))
        fileCount = len(fileBlock['files'])
        totalSize = float(blockInstance.get('BlockSize', 0))

        msg = "Fileblock: %s\n ==> Size: %s Files: %s\n" % (
            fileblockName, totalSize, fileCount)
        logging.warning(msg)

        #  //
        # // Test close block conditions
        #//
        closeBlock = False
        if timeOut:
            if int(time.time()) - int(blockInstance['CreationDate']) > timeOut:
                closeBlock = True
                msg = "Closing Block based on timeOut: %s" % fileblockName
                logging.debug(msg)
        if fileCount >= maxFiles:
            closeBlock = True
            msg = "Closing Block Based on files: %s" % fileblockName
            logging.debug(msg)

        if maxSize != None:
            if totalSize >= maxSize:
                closeBlock = True
                msg = "Closing Block Based on size: %s" % fileblockName
                logging.debug(msg)

        if closeBlock:
            # Now we need to commit files
            if len(filesToCommit) > 0:
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []
                    #logging.debug("Inserted files: %s to FileBlock: %s" \
                    #              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    #msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)
            #  //
            # // Close the block
            #//
            self.dbs.closeBlock(
                DBSWriterObjects.createDBSFileBlock(fileblockName))
            if self.globalDBSUrl:
                self.dbs.dbsMigrateBlock(srcURL=self.args['url'],
                                         dstURL=self.globalDBSUrl,
                                         block_name=fileblockName,
                                         srcVersion=self.version,
                                         dstVersion=self.globalVersion)
                for algo in algos:
                    pass
                    #self.globalDBS.insertAlgoInPD(dataset = get_path(fileblockName.split('#')[0]),
                    #                              algorithm = algo)

                logging.info("Migrated block %s to global" % (fileblockName))
            else:
                logging.error("Should've migrated block %s, but didn't" %
                              (fileblockName))
        return closeBlock

    def migrateDatasetBlocks(self, inputDBSUrl, datasetPath, blocks):
        """
        _migrateDatasetBlocks_

        Migrate the list of fileblocks provided by blocks, belonging
        to the dataset specified by the dataset path to this DBS instance
        from the inputDBSUrl provided

        - *inputDBSUrl* : URL for connection to input DBS
        - *datasetPath* : Name of dataset in input DBS (must exist in input
                          DBS)
        - *blocks*      : list of block names to be migrated (must exist
                          in input DBS)

        """
        if len(blocks) == 0:
            msg = "FileBlocks not provided.\n"
            msg += "You must provide the name of at least one fileblock\n"
            msg += "to be migrated"
            raise DBSWriterError(msg)
        #  //
        # // Hook onto input DBSUrl and verify that the dataset & blocks
        #//  exist
        reader = DBSReader(inputDBSUrl)

        inputBlocks = reader.listFileBlocks(datasetPath)

        for block in blocks:
            #  //
            # // Test block exists at source
            #//
            if block not in inputBlocks:
                msg = "Block name:\n ==> %s\n" % block
                msg += "Not found in input dataset:\n ==> %s\n" % datasetPath
                msg += "In DBS Instance:\n ==> %s\n" % inputDBSUrl
                raise DBSWriterError(msg)

            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if not self.reader.blockIsOpen(block):
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Migration of that block"
                    logging.warning(msg)
                    continue

            try:
                xferData = reader.dbs.listDatasetContents(datasetPath, block)
            except DbsException as ex:
                msg = "Error in DBSWriter.migrateDatasetBlocks\n"
                msg += "Could not read content of dataset:\n ==> %s\n" % (
                    datasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            xferData = _remapBlockParentage(datasetPath, xferData)

            try:
                self.dbs.insertDatasetContents(xferData)
            except DbsException as ex:
                msg = "Error in DBSWriter.migrateDatasetBlocks\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    datasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
            del xferData

        return

    def importDatasetWithExistingParents(self,
                                         sourceDBS,
                                         sourceDatasetPath,
                                         targetDBS,
                                         onlyClosed=True):
        """
        _importDataset_

        Import a dataset into the local scope DBS.
        It complains if the parent dataset ar not there!!

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported

        - *targetDBS* : URL for DBS to have dataset imported to

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath,
                                               onlyClosed,
                                               locations=False)
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if not str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(
                            inputBlock['NumberOfFiles']) != "0":
                        msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                        msg += "Block has no locations defined: %s" % block
                        raise DBSWriterError(msg)
                    logging.info("Update block locations to:")
                    for sename in locations:
                        self.dbs.addReplicaToBlock(block, sename)
                        logging.info(sename)
                    continue

            try:
                xferData = reader.dbs.listDatasetContents(
                    sourceDatasetPath, block)
            except DbsException as ex:
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Could not read content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
            try:
                self.dbs.insertDatasetContents(xferData)
            except DbsException as ex:
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
            del xferData

            locations = reader.listFileBlockLocation(block)
            # only empty file blocks can have no location
            if not locations and str(inputBlock['NumberOfFiles']) != "0":
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Block has no locations defined: %s" % block
                raise DBSWriterError(msg)
            for sename in locations:
                self.dbs.addReplicaToBlock(block, sename)

        return

    def importDataset(self,
                      sourceDBS,
                      sourceDatasetPath,
                      targetDBS,
                      onlyClosed=True):
        """
        _importDataset_

        Import a dataset into the local scope DBS with full parentage hirerarchy
        (at least not slow because branches info is dropped)

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported

        - *targetDBS* : URL for DBS to have dataset imported to

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath,
                                               onlyClosed,
                                               locations=False)
        blkCounter = 0
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            blkCounter = blkCounter + 1
            msg = "Importing block %s of %s: %s " % (blkCounter,
                                                     len(inputBlocks), block)
            logging.debug(msg)
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(
                            inputBlock['NumberOfFiles']) != "0":
                        msg = "Error in DBSWriter.importDataset\n"
                        msg += "Block has no locations defined: %s" % block
                        raise DBSWriterError(msg)
                    logging.info("Update block locations to:")
                    for sename in locations:
                        self.dbs.addReplicaToBlock(block, sename)
                        logging.info(sename)
                    continue

            try:

                self.dbs.migrateDatasetContents(sourceDBS,
                                                targetDBS,
                                                sourceDatasetPath,
                                                block_name=block,
                                                noParentsReadOnly=False)
            except DbsException as ex:
                msg = "Error in DBSWriter.importDataset\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            locations = reader.listFileBlockLocation(block)
            # only empty file blocks can have no location
            if not locations and str(inputBlock['NumberOfFiles']) != "0":
                msg = "Error in DBSWriter.importDataset\n"
                msg += "Block has no locations defined: %s" % block
                raise DBSWriterError(msg)
            for sename in locations:
                self.dbs.addReplicaToBlock(block, sename)

        return

    def importDatasetWithoutParentage(self,
                                      sourceDBS,
                                      sourceDatasetPath,
                                      targetDBS,
                                      onlyClosed=True):
        """
        _importDataset_

        Import a dataset into the local scope DBS with one level parentage,
        however it has severe limitation on its use due to the "ReadOnly" concept.

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported

        - *targetDBS* : URL for DBS to have dataset imported to

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath,
                                               onlyClosed,
                                               locations=False)
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(
                            inputBlock['NumberOfFiles']) != "0":
                        msg = "Error in DBSWriter.importDatasetWithoutParentage\n"
                        msg += "Block has no locations defined: %s" % block
                        raise DBSWriterError(msg)
                    logging.info("Update block locations to:")
                    for sename in locations:
                        self.dbs.addReplicaToBlock(block, sename)
                        logging.info(sename)
                    continue

            try:
                self.dbs.migrateDatasetContents(sourceDBS,
                                                targetDBS,
                                                sourceDatasetPath,
                                                block_name=block,
                                                noParentsReadOnly=True)
            except DbsException as ex:
                msg = "Error in DBSWriter.importDatasetWithoutParentage\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            locations = reader.listFileBlockLocation(block)
            # only empty file blocks can have no location
            if not locations and str(inputBlock['NumberOfFiles']) != "0":
                msg = "Error in DBSWriter.importDatasetWithoutParentage\n"
                msg += "Block has no locations defined: %s" % block
                raise DBSWriterError(msg)
            for sename in locations:
                self.dbs.addReplicaToBlock(block, sename)

        return

    def getOutputDatasetsWithPSet(payloadNode):
        """
        _getOutputDatasetsWithPSet_

        Extract all the information about output datasets from the
        payloadNode object provided, including the {{}} format PSet cfg

        Returns a list of DatasetInfo objects including App details
        from the node.

        """
        result = []

        for item in payloadNode._OutputDatasets:
            resultEntry = DatasetInfo()
            resultEntry.update(item)
            resultEntry["ApplicationName"] = payloadNode.application[
                'Executable']
            resultEntry["ApplicationProject"] = payloadNode.application[
                'Project']
            resultEntry["ApplicationVersion"] = payloadNode.application[
                'Version']
            resultEntry["ApplicationFamily"] = item.get(
                "OutputModuleName", "AppFamily")

            try:
                config = payloadNode.cfgInterface
                psetStr = config.originalContent()
                resultEntry['PSetContent'] = psetStr
            except Exception as ex:
                resultEntry['PSetContent'] = None

            result.append(resultEntry)

        return _sortDatasets(result)
Exemplo n.º 10
0
class DBSWriter:
    """
    _DBSWriter_

    General API for writing data to DBS


    """
    def __init__(self, url,  **contact):
        args = { "url" : url, "level" : 'ERROR'}
        args.update(contact)
        try:
            self.dbs           = DbsApi(args)
            self.args          = args
            self.version       = args.get('version', None)
            self.globalDBSUrl  = args.get('globalDBSUrl', None)
            self.globalVersion = args.get('globalVersion', None)
            if self.globalDBSUrl:
                globalArgs = {'url': url, 'level': 'ERROR'}
                globalArgs.update(contact)
                self.globalDBS = DbsApi(globalArgs)

        except DbsException as ex:
            msg = "Error in DBSWriterError with DbsApi\n"
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)
        self.reader = DBSReader(**args)

    def createDatasets(self, workflowSpec):
        """
        _createDatasets_

        Create All the output datasets found in the workflow spec instance
        provided

        """
        try:
            workflowSpec.payload.operate(
                _CreateDatasetOperator(self.dbs, workflowSpec)
                )
        except DbsException as ex:
            msg = "Error in DBSWriter.createDatasets\n"
            msg += "For Workflow: %s\n" % workflowSpec.workflowName()
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)
        return

    def insertFilesForDBSBuffer(self, files, procDataset, algos,
                                jobType = "NotMerge", insertDetectorData = False,
                                maxFiles = 100, maxSize = 99999999, timeOut = None,
                                fileCommitLength = 5):
        """
        _insertFiles_

        list of files inserted in DBS
        """
        #TODO: Whats the purpose of insertDetectorData

        if len(files) < 1:
            return
        affectedBlocks = []
        insertFiles =  []
        addedRuns=[]
        pnn = None

        #Get the algos in insertable form
        # logging.error("About to input algos")
        # logging.error(algos)
        ialgos = [DBSWriterObjects.createAlgorithmForInsert(dict(algo)) for algo in algos ]

        #print ialgos

        for outFile in files:
            #  //
            # // Convert each file into a DBS File object
            #//
            lumiList = []

            #Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file

            for runlumiinfo in outFile.getRuns():
                lrun=long(runlumiinfo.run)
                run = DbsRun(
                    RunNumber = lrun,
                    NumberOfEvents = 0,
                    NumberOfLumiSections = 0,
                    TotalLuminosity = 0,
                    StoreNumber = 0,
                    StartOfRun = 0,
                    EndOfRun = 0,
                    )
                #Only added if not added by another file in this loop, why waste a call to DBS
                if lrun not in addedRuns:
                    self.dbs.insertRun(run)
                    addedRuns.append(lrun) #save it so we do not try to add it again to DBS
                    logging.debug("run %s added to DBS " % str(lrun))
                for alsn in runlumiinfo:
                    lumi = DbsLumiSection(
                            LumiSectionNumber = long(alsn),
                            StartEventNumber = 0,
                            EndEventNumber = 0,
                            LumiStartTime = 0,
                            LumiEndTime = 0,
                            RunNumber = lrun,
                    )
                    lumiList.append(lumi)

            logging.debug("lumi list created for the file")

            dbsfile = DbsFile(
                              #Checksum = str(outFile['cksum']),
                              NumberOfEvents = outFile['events'],
                              LogicalFileName = outFile['lfn'],
                              FileSize = int(outFile['size']),
                              Status = "VALID",
                              ValidationStatus = 'VALID',
                              FileType = 'EDM',
                              Dataset = procDataset,
                              TierList = DBSWriterObjects.makeTierList(procDataset['Path'].split('/')[3]),
                              AlgoList = ialgos,
                              LumiList = lumiList,
                              ParentList = outFile.getParentLFNs(),
                              #BranchHash = outFile['BranchHash'],
                            )
            #Set checksums by hand
            #dbsfile['Checksum'] = 0  #Set a default?
            for entry in outFile['checksums'].keys():
                #This should be a dictionary with a cktype key and cksum value
                if entry.lower() == 'cksum':
                    dbsfile['Checksum'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'adler32':
                    dbsfile['Adler32'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'md5':
                    dbsfile['Md5'] = str(outFile['checksums'][entry])



            #This check comes from ProdAgent, not sure if its required
            if len(outFile["locations"]) > 0:
                pnn = list(outFile["locations"])[0]
                logging.debug("PNN associated to file is: %s"%pnn)
            else:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No PNN associated to file"
                #print "FAKING seName for now"
                #seName="cmssrm.fnal.gov"
                raise DBSWriterError(msg)
            insertFiles.append(dbsfile)
        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks


        sumSize   = 0
        sumFiles  = 0
        tmpFiles  = []
        blockList = []
        #First, get the block.  See if the block already exists
        try:
            fileBlock = DBSWriterObjects.getDBSFileBlock(
                self.dbs,
                procDataset,
                pnn)
            fileBlock['files'] = []
            #if not fileBlock in affectedBlocks:
            #    affectedBlocks.append(fileBlock)
        except DbsException as ex:
            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
            msg += "Cannot retrieve FileBlock for dataset:\n"
            msg += " %s\n" % procDataset['Path']
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)



        filesToCommit = []
        for file in insertFiles:
            # First see if the block is full
            if self.manageFileBlock(fileBlock = fileBlock, maxFiles = maxFiles,
                                    maxSize = maxSize, timeOut = timeOut, algos = ialgos,
                                    filesToCommit = filesToCommit, procDataset = procDataset):
                fileBlock['OpenForWriting'] = 0
                if not fileBlock in affectedBlocks:
                    affectedBlocks.append(fileBlock)
                # Then we need a new block
                try:
                    fileBlock = DBSWriterObjects.getDBSFileBlock(
                        self.dbs,
                        procDataset,
                        pnn)
                    fileBlock['files'] = []
                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
                    msg += "Cannot retrieve FileBlock for dataset:\n"
                    msg += " %s\n" % procDataset['Path']
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)

            fileBlock['files'].append(file['LogicalFileName'])
            filesToCommit.append(file)
            if len(filesToCommit) >= fileCommitLength:
                    # Only commit the files if there are more of them then the maximum length
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []
                    logging.debug("Inserted files: %s to FileBlock: %s" \
                                  % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)


        if len(filesToCommit) > 0:
            try:
                self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                filesToCommit = []
                logging.debug("Inserted files: %s to FileBlock: %s" \
                              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "Cannot insert processed files:\n"
                msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)


        if not fileBlock in affectedBlocks:
            affectedBlocks.append(fileBlock)




        ## Do bulk inserts now for DBS
        #filesToCommit = []
        #count         = 0
        #count2        = 0
        #for file in insertFiles:
        #    count += 1
        #    #Try and close the box
        #    logging.error("Should have a file")
        #    logging.error(len(filesToCommit))
        #    count2 += len(filesToCommit)
        #    if self.manageFileBlock(fileBlock = fileBlock, maxFiles = maxFiles,
        #                            maxSize = maxSize, timeOut = timeOut, algos = ialgos,
        #                            filesToCommit = filesToCommit, procDataset = procDataset):
        #        fileBlock['OpenForWriting'] = '0'
        #        if not fileBlock in affectedBlocks:
        #            affectedBlocks.append(fileBlock)
        #
        #
        #
        #        # Then we need a new block
        #        try:
        #            fileBlock = DBSWriterObjects.getDBSFileBlock(
        #                self.dbs,
        #                procDataset,
        #                seName)
        #            fileBlock['files'] = []
        #        except DbsException, ex:
        #            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
        #            msg += "Cannot retrieve FileBlock for dataset:\n"
        #            msg += " %s\n" % procDataset['Path']
        #            msg += "%s\n" % formatEx(ex)
        #            raise DBSWriterError(msg)
        #    #At this point, we should commit the block as is
        #    fileBlock['files'].append(file['LogicalFileName'])
        #    if jobType == "MergeSpecial":
        #        for file in fileList:
        #            file['Block'] = fileBlock
        #            msg="calling: self.dbs.insertMergedFile(%s, %s)" % (str(file['ParentList']),str(file))
        #            logging.debug(msg)
        #            try:
        #                #
        #                #
        #                # NOTE To Anzar From Anzar (File cloning as in DBS API can be done here and then I can use Bulk insert on Merged files as well)
        #                self.dbs.insertMergedFile(file['ParentList'],
        #                                          file)
        #
        #            except DbsException, ex:
        #                msg = "Error in DBSWriter.insertFiles\n"
        #                msg += "Cannot insert merged file:\n"
        #                msg += "  %s\n" % file['LogicalFileName']
        #                msg += "%s\n" % formatEx(ex)
        #                raise DBSWriterError(msg)
        #            logging.debug("Inserted merged file: %s to FileBlock: %s"%(file['LogicalFileName'],fileBlock['Name']))
        #    else:
        #        filesToCommit.append(file)
        #        if len(filesToCommit) >= fileCommitLength:
        #            # Only commit the files if there are more of them then the maximum length
        #            try:
        #                logging.error("About to commit %i files" %(len(filesToCommit)))
        #                count2 += len(filesToCommit)
        #                self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
        #                filesToCommit = []
        #                logging.debug("Inserted files: %s to FileBlock: %s" \
        #                              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))
        #
        #            except DbsException, ex:
        #                msg = "Error in DBSWriter.insertFiles\n"
        #                msg += "Cannot insert processed files:\n"
        #                msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
        #                msg += "%s\n" % formatEx(ex)
        #                raise DBSWriterError(msg)
        #
        #
        #
        #
        ## If we still have files to commit, commit them
        #logging.error("Got to the end of the loop")
        #logging.error(len(filesToCommit))
        #logging.error(count2)
        #if len(filesToCommit) > 0:
        #    try:
        #        logging.error("About to insert some files")
        #        self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
        #        filesToCommit = []
        #        logging.debug("Inserted files: %s to FileBlock: %s" \
        #                      % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))
        #
        #    except DbsException, ex:
        #        msg = "Error in DBSWriter.insertFiles\n"
        #        msg += "Cannot insert processed files:\n"
        #        msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
        #        msg += "%s\n" % formatEx(ex)
        #        raise DBSWriterError(msg)


        if not fileBlock in affectedBlocks:
            affectedBlocks.append(fileBlock)

        return list(affectedBlocks)








    def insertFiles(self, fwkJobRep, insertDetectorData = False):
        """
        _insertFiles_

        Process the files in the FwkJobReport instance and insert
        them into the associated datasets

        A list of affected fileblock names is returned both for merged
        and unmerged fileblocks. Only merged blocks will have to be managed.
        #for merged file
        #blocks to facilitate management of those blocks.
        #This list is not populated for processing jobs since we dont really
        #care about the processing job blocks.

        """

        insertLists = {}
        orderedHashes = []
        affectedBlocks = set()

        if len(fwkJobRep.files)<=0:
            msg = "Error in DBSWriter.insertFiles\n"
            msg += "No files found in FrameWorkJobReport for:\n"
            msg += "==> JobSpecId: %s"%fwkJobRep.jobSpecId
            msg += " Workflow: %s"%fwkJobRep.workflowSpecId
            raise DBSWriterError(msg)


        for outFile in fwkJobRep.sortFiles():
            #  //
            # // Convert each file into a DBS File object
            #//
            pnn = None
            if "PNN" in outFile:
                if outFile['PNN'] :
                    pnn = outFile['PNN']
                    logging.debug("PNN associated to file is: %s"%pnn)
## remove the fallback to site se-name if no SE is associated to File
## because it's likely that there is some stage out problem if there
## is no SEName associated to the file.
#            if not seName:
#                if fwkJobRep.siteDetails.has_key("se-name"):
#                   seName = fwkJobRep.siteDetails['se-name']
#                   seName = str(seName)
#                   logging.debug("site SEname: %s"%seName)
            if not pnn:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No PNN associated to files in FrameWorkJobReport for "
#                msg += "No SEname found in FrameWorkJobReport for "
                msg += "==> JobSpecId: %s"%fwkJobRep.jobSpecId
                msg += " Workflow: %s"%fwkJobRep.workflowSpecId
                raise DBSWriterError(msg)
            try:
                if ( insertDetectorData ):
                    dbsFiles = DBSWriterObjects.createDBSFiles(outFile,
                                                               fwkJobRep.jobType,
                                                               self.dbs)
                else:
                    dbsFiles = DBSWriterObjects.createDBSFiles(outFile,
                                                               fwkJobRep.jobType)
            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles:\n"
                msg += "Error creating DbsFile instances for file:\n"
                msg += "%s\n" % outFile['LFN']
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            if len(dbsFiles)<=0:
                msg="No DbsFile instances created. Not enough info in the FrameWorkJobReport for"
                msg += "==> JobSpecId: %s"%fwkJobRep.jobSpecId
                msg += " Workflow: %s"%fwkJobRep.workflowSpecId
                raise DBSWriterError(msg)

            for f in dbsFiles:
                datasetName = makeDBSDSName(f)
                hashName = "%s-%s" % (pnn, datasetName)

                if hashName not in insertLists:
                    insertLists[hashName] = _InsertFileList(pnn,
                                                            datasetName)
                insertLists[hashName].append(f)

                if not orderedHashes.count(hashName):
                    orderedHashes.append(hashName)


        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks

        for hash in orderedHashes:

            fileList = insertLists[hash]
            procDataset = fileList[0]['Dataset']


            try:
                fileBlock = DBSWriterObjects.getDBSFileBlock(
                    self.dbs,
                    procDataset,
                    fileList.pnn)

            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "Cannot retrieve FileBlock for dataset:\n"
                msg += " %s\n" % procDataset
#                msg += "In Storage Element:\n %s\n" % fileList.seName
                msg += "In PNN:\n %s\n" % fileList.pnn
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            if fwkJobRep.jobType == "Merge":
                #  //
                # // Merge files
                #//
                for mergedFile in fileList:
                    mergedFile['Block'] = fileBlock
                    affectedBlocks.add(fileBlock['Name'])
                    msg="calling: self.dbs.insertMergedFile(%s, %s)" % (str(mergedFile['ParentList']),str(mergedFile))
                    logging.debug(msg)
                    try:
                        self.dbs.insertMergedFile(mergedFile['ParentList'],
                                                  mergedFile)

                    except DbsException as ex:
                        msg = "Error in DBSWriter.insertFiles\n"
                        msg += "Cannot insert merged file:\n"
                        msg += "  %s\n" % mergedFile['LogicalFileName']
                        msg += "%s\n" % formatEx(ex)
                        raise DBSWriterError(msg)
                    logging.debug("Inserted merged file: %s to FileBlock: %s"%(mergedFile['LogicalFileName'],fileBlock['Name']))
            else:
                #  //
                # // Processing files
                #//
                affectedBlocks.add(fileBlock['Name'])
                msg="calling: self.dbs.insertFiles(%s, %s, %s)" % (str(procDataset),str(list(fileList)),str(fileBlock))
                logging.debug(msg)

                try:
                    self.dbs.insertFiles(procDataset, list(fileList),
                                         fileBlock)
                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    msg += " %s\n" % (
                        [ x['LogicalFileName'] for x in fileList ],
                        )

                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)
                logging.debug("Inserted files: %s to FileBlock: %s"%( ([ x['LogicalFileName'] for x in fileList ]),fileBlock['Name']))

        return list(affectedBlocks)



    def manageFileBlock(self, fileBlock, maxFiles = 100, maxSize = None,
                        timeOut = None, algos = [], filesToCommit = [],
                        procDataset = None):
        """
        _manageFileBlock_

        Check to see wether the fileblock with the provided name
        is closeable based on number of files or total size.

        If the block equals or exceeds wither the maxFiles or maxSize
        parameters, close the block and return True, else do nothing and
        return False

        """

        #  //
        # // Check that the block exists, and is open before we close it
        #//

        fileblockName = fileBlock['Name']

        blockInstance = self.dbs.listBlocks(block_name=fileblockName)
        if len(blockInstance) > 1:
            msg = "Multiple Blocks matching name: %s\n" % fileblockName
            msg += "Unable to manage file block..."
            raise DBSWriterError(msg)

        if len(blockInstance) == 0:
            msg = "Block name %s not found\n" % fileblockName
            msg += "Cant manage a non-existent fileblock"
            raise DBSWriterError(msg)
        blockInstance = blockInstance[0]
        isClosed = blockInstance.get('OpenForWriting', '1')
        if isClosed != '1':
            msg = "Block %s already closed" % fileblockName
            logging.warning(msg)
            # Now we need to commit files
            if len(filesToCommit) > 0:
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []


                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    raise DBSWriterError(msg)

            # Attempting to migrate to global
            if self.globalDBSUrl:

                self.dbs.dbsMigrateBlock(srcURL = self.args['url'],
                                         dstURL = self.globalDBSUrl,
                                         block_name = fileblockName,
                                         srcVersion = self.version,
                                         dstVersion = self.globalVersion,
                                         )
                #for algo in algos:
                #    self.globalDBS.insertAlgoInPD(dataset = get_path(fileblockName.split('#')[0]),
                #                                  algorithm = algo)
                logging.info("Migrated block %s to global due to pre-closed status" %(fileblockName))
            else:
                logging.error("Should've migrated block %s because it was already closed, but didn't" % (fileblockName))
            return True



        #  //
        # // We have an open block, sum number of files and file sizes
        #//

        #fileCount = int(blockInstance.get('NumberOfFiles', 0))
        fileCount = len(fileBlock['files'])
        totalSize = float(blockInstance.get('BlockSize', 0))

        msg = "Fileblock: %s\n ==> Size: %s Files: %s\n" % (
            fileblockName, totalSize, fileCount)
        logging.warning(msg)

        #  //
        # // Test close block conditions
        #//
        closeBlock = False
        if timeOut:
            if int(time.time()) - int(blockInstance['CreationDate']) > timeOut:
                closeBlock = True
                msg = "Closing Block based on timeOut: %s" % fileblockName
                logging.debug(msg)
        if fileCount >= maxFiles:
            closeBlock = True
            msg = "Closing Block Based on files: %s" % fileblockName
            logging.debug(msg)

        if maxSize != None:
            if totalSize >= maxSize:
                closeBlock = True
                msg = "Closing Block Based on size: %s" % fileblockName
                logging.debug(msg)


        if closeBlock:
            # Now we need to commit files
            if len(filesToCommit) > 0:
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []
                    #logging.debug("Inserted files: %s to FileBlock: %s" \
                    #              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    #msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)
            #  //
            # // Close the block
            #//
            self.dbs.closeBlock(
                DBSWriterObjects.createDBSFileBlock(fileblockName)
                )
            if self.globalDBSUrl:
                self.dbs.dbsMigrateBlock(srcURL = self.args['url'],
                                         dstURL = self.globalDBSUrl,
                                         block_name = fileblockName,
                                         srcVersion = self.version,
                                         dstVersion = self.globalVersion
                                         )
                for algo in algos:
                    pass
                    #self.globalDBS.insertAlgoInPD(dataset = get_path(fileblockName.split('#')[0]),
                    #                              algorithm = algo)

                logging.info("Migrated block %s to global" %(fileblockName))
            else:
                logging.error("Should've migrated block %s, but didn't" % (fileblockName))
        return closeBlock



    def migrateDatasetBlocks(self, inputDBSUrl, datasetPath, blocks):
        """
        _migrateDatasetBlocks_

        Migrate the list of fileblocks provided by blocks, belonging
        to the dataset specified by the dataset path to this DBS instance
        from the inputDBSUrl provided

        - *inputDBSUrl* : URL for connection to input DBS
        - *datasetPath* : Name of dataset in input DBS (must exist in input
                          DBS)
        - *blocks*      : list of block names to be migrated (must exist
                          in input DBS)

        """
        if len(blocks) == 0:
            msg = "FileBlocks not provided.\n"
            msg += "You must provide the name of at least one fileblock\n"
            msg += "to be migrated"
            raise DBSWriterError(msg)
        #  //
        # // Hook onto input DBSUrl and verify that the dataset & blocks
        #//  exist
        reader = DBSReader(inputDBSUrl)

        inputBlocks = reader.listFileBlocks(datasetPath)

        for block in blocks:
            #  //
            # // Test block exists at source
            #//
            if block not in inputBlocks:
                msg = "Block name:\n ==> %s\n" % block
                msg += "Not found in input dataset:\n ==> %s\n" % datasetPath
                msg += "In DBS Instance:\n ==> %s\n" % inputDBSUrl
                raise DBSWriterError(msg)

            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if not self.reader.blockIsOpen(block):
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Migration of that block"
                    logging.warning(msg)
                    continue

            try:
                xferData = reader.dbs.listDatasetContents(datasetPath,  block)
            except DbsException as ex:
                msg = "Error in DBSWriter.migrateDatasetBlocks\n"
                msg += "Could not read content of dataset:\n ==> %s\n" % (
                    datasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            xferData = _remapBlockParentage(datasetPath, xferData)

            try:
                self.dbs.insertDatasetContents(xferData)
            except DbsException as ex:
                msg = "Error in DBSWriter.migrateDatasetBlocks\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    datasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
            del xferData


        return

    def importDatasetWithExistingParents(self, sourceDBS, sourceDatasetPath, targetDBS,
                      onlyClosed = True):
        """
        _importDataset_

        Import a dataset into the local scope DBS.
        It complains if the parent dataset ar not there!!

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported

        - *targetDBS* : URL for DBS to have dataset imported to

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed, locations = False)
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if not str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(inputBlock['NumberOfFiles']) != "0":
                        msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                        msg += "Block has no locations defined: %s" % block
                        raise DBSWriterError(msg)
                    logging.info("Update block locations to:")
                    for pnn in locations:
                        self.dbs.addReplicaToBlock(block,pnn)
                        logging.info(pnn)
                    continue


            try:
                xferData = reader.dbs.listDatasetContents(
                    sourceDatasetPath,  block
                    )
            except DbsException as ex:
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Could not read content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
            try:
                self.dbs.insertDatasetContents(xferData)
            except DbsException as ex:
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
            del xferData

            locations = reader.listFileBlockLocation(block)
            # only empty file blocks can have no location
            if not locations and str(inputBlock['NumberOfFiles']) != "0":
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Block has no locations defined: %s" % block
                raise DBSWriterError(msg)
            for pnn in locations:
                self.dbs.addReplicaToBlock(block,pnn)

        return

    def importDataset(self, sourceDBS, sourceDatasetPath, targetDBS,
                      onlyClosed = True):
        """
        _importDataset_

        Import a dataset into the local scope DBS with full parentage hirerarchy
        (at least not slow because branches info is dropped)

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported

        - *targetDBS* : URL for DBS to have dataset imported to

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed, locations = False)
        blkCounter=0
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            blkCounter=blkCounter+1
            msg="Importing block %s of %s: %s " % (blkCounter,len(inputBlocks),block)
            logging.debug(msg)
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(inputBlock['NumberOfFiles']) != "0":
                        msg = "Error in DBSWriter.importDataset\n"
                        msg += "Block has no locations defined: %s" % block
                        raise DBSWriterError(msg)
                    logging.info("Update block locations to:")
                    for pnn in locations:
                        self.dbs.addReplicaToBlock(block,pnn)
                        logging.info(pnn)
                    continue

            try:

                self.dbs.migrateDatasetContents(sourceDBS, targetDBS, sourceDatasetPath, block_name=block, noParentsReadOnly = False)
            except DbsException as ex:
                msg = "Error in DBSWriter.importDataset\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            locations = reader.listFileBlockLocation(block)
            # only empty file blocks can have no location
            if not locations and str(inputBlock['NumberOfFiles']) != "0":
                msg = "Error in DBSWriter.importDataset\n"
                msg += "Block has no locations defined: %s" % block
                raise DBSWriterError(msg)
            for pnn in locations:
                self.dbs.addReplicaToBlock(block,pnn)

        return


    def importDatasetWithoutParentage(self, sourceDBS, sourceDatasetPath, targetDBS,
                      onlyClosed = True):
        """
        _importDataset_

        Import a dataset into the local scope DBS with one level parentage,
        however it has severe limitation on its use due to the "ReadOnly" concept.

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported

        - *targetDBS* : URL for DBS to have dataset imported to

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed, locations = False)
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(inputBlock['NumberOfFiles']) != "0":
                        msg = "Error in DBSWriter.importDatasetWithoutParentage\n"
                        msg += "Block has no locations defined: %s" % block
                        raise DBSWriterError(msg)
                    logging.info("Update block locations to:")
                    for pnn in locations:
                        self.dbs.addReplicaToBlock(block,pnn)
                        logging.info(pnn)
                    continue

            try:
                self.dbs.migrateDatasetContents(sourceDBS, targetDBS, sourceDatasetPath, block_name=block, noParentsReadOnly = True )
            except DbsException as ex:
                msg = "Error in DBSWriter.importDatasetWithoutParentage\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            locations = reader.listFileBlockLocation(block)
            # only empty file blocks can have no location
            if not locations and str(inputBlock['NumberOfFiles']) != "0":
                msg = "Error in DBSWriter.importDatasetWithoutParentage\n"
                msg += "Block has no locations defined: %s" % block
                raise DBSWriterError(msg)
            for pnn in locations:
                self.dbs.addReplicaToBlock(block,pnn)

        return


    def getOutputDatasetsWithPSet(payloadNode):
        """
        _getOutputDatasetsWithPSet_

        Extract all the information about output datasets from the
        payloadNode object provided, including the {{}} format PSet cfg

        Returns a list of DatasetInfo objects including App details
        from the node.

        """
        result = []

        for item in payloadNode._OutputDatasets:
            resultEntry = DatasetInfo()
            resultEntry.update(item)
            resultEntry["ApplicationName"] = payloadNode.application['Executable']
            resultEntry["ApplicationProject"] = payloadNode.application['Project']
            resultEntry["ApplicationVersion"] = payloadNode.application['Version']
            resultEntry["ApplicationFamily"] = item.get("OutputModuleName", "AppFamily")

            try:
                config = payloadNode.cfgInterface
                psetStr = config.originalContent()
                resultEntry['PSetContent'] = psetStr
            except Exception as ex:
                resultEntry['PSetContent'] = None

            result.append(resultEntry)

        return _sortDatasets(result)
Exemplo n.º 11
0
class DBSReaderTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        # self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        self.endpoint = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
        self.dbs = None
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listPrimaryDatasets("Jet*")
        self.assertTrue("Jet" in results)
        self.assertTrue("JetMET" in results)
        self.assertTrue("JetMETTau" in results)
        self.assertFalse(self.dbs.listPrimaryDatasets("DoesntExist"))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.matchProcessedDatasets("Jet", "RAW", "Run2011A-v1")
        self.assertEqual(1, len(dataset))
        self.assertEqual(["/Jet/Run2011A-v1/RAW"], dataset[0]["PathList"])
        self.assertEqual("Run2011A-v1", dataset[0]["Name"])
        self.assertFalse(self.dbs.matchProcessedDatasets("Jet", "RAW", "Run2011A-v666"))

    @attr("integration")
    def testlistRuns(self):
        """listRuns returns known runs"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRuns(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK)
        self.assertEqual([173657], runs)

    @attr("integration")
    def testlistRunLumis(self):
        """listRunLumis returns known runs and lumicounts"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRunLumis(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(173692 in runs)
        self.assertEqual(runs[173692], 2782)
        runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK)
        self.assertEqual({173657: 94}, runs)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        self.dbs = DBSReader(self.endpoint)
        datasets = self.dbs.listProcessedDatasets("Jet", "RAW")
        self.assertTrue("Run2011A-v1" in datasets)
        self.assertTrue("Run2011B-v1" in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets("Jet", "blah"))
        self.assertFalse(self.dbs.listProcessedDatasets("blah", "RAW"))

    @attr("integration")
    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    @attr("integrtion")
    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset["path"], DATASET)
        self.assertEqual(dataset["block"], "")
        self.assertEqual(dataset["NumberOfEvents"], "22075")
        self.assertEqual(dataset["NumberOfBlocks"], "46")
        self.assertEqual(dataset["total_size"], "4001680824")
        self.assertEqual(dataset["NumberOfFiles"], "49")
        self.assertEqual(dataset["NumberOfLumis"], "7223")

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block["path"], "")
        self.assertEqual(block["block"], BLOCK)
        self.assertEqual(block["NumberOfEvents"], "377")
        self.assertEqual(block["NumberOfBlocks"], "1")
        self.assertEqual(block["total_size"], "150780132")
        self.assertEqual(block["NumberOfFiles"], "2")
        self.assertEqual(block["NumberOfLumis"], "94")

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + "blah")
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + "asas")

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block["Name"] in [x["Name"] for x in blocks])
        self.assertEqual(BLOCK, block["Name"])
        # self.assertEqual(377, block['NumberOfEvents'])
        self.assertEqual(150780132, block["BlockSize"])
        self.assertEqual(2, block["NumberOfFiles"])
        # possibly fragile but assume block located at least at cern
        sites = [x["Name"] for x in block["StorageElementList"] if x["Name"].find("cern.ch") > -1]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + "blah")
        self.assertFalse(self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + "asas"))

    @attr("integration")
    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.listFileBlocks(DATASET)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    @attr("integration")
    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    @attr("integration")
    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertFalse(self.dbs.blockExists(DATASET + "#somethingelse"))

    @attr("integration")
    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in [x["LogicalFileName"] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + "#blah")

    @attr("integration")
    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        # hope PromptReco doesn't get deleted
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents(
            "/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60"
        )
        self.assertEqual(1, len(files))
        self.assertEqual(
            "/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60", files[0]["Block"]["Name"]
        )
        self.assertEqual(
            "/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root",
            files[0]["ParentList"][0]["LogicalFileName"],
        )

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + "asas")

    @attr("integration")
    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + "asas")

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        self.dbs = DBSReader(self.endpoint)
        # assume one site is cern
        sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x.find("cern.ch") > -1]
        self.assertTrue(sites)
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(BLOCK + "blah"))

    @attr("integration")
    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block["Files"]))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + "asas")

    @attr("integration")
    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents(
            "/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60"
        )
        self.assertEqual(len(block), 1)
        block = block["/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60"]
        self.assertEqual(
            "/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root",
            block["Files"][0]["ParentList"][0]["LogicalFileName"],
        )

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + "asas")

    @attr("integration")
    def testGetFiles(self):
        """getFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    @attr("integration")
    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents("/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60")
        self.assertEqual(1, len(parents))
        self.assertEqual("/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60", parents[0]["Name"])
        sites = [x for x in parents[0]["StorageElementList"] if x.find("cern.ch") > -1]
        self.assertTrue(sites)

        self.assertFalse(
            self.dbs.listBlockParents("/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl")
        )

    @attr("integration")
    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    @attr("integration")
    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.dbs = DBSReader(self.endpoint)
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + "asas"))