Пример #1
0
    def __call__(self):
        """
        _operator()_

        Load PU dataset information from DBS

        """
        
        
        reader = DBSReader(self.dbsUrl)
        blocks = reader.listFileBlocks(self.dataset, False)
        
        for block in blocks:
            #  //
            # // Populate locations
            #//
            locations = reader.listFileBlockLocation(block)
            if locations:
                self.blockSites[block] = locations
            for location in locations:
                if not self.sites.has_key(location):
                    self.sites[location] = set()
                self.sites[location].add(block)
            #  //
            # // Populate File list for block
            #//
            self[block] = reader.lfnsInBlock(block)

        return
Пример #2
0
    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("SplitSize = %s" % self.splitSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        thefiles = Fileset(name='FilesToSplit')
        reader = DBSReader(self.dbsUrl)
        fileList = reader.dbs.listFiles(analysisDataset = self.inputDataset(),
                                        retriveList = [ 'retrive_block',
                                                        'retrive_run'])

        blocks = {}

        for f in fileList:
            block = f['Block']['Name']
            if not blocks.has_key(block):
                blocks[block] = reader.listFileBlockLocation(block)
            f['Block']['StorageElementList'].extend(blocks[block])
            wmbsFile = File(f['LogicalFileName'])
            [ wmbsFile['locations'].add(x) for x in blocks[block] ]
            wmbsFile['block'] = block
            thefiles.addFile(
                wmbsFile
                )


        work = Workflow()
        subs = Subscription(
            fileset = thefiles,
            workflow = work,
            split_algo = 'FileBased',
            type = "Processing")
        splitter = SplitterFactory()
        jobfactory = splitter(subs)

        jobs = jobfactory(files_per_job = self.splitSize)



        jobDefs = []
        for job in jobs.jobs:
            #job.mask.setMaxAndSkipEvents(-1, 0)
            jobDef = JobDefinition()
            jobDef['LFNS'].extend(job.listLFNs())
            jobDef['SkipEvents'] = 0
            jobDef['MaxEvents'] = -1
            [ jobDef['SENames'].extend(list(x['locations']))
              for x  in job.listFiles() ]
            jobDefs.append(jobDef)


        return jobDefs
Пример #3
0
def tmdbInjectBlock(dbsUrl,
                    datasetPath,
                    blockName,
                    phedexConfig,
                    workingDir="/tmp",
                    nodes=None,
                    storageElements=None):
    """
    _tmdbInjectBlock_

    Util Method for injecting a fileblock into TMDB

    

    """

    fileName = blockName.replace("/", "_")
    fileName = fileName.replace("#", "")
    dropXML = "%s/%s-PhEDExDrop.xml" % (workingDir, fileName)

    xmlContent = makePhEDExDrop(dbsUrl, datasetPath, blockName)
    handle = open(dropXML, 'w')
    handle.write(xmlContent)
    handle.close()

    reader = DBSReader(dbsUrl)

    if not storageElements:
        storageElements = reader.listFileBlockLocation(blockName)

    tmdbInject(phedexConfig, dropXML, nodes, *storageElements)

    return
Пример #4
0
    def checkPublication(self):
        """
           check dataset publication in a dbs  
        """

        common.logger.info('--->>> Check data publication: dataset '+self.dataset_to_check+' in DBS url '+ self.DBSURL+'\n')
        #  //
        # // Get API to DBS
        #//
        dbsreader = DBSReader(self.DBSURL)
        #  //
        # // Get list of datasets
        #//
        if len(self.dataset_to_check.split('/')) < 4:
            msg = "the provided dataset name is not correct"
            raise CrabException(msg)
        else:   
            primds=self.dataset_to_check.split('/')[1]
            procds=self.dataset_to_check.split('/')[2]
            tier=self.dataset_to_check.split('/')[3]
            datasets=dbsreader.matchProcessedDatasets(primds,tier,procds)
            if common.debugLevel:
                print "PrimaryDataset = ", primds
                print "ProcessedDataset = ", procds
                print "DataTier = ", tier
                print "datasets matching your requirements= ", datasets

        for dataset in datasets:
        #  //
        # // Get list of blocks for the dataset and their location
        #//
            if len(dataset.get('PathList'))==0:
                print "===== Empty dataset yet /%s/%s with tiers %s"%(dataset.get('PrimaryDataset')['Name'],dataset.get('Name'),dataset.get('TierList'))
            else:
                for datasetpath in dataset.get('PathList'):
                    nevttot=0
                    print "=== dataset %s"%datasetpath
                    ### FEDE #######
                    if dataset['Description'] != None:
                        print "=== dataset description = ", dataset['Description']
                    ################    
                    blocks=dbsreader.getFileBlocksInfo(datasetpath)
                    for block in blocks:
                        SEList=dbsreader.listFileBlockLocation(block['Name'])  # replace that with DLS query
                        print "===== File block name: %s" %block['Name']
                        print "      File block located at: ", SEList
                        print "      File block status: %s" %block['OpenForWriting']
                        print "      Number of files: %s"%block['NumberOfFiles']
                        print "      Number of Bytes: %s"%block['BlockSize']
                        print "      Number of Events: %s"%block['NumberOfEvents']
                        if common.debugLevel:
                            print "--------- info about files --------"
                            print " Size \t Events \t LFN \t FileStatus "
                            files=dbsreader.listFilesInBlock(block['Name'])
                            for file in files:
                                print "%s %s %s %s"%(file['FileSize'],file['NumberOfEvents'],file['LogicalFileName'],file['Status'])
                        nevttot = nevttot + block['NumberOfEvents']
                    print "\n total events: %s in dataset: %s\n"%(nevttot,datasetpath)
        if not common.debugLevel:
            common.logger.info('You can obtain more info about files of the dataset using: crab -checkPublication -USER.dataset_to_check='+self.dataset_to_check+' -USER.dbs_url_for_publication='+self.DBSURL+' -debug')
Пример #5
0
def manageDatasetBlocks(datasetPath,
                        localDBS,
                        globalDBS,
                        phedexConfig=None,
                        phedexNodes=None):
    """
    _manageDatasetBlocks_

    Trawl through the dataset for all remaining open blocks, and then close them,
    migrate them to global and inject them into PhEDEx if phedexConfig is not None, using
    the optional list of PhEDEx nodes if provided.


    """
    dbs = DBSReader(localDBS)
    blocks = dbs.listFileBlocks(datasetPath)

    for block in blocks:
        if dbs.blockIsOpen(block):
            blockMgr = BlockManager(block, localDbs, globalDbs, datasetPath)
            blockMgr.closeBlock()
            blockMgr.migrateToGlobalDBS()
            if phedexConfig != None:
                blockMgr.injectBlockToPhEDEx(phedexConfig, phedexNodes)

    return
Пример #6
0
def makePhEDExDrop(dbsUrl, datasetPath, *blockNames):
    """
    _makePhEDExDrop_

    Given a DBS2 Url, dataset name and list of blockNames,
    generate an XML structure for injection

    """
    spec = XMLInjectionSpec(dbsUrl, 
                            datasetPath)


    reader = DBSReader(dbsUrl)

    for block in blockNames:
        blockContent = reader.getFileBlock(block)
        isOpen = reader.blockIsOpen(block)
        
        if isOpen:
            xmlBlock = spec.getFileblock(block, "y")
        else:
            xmlBlock = spec.getFileblock(block, "n")

        for x in blockContent[block]['Files']:
            checksums = {'cksum' : x['Checksum']}
            if x.get('Adler32') not in (None, ''):
                checksums['adler32'] = x['Adler32'] 
            xmlBlock.addFile(x['LogicalFileName'], checksums, x['FileSize'])

    improv = spec.save()
    xmlString = improv.makeDOMElement().toprettyxml()
    return xmlString
Пример #7
0
    def __call__(self):
        """
        _operator()_

        Load PU dataset information from DBS

        """

        reader = DBSReader(self.dbsUrl)
        blocks = reader.listFileBlocks(self.dataset, False)

        for block in blocks:
            #  //
            # // Populate locations
            #//
            locations = reader.listFileBlockLocation(block)
            if locations:
                self.blockSites[block] = locations
            for location in locations:
                if not self.sites.has_key(location):
                    self.sites[location] = set()
                self.sites[location].add(block)
            #  //
            # // Populate File list for block
            #//
            self[block] = reader.lfnsInBlock(block)

        return
Пример #8
0
def tmdbInjectBlock(dbsUrl, datasetPath, blockName, phedexConfig,
                    workingDir="/tmp", nodes=None, storageElements=None):
    """
    _tmdbInjectBlock_

    Util Method for injecting a fileblock into TMDB

    

    """

    fileName = blockName.replace("/","_")
    fileName = fileName.replace("#","")
    dropXML = "%s/%s-PhEDExDrop.xml" % (workingDir, fileName)
    
    xmlContent = makePhEDExDrop(dbsUrl, datasetPath, blockName)
    handle = open(dropXML, 'w')
    handle.write(xmlContent)
    handle.close()

    reader = DBSReader(dbsUrl)
    
    if not storageElements:
        storageElements = reader.listFileBlockLocation(blockName)
    
    tmdbInject(phedexConfig, dropXML, nodes, *storageElements )

    return
Пример #9
0
def validateDataset( datasetPath, dbsUrl):
    """
    _validateDataset_
    
    Util method to check that the datasetPath provided
    exists in the dbsUrl provided
    
    """
    
    datasetDetails = DatasetConventions.parseDatasetPath(datasetPath)
    for key in ['Primary', 'DataTier', 'Processed']:
        if datasetDetails[key] == None:
            msg = "Invalid Dataset Name: \n ==> %s\n" % datasetPath
            msg += "Does not contain %s information" % key
            raise WorkflowMakerError(msg)
                

    datasets = []
    try:
        reader = DBSReader(dbsUrl)
        datasets = reader.matchProcessedDatasets(
            datasetDetails['Primary'],
            datasetDetails['DataTier'],
            datasetDetails['Processed'])
    except Exception, ex:
        msg = "Error calling DBS to validate dataset:\n%s\n" % datasetPath
        msg += str(ex)
        raise WorkflowMakerError(msg)
Пример #10
0
    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("SplitSize = %s" % self.splitSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        thefiles = Fileset(name='FilesToSplit')
        reader = DBSReader(self.dbsUrl)
        fileList = reader.dbs.listFiles(
            analysisDataset=self.inputDataset(),
            retriveList=['retrive_block', 'retrive_run'])

        blocks = {}

        for f in fileList:
            block = f['Block']['Name']
            if not blocks.has_key(block):
                blocks[block] = reader.listFileBlockLocation(block)
            f['Block']['StorageElementList'].extend(blocks[block])
            wmbsFile = File(f['LogicalFileName'])
            [wmbsFile['locations'].add(x) for x in blocks[block]]
            wmbsFile['block'] = block
            thefiles.addFile(wmbsFile)

        work = Workflow()
        subs = Subscription(fileset=thefiles,
                            workflow=work,
                            split_algo='FileBased',
                            type="Processing")
        splitter = SplitterFactory()
        jobfactory = splitter(subs)

        jobs = jobfactory(files_per_job=self.splitSize)

        jobDefs = []
        for job in jobs.jobs:
            #job.mask.setMaxAndSkipEvents(-1, 0)
            jobDef = JobDefinition()
            jobDef['LFNS'].extend(job.listLFNs())
            jobDef['SkipEvents'] = 0
            jobDef['MaxEvents'] = -1
            [
                jobDef['SENames'].extend(list(x['locations']))
                for x in job.listFiles()
            ]
            jobDefs.append(jobDef)

        return jobDefs
Пример #11
0
def createJobSplitter(dataset, dbsUrl, onlyClosedBlocks=False, siteWhitelist=[], blockWhitelist=[], withParents=False):
    """
    _createJobSplitter_

    Instantiate a JobSplitter instance for the dataset provided
    and populate it with details from DBS.


    """
    reader = DBSReader(dbsUrl)
    result = JobSplitter(dataset)
    filterSites = len(siteWhitelist) > 0
    filterBlocks = len(blockWhitelist) > 0

    for blockName in reader.listFileBlocks(dataset, onlyClosedBlocks):
        locations = reader.listFileBlockLocation(blockName)
        if filterBlocks:
            if blockName not in blockWhitelist:
                msg = "Excluding block %s based on block whitelist: %s\n" % (blockName, blockWhitelist)
                logging.debug(msg)
                continue

        if filterSites:
            siteMatches = filter(lambda x: x in locations, siteWhitelist)

            if len(siteMatches) == 0:
                msg = "Excluding block %s based on sites: %s \n" % (blockName, locations)
                logging.debug(msg)
                continue
            else:
                locations = siteMatches

        newBlock = result.newFileblock(blockName, *locations)

        if withParents == True:
            blockData = reader.getFileBlockWithParents(blockName)[blockName]
        else:
            blockData = reader.getFileBlock(blockName)[blockName]

        totalEvents = 0
        fileList = set()
        for fileInfo in blockData["Files"]:
            totalEvents += fileInfo["NumberOfEvents"]
            fileList.add(fileInfo["LogicalFileName"])
            if withParents:
                parList = [x["LogicalFileName"] for x in fileInfo["ParentList"]]

                newBlock.addFile(fileInfo["LogicalFileName"], fileInfo["NumberOfEvents"], parList)
            else:
                newBlock.addFile(fileInfo["LogicalFileName"], fileInfo["NumberOfEvents"])

        logging.debug("Block %s contains %s events in %s files" % (blockName, totalEvents, len(fileList)))

    return result
Пример #12
0
    def index(self, dataset):
        html = """<html><body><h2>Local DBS Dataset Listing</h2>\n """
        html += "<h4>Dataset: %s<h4>\n" % dataset

        reader = DBSReader(self.localDBS)

        html += "<h4>Block Details</h4>\n"
        html += "<table>\n"
        html += "<tr><th>Block</th><th>SEName</th><th>Files</th>"
        html += "<th>Events</th></tr>\n"
        try:
            blocks = reader.getFileBlocksInfo(dataset)
        except Exception, ex:
            html += "</table>\n"
            html += "<p> Error accessing dataset information: %s</p>" % str(ex)
            html += """</body></html>"""
            return html
Пример #13
0
    def loadLFNs(self, **dbsContacts):
        """
        Get the list of LFNs from the DBS

        """
        for i in self:
            self.remove(i)

        dbsUrl = dbsContacts.get('DBSURL', None)
        if dbsUrl == None:
            dbsUrl = getLocalDBSURL()
        reader = DBSReader(dbsUrl)
        fileList = reader.getFiles(self.dataset)

        for block in fileList.values():
            result = [ x['LogicalFileName'] for x in block['Files']]
            self.extend(result)
        return
Пример #14
0
    def processingComplete(self):
        """
        _processingComplete_

        look at the processing jobs for the workflow, and return True
        if all processing jobs are complete

        """
        intermediateDBS = self.workflowSpec.parameters['DBSURL']
        outputDataset   = self.workflowSpec.outputDatasets()[0].name()

        allJobs      = WEUtils.jobsForWorkflow(self.workflow, "Merge")
        finishedJobs = WEUtils.jobsForWorkflow(self.workflow, "Merge", "finished")
        totalProcessing = len(allJobs)
        totalComplete   = len(finishedJobs)

        logging.info("%s: %s/%s jobs complete" %
                      (self.workflow,totalComplete,totalProcessing))

        if totalProcessing == 0: # Protection for non-sensical situation
            return False

        if totalComplete < totalProcessing:
            return False

        # Check to make sure local DBS knows about all output files
        try:
            reader = DBSReader(intermediateDBS)
            blockList = reader.getFiles(dataset = outputDataset)
        except:
            logging.info("Dataset not in DBS yet")
            return False

        totalRegistered = 0
        for block in blockList:
            totalRegistered += len(blockList[block]['Files'])

        logging.info("%s: %s/%s jobs registered" %
                      (self.workflow,totalRegistered,totalProcessing))
        if totalRegistered < totalProcessing:
            return False

        return True
Пример #15
0
    def loadSites(self, **dbsContacts):
        """
        Get the list of sites hosting the PU from DBS/DLS
                                                                                                              
        """
        dbsUrl = dbsContacts.get('DBSURL', None)
        if dbsUrl == None:
            dbsUrl = getLocalDBSURL()
        
        reader = DBSReader(dbsUrl)

        locations = []        
        blocks =  reader.listFileBlocks(self.dataset, True)

        for block in blocks:
            try:
                locations = reader.listFileBlockLocation(block)
            except Exception, ex:
                msg = "Unable to find DLS Locations for Block: %s\n" %  block
                msg += str(ex)
                logging.warning(msg)
                continue
Пример #16
0
def manageDatasetBlocks(datasetPath, localDBS, globalDBS, phedexConfig = None, phedexNodes = None):
    """
    _manageDatasetBlocks_

    Trawl through the dataset for all remaining open blocks, and then close them,
    migrate them to global and inject them into PhEDEx if phedexConfig is not None, using
    the optional list of PhEDEx nodes if provided.


    """
    dbs = DBSReader(localDBS)
    blocks = dbs.listFileBlocks(datasetPath)

    for block in blocks:
        if dbs.blockIsOpen(block):
            blockMgr = BlockManager(block, localDbs, globalDbs, datasetPath)
            blockMgr.closeBlock()
            blockMgr.migrateToGlobalDBS()
            if phedexConfig != None:
                blockMgr.injectBlockToPhEDEx(phedexConfig, phedexNodes)

    return
Пример #17
0
def splitDatasetByRun(datasetName, dbsUrl):
    """
    _splitDatasetByRun_

    Chop up a dataset into a set of jobs with 1 job per run

    """
    reader = DBSReader(dbsUrl)
    result = []
    for run in listRunsInDataset(reader, datasetName):
        files = listFilesInRun(reader, datasetName, run)
        job = JobDefinition()
        job['LFNS'] = files
        job['RunNumber'] = run
        result.append(job)
    return result
Пример #18
0
class DBSWriter:
    """
    _DBSWriter_

    General API for writing data to DBS


    """
    def __init__(self, url, **contact):
        args = {"url": url, "level": 'ERROR'}
        args.update(contact)
        try:
            self.dbs = DbsApi(args)
        except DbsException, ex:
            msg = "Error in DBSWriterError with DbsApi\n"
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)
        self.reader = DBSReader(**args)
Пример #19
0
    def importDatasetWithExistingParents(self, sourceDBS, sourceDatasetPath, targetDBS,
                      onlyClosed = True):
        """
        _importDataset_

        Import a dataset into the local scope DBS.
        It complains if the parent dataset ar not there!!

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported
        
        - *targetDBS* : URL for DBS to have dataset imported to

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed)
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if not str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(inputBlock['NumberOfFiles']) != "0":
                        msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                        msg += "Block has no locations defined: %s" % block
                        raise DBSWriterError(msg)
                    logging.info("Update block locations to:")
                    for sename in locations:
                        self.dbs.addReplicaToBlock(block,sename)
                        logging.info(sename)
                    continue

            
            try:
                xferData = reader.dbs.listDatasetContents(
                    sourceDatasetPath,  block
                    )
            except DbsException, ex:
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Could not read content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
            try:
                self.dbs.insertDatasetContents(xferData)
            except DbsException, ex:
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
Пример #20
0
    def importDatasetWithExistingParents(self,
                                         sourceDBS,
                                         sourceDatasetPath,
                                         targetDBS,
                                         onlyClosed=True):
        """
        _importDataset_

        Import a dataset into the local scope DBS.
        It complains if the parent dataset ar not there!!

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported
        
        - *targetDBS* : URL for DBS to have dataset imported to

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed)
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if not str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(
                            inputBlock['NumberOfFiles']) != "0":
                        msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                        msg += "Block has no locations defined: %s" % block
                        raise DBSWriterError(msg)
                    logging.info("Update block locations to:")
                    for sename in locations:
                        self.dbs.addReplicaToBlock(block, sename)
                        logging.info(sename)
                    continue

            try:
                xferData = reader.dbs.listDatasetContents(
                    sourceDatasetPath, block)
            except DbsException, ex:
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Could not read content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
            try:
                self.dbs.insertDatasetContents(xferData)
            except DbsException, ex:
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
Пример #21
0
    def migrateDatasetBlocks(self, inputDBSUrl, datasetPath, blocks):
        """
        _migrateDatasetBlocks_

        Migrate the list of fileblocks provided by blocks, belonging
        to the dataset specified by the dataset path to this DBS instance
        from the inputDBSUrl provided

        - *inputDBSUrl* : URL for connection to input DBS
        - *datasetPath* : Name of dataset in input DBS (must exist in input
                          DBS)
        - *blocks*      : list of block names to be migrated (must exist
                          in input DBS)

        """
        if len(blocks) == 0:
            msg = "FileBlocks not provided.\n"
            msg += "You must provide the name of at least one fileblock\n"
            msg += "to be migrated"
            raise DBSWriterError(msg)
        #  //
        # // Hook onto input DBSUrl and verify that the dataset & blocks
        #//  exist
        reader = DBSReader(inputDBSUrl)
        
        inputBlocks = reader.listFileBlocks(datasetPath)
        
        for block in blocks:
            #  //
            # // Test block exists at source
            #// 
            if block not in inputBlocks:
                msg = "Block name:\n ==> %s\n" % block
                msg += "Not found in input dataset:\n ==> %s\n" % datasetPath
                msg += "In DBS Instance:\n ==> %s\n" % inputDBSUrl
                raise DBSWriterError(msg)

            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if not self.reader.blockIsOpen(block):
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Migration of that block"
                    logging.warning(msg)
                    continue
                
            try:
                xferData = reader.dbs.listDatasetContents(datasetPath,  block)
            except DbsException, ex:
                msg = "Error in DBSWriter.migrateDatasetBlocks\n"
                msg += "Could not read content of dataset:\n ==> %s\n" % (
                    datasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
            
            xferData = _remapBlockParentage(datasetPath, xferData)
            
            try:
                self.dbs.insertDatasetContents(xferData)
            except DbsException, ex:
                msg = "Error in DBSWriter.migrateDatasetBlocks\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    datasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
Пример #22
0
def createJobSplitter(dataset,
                      dbsUrl,
                      onlyClosedBlocks=False,
                      siteWhitelist=[],
                      blockWhitelist=[],
                      withParents=False):
    """
    _createJobSplitter_

    Instantiate a JobSplitter instance for the dataset provided
    and populate it with details from DBS.


    """
    reader = DBSReader(dbsUrl)
    result = JobSplitter(dataset)
    filterSites = len(siteWhitelist) > 0
    filterBlocks = len(blockWhitelist) > 0

    for blockName in reader.listFileBlocks(dataset, onlyClosedBlocks):
        locations = reader.listFileBlockLocation(blockName)
        if filterBlocks:
            if blockName not in blockWhitelist:
                msg = "Excluding block %s based on block whitelist: %s\n" % (
                    blockName, blockWhitelist)
                logging.debug(msg)
                continue

        if filterSites:
            siteMatches = filter(lambda x: x in locations, siteWhitelist)

            if len(siteMatches) == 0:
                msg = "Excluding block %s based on sites: %s \n" % (
                    blockName,
                    locations,
                )
                logging.debug(msg)
                continue
            else:
                locations = siteMatches

        newBlock = result.newFileblock(blockName, *locations)

        if withParents == True:
            blockData = reader.getFileBlockWithParents(blockName)[blockName]
        else:
            blockData = reader.getFileBlock(blockName)[blockName]

        totalEvents = 0
        fileList = set()
        for fileInfo in blockData['Files']:
            totalEvents += fileInfo['NumberOfEvents']
            fileList.add(fileInfo['LogicalFileName'])
            if withParents:
                parList = [
                    x['LogicalFileName'] for x in fileInfo['ParentList']
                ]

                newBlock.addFile(fileInfo['LogicalFileName'],
                                 fileInfo['NumberOfEvents'], parList)
            else:
                newBlock.addFile(fileInfo['LogicalFileName'],
                                 fileInfo['NumberOfEvents'])

        logging.debug("Block %s contains %s events in %s files" % (
            blockName,
            totalEvents,
            len(fileList),
        ))

    return result
Пример #23
0
    def makeBlockList(self, onlyClosedBlocks = False, sites=None,
        providedOnlyBlocks=None):
        """
        _makeBlockList_


        Generate the list of blocks for the workflow.

        1. Get the list of all blocks from the DBS
        2. Compare to list of blocks in persistency file
        3. Obtain the intersection of the new blocks and the providedOnlyBlocks list.
        4. Set OnlyBlocks parameter to intersection obtained.
        
        """
        #reader = DBSReader(self.dbsUrl)
        # At this point, blocks should be in local DBS
        localDBS = getLocalDBSURL()
        reader = DBSReader(localDBS)
        dbsBlocks = reader.listFileBlocks(self.inputDataset(), onlyClosedBlocks)
        
        if self.persistData.blocks != []:
            remover = lambda x : x not in self.persistData.blocks
            newBlocks = filter(remover, dbsBlocks)
        else:
            newBlocks = dbsBlocks

        #  //
        # // Skipping blocks without site info
        #//
        msg = "Filtering blocks according to Site information..."
        logging.info(msg)
        blocksAtSites = []
        for block in newBlocks:
            locations = reader.listFileBlockLocation(block)
            if not locations:
                msg = "\nSkipping block: "
                msg += "No site info available for block %s " % block
                logging.info(msg)
            elif sites is not None:
                locationInSites = False
                for location in locations:
                    if location in sites:
                        locationInSites = True
                        break
                if locationInSites:
                    blocksAtSites.append(block)
                else:
                    msg = "\nSkipping block: "
                    msg += "Block %s has no replicas in %s" % (block,
                        ", ".join(sites))
                    logging.info(msg)
            else:
                blocksAtSites.append(block)
        newBlocks = blocksAtSites

        if len(newBlocks) == 0:
            msg = "No New Blocks found for dataset\n"
            raise RuntimeError, msg

        #  //
        # // Check presence of provided Blocks in newBlocks
        #//
        blocksToProcess = []
        if providedOnlyBlocks is not None :
            providedOnlyBlocksList = providedOnlyBlocks.split(',')
            msg = "OnlyBlocks setting provided. Processing it..."
            logging.info(msg)
            msg = "OnlyBlocks list contains %s Blocks." % (
                len(providedOnlyBlocksList))
            logging.info(msg)
            blockCount = 1
            for block in providedOnlyBlocksList :
                if block.strip() in newBlocks :
                    blocksToProcess.append(block.strip())
                    msg = "Block %s: Adding Block %s" % (
                        blockCount, block)
                    msg += " to the Whitelist"
                    logging.info(msg)
                else:
                    msg = "Block %s: Skipping Block %s " % (
                        blockCount, block)
                    msg += "It's no New or it has been processed"
                    msg += " already."
                    logging.info(msg)
                blockCount += 1
        else :
            blocksToProcess = newBlocks
            msg = "OnlyBlocks setting not provided. Processing"
            msg += " all New Blocks for Dataset\n"
            logging.info(msg)

        if len(blocksToProcess) == 0 :
            msg = "OnlyBlocks list does not match any New Blocks"
            msg += " found for Dataset\n"
            raise RuntimeError, msg
        
        blockList = str(blocksToProcess)
        blockList = blockList.replace("[", "")
        blockList = blockList.replace("]", "")
        blockList = blockList.replace("\'", "")
        blockList = blockList.replace("\"", "")
        self.workflow.parameters['OnlyBlocks'] = blockList
        self.persistData.blocks.extend(blocksToProcess)
        return
Пример #24
0
    def importDataset(self,
                      sourceDBS,
                      sourceDatasetPath,
                      targetDBS,
                      onlyClosed=True,
                      skipNoSiteError=False):
        """
        _importDataset_

        Import a dataset into the local scope DBS with full parentage hirerarchy
        (at least not slow because branches info is dropped). Parents are also
        imported. This method imports block by block, then each time a block
        is imported, its parent blocks will be imported first.

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported

        - *targetDBS* : URL for DBS to have dataset imported to

        - *onlyClosed* : Only closed blocks will be imported if set to True

        - *skipNoSiteError* : If this is True, then this method wont raise an
                              Exception if a block has no site information in 
                              sourceDBS.

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed)
        blkCounter = 0
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            blkCounter = blkCounter + 1
            msg = "Importing block %s of %s: %s " % (blkCounter,
                                                     len(inputBlocks), block)
            logging.debug(msg)
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(
                            inputBlock['NumberOfFiles']) != "0":
                        # we don't skip the error raising
                        if not skipNoSiteError:
                            msg = "Error in DBSWriter.importDataset\n"
                            msg += "Block has no locations defined: %s" % block
                            raise DBSWriterError(msg)
                        msg = "Block has no locations defined: %s" % block
                        logging.info(msg)
                    logging.info("Update block locations to:")
                    for sename in locations:
                        self.dbs.addReplicaToBlock(block, sename)
                        logging.info(sename)
                    continue

            try:

                self.dbs.dbsMigrateBlock(sourceDBS,
                                         targetDBS,
                                         block_name=block)
            except DbsException, ex:
                msg = "Error in DBSWriter.importDataset\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            locations = reader.listFileBlockLocation(block)
            # only empty file blocks can have no location
            if not locations and str(inputBlock['NumberOfFiles']) != "0":
                # we don't skip the error raising
                if not skipNoSiteError:
                    msg = "Error in DBSWriter.importDataset\n"
                    msg += "Block has no locations defined: %s" % block
                    raise DBSWriterError(msg)
                msg = "Block has no locations defined: %s" % block
                logging.info(msg)
            for sename in locations:
                self.dbs.addReplicaToBlock(block, sename)
Пример #25
0
    def makeFileList(self, onlyClosedBlocks = False, sites=None,
        providedOnlyBlocks=None, providedOnlyFiles=None):
        """
        _makeFileList_


        Generate the list of blocks for the workflow.

        1. Get the list of all blocks from the DBS
        2. Compare to list of blocks in persistency file
        3. Obtain the intersection of the new blocks and the providedOnlyBlocks
           list.
        4. Set OnlyBlocks parameter to intersection obtained.
        
        """
        #reader = DBSReader(self.dbsUrl)
        # At this point, blocks should be in local DBS
        localDBS = getLocalDBSURL()
        reader = DBSReader(localDBS)

        #  //
        # // Querying list of blocks from DBS
        #//
        msg = "Querying for closed blocks in Local DBS: %s ..." % localDBS
        logging.info(msg)
        dbsBlocks = reader.listFileBlocks(self.inputDataset(),
                                            onlyClosedBlocks)
        msg = "Retrieved %s close blocks from Local DBS" % len(dbsBlocks)
        logging.info(msg)

        #  //
        # // Constructing mapping structures block-file
        #//
        filesToBlocks = {}
        blocksToFiles = {}
        dbsFiles = reader.dbs.listFiles(path=self.inputDataset())
        for dbsfile in dbsFiles:
            if dbsfile['Block']['Name'] in dbsBlocks:
                filesToBlocks[dbsfile['LogicalFileName']] = \
                                                    dbsfile['Block']['Name']
                blocksToFiles.setdefault(dbsfile['Block']['Name'], []
                                         ).append(dbsfile['LogicalFileName'])

        # OnlyFiles?
        if providedOnlyFiles is not None and \
            providedOnlyFiles.strip().lower() != 'auto':
            msg = "Using OnlyFiles list:"
            msg += " %s files." % len(providedOnlyFiles.split(','))
            logging.info(msg)
            onlyFiles = [x.strip() for x in providedOnlyFiles.split(',') if x]
        # OnlyFiles=auto
        elif providedOnlyFiles is not None:
            msg = "Automatically generating OnlyFiles list from DBS..."
            logging.info(msg)
            onlyFiles = self.createOnlyFilesFromWorkflow()
        # OnlyBlocks
        elif providedOnlyBlocks is not None:
            msg = "Using OnlyBLocks list:"
            msg += " %s blocks." % len(providedOnlyBlocks.split(','))
            logging.info(msg)
            onlyFiles = []
            for block in \
                    [x.strip() for x in providedOnlyBlocks.split(',') if x]:
                onlyFiles.extend(blocksToFiles[dbsBlocks])
        # Processing everything in DBS
        else:
            msg = "Processing whole input dataset..."
            logging.info(msg)
            onlyFiles = []
            for block in dbsBlocks:
                onlyFiles.extend(blocksToFiles[dbsBlocks])

        if not onlyFiles:
            msg = "No files were found for the input dataset: " + \
                self.inputDataset()
            raise RuntimeError, msg

        #  //
        # // Filter files that were already processed
        #//
        if self.persistData.blocks:
            msg = "Filtering files that were already processed for this"
            msg += " workflow..."
            logging.info(msg)
            processedFiles = self.persistData.getFiles()
            msg = "Persistency file has %s file(s)" % len(processedFiles)
            logging.info(msg)
            remover  = lambda x: x not in processedFiles
            onlyFiles = filter(remover, onlyFiles)
            msg = "%s file(s) were removed" % \
                                    str(len(processedFiles) - len(onlyFiles))
            logging.info(msg)

        if not onlyFiles:
            msg = "No New files were found for the input dataset: " + \
                self.inputDataset()
            raise RuntimeError, msg

        #  //
        # // Filter files in blocks without site info
        #//
        msg = "Filtering blocks according to Site information..."
        logging.info(msg)
        candidateBlocks = {}
        for file in onlyFiles:
            candidateBlocks.setdefault(filesToBlocks[file], []).append(file)
        blocksAtSites = []
        for block in candidateBlocks:
            locations = reader.listFileBlockLocation(block)
            if not locations:
                msg = "Excluding block without site info ==> %s" % block
                logging.info(msg)
            elif sites is not None:
                locationInSites = False
                for location in locations:
                    if location in sites:
                        locationInSites = True
                        break
                if locationInSites:
                    blocksAtSites.append(block)
                else:
                    msg = "Excluding block without replicas"
                    msg += " in %s ==> %s" % (block, ", ".join(sites))
                    logging.info(msg)
            else:   
                blocksAtSites.append(block)
        if len(blocksAtSites) == 0:
            msg = "No block has site information."
            raise RuntimeError, msg

        #  //
        # // Constructing OnlyBlocks and OnlyFiles list
        #//
        onlyBlocks = {}
        for block in blocksAtSites:
            onlyBlocks[block] = candidateBlocks[block]
        onlyFiles = []
        for block in onlyBlocks:
            onlyFiles.extend(onlyBlocks[block])

        msg = "\n ==> Files to process: %s" % len(onlyFiles)
        msg += "\n ==> Blocks to process: %s" % len(onlyBlocks)
        logging.info(msg)
    
        blockList = ",".join(onlyBlocks.keys())
        fileList = ",".join(onlyFiles)
        self.workflow.parameters['OnlyBlocks'] = blockList
        self.workflow.parameters['OnlyFiles'] = fileList
        self.persistData.update(onlyBlocks)
        return
Пример #26
0
    def publishDataset(self,file):
        """
        """
        try:
            jobReport = readJobReport(file)[0]
            self.exit_status = '0'
        except IndexError:
            self.exit_status = '1'
            msg = "Error: Problem with "+file+" file"
            common.logger.info(msg)
            return self.exit_status

        if (len(self.dataset_to_import) != 0):
           for dataset in self.dataset_to_import:
               common.logger.info("--->>> Importing parent dataset in the dbs: " +dataset)
               status_import=self.importParentDataset(self.globalDBS, dataset)
               if (status_import == 1):
                   common.logger.info('Problem with parent '+ dataset +' import from the global DBS '+self.globalDBS+ 'to the local one '+self.DBSURL)
                   self.exit_status='1'
                   return self.exit_status
               else:
                   common.logger.info('Import ok of dataset '+dataset)

        
        if (len(jobReport.files) <= 0) :
            self.exit_status = '1'
            msg = "Error: No EDM file to publish in xml file"+file+" file"
            common.logger.info(msg)
            return self.exit_status
        else:
            msg = "fjr contains some files to publish" 
            common.logger.debug(msg)

        #### datasets creation in dbs
        #// DBS to contact write and read of the same dbs
        dbsReader = DBSReader(self.DBSURL,level='ERROR')
        dbswriter = DBSWriter(self.DBSURL)
        #####

        self.published_datasets = [] 
        for fileinfo in jobReport.files:
            datasets_info=fileinfo.dataset
            if len(datasets_info)<=0:
                self.exit_status = '1'
                msg = "Error: No info about dataset in the xml file "+file
                common.logger.info(msg)
                return self.exit_status
            else:
                for dataset in datasets_info:
                    #### for production data
                    self.processedData = dataset['ProcessedDataset']
                    if (dataset['PrimaryDataset'] == 'null'):
                        dataset['PrimaryDataset'] = self.userprocessedData
                    elif self.datasetpath.upper() != 'NONE':
                        dataset['ParentDataset']= self.datasetpath

                    dataset['PSetContent']=self.content
                    cfgMeta = {'name' : self.pset , 'Type' : 'user' , 'annotation': 'user cfg', 'version' : 'private version'} # add real name of user cfg
                    common.logger.info("PrimaryDataset = %s"%dataset['PrimaryDataset'])
                    common.logger.info("ProcessedDataset = %s"%dataset['ProcessedDataset'])
                    common.logger.info("<User Dataset Name> = /"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER")
                    
                    self.dataset_to_check="/"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER"


                    self.published_datasets.append(self.dataset_to_check)

                    common.logger.log(10-1,"--->>> Inserting primary: %s processed : %s"%(dataset['PrimaryDataset'],dataset['ProcessedDataset']))
                    
                    #### check if dataset already exists in the DBS
                    result = dbsReader.matchProcessedDatasets(dataset['PrimaryDataset'], 'USER', dataset['ProcessedDataset'])
                    if (len(result) != 0):
                       result = dbsReader.listDatasetFiles(self.dataset_to_check)

                    primary = DBSWriterObjects.createPrimaryDataset( dataset, dbswriter.dbs)
                    common.logger.log(10-1,"Primary:  %s "%primary)
                    print "primary = ", primary 

                    algo = DBSWriterObjects.createAlgorithm(dataset, cfgMeta, dbswriter.dbs)
                    common.logger.log(10-1,"Algo:  %s "%algo)

                    processed = DBSWriterObjects.createProcessedDataset(primary, algo, dataset, dbswriter.dbs)
                    common.logger.log(10-1,"Processed:  %s "%processed)
                    print "processed = ", processed 

                    common.logger.log(10-1,"Inserted primary %s processed %s"%(primary,processed))
                    #######################################################################################
                
        common.logger.log(10-1,"exit_status = %s "%self.exit_status)
        return self.exit_status
Пример #27
0
    sys.exit(1)
if (block != None) and (blockFileList != None) and (datasetPath != None):
    print "\n options --block or --blockFileList or --datasetPath are mutually exclusive"
    print usage
    sys.exit(1)

print ">>>>> DBS URL : %s "%(url,)

import logging
logging.disable(logging.INFO)
#  //
# // Get API to DBS
#//
args = {'url' : url , 'level' : 'ERROR'}
dbsapi = DbsApi(args)
dbsreader = DBSReader(url)

#  //
# // Close FileBlock method
#//
def closeDBSFileBlock(ablock):   
  print "Closing block %s"%ablock
  dbsblock = DbsFileBlock( Name = ablock)
  dbsapi.closeBlock(dbsblock)

### --block option: close single block
if (block != None):
  closeDBSFileBlock(block)

## --blockFileList option: close list of blocks from a file
if (blockFileList != None) :
Пример #28
0
    def checkPublication(self):
        """
           check dataset publication in a dbs  
        """

        common.logger.info('--->>> Check data publication: dataset ' +
                           self.dataset_to_check + ' in DBS url ' +
                           self.DBSURL + '\n')
        #  //
        # // Get API to DBS
        #//
        dbsreader = DBSReader(self.DBSURL)
        #  //
        # // Get list of datasets
        #//
        if len(self.dataset_to_check.split('/')) < 4:
            msg = "the provided dataset name is not correct"
            raise CrabException(msg)
        else:
            primds = self.dataset_to_check.split('/')[1]
            procds = self.dataset_to_check.split('/')[2]
            tier = self.dataset_to_check.split('/')[3]
            datasets = dbsreader.matchProcessedDatasets(primds, tier, procds)
            if common.debugLevel:
                print "PrimaryDataset = ", primds
                print "ProcessedDataset = ", procds
                print "DataTier = ", tier
                print "datasets matching your requirements= ", datasets

        for dataset in datasets:
            #  //
            # // Get list of blocks for the dataset and their location
            #//
            if len(dataset.get('PathList')) == 0:
                print "===== Empty dataset yet /%s/%s with tiers %s" % (
                    dataset.get('PrimaryDataset')['Name'], dataset.get('Name'),
                    dataset.get('TierList'))
            else:
                for datasetpath in dataset.get('PathList'):
                    nevttot = 0
                    print "=== dataset %s" % datasetpath
                    ### FEDE #######
                    if dataset['Description'] != None:
                        print "=== dataset description = ", dataset[
                            'Description']
                    ################
                    blocks = dbsreader.getFileBlocksInfo(datasetpath)
                    for block in blocks:
                        SEList = dbsreader.listFileBlockLocation(
                            block['Name'])  # replace that with DLS query
                        print "===== File block name: %s" % block['Name']
                        print "      File block located at: ", SEList
                        print "      File block status: %s" % block[
                            'OpenForWriting']
                        print "      Number of files: %s" % block[
                            'NumberOfFiles']
                        print "      Number of Bytes: %s" % block['BlockSize']
                        print "      Number of Events: %s" % block[
                            'NumberOfEvents']
                        if common.debugLevel:
                            print "--------- info about files --------"
                            print " Size \t Events \t LFN \t FileStatus "
                            files = dbsreader.listFilesInBlock(block['Name'])
                            for file in files:
                                print "%s %s %s %s" % (
                                    file['FileSize'], file['NumberOfEvents'],
                                    file['LogicalFileName'], file['Status'])
                        nevttot = nevttot + block['NumberOfEvents']
                    print "\n total events: %s in dataset: %s\n" % (
                        nevttot, datasetpath)
        if not common.debugLevel:
            common.logger.info(
                'You can obtain more info about files of the dataset using: crab -checkPublication -USER.dataset_to_check='
                + self.dataset_to_check + ' -USER.dbs_url_for_publication=' +
                self.DBSURL + ' -debug')
Пример #29
0
    def publishDataset(self, file):
        """
        """
        try:
            jobReport = readJobReport(file)[0]
            self.exit_status = '0'
        except IndexError:
            self.exit_status = '1'
            msg = "Error: Problem with " + file + " file"
            common.logger.info(msg)
            return self.exit_status

        if (len(self.dataset_to_import) != 0):
            for dataset in self.dataset_to_import:
                common.logger.info(
                    "--->>> Importing parent dataset in the dbs: " + dataset)
                status_import = self.importParentDataset(
                    self.globalDBS, dataset)
                if (status_import == 1):
                    common.logger.info('Problem with parent ' + dataset +
                                       ' import from the global DBS ' +
                                       self.globalDBS + 'to the local one ' +
                                       self.DBSURL)
                    self.exit_status = '1'
                    return self.exit_status
                else:
                    common.logger.info('Import ok of dataset ' + dataset)

        if (len(jobReport.files) <= 0):
            self.exit_status = '1'
            msg = "Error: No EDM file to publish in xml file" + file + " file"
            common.logger.info(msg)
            return self.exit_status
        else:
            msg = "fjr contains some files to publish"
            common.logger.debug(msg)

        #### datasets creation in dbs
        #// DBS to contact write and read of the same dbs
        dbsReader = DBSReader(self.DBSURL, level='ERROR')
        dbswriter = DBSWriter(self.DBSURL)
        #####

        self.published_datasets = []
        for fileinfo in jobReport.files:
            datasets_info = fileinfo.dataset
            if len(datasets_info) <= 0:
                self.exit_status = '1'
                msg = "Error: No info about dataset in the xml file " + file
                common.logger.info(msg)
                return self.exit_status
            else:
                for dataset in datasets_info:
                    #### for production data
                    self.processedData = dataset['ProcessedDataset']
                    if (dataset['PrimaryDataset'] == 'null'):
                        dataset['PrimaryDataset'] = self.userprocessedData
                    elif self.datasetpath.upper() != 'NONE':
                        dataset['ParentDataset'] = self.datasetpath

                    dataset['PSetContent'] = self.content
                    cfgMeta = {
                        'name': self.pset,
                        'Type': 'user',
                        'annotation': 'user cfg',
                        'version': 'private version'
                    }  # add real name of user cfg
                    common.logger.info("PrimaryDataset = %s" %
                                       dataset['PrimaryDataset'])
                    common.logger.info("ProcessedDataset = %s" %
                                       dataset['ProcessedDataset'])
                    common.logger.info("<User Dataset Name> = /" +
                                       dataset['PrimaryDataset'] + "/" +
                                       dataset['ProcessedDataset'] + "/USER")

                    self.dataset_to_check = "/" + dataset[
                        'PrimaryDataset'] + "/" + dataset[
                            'ProcessedDataset'] + "/USER"

                    self.published_datasets.append(self.dataset_to_check)

                    common.logger.log(
                        10 - 1, "--->>> Inserting primary: %s processed : %s" %
                        (dataset['PrimaryDataset'],
                         dataset['ProcessedDataset']))

                    #### check if dataset already exists in the DBS
                    result = dbsReader.matchProcessedDatasets(
                        dataset['PrimaryDataset'], 'USER',
                        dataset['ProcessedDataset'])
                    if (len(result) != 0):
                        result = dbsReader.listDatasetFiles(
                            self.dataset_to_check)

                    primary = DBSWriterObjects.createPrimaryDataset(
                        dataset, dbswriter.dbs)
                    common.logger.log(10 - 1, "Primary:  %s " % primary)
                    print "primary = ", primary

                    algo = DBSWriterObjects.createAlgorithm(
                        dataset, cfgMeta, dbswriter.dbs)
                    common.logger.log(10 - 1, "Algo:  %s " % algo)

                    processed = DBSWriterObjects.createProcessedDataset(
                        primary, algo, dataset, dbswriter.dbs)
                    common.logger.log(10 - 1, "Processed:  %s " % processed)
                    print "processed = ", processed

                    common.logger.log(
                        10 - 1, "Inserted primary %s processed %s" %
                        (primary, processed))
                    #######################################################################################

        common.logger.log(10 - 1, "exit_status = %s " % self.exit_status)
        return self.exit_status
Пример #30
0
    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("MergeSize = %s" % self.mergeSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        logging.debug("Connection to DBS at: %s" % self.dbsUrl)

        reader = DBSReader(self.dbsUrl)
        blockList = reader.dbs.listBlocks(dataset = self.inputDataset())
        jobDefs = []

        for block in blockList:
            blockName = block['Name']
            logging.debug("Getting files for block %s" % blockName)
            locations = reader.listFileBlockLocation(blockName)
            fileList  = reader.dbs.listFiles(blockName = blockName)
            if not fileList: # Skip empty blocks
                continue

            thefiles = Fileset(name='FilesToSplit')
            for f in fileList:
                f['Block']['StorageElementList'].extend(locations)
                wmbsFile = File(f['LogicalFileName'])
                [ wmbsFile['locations'].add(x) for x in locations ]
                wmbsFile['block'] = blockName
                wmbsFile['size']  = f['FileSize']
                thefiles.addFile(wmbsFile)

            work = Workflow()
            subs = Subscription(
                fileset = thefiles,
                workflow = work,
                split_algo = 'MergeBySize',
                type = "Merge")
            logging.debug("Info for Subscription %s" % subs)
            splitter = SplitterFactory()
            jobfactory = splitter(subs)

            jobGroups = jobfactory(
                merge_size=self.mergeSize,                # min in Bytes
                all_files=True                            # merge all files
                )
            if not jobGroups:
                raise(SyntaxError)
            for jobGroup in jobGroups:
                for job in jobGroup.getJobs():
                    jobDef = JobDefinition()
                    jobDef['LFNS'].extend(job.getFiles(type='lfn'))
                    jobDef['SkipEvents'] = 0
                    jobDef['MaxEvents'] = -1
                    [ jobDef['SENames'].extend(list(x['locations']))
                        for x in job.getFiles() ]
                    jobDefs.append(jobDef)

        return jobDefs
Пример #31
0
    def importDataset(self, sourceDBS, sourceDatasetPath, targetDBS,
                      onlyClosed = True, skipNoSiteError=False):
        """
        _importDataset_

        Import a dataset into the local scope DBS with full parentage hirerarchy
        (at least not slow because branches info is dropped). Parents are also
        imported. This method imports block by block, then each time a block
        is imported, its parent blocks will be imported first.

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported

        - *targetDBS* : URL for DBS to have dataset imported to

        - *onlyClosed* : Only closed blocks will be imported if set to True

        - *skipNoSiteError* : If this is True, then this method wont raise an
                              Exception if a block has no site information in 
                              sourceDBS.

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed)
        blkCounter=0
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            blkCounter=blkCounter+1
            msg="Importing block %s of %s: %s " % (blkCounter,len(inputBlocks),block)
            logging.debug(msg)
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(inputBlock['NumberOfFiles']) != "0":
                        # we don't skip the error raising
                        if not skipNoSiteError:
                            msg = "Error in DBSWriter.importDataset\n"
                            msg += "Block has no locations defined: %s" % block
                            raise DBSWriterError(msg)
                        msg = "Block has no locations defined: %s" % block
                        logging.info(msg)
                    logging.info("Update block locations to:")
                    for sename in locations:
                        self.dbs.addReplicaToBlock(block,sename)
                        logging.info(sename)
                    continue

            try:

                self.dbs.dbsMigrateBlock(sourceDBS, targetDBS, block_name=block)
            except DbsException, ex:
                msg = "Error in DBSWriter.importDataset\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
                    
            locations = reader.listFileBlockLocation(block)
            # only empty file blocks can have no location
            if not locations and str(inputBlock['NumberOfFiles']) != "0":
                # we don't skip the error raising
                if not skipNoSiteError:
                    msg = "Error in DBSWriter.importDataset\n"
                    msg += "Block has no locations defined: %s" % block
                    raise DBSWriterError(msg)
                msg = "Block has no locations defined: %s" % block
                logging.info(msg)
            for sename in locations:
                self.dbs.addReplicaToBlock(block,sename)
Пример #32
0
    def migrateDatasetBlocks(self, inputDBSUrl, datasetPath, blocks):
        """
        _migrateDatasetBlocks_

        Migrate the list of fileblocks provided by blocks, belonging
        to the dataset specified by the dataset path to this DBS instance
        from the inputDBSUrl provided

        - *inputDBSUrl* : URL for connection to input DBS
        - *datasetPath* : Name of dataset in input DBS (must exist in input
                          DBS)
        - *blocks*      : list of block names to be migrated (must exist
                          in input DBS)

        """
        if len(blocks) == 0:
            msg = "FileBlocks not provided.\n"
            msg += "You must provide the name of at least one fileblock\n"
            msg += "to be migrated"
            raise DBSWriterError(msg)
        #  //
        # // Hook onto input DBSUrl and verify that the dataset & blocks
        #//  exist
        reader = DBSReader(inputDBSUrl)

        inputBlocks = reader.listFileBlocks(datasetPath)

        for block in blocks:
            #  //
            # // Test block exists at source
            #//
            if block not in inputBlocks:
                msg = "Block name:\n ==> %s\n" % block
                msg += "Not found in input dataset:\n ==> %s\n" % datasetPath
                msg += "In DBS Instance:\n ==> %s\n" % inputDBSUrl
                raise DBSWriterError(msg)

            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if not self.reader.blockIsOpen(block):
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Migration of that block"
                    logging.warning(msg)
                    continue

            try:
                xferData = reader.dbs.listDatasetContents(datasetPath, block)
            except DbsException, ex:
                msg = "Error in DBSWriter.migrateDatasetBlocks\n"
                msg += "Could not read content of dataset:\n ==> %s\n" % (
                    datasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            xferData = _remapBlockParentage(datasetPath, xferData)

            try:
                self.dbs.insertDatasetContents(xferData)
            except DbsException, ex:
                msg = "Error in DBSWriter.migrateDatasetBlocks\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    datasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
Пример #33
0
    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("MergeSize = %s" % self.mergeSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        logging.debug("Connection to DBS at: %s" % self.dbsUrl)

        reader = DBSReader(self.dbsUrl)
        blockList = reader.dbs.listBlocks(dataset=self.inputDataset())
        jobDefs = []

        for block in blockList:
            blockName = block['Name']
            logging.debug("Getting files for block %s" % blockName)
            locations = reader.listFileBlockLocation(blockName)
            fileList = reader.dbs.listFiles(blockName=blockName)
            if not fileList:  # Skip empty blocks
                continue

            thefiles = Fileset(name='FilesToSplit')
            for f in fileList:
                f['Block']['StorageElementList'].extend(locations)
                wmbsFile = File(f['LogicalFileName'])
                [wmbsFile['locations'].add(x) for x in locations]
                wmbsFile['block'] = blockName
                wmbsFile['size'] = f['FileSize']
                thefiles.addFile(wmbsFile)

            work = Workflow()
            subs = Subscription(fileset=thefiles,
                                workflow=work,
                                split_algo='MergeBySize',
                                type="Merge")
            logging.debug("Info for Subscription %s" % subs)
            splitter = SplitterFactory()
            jobfactory = splitter(subs)

            jobGroups = jobfactory(
                merge_size=self.mergeSize,  # min in Bytes
                all_files=True  # merge all files
            )
            if not jobGroups:
                raise (SyntaxError)
            for jobGroup in jobGroups:
                for job in jobGroup.getJobs():
                    jobDef = JobDefinition()
                    jobDef['LFNS'].extend(job.getFiles(type='lfn'))
                    jobDef['SkipEvents'] = 0
                    jobDef['MaxEvents'] = -1
                    [
                        jobDef['SENames'].extend(list(x['locations']))
                        for x in job.getFiles()
                    ]
                    jobDefs.append(jobDef)

        return jobDefs
Пример #34
0
    print usage
    sys.exit(1)
if (block != None) and (datasetPath != None):
    print "\n options --block or --datasetPath are mutually exclusive"
    print usage
    sys.exit(1)

print ">>>>> DBS URL : %s" % (url)

import logging
logging.disable(logging.INFO)

#  //
# // Get API to DBS
#//
args = {'url': url, 'level': 'ERROR'}
dbsapi = DbsApi(args)

#  //
# // Delete dataset
#//
if (datasetPath):
    print "Deleting datasetPath=%s" % datasetPath
    dbsapi.deleteProcDS(datasetPath)

if (block):
    dbsreader = DBSReader(url)
    getdatasetPath = dbsreader.blockToDatasetPath(block)
    print "Deleting block=%s from datasetPath=%s" % (block, getdatasetPath)
    dbsapi.deleteBlock(getdatasetPath, block)
Пример #35
0
    print usage
    sys.exit(1)


print ">>>>> DBS URL : %s"%(url)

import logging
logging.disable(logging.INFO)

#  //
# // Get API to DBS
#//
args = {'url' : url , 'level' : 'ERROR'}
dbsapi = DbsApi(args)

#  //
# // Delete dataset
#//
if (datasetPath):
 print "Deleting datasetPath=%s"%datasetPath
 dbsapi.deleteProcDS(datasetPath)

if (block):
 dbsreader = DBSReader(url)
 getdatasetPath = dbsreader.blockToDatasetPath(block)
 print "Deleting block=%s from datasetPath=%s"%(block,getdatasetPath)
 dbsapi.deleteBlock(getdatasetPath,block)



Пример #36
0
        sys.exit(1)

if datasetPath == None:
    print "--datasetPath option not provided. For example : --datasetPath /primarydataset/processeddataset/datatier"
    print usage
    sys.exit()
print ">>>>> DBS URL : %s"%(url,)


import logging
logging.disable(logging.INFO)

#  //
# // Get API to DBS
#//
dbsreader = DBSReader(url)
#  //
# // Get list of datasets
#//
if datasetPath:
     primds=datasetPath.split('/')[1]
     procds=datasetPath.split('/')[2]
     tier=datasetPath.split('/')[3]
#     print " matchProcessedDatasets(%s,%s,%s)"%(primds,tier,procds)
     datasets=dbsreader.matchProcessedDatasets(primds,tier,procds)
else:
     datasets=dbsreader.matchProcessedDatasets("*","*","*")


for dataset in datasets:
#  //