Exemplo n.º 1
0
def tmdbInjectBlock(dbsUrl,
                    datasetPath,
                    blockName,
                    phedexConfig,
                    workingDir="/tmp",
                    nodes=None,
                    storageElements=None):
    """
    _tmdbInjectBlock_

    Util Method for injecting a fileblock into TMDB

    

    """

    fileName = blockName.replace("/", "_")
    fileName = fileName.replace("#", "")
    dropXML = "%s/%s-PhEDExDrop.xml" % (workingDir, fileName)

    xmlContent = makePhEDExDrop(dbsUrl, datasetPath, blockName)
    handle = open(dropXML, 'w')
    handle.write(xmlContent)
    handle.close()

    reader = DBSReader(dbsUrl)

    if not storageElements:
        storageElements = reader.listFileBlockLocation(blockName)

    tmdbInject(phedexConfig, dropXML, nodes, *storageElements)

    return
Exemplo n.º 2
0
    def __call__(self):
        """
        _operator()_

        Load PU dataset information from DBS

        """
        
        
        reader = DBSReader(self.dbsUrl)
        blocks = reader.listFileBlocks(self.dataset, False)
        
        for block in blocks:
            #  //
            # // Populate locations
            #//
            locations = reader.listFileBlockLocation(block)
            if locations:
                self.blockSites[block] = locations
            for location in locations:
                if not self.sites.has_key(location):
                    self.sites[location] = set()
                self.sites[location].add(block)
            #  //
            # // Populate File list for block
            #//
            self[block] = reader.lfnsInBlock(block)

        return
Exemplo n.º 3
0
    def __call__(self):
        """
        _operator()_

        Load PU dataset information from DBS

        """

        reader = DBSReader(self.dbsUrl)
        blocks = reader.listFileBlocks(self.dataset, False)

        for block in blocks:
            #  //
            # // Populate locations
            #//
            locations = reader.listFileBlockLocation(block)
            if locations:
                self.blockSites[block] = locations
            for location in locations:
                if not self.sites.has_key(location):
                    self.sites[location] = set()
                self.sites[location].add(block)
            #  //
            # // Populate File list for block
            #//
            self[block] = reader.lfnsInBlock(block)

        return
Exemplo n.º 4
0
def tmdbInjectBlock(dbsUrl, datasetPath, blockName, phedexConfig,
                    workingDir="/tmp", nodes=None, storageElements=None):
    """
    _tmdbInjectBlock_

    Util Method for injecting a fileblock into TMDB

    

    """

    fileName = blockName.replace("/","_")
    fileName = fileName.replace("#","")
    dropXML = "%s/%s-PhEDExDrop.xml" % (workingDir, fileName)
    
    xmlContent = makePhEDExDrop(dbsUrl, datasetPath, blockName)
    handle = open(dropXML, 'w')
    handle.write(xmlContent)
    handle.close()

    reader = DBSReader(dbsUrl)
    
    if not storageElements:
        storageElements = reader.listFileBlockLocation(blockName)
    
    tmdbInject(phedexConfig, dropXML, nodes, *storageElements )

    return
Exemplo n.º 5
0
    def checkPublication(self):
        """
           check dataset publication in a dbs  
        """

        common.logger.info('--->>> Check data publication: dataset '+self.dataset_to_check+' in DBS url '+ self.DBSURL+'\n')
        #  //
        # // Get API to DBS
        #//
        dbsreader = DBSReader(self.DBSURL)
        #  //
        # // Get list of datasets
        #//
        if len(self.dataset_to_check.split('/')) < 4:
            msg = "the provided dataset name is not correct"
            raise CrabException(msg)
        else:   
            primds=self.dataset_to_check.split('/')[1]
            procds=self.dataset_to_check.split('/')[2]
            tier=self.dataset_to_check.split('/')[3]
            datasets=dbsreader.matchProcessedDatasets(primds,tier,procds)
            if common.debugLevel:
                print "PrimaryDataset = ", primds
                print "ProcessedDataset = ", procds
                print "DataTier = ", tier
                print "datasets matching your requirements= ", datasets

        for dataset in datasets:
        #  //
        # // Get list of blocks for the dataset and their location
        #//
            if len(dataset.get('PathList'))==0:
                print "===== Empty dataset yet /%s/%s with tiers %s"%(dataset.get('PrimaryDataset')['Name'],dataset.get('Name'),dataset.get('TierList'))
            else:
                for datasetpath in dataset.get('PathList'):
                    nevttot=0
                    print "=== dataset %s"%datasetpath
                    ### FEDE #######
                    if dataset['Description'] != None:
                        print "=== dataset description = ", dataset['Description']
                    ################    
                    blocks=dbsreader.getFileBlocksInfo(datasetpath)
                    for block in blocks:
                        SEList=dbsreader.listFileBlockLocation(block['Name'])  # replace that with DLS query
                        print "===== File block name: %s" %block['Name']
                        print "      File block located at: ", SEList
                        print "      File block status: %s" %block['OpenForWriting']
                        print "      Number of files: %s"%block['NumberOfFiles']
                        print "      Number of Bytes: %s"%block['BlockSize']
                        print "      Number of Events: %s"%block['NumberOfEvents']
                        if common.debugLevel:
                            print "--------- info about files --------"
                            print " Size \t Events \t LFN \t FileStatus "
                            files=dbsreader.listFilesInBlock(block['Name'])
                            for file in files:
                                print "%s %s %s %s"%(file['FileSize'],file['NumberOfEvents'],file['LogicalFileName'],file['Status'])
                        nevttot = nevttot + block['NumberOfEvents']
                    print "\n total events: %s in dataset: %s\n"%(nevttot,datasetpath)
        if not common.debugLevel:
            common.logger.info('You can obtain more info about files of the dataset using: crab -checkPublication -USER.dataset_to_check='+self.dataset_to_check+' -USER.dbs_url_for_publication='+self.DBSURL+' -debug')
Exemplo n.º 6
0
    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("SplitSize = %s" % self.splitSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        thefiles = Fileset(name='FilesToSplit')
        reader = DBSReader(self.dbsUrl)
        fileList = reader.dbs.listFiles(analysisDataset = self.inputDataset(),
                                        retriveList = [ 'retrive_block',
                                                        'retrive_run'])

        blocks = {}

        for f in fileList:
            block = f['Block']['Name']
            if not blocks.has_key(block):
                blocks[block] = reader.listFileBlockLocation(block)
            f['Block']['StorageElementList'].extend(blocks[block])
            wmbsFile = File(f['LogicalFileName'])
            [ wmbsFile['locations'].add(x) for x in blocks[block] ]
            wmbsFile['block'] = block
            thefiles.addFile(
                wmbsFile
                )


        work = Workflow()
        subs = Subscription(
            fileset = thefiles,
            workflow = work,
            split_algo = 'FileBased',
            type = "Processing")
        splitter = SplitterFactory()
        jobfactory = splitter(subs)

        jobs = jobfactory(files_per_job = self.splitSize)



        jobDefs = []
        for job in jobs.jobs:
            #job.mask.setMaxAndSkipEvents(-1, 0)
            jobDef = JobDefinition()
            jobDef['LFNS'].extend(job.listLFNs())
            jobDef['SkipEvents'] = 0
            jobDef['MaxEvents'] = -1
            [ jobDef['SENames'].extend(list(x['locations']))
              for x  in job.listFiles() ]
            jobDefs.append(jobDef)


        return jobDefs
Exemplo n.º 7
0
    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("SplitSize = %s" % self.splitSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        thefiles = Fileset(name='FilesToSplit')
        reader = DBSReader(self.dbsUrl)
        fileList = reader.dbs.listFiles(
            analysisDataset=self.inputDataset(),
            retriveList=['retrive_block', 'retrive_run'])

        blocks = {}

        for f in fileList:
            block = f['Block']['Name']
            if not blocks.has_key(block):
                blocks[block] = reader.listFileBlockLocation(block)
            f['Block']['StorageElementList'].extend(blocks[block])
            wmbsFile = File(f['LogicalFileName'])
            [wmbsFile['locations'].add(x) for x in blocks[block]]
            wmbsFile['block'] = block
            thefiles.addFile(wmbsFile)

        work = Workflow()
        subs = Subscription(fileset=thefiles,
                            workflow=work,
                            split_algo='FileBased',
                            type="Processing")
        splitter = SplitterFactory()
        jobfactory = splitter(subs)

        jobs = jobfactory(files_per_job=self.splitSize)

        jobDefs = []
        for job in jobs.jobs:
            #job.mask.setMaxAndSkipEvents(-1, 0)
            jobDef = JobDefinition()
            jobDef['LFNS'].extend(job.listLFNs())
            jobDef['SkipEvents'] = 0
            jobDef['MaxEvents'] = -1
            [
                jobDef['SENames'].extend(list(x['locations']))
                for x in job.listFiles()
            ]
            jobDefs.append(jobDef)

        return jobDefs
Exemplo n.º 8
0
def createJobSplitter(dataset, dbsUrl, onlyClosedBlocks=False, siteWhitelist=[], blockWhitelist=[], withParents=False):
    """
    _createJobSplitter_

    Instantiate a JobSplitter instance for the dataset provided
    and populate it with details from DBS.


    """
    reader = DBSReader(dbsUrl)
    result = JobSplitter(dataset)
    filterSites = len(siteWhitelist) > 0
    filterBlocks = len(blockWhitelist) > 0

    for blockName in reader.listFileBlocks(dataset, onlyClosedBlocks):
        locations = reader.listFileBlockLocation(blockName)
        if filterBlocks:
            if blockName not in blockWhitelist:
                msg = "Excluding block %s based on block whitelist: %s\n" % (blockName, blockWhitelist)
                logging.debug(msg)
                continue

        if filterSites:
            siteMatches = filter(lambda x: x in locations, siteWhitelist)

            if len(siteMatches) == 0:
                msg = "Excluding block %s based on sites: %s \n" % (blockName, locations)
                logging.debug(msg)
                continue
            else:
                locations = siteMatches

        newBlock = result.newFileblock(blockName, *locations)

        if withParents == True:
            blockData = reader.getFileBlockWithParents(blockName)[blockName]
        else:
            blockData = reader.getFileBlock(blockName)[blockName]

        totalEvents = 0
        fileList = set()
        for fileInfo in blockData["Files"]:
            totalEvents += fileInfo["NumberOfEvents"]
            fileList.add(fileInfo["LogicalFileName"])
            if withParents:
                parList = [x["LogicalFileName"] for x in fileInfo["ParentList"]]

                newBlock.addFile(fileInfo["LogicalFileName"], fileInfo["NumberOfEvents"], parList)
            else:
                newBlock.addFile(fileInfo["LogicalFileName"], fileInfo["NumberOfEvents"])

        logging.debug("Block %s contains %s events in %s files" % (blockName, totalEvents, len(fileList)))

    return result
Exemplo n.º 9
0
    def loadSites(self, **dbsContacts):
        """
        Get the list of sites hosting the PU from DBS/DLS
                                                                                                              
        """
        dbsUrl = dbsContacts.get('DBSURL', None)
        if dbsUrl == None:
            dbsUrl = getLocalDBSURL()
        
        reader = DBSReader(dbsUrl)

        locations = []        
        blocks =  reader.listFileBlocks(self.dataset, True)

        for block in blocks:
            try:
                locations = reader.listFileBlockLocation(block)
            except Exception, ex:
                msg = "Unable to find DLS Locations for Block: %s\n" %  block
                msg += str(ex)
                logging.warning(msg)
                continue
Exemplo n.º 10
0
    def importDatasetWithExistingParents(self, sourceDBS, sourceDatasetPath, targetDBS,
                      onlyClosed = True):
        """
        _importDataset_

        Import a dataset into the local scope DBS.
        It complains if the parent dataset ar not there!!

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported
        
        - *targetDBS* : URL for DBS to have dataset imported to

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed)
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if not str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(inputBlock['NumberOfFiles']) != "0":
                        msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                        msg += "Block has no locations defined: %s" % block
                        raise DBSWriterError(msg)
                    logging.info("Update block locations to:")
                    for sename in locations:
                        self.dbs.addReplicaToBlock(block,sename)
                        logging.info(sename)
                    continue

            
            try:
                xferData = reader.dbs.listDatasetContents(
                    sourceDatasetPath,  block
                    )
            except DbsException, ex:
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Could not read content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
            try:
                self.dbs.insertDatasetContents(xferData)
            except DbsException, ex:
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
Exemplo n.º 11
0
    def makeBlockList(self, onlyClosedBlocks = False, sites=None,
        providedOnlyBlocks=None):
        """
        _makeBlockList_


        Generate the list of blocks for the workflow.

        1. Get the list of all blocks from the DBS
        2. Compare to list of blocks in persistency file
        3. Obtain the intersection of the new blocks and the providedOnlyBlocks list.
        4. Set OnlyBlocks parameter to intersection obtained.
        
        """
        #reader = DBSReader(self.dbsUrl)
        # At this point, blocks should be in local DBS
        localDBS = getLocalDBSURL()
        reader = DBSReader(localDBS)
        dbsBlocks = reader.listFileBlocks(self.inputDataset(), onlyClosedBlocks)
        
        if self.persistData.blocks != []:
            remover = lambda x : x not in self.persistData.blocks
            newBlocks = filter(remover, dbsBlocks)
        else:
            newBlocks = dbsBlocks

        #  //
        # // Skipping blocks without site info
        #//
        msg = "Filtering blocks according to Site information..."
        logging.info(msg)
        blocksAtSites = []
        for block in newBlocks:
            locations = reader.listFileBlockLocation(block)
            if not locations:
                msg = "\nSkipping block: "
                msg += "No site info available for block %s " % block
                logging.info(msg)
            elif sites is not None:
                locationInSites = False
                for location in locations:
                    if location in sites:
                        locationInSites = True
                        break
                if locationInSites:
                    blocksAtSites.append(block)
                else:
                    msg = "\nSkipping block: "
                    msg += "Block %s has no replicas in %s" % (block,
                        ", ".join(sites))
                    logging.info(msg)
            else:
                blocksAtSites.append(block)
        newBlocks = blocksAtSites

        if len(newBlocks) == 0:
            msg = "No New Blocks found for dataset\n"
            raise RuntimeError, msg

        #  //
        # // Check presence of provided Blocks in newBlocks
        #//
        blocksToProcess = []
        if providedOnlyBlocks is not None :
            providedOnlyBlocksList = providedOnlyBlocks.split(',')
            msg = "OnlyBlocks setting provided. Processing it..."
            logging.info(msg)
            msg = "OnlyBlocks list contains %s Blocks." % (
                len(providedOnlyBlocksList))
            logging.info(msg)
            blockCount = 1
            for block in providedOnlyBlocksList :
                if block.strip() in newBlocks :
                    blocksToProcess.append(block.strip())
                    msg = "Block %s: Adding Block %s" % (
                        blockCount, block)
                    msg += " to the Whitelist"
                    logging.info(msg)
                else:
                    msg = "Block %s: Skipping Block %s " % (
                        blockCount, block)
                    msg += "It's no New or it has been processed"
                    msg += " already."
                    logging.info(msg)
                blockCount += 1
        else :
            blocksToProcess = newBlocks
            msg = "OnlyBlocks setting not provided. Processing"
            msg += " all New Blocks for Dataset\n"
            logging.info(msg)

        if len(blocksToProcess) == 0 :
            msg = "OnlyBlocks list does not match any New Blocks"
            msg += " found for Dataset\n"
            raise RuntimeError, msg
        
        blockList = str(blocksToProcess)
        blockList = blockList.replace("[", "")
        blockList = blockList.replace("]", "")
        blockList = blockList.replace("\'", "")
        blockList = blockList.replace("\"", "")
        self.workflow.parameters['OnlyBlocks'] = blockList
        self.persistData.blocks.extend(blocksToProcess)
        return
Exemplo n.º 12
0
def createJobSplitter(dataset,
                      dbsUrl,
                      onlyClosedBlocks=False,
                      siteWhitelist=[],
                      blockWhitelist=[],
                      withParents=False):
    """
    _createJobSplitter_

    Instantiate a JobSplitter instance for the dataset provided
    and populate it with details from DBS.


    """
    reader = DBSReader(dbsUrl)
    result = JobSplitter(dataset)
    filterSites = len(siteWhitelist) > 0
    filterBlocks = len(blockWhitelist) > 0

    for blockName in reader.listFileBlocks(dataset, onlyClosedBlocks):
        locations = reader.listFileBlockLocation(blockName)
        if filterBlocks:
            if blockName not in blockWhitelist:
                msg = "Excluding block %s based on block whitelist: %s\n" % (
                    blockName, blockWhitelist)
                logging.debug(msg)
                continue

        if filterSites:
            siteMatches = filter(lambda x: x in locations, siteWhitelist)

            if len(siteMatches) == 0:
                msg = "Excluding block %s based on sites: %s \n" % (
                    blockName,
                    locations,
                )
                logging.debug(msg)
                continue
            else:
                locations = siteMatches

        newBlock = result.newFileblock(blockName, *locations)

        if withParents == True:
            blockData = reader.getFileBlockWithParents(blockName)[blockName]
        else:
            blockData = reader.getFileBlock(blockName)[blockName]

        totalEvents = 0
        fileList = set()
        for fileInfo in blockData['Files']:
            totalEvents += fileInfo['NumberOfEvents']
            fileList.add(fileInfo['LogicalFileName'])
            if withParents:
                parList = [
                    x['LogicalFileName'] for x in fileInfo['ParentList']
                ]

                newBlock.addFile(fileInfo['LogicalFileName'],
                                 fileInfo['NumberOfEvents'], parList)
            else:
                newBlock.addFile(fileInfo['LogicalFileName'],
                                 fileInfo['NumberOfEvents'])

        logging.debug("Block %s contains %s events in %s files" % (
            blockName,
            totalEvents,
            len(fileList),
        ))

    return result
Exemplo n.º 13
0
    def makeFileList(self, onlyClosedBlocks = False, sites=None,
        providedOnlyBlocks=None, providedOnlyFiles=None):
        """
        _makeFileList_


        Generate the list of blocks for the workflow.

        1. Get the list of all blocks from the DBS
        2. Compare to list of blocks in persistency file
        3. Obtain the intersection of the new blocks and the providedOnlyBlocks
           list.
        4. Set OnlyBlocks parameter to intersection obtained.
        
        """
        #reader = DBSReader(self.dbsUrl)
        # At this point, blocks should be in local DBS
        localDBS = getLocalDBSURL()
        reader = DBSReader(localDBS)

        #  //
        # // Querying list of blocks from DBS
        #//
        msg = "Querying for closed blocks in Local DBS: %s ..." % localDBS
        logging.info(msg)
        dbsBlocks = reader.listFileBlocks(self.inputDataset(),
                                            onlyClosedBlocks)
        msg = "Retrieved %s close blocks from Local DBS" % len(dbsBlocks)
        logging.info(msg)

        #  //
        # // Constructing mapping structures block-file
        #//
        filesToBlocks = {}
        blocksToFiles = {}
        dbsFiles = reader.dbs.listFiles(path=self.inputDataset())
        for dbsfile in dbsFiles:
            if dbsfile['Block']['Name'] in dbsBlocks:
                filesToBlocks[dbsfile['LogicalFileName']] = \
                                                    dbsfile['Block']['Name']
                blocksToFiles.setdefault(dbsfile['Block']['Name'], []
                                         ).append(dbsfile['LogicalFileName'])

        # OnlyFiles?
        if providedOnlyFiles is not None and \
            providedOnlyFiles.strip().lower() != 'auto':
            msg = "Using OnlyFiles list:"
            msg += " %s files." % len(providedOnlyFiles.split(','))
            logging.info(msg)
            onlyFiles = [x.strip() for x in providedOnlyFiles.split(',') if x]
        # OnlyFiles=auto
        elif providedOnlyFiles is not None:
            msg = "Automatically generating OnlyFiles list from DBS..."
            logging.info(msg)
            onlyFiles = self.createOnlyFilesFromWorkflow()
        # OnlyBlocks
        elif providedOnlyBlocks is not None:
            msg = "Using OnlyBLocks list:"
            msg += " %s blocks." % len(providedOnlyBlocks.split(','))
            logging.info(msg)
            onlyFiles = []
            for block in \
                    [x.strip() for x in providedOnlyBlocks.split(',') if x]:
                onlyFiles.extend(blocksToFiles[dbsBlocks])
        # Processing everything in DBS
        else:
            msg = "Processing whole input dataset..."
            logging.info(msg)
            onlyFiles = []
            for block in dbsBlocks:
                onlyFiles.extend(blocksToFiles[dbsBlocks])

        if not onlyFiles:
            msg = "No files were found for the input dataset: " + \
                self.inputDataset()
            raise RuntimeError, msg

        #  //
        # // Filter files that were already processed
        #//
        if self.persistData.blocks:
            msg = "Filtering files that were already processed for this"
            msg += " workflow..."
            logging.info(msg)
            processedFiles = self.persistData.getFiles()
            msg = "Persistency file has %s file(s)" % len(processedFiles)
            logging.info(msg)
            remover  = lambda x: x not in processedFiles
            onlyFiles = filter(remover, onlyFiles)
            msg = "%s file(s) were removed" % \
                                    str(len(processedFiles) - len(onlyFiles))
            logging.info(msg)

        if not onlyFiles:
            msg = "No New files were found for the input dataset: " + \
                self.inputDataset()
            raise RuntimeError, msg

        #  //
        # // Filter files in blocks without site info
        #//
        msg = "Filtering blocks according to Site information..."
        logging.info(msg)
        candidateBlocks = {}
        for file in onlyFiles:
            candidateBlocks.setdefault(filesToBlocks[file], []).append(file)
        blocksAtSites = []
        for block in candidateBlocks:
            locations = reader.listFileBlockLocation(block)
            if not locations:
                msg = "Excluding block without site info ==> %s" % block
                logging.info(msg)
            elif sites is not None:
                locationInSites = False
                for location in locations:
                    if location in sites:
                        locationInSites = True
                        break
                if locationInSites:
                    blocksAtSites.append(block)
                else:
                    msg = "Excluding block without replicas"
                    msg += " in %s ==> %s" % (block, ", ".join(sites))
                    logging.info(msg)
            else:   
                blocksAtSites.append(block)
        if len(blocksAtSites) == 0:
            msg = "No block has site information."
            raise RuntimeError, msg

        #  //
        # // Constructing OnlyBlocks and OnlyFiles list
        #//
        onlyBlocks = {}
        for block in blocksAtSites:
            onlyBlocks[block] = candidateBlocks[block]
        onlyFiles = []
        for block in onlyBlocks:
            onlyFiles.extend(onlyBlocks[block])

        msg = "\n ==> Files to process: %s" % len(onlyFiles)
        msg += "\n ==> Blocks to process: %s" % len(onlyBlocks)
        logging.info(msg)
    
        blockList = ",".join(onlyBlocks.keys())
        fileList = ",".join(onlyFiles)
        self.workflow.parameters['OnlyBlocks'] = blockList
        self.workflow.parameters['OnlyFiles'] = fileList
        self.persistData.update(onlyBlocks)
        return
Exemplo n.º 14
0
    def importDatasetWithExistingParents(self,
                                         sourceDBS,
                                         sourceDatasetPath,
                                         targetDBS,
                                         onlyClosed=True):
        """
        _importDataset_

        Import a dataset into the local scope DBS.
        It complains if the parent dataset ar not there!!

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported
        
        - *targetDBS* : URL for DBS to have dataset imported to

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed)
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if not str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(
                            inputBlock['NumberOfFiles']) != "0":
                        msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                        msg += "Block has no locations defined: %s" % block
                        raise DBSWriterError(msg)
                    logging.info("Update block locations to:")
                    for sename in locations:
                        self.dbs.addReplicaToBlock(block, sename)
                        logging.info(sename)
                    continue

            try:
                xferData = reader.dbs.listDatasetContents(
                    sourceDatasetPath, block)
            except DbsException, ex:
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Could not read content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
            try:
                self.dbs.insertDatasetContents(xferData)
            except DbsException, ex:
                msg = "Error in DBSWriter.importDatasetWithExistingParents\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
Exemplo n.º 15
0
#     print " matchProcessedDatasets(%s,%s,%s)"%(primds,tier,procds)
     datasets=dbsreader.matchProcessedDatasets(primds,tier,procds)
else:
     datasets=dbsreader.matchProcessedDatasets("*","*","*")


for dataset in datasets:
#  //
# // Get list of blocks for the dataset and their location
#//
 for datasetpath in dataset.get('PathList'):
   nevttot=0
   print "===== dataset %s"%datasetpath
   blocks=dbsreader.getFileBlocksInfo(datasetpath)
   for block in blocks:
     SEList=dbsreader.listFileBlockLocation(block['Name'])  # replace that with DLS query
     print "== File block %s is located at: %s"%(block['Name'],SEList)
     print "File block name: %s" %block['Name']
     print "File block status: %s" %block['OpenForWriting']
     print "Number of files: %s"%block['NumberOfFiles']
     print "Number of Bytes: %s"%block['BlockSize']
     print "Number of Events: %s"%block['NumberOfEvents']
     if full:
      print "--------- info about files --------"
      print " Size \t Events \t LFN \t FileStatus "
      files=dbsreader.listFilesInBlock(block['Name'])
      for file in files:
        print "%s %s %s %s"%(file['FileSize'],file['NumberOfEvents'],file['LogicalFileName'],file['Status'])
     nevttot = nevttot + block['NumberOfEvents']

   print "\n total events: %s in dataset: %s\n"%(nevttot,datasetpath)
Exemplo n.º 16
0
    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("MergeSize = %s" % self.mergeSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        logging.debug("Connection to DBS at: %s" % self.dbsUrl)

        reader = DBSReader(self.dbsUrl)
        blockList = reader.dbs.listBlocks(dataset=self.inputDataset())
        jobDefs = []

        for block in blockList:
            blockName = block['Name']
            logging.debug("Getting files for block %s" % blockName)
            locations = reader.listFileBlockLocation(blockName)
            fileList = reader.dbs.listFiles(blockName=blockName)
            if not fileList:  # Skip empty blocks
                continue

            thefiles = Fileset(name='FilesToSplit')
            for f in fileList:
                f['Block']['StorageElementList'].extend(locations)
                wmbsFile = File(f['LogicalFileName'])
                [wmbsFile['locations'].add(x) for x in locations]
                wmbsFile['block'] = blockName
                wmbsFile['size'] = f['FileSize']
                thefiles.addFile(wmbsFile)

            work = Workflow()
            subs = Subscription(fileset=thefiles,
                                workflow=work,
                                split_algo='MergeBySize',
                                type="Merge")
            logging.debug("Info for Subscription %s" % subs)
            splitter = SplitterFactory()
            jobfactory = splitter(subs)

            jobGroups = jobfactory(
                merge_size=self.mergeSize,  # min in Bytes
                all_files=True  # merge all files
            )
            if not jobGroups:
                raise (SyntaxError)
            for jobGroup in jobGroups:
                for job in jobGroup.getJobs():
                    jobDef = JobDefinition()
                    jobDef['LFNS'].extend(job.getFiles(type='lfn'))
                    jobDef['SkipEvents'] = 0
                    jobDef['MaxEvents'] = -1
                    [
                        jobDef['SENames'].extend(list(x['locations']))
                        for x in job.getFiles()
                    ]
                    jobDefs.append(jobDef)

        return jobDefs
Exemplo n.º 17
0
    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("MergeSize = %s" % self.mergeSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        logging.debug("Connection to DBS at: %s" % self.dbsUrl)

        reader = DBSReader(self.dbsUrl)
        blockList = reader.dbs.listBlocks(dataset = self.inputDataset())
        jobDefs = []

        for block in blockList:
            blockName = block['Name']
            logging.debug("Getting files for block %s" % blockName)
            locations = reader.listFileBlockLocation(blockName)
            fileList  = reader.dbs.listFiles(blockName = blockName)
            if not fileList: # Skip empty blocks
                continue

            thefiles = Fileset(name='FilesToSplit')
            for f in fileList:
                f['Block']['StorageElementList'].extend(locations)
                wmbsFile = File(f['LogicalFileName'])
                [ wmbsFile['locations'].add(x) for x in locations ]
                wmbsFile['block'] = blockName
                wmbsFile['size']  = f['FileSize']
                thefiles.addFile(wmbsFile)

            work = Workflow()
            subs = Subscription(
                fileset = thefiles,
                workflow = work,
                split_algo = 'MergeBySize',
                type = "Merge")
            logging.debug("Info for Subscription %s" % subs)
            splitter = SplitterFactory()
            jobfactory = splitter(subs)

            jobGroups = jobfactory(
                merge_size=self.mergeSize,                # min in Bytes
                all_files=True                            # merge all files
                )
            if not jobGroups:
                raise(SyntaxError)
            for jobGroup in jobGroups:
                for job in jobGroup.getJobs():
                    jobDef = JobDefinition()
                    jobDef['LFNS'].extend(job.getFiles(type='lfn'))
                    jobDef['SkipEvents'] = 0
                    jobDef['MaxEvents'] = -1
                    [ jobDef['SENames'].extend(list(x['locations']))
                        for x in job.getFiles() ]
                    jobDefs.append(jobDef)

        return jobDefs
Exemplo n.º 18
0
    def importDataset(self, sourceDBS, sourceDatasetPath, targetDBS,
                      onlyClosed = True, skipNoSiteError=False):
        """
        _importDataset_

        Import a dataset into the local scope DBS with full parentage hirerarchy
        (at least not slow because branches info is dropped). Parents are also
        imported. This method imports block by block, then each time a block
        is imported, its parent blocks will be imported first.

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported

        - *targetDBS* : URL for DBS to have dataset imported to

        - *onlyClosed* : Only closed blocks will be imported if set to True

        - *skipNoSiteError* : If this is True, then this method wont raise an
                              Exception if a block has no site information in 
                              sourceDBS.

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed)
        blkCounter=0
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            blkCounter=blkCounter+1
            msg="Importing block %s of %s: %s " % (blkCounter,len(inputBlocks),block)
            logging.debug(msg)
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(inputBlock['NumberOfFiles']) != "0":
                        # we don't skip the error raising
                        if not skipNoSiteError:
                            msg = "Error in DBSWriter.importDataset\n"
                            msg += "Block has no locations defined: %s" % block
                            raise DBSWriterError(msg)
                        msg = "Block has no locations defined: %s" % block
                        logging.info(msg)
                    logging.info("Update block locations to:")
                    for sename in locations:
                        self.dbs.addReplicaToBlock(block,sename)
                        logging.info(sename)
                    continue

            try:

                self.dbs.dbsMigrateBlock(sourceDBS, targetDBS, block_name=block)
            except DbsException, ex:
                msg = "Error in DBSWriter.importDataset\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
                    
            locations = reader.listFileBlockLocation(block)
            # only empty file blocks can have no location
            if not locations and str(inputBlock['NumberOfFiles']) != "0":
                # we don't skip the error raising
                if not skipNoSiteError:
                    msg = "Error in DBSWriter.importDataset\n"
                    msg += "Block has no locations defined: %s" % block
                    raise DBSWriterError(msg)
                msg = "Block has no locations defined: %s" % block
                logging.info(msg)
            for sename in locations:
                self.dbs.addReplicaToBlock(block,sename)
Exemplo n.º 19
0
    def checkPublication(self):
        """
           check dataset publication in a dbs  
        """

        common.logger.info('--->>> Check data publication: dataset ' +
                           self.dataset_to_check + ' in DBS url ' +
                           self.DBSURL + '\n')
        #  //
        # // Get API to DBS
        #//
        dbsreader = DBSReader(self.DBSURL)
        #  //
        # // Get list of datasets
        #//
        if len(self.dataset_to_check.split('/')) < 4:
            msg = "the provided dataset name is not correct"
            raise CrabException(msg)
        else:
            primds = self.dataset_to_check.split('/')[1]
            procds = self.dataset_to_check.split('/')[2]
            tier = self.dataset_to_check.split('/')[3]
            datasets = dbsreader.matchProcessedDatasets(primds, tier, procds)
            if common.debugLevel:
                print "PrimaryDataset = ", primds
                print "ProcessedDataset = ", procds
                print "DataTier = ", tier
                print "datasets matching your requirements= ", datasets

        for dataset in datasets:
            #  //
            # // Get list of blocks for the dataset and their location
            #//
            if len(dataset.get('PathList')) == 0:
                print "===== Empty dataset yet /%s/%s with tiers %s" % (
                    dataset.get('PrimaryDataset')['Name'], dataset.get('Name'),
                    dataset.get('TierList'))
            else:
                for datasetpath in dataset.get('PathList'):
                    nevttot = 0
                    print "=== dataset %s" % datasetpath
                    ### FEDE #######
                    if dataset['Description'] != None:
                        print "=== dataset description = ", dataset[
                            'Description']
                    ################
                    blocks = dbsreader.getFileBlocksInfo(datasetpath)
                    for block in blocks:
                        SEList = dbsreader.listFileBlockLocation(
                            block['Name'])  # replace that with DLS query
                        print "===== File block name: %s" % block['Name']
                        print "      File block located at: ", SEList
                        print "      File block status: %s" % block[
                            'OpenForWriting']
                        print "      Number of files: %s" % block[
                            'NumberOfFiles']
                        print "      Number of Bytes: %s" % block['BlockSize']
                        print "      Number of Events: %s" % block[
                            'NumberOfEvents']
                        if common.debugLevel:
                            print "--------- info about files --------"
                            print " Size \t Events \t LFN \t FileStatus "
                            files = dbsreader.listFilesInBlock(block['Name'])
                            for file in files:
                                print "%s %s %s %s" % (
                                    file['FileSize'], file['NumberOfEvents'],
                                    file['LogicalFileName'], file['Status'])
                        nevttot = nevttot + block['NumberOfEvents']
                    print "\n total events: %s in dataset: %s\n" % (
                        nevttot, datasetpath)
        if not common.debugLevel:
            common.logger.info(
                'You can obtain more info about files of the dataset using: crab -checkPublication -USER.dataset_to_check='
                + self.dataset_to_check + ' -USER.dbs_url_for_publication=' +
                self.DBSURL + ' -debug')
Exemplo n.º 20
0
    def importDataset(self,
                      sourceDBS,
                      sourceDatasetPath,
                      targetDBS,
                      onlyClosed=True,
                      skipNoSiteError=False):
        """
        _importDataset_

        Import a dataset into the local scope DBS with full parentage hirerarchy
        (at least not slow because branches info is dropped). Parents are also
        imported. This method imports block by block, then each time a block
        is imported, its parent blocks will be imported first.

        - *sourceDBS* : URL for input DBS instance

        - *sourceDatasetPath* : Dataset Path to be imported

        - *targetDBS* : URL for DBS to have dataset imported to

        - *onlyClosed* : Only closed blocks will be imported if set to True

        - *skipNoSiteError* : If this is True, then this method wont raise an
                              Exception if a block has no site information in 
                              sourceDBS.

        """
        reader = DBSReader(sourceDBS)
        inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed)
        blkCounter = 0
        for inputBlock in inputBlocks:
            block = inputBlock['Name']
            #  //
            # // Test block does not exist in target
            #//
            blkCounter = blkCounter + 1
            msg = "Importing block %s of %s: %s " % (blkCounter,
                                                     len(inputBlocks), block)
            logging.debug(msg)
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if str(inputBlock['OpenForWriting']) != '1':
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Import of that block"
                    logging.warning(msg)
                    locations = reader.listFileBlockLocation(block)
                    # only empty file blocks can have no location
                    if not locations and str(
                            inputBlock['NumberOfFiles']) != "0":
                        # we don't skip the error raising
                        if not skipNoSiteError:
                            msg = "Error in DBSWriter.importDataset\n"
                            msg += "Block has no locations defined: %s" % block
                            raise DBSWriterError(msg)
                        msg = "Block has no locations defined: %s" % block
                        logging.info(msg)
                    logging.info("Update block locations to:")
                    for sename in locations:
                        self.dbs.addReplicaToBlock(block, sename)
                        logging.info(sename)
                    continue

            try:

                self.dbs.dbsMigrateBlock(sourceDBS,
                                         targetDBS,
                                         block_name=block)
            except DbsException, ex:
                msg = "Error in DBSWriter.importDataset\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    sourceDatasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            locations = reader.listFileBlockLocation(block)
            # only empty file blocks can have no location
            if not locations and str(inputBlock['NumberOfFiles']) != "0":
                # we don't skip the error raising
                if not skipNoSiteError:
                    msg = "Error in DBSWriter.importDataset\n"
                    msg += "Block has no locations defined: %s" % block
                    raise DBSWriterError(msg)
                msg = "Block has no locations defined: %s" % block
                logging.info(msg)
            for sename in locations:
                self.dbs.addReplicaToBlock(block, sename)