Пример #1
0
class DBSBufferBlock(object):
    """
    _DBSBufferBlock_

    """
    def __init__(self, name, location, datasetpath):
        """
        Just the necessary objects

        Expects:
          name:  The blockname in full
          location: The PNN of the site the block is at
        """

        self.data = {
            'dataset_conf_list': [],  # List of dataset configurations
            'file_conf_list': [],  # List of files, the configuration for each
            'files': [],  # List of file objects
            'block': {},  # Dict of block info
            'processing_era': {},  # Dict of processing era info
            'acquisition_era': {},  # Dict of acquisition era information
            'primds': {},  # Dict of primary dataset info
            'dataset': {},  # Dict of processed dataset info
            'file_parent_list': [],  # List of file parents
            'dataset_parent_list':
            [],  # List of parent datasets (DBS requires this as list although it only allows one parent)
            'close_settings': {}
        }  # Dict of info about block close settings

        self.files = []
        self.encoder = JSONRequests()
        self.status = 'Open'
        self.inBuff = False
        self.startTime = time.time()
        self.name = name
        self.location = location
        self.datasetpath = datasetpath
        self.workflows = set()

        self.data['block']['block_name'] = name
        self.data['block']['origin_site_name'] = location
        self.data['block']['open_for_writing'] = 1

        self.data['block']['create_by'] = "WMAgent"
        self.data['block']['creation_date'] = int(time.time())
        self.data['block']['block_size'] = 0
        self.data['block']['file_count'] = 0
        self.data['block']['block_events'] = 0

        self.data['close_settings'] = {}
        self.data['close_settings']['block_close_max_wait_time'] = None
        self.data['close_settings']['block_close_max_events'] = None
        self.data['close_settings']['block_close_max_size'] = None
        self.data['close_settings']['block_close_max_files'] = None
        return

    def encode(self):
        """
        _encode_

        Turn this into a JSON object for transmission
        to DBS
        """

        return self.encoder.encode(data=self.data)

    def addFile(self, dbsFile, datasetType, primaryDatasetType):
        """
        _addFile_

        Add a DBSBufferFile object to our block
        """
        if dbsFile['id'] in [x['id'] for x in self.files]:
            msg = "Duplicate file inserted into DBSBufferBlock: %i\n" % (
                dbsFile['id'])
            msg += "Ignoring this file for now!\n"
            logging.error(msg)
            logging.debug("Block length: %i", len(self.files))
            l = sorted([x['id'] for x in self.files])
            logging.debug("First file: %s    Last file: %s", l[0], l[-1])
            return

        for setting in self.data['close_settings']:
            if self.data['close_settings'][setting] is None:
                self.data['close_settings'][setting] = dbsFile[setting]

        self.workflows.add(dbsFile['workflow'])

        self.files.append(dbsFile)
        self.data['block']['block_size'] += int(dbsFile['size'])
        self.data['block']['file_count'] += 1
        self.data['block']['block_events'] += int(dbsFile['events'])

        # Assemble information for the file itself
        fileDict = {}
        fileDict['file_type'] = 'EDM'
        fileDict['logical_file_name'] = dbsFile['lfn']
        fileDict['file_size'] = dbsFile['size']
        fileDict['event_count'] = dbsFile['events']
        fileDict['last_modified_by'] = "WMAgent"
        fileDict['last_modification_date'] = int(time.time())
        fileDict['auto_cross_section'] = 0.0

        # Do the checksums
        for cktype in dbsFile['checksums']:
            cksum = dbsFile['checksums'][cktype]
            if cktype.lower() == 'cksum':
                fileDict['check_sum'] = cksum
            elif cktype.lower() == 'adler32':
                fileDict['adler32'] = cksum
            elif cktype.lower() == 'md5':
                fileDict['md5'] = cksum

        # Do the runs
        lumiList = []
        for run in dbsFile.getRuns():
            for lumi in run.lumis:
                dbsLumiDict = {'lumi_section_num': lumi, 'run_num': run.run}
                if run.getEventsByLumi(lumi) is not None:
                    # if events is not None update event for dbs upload
                    dbsLumiDict['event_count'] = run.getEventsByLumi(lumi)
                lumiList.append(dbsLumiDict)
        fileDict['file_lumi_list'] = lumiList

        # Append to the files list
        self.data['files'].append(fileDict)

        # If dataset_parent_list is defined don't add the file parentage.
        # This means it is block from StepChain workflow and parentage of file will be resloved later
        if not self.data['dataset_parent_list']:
            # now add file to data
            parentLFNs = dbsFile.getParentLFNs()
            for lfn in parentLFNs:
                self.addFileParent(child=dbsFile['lfn'], parent=lfn)

        # Do the algo
        algo = self.addConfiguration(release=dbsFile['appVer'],
                                     psetHash=dbsFile['psetHash'],
                                     appName=dbsFile['appName'],
                                     outputLabel=dbsFile['appFam'],
                                     globalTag=dbsFile['globalTag'])

        # Now add the file with the algo
        # Try to avoid messing with pointers here
        fileAlgo = {}
        fileAlgo.update(algo)
        fileAlgo['lfn'] = dbsFile['lfn']
        self.data['file_conf_list'].append(fileAlgo)

        if dbsFile.get('acquisition_era', False):
            self.setAcquisitionEra(dbsFile['acquisition_era'])
        elif dbsFile.get('acquisitionEra', False):
            self.setAcquisitionEra(dbsFile['acquisitionEra'])
        if dbsFile.get('processingVer', False):
            self.setProcessingVer(dbsFile['processingVer'])
        elif dbsFile.get('processing_ver', False):
            self.setProcessingVer(dbsFile['processing_ver'])

        # Take care of the dataset
        self.setDataset(datasetName=dbsFile['datasetPath'],
                        primaryType=primaryDatasetType,
                        datasetType=datasetType,
                        physicsGroup=dbsFile.get('physicsGroup', None),
                        prep_id=dbsFile.get('prep_id', None))

        return

    def addFileParent(self, child, parent):
        """
        _addFileParent_

        Add file parents to the data block
        """
        info = {'parent_logical_file_name': parent, 'logical_file_name': child}
        self.data['file_parent_list'].append(info)

        return

    def addBlockParent(self, parent):
        """
        _addBlockParent_

        Add the parents of the block
        """

        self.data['block_parent_list'].append({'block_name': parent})
        return

    def addDatasetParent(self, parent):
        """
        _addDatasetParent_

        Add the parent datasets to the data block
        """
        self.data['dataset_parent_list'].append(parent)
        return

    def setProcessingVer(self, procVer):
        """
        _setProcessingVer_

        Set the block's processing version.
        """
        # compatibility statement for old style proc ver (still needed ?)
        if procVer.count("-") == 1:
            self.data["processing_era"]["processing_version"] = procVer.split(
                "-v")[1]
        else:
            self.data["processing_era"]["processing_version"] = procVer

        self.data["processing_era"]["create_by"] = "WMAgent"
        self.data["processing_era"]["description"] = ""
        return

    def setAcquisitionEra(self, era, date=123456789):
        """
        _setAcquisitionEra_

        Set the acquisition era for the block
        """
        self.data['acquisition_era']['acquisition_era_name'] = era
        self.data['acquisition_era']['start_date'] = date
        return

    def setPhysicsGroup(self, group):
        """
        _setPhysicsGroup_

        Sets the name of the physics group to which the dataset is attached
        """

        self.data['dataset']['physics_group_name'] = group
        return

    def getDatasetPath(self):
        """
        _getDatasetPath_

        Return the datasetpath
        """
        return self.datasetpath

    def getDataset(self):
        """
        _getDataset_

        Return the dataset (None if not set)
        """
        return self.data['dataset'].get('dataset', None)

    def setDataset(self,
                   datasetName,
                   primaryType,
                   datasetType,
                   physicsGroup=None,
                   prep_id=None,
                   overwrite=False):
        """
        _setDataset_

        Set all the information concerning a single dataset, including
        the primary, processed and tier info
        """
        if self.getDataset() != None and not overwrite:
            # Do nothing, we already have a dataset
            return

        Lexicon.primaryDatasetType(primaryType)

        if not datasetType in [
                'VALID', 'PRODUCTION', 'INVALID', 'DEPRECATED', 'DELETED'
        ]:
            msg = "Invalid processedDatasetType %s\n" % datasetType
            logging.error(msg)
            raise DBSBufferBlockException(msg)

        try:
            if datasetName[0] == '/':
                _, primary, processed, tier = datasetName.split('/')
            else:
                primary, processed, tier = datasetName.split('/')
        except Exception:
            msg = "Invalid dataset name %s" % datasetName
            logging.error(msg)
            raise DBSBufferBlockException(msg)

        # Do the primary dataset
        self.data['primds']['primary_ds_name'] = primary
        self.data['primds']['primary_ds_type'] = primaryType
        self.data['primds']['create_by'] = "WMAgent"
        self.data['primds']['creation_date'] = int(time.time())

        # Do the processed
        self.data['dataset']['physics_group_name'] = physicsGroup
        self.data['dataset']['processed_ds_name'] = processed
        self.data['dataset']['data_tier_name'] = tier
        self.data['dataset']['dataset_access_type'] = datasetType
        self.data['dataset']['dataset'] = datasetName
        self.data['dataset']['prep_id'] = prep_id
        # Add misc meta data.
        self.data['dataset']['create_by'] = "WMAgent"
        self.data['dataset']['last_modified_by'] = "WMAgent"
        self.data['dataset']['creation_date'] = int(time.time())
        self.data['dataset']['last_modification_date'] = int(time.time())
        return

    def addConfiguration(self,
                         release,
                         psetHash,
                         appName='cmsRun',
                         outputLabel='Merged',
                         globalTag='None'):
        """
        _addConfiguration_

        Add the algorithm config to the data block
        """

        algo = {
            'release_version': release,
            'pset_hash': psetHash,
            'app_name': appName,
            'output_module_label': outputLabel,
            'global_tag': globalTag
        }

        if not algo in self.data['dataset_conf_list']:
            self.data['dataset_conf_list'].append(algo)

        return algo

    def getNFiles(self):
        """
        _getNFiles_

        Return the number of files in the block
        """

        return len(self.files)

    def getSize(self):
        """
        _getSize_

        Get size of block
        """
        return self.data['block']['block_size']

    def getNumEvents(self):
        """
        _getNumEvents_

        Get the number of events in the block
        """
        return self.data['block']['block_events']

    def getTime(self):
        """
        _getTime_

        Return the time the block has been running
        """

        return time.time() - self.startTime

    def getMaxBlockTime(self):
        """
        _getMaxBlockTime_

        Return the max time that the block should stay open
        """
        return self.data['close_settings']['block_close_max_wait_time']

    def getMaxBlockSize(self):
        """
        _getMaxBlockSize_

        Return the max size allowed for the block
        """
        return self.data['close_settings']['block_close_max_size']

    def getMaxBlockNumEvents(self):
        """
        _getMaxBlockNumEvents_

        Return the max number of events allowed for the block
        """
        return self.data['close_settings']['block_close_max_events']

    def getMaxBlockFiles(self):
        """
        _getMaxBlockFiles_

        Return the max number of files allowed for the block
        """
        return self.data['close_settings']['block_close_max_files']

    def getName(self):
        """
        _getName_

        Get Name
        """

        return self.name

    def getLocation(self):
        """
        _getLocation_

        Get location
        """

        return self.location

    def getStartTime(self):
        """
        _getStartTime_

        Get the time the block was opened at
        """

        return self.startTime

    def FillFromDBSBuffer(self, blockInfo):
        """
        _FillFromDBSBuffer_

        Take the info provided by LoadBlocks and
        use it to create a block object
        """
        # Blocks loaded out of the buffer should
        # have both a creation time, and should
        # be in the buffer (duh)
        self.startTime = blockInfo.get('creation_date')
        self.inBuff = True

        if 'status' in blockInfo:
            self.status = blockInfo['status']
            if self.status == "Pending":
                self.data['block']['open_for_writing'] = 0

            del blockInfo['status']

        for key in blockInfo:
            self.data['block'][key] = blockInfo.get(key)

    def convertToDBSBlock(self):
        """
        convert to DBSBlock structure to upload to dbs
        TODO: check file lumi event and validate event is not null
        """
        block = {}

        #TODO: instead of using key to remove need to change to keyToKeep
        # Ask dbs team to publish the list (API)
        keyToRemove = [
            'insertedFiles', 'newFiles', 'file_count', 'block_size',
            'origin_site_name', 'creation_date', 'open', 'Name',
            'close_settings'
        ]

        nestedKeyToRemove = [
            'block.block_events', 'block.datasetpath', 'block.workflows'
        ]

        dbsBufferToDBSBlockKey = {
            'block_size': 'BlockSize',
            'creation_date': 'CreationDate',
            'file_count': 'NumberOfFiles',
            'origin_site_name': 'location'
        }

        # clone the new DBSBlock dict after filtering out the data.
        for key in self.data:
            if key in keyToRemove:
                continue
            elif key in dbsBufferToDBSBlockKey:
                block[dbsBufferToDBSBlockKey[key]] = copy.deepcopy(
                    self.data[key])
            else:
                block[key] = copy.deepcopy(self.data[key])

        # delete nested key dictionary
        for nestedKey in nestedKeyToRemove:
            firstkey, subkey = nestedKey.split('.', 1)
            if firstkey in block and subkey in block[firstkey]:
                del block[firstkey][subkey]

        return block

    def setPendingAndCloseBlock(self):
        "set the block status as Pending for upload as well as closed"
        # Pending means ready to upload
        self.status = "Pending"
        # close block on DBS3 status
        self.data['block']['open_for_writing'] = 0
Пример #2
0
class DBSBlock:
    """
    DBSBlock

    Class for holding all the necessary equipment for a DBSBlock
    """

    def __init__(self, name, location, das):
        """
        Just the necessary objects

        Expects:
          name:  The blockname in full
          location: The SE-name of the site the block is at
        """
        
        
        self.data      = {'dataset_conf_list':    [],   # List of dataset configurations
                          'file_conf_list':       [],   # List of files, with the configuration for each
                          'files':                [],   # List of file objects
                          'block':                {},   # Dict of block info
                          'block_parent_list':    [],   # List of block parents
                          'processing_era':       {},   # Dict of processing era info
                          'ds_parent_list':       [],   # List of parent datasets
                          'acquisition_era':      {},   # Dict of acquisition era information
                          'primds':               {},   # Dict of primary dataset info
                          'dataset':              {},   # Dict of processed dataset info
                          'physics_group_name':   {},   # Physics Group Name
                          'file_parent_list':     []}   # List of file parents
                          
        self.files     = [] 
        self.encoder   = JSONRequests()
        self.status    = 'Open'
        self.inBuff    = False
        self.startTime = time.time()
        self.name      = name
        self.location  = location
        self.das       = das

        self.data['block']['block_name']       = name
        self.data['block']['origin_site_name'] = location
        self.data['block']['open_for_writing'] = 0  # If we're sending a block, it better be open

        return                      


    def encode(self):
        """
        _encode_

        Turn this into a JSON object for transmission
        to DBS
        """

        return self.encoder.encode(data = self.data)



    def addFile(self, dbsFile):
        """
        _addFile_
        
        Add a DBSBufferFile object to our block
        """

        
        
        if dbsFile['id'] in [x['id'] for x in self.files]:
            msg =  "Duplicate file inserted into DBSBlock: %i\n" % (dbsFile['id'])
            msg += "Ignoring this file for now!\n"
            logging.error(msg)
            logging.debug("Block length: %i" % len(self.files))
            l = [x['id'] for x in self.files]
            l.sort()
            logging.debug("First file: %s    Last file: %s" % (l[0], l[-1]))
            return
        
        self.files.append(dbsFile)
        # Assemble information for the file itself
        fileDict = {}
        fileDict['file_type']              =  'EDM'
        fileDict['logical_file_name']      = dbsFile['lfn']
        fileDict['file_size']              = dbsFile['size']
        fileDict['event_count']            = dbsFile['events']
        # Do the checksums
        for cktype in dbsFile['checksums'].keys():
            cksum = dbsFile['checksums'][cktype]
            if cktype.lower() == 'cksum':
                fileDict['check_sum'] = cksum
            elif cktype.lower() == 'adler32':
                fileDict['adler32'] = cksum
            elif cktype.lower() == 'md5':
                fileDict['md5'] = cksum

        # Do the runs
        lumiList = []
        for run in dbsFile.getRuns():
            for lumi in run.lumis:
                lumiList.append({'lumi_section_num': lumi, 'run_num': run.run})
        fileDict['file_lumi_list'] = lumiList
                
        # Append to the files list
        self.data['files'].append(fileDict)
        
        # now add file to data
        parentLFNs = dbsFile.getParentLFNs()
        for lfn in parentLFNs:
            self.addFileParent(child = dbsFile['lfn'], parent = lfn)
            
            
        # Do the algo
        algo = self.addConfiguration(release = dbsFile['appVer'],
                                     psetHash = dbsFile['psetHash'],
                                     appName = dbsFile['appName'],
                                     outputLabel = dbsFile['appFam'],
                                     globalTag = dbsFile['globalTag'])
        
        # Now add the file with the algo
        # Try to avoid messing with pointers here
        fileAlgo = {}
        fileAlgo.update(algo)
        fileAlgo['lfn'] = dbsFile['lfn']
        self.data['file_conf_list'].append(fileAlgo)

        if dbsFile.get('acquisition_era', False):
            self.setAcquisitionEra(dbsFile['acquisition_era'])
        elif dbsFile.get('acquisitionEra', False):
            self.setAcquisitionEra(dbsFile['acquisitionEra'])
        if dbsFile.get('processingVer', False):
            self.setProcessingVer(dbsFile['processingVer'])
        elif dbsFile.get('processing_ver', False):
            self.setProcessingVer(dbsFile['processing_ver'])

        # Take care of the dataset
        self.setDataset(datasetName  = dbsFile['datasetPath'],
                        primaryType  = dbsFile.get('primaryType', 'DATA'),
                        datasetType  = dbsFile.get('datasetType', 'PRODUCTION'),
                        physicsGroup = dbsFile.get('physicsGroup', None))
       
        return

    def addFileParent(self, child, parent):
        """
        _addFileParent_
        
        Add file parents to the data block
        """
        info = {'parent_logical_file_name': parent,
                'logical_file_name': child}
        self.data['file_parent_list'].append(info)
        
        return

    def addBlockParent(self, parent):
        """
        _addBlockParent_
        
        Add the parents of the block
        """

        self.data['block_parent_list'].append({'block_name': parent})
        return

    def addDatasetParent(self, parent):
        """
        _addDatasetParent_

        Add the parent datasets to the data block
        """

        self.data['ds_parent_list'].append({'parent_dataset': parent})
        return

    def setProcessingVer(self, era):
        """
        _setProcessingEra_

        Set the block's processing era
        """

        self.data['processing_era']['processing_version'] = era
        return

    def setAcquisitionEra(self, era, date = 123456789):
        """
        _setAcquisitionEra_

        Set the acquisition era for the block
        """

        self.data['acquisition_era']['acquisition_era_name'] = era
        self.data['acquisition_era']['start_date']           = date
        return

    def setPhysicsGroup(self, group):
        """
        _setPhysicsGroup_

        Sets the name of the physics group to which the dataset is attached
        """

        self.data['dataset']['physics_group_name'] = group
        return

    def hasDataset(self):
        """
        _hasDataset_

        Check and see if the dataset has been properly set
        """

        return self.data['dataset'].get('dataset', False)
        


    def setDataset(self, datasetName, primaryType,
                   datasetType, physicsGroup = None, overwrite = False, valid = 1):
        """
        _setDataset_

        Set all the information concerning a single dataset, including
        the primary, processed and tier info
        """

        if self.hasDataset() and not overwrite:
            # Do nothing, we already have a dataset
            return

        if not primaryType in ['MC', 'DATA', 'TEST']:
            msg = "Invalid primaryDatasetType %s\n" % primaryType
            logging.error(msg)
            raise DBSBlockException(msg)

        if not datasetType in ['VALID', 'PRODUCTION', 'INVALID', 'DEPRECATED', 'DELETED']:
            msg = "Invalid processedDatasetType %s\n" % datasetType
            logging.error(msg)
            raise DBSBlockException(msg)

        try:
            if datasetName[0] == '/':
                junk, primary, processed, tier = datasetName.split('/')
            else:
                primary, processed, tier = datasetName.split('/')
        except Exception, ex:
            msg = "Invalid dataset name %s" % datasetName
            logging.error(msg)
            raise DBSBlockException(msg)

        # Do the primary dataset
        self.data['primds']['primary_ds_name'] = primary
        self.data['primds']['primary_ds_type'] = primaryType


        # Do the processed
        self.data['dataset']['physics_group_name']  = physicsGroup
        self.data['dataset']['processed_ds_name']   = processed
        self.data['dataset']['data_tier_name']      = tier
        self.data['dataset']['dataset_access_type'] = datasetType
        self.data['dataset']['dataset']             = datasetName

        return
Пример #3
0
class ProcessPool:
    def __init__(self, slaveClassName, totalSlaves, componentDir,
                 config, namespace = 'WMComponent', inPort = '5555',
                 outPort = '5558'):
        """
        __init__

        Constructor for the process pool.  The slave class name must be based
        inside the WMComponent namespace.  For examples, the JobAccountant would
        pass in 'JobAccountant.AccountantWorker' to run the AccountantWorker
        class.  All log files will be stored in the component directory that is
        passed in.  Each slave will have its own log file.

        Note that the config is only used to determine database connection
        parameters.  It is not passed to the slave class.  The slaveInit
        parameter will be serialized and passed to the slave class's
        constructor.
        """
        self.enqueueIndex = 0
        self.dequeueIndex = 0
        self.runningWork  = 0

        #Use the Services.Requests JSONizer, which handles __to_json__ calls
        self.jsonHandler = JSONRequests()

        # heartbeat should be registered at this point
        if getattr(config.Agent, "useHeartbeat", True):
            self.heartbeatAPI = HeartbeatAPI(getattr(config.Agent, "componentName", "ProcPoolSlave"))

        self.slaveClassName = slaveClassName
        self.componentDir   = componentDir
        self.config         = config
        # Grab the python version from the current version
        # Assume naming convention pythonA.B, i.e., python2.4 for v2.4.X
        majorVersion = sys.version_info[0]
        minorVersion = sys.version_info[1]

        if majorVersion and minorVersion:
            self.versionString = "python%i.%i" % (majorVersion, minorVersion)
        else:
            self.versionString = "python2.6"

        self.workers   = []
        self.nSlaves   = totalSlaves
        self.namespace = namespace
        self.inPort    = inPort
        self.outPort   = outPort


        # Pickle the config
        self.configPath = os.path.join(componentDir, '%s_config.pkl' % slaveClassName)
        if os.path.exists(self.configPath):
            # Then we note it and overwrite it
            msg =  "Something's in the way of the ProcessPool config: %s" % self.configPath
            logging.error(msg)
        f = open(self.configPath, 'w')
        cPickle.dump(config, f)
        f.close()

        # Set up ZMQ
        try:
            context = zmq.Context()
            self.sender = context.socket(zmq.PUSH)
            self.sender.bind("tcp://*:%s" % inPort)
            self.sink = context.socket(zmq.PULL)
            self.sink.bind("tcp://*:%s" % outPort)
        except zmq.ZMQError:
            # Try this again in a moment to see
            # if it's just being held by something pre-existing
            import time
            time.sleep(1)
            logging.error("Blocked socket on startup: Attempting sleep to give it time to clear.")
            try:
                context = zmq.Context()
                self.sender = context.socket(zmq.PUSH)
                self.sender.bind("tcp://*:%s" % inPort)
                self.sink = context.socket(zmq.PULL)
                self.sink.bind("tcp://*:%s" % outPort)
            except Exception as ex:
                msg =  "Error attempting to open TCP sockets\n"
                msg += str(ex)
                logging.error(msg)
                import traceback
                print traceback.format_exc()
                raise ProcessPoolException(msg)

        # Now actually create the slaves
        self.createSlaves()


        return


    def createSlaves(self):
        """
        _createSlaves_

        Create the slaves by using the values from __init__()
        Moving it into a separate function allows us to restart
        all of them.
        """

        totalSlaves    = self.nSlaves
        slaveClassName = self.slaveClassName
        config         = self.config
        namespace      = self.namespace
        inPort         = self.inPort
        outPort        = self.outPort

        slaveArgs = [self.versionString, __file__, self.slaveClassName, inPort,
                     outPort, self.configPath, self.componentDir, self.namespace]

        count = 0
        while totalSlaves > 0:
            #For each worker you want create a slave process
            #That process calls this code (WMCore.ProcessPool) and opens
            #A process pool that loads the designated class
            slaveProcess = subprocess.Popen(slaveArgs, stdin = subprocess.PIPE,
                                            stdout = subprocess.PIPE)
            self.workers.append(slaveProcess)
            totalSlaves -= 1
            count += 1


        return

    def _subProcessName(self, slaveClassName, sequence):
        """ subProcessName for heartbeat
            could change to use process ID as a suffix
        """
        return "%s_%s" % (slaveClassName, sequence + 1)

    def __del__(self):
        """
        __del__

        Kill all the workers processes by sending them an invalid JSON object.
        This will cause them to shut down.
        """
        self.close()
        return

    def close(self):
        """
        _close_

        Close shuts down all the active systems by:

        a) Sending STOP commands for all workers
        b) Closing the pipes
        c) Shutting down the workers themselves
        """
        for i in range(self.nSlaves):
            try:
                encodedWork = self.jsonHandler.encode('STOP')
                self.sender.send(encodedWork)
            except Exception as ex:
                # Might be already failed.  Nothing you can
                # really do about that.
                logging.error("Failure killing running process: %s" % str(ex))
                pass

        try:
            self.sender.close()
        except:
            # We can't really do anything if we fail
            pass
        try:
            self.sink.close()
        except:
            # We can't do anything if we fail
            pass

        # Now close the workers by hand
        for worker in self.workers:
            try:
                worker.join()
            except Exception as ex:
                try:
                    worker.terminate()
                except Exception as ex2:
                    logging.error("Failure to join or terminate process")
                    logging.error(str(ex))
                    logging.error(str(ex2))
                    continue
        self.workers = []
        return

    def enqueue(self, work, list = False):
        """
        __enqeue__

        Assign work to the workers processes.  The work parameters must be a
        list where each item in the list can be serialized into JSON.

        If list is True, the entire list is sent as one piece of work
        """
        if len(self.workers) < 1:
            # Someone's shut down the system
            msg = "Attempting to send work after system failure and shutdown!\n"
            logging.error(msg)
            raise ProcessPoolException(msg)

        if not list:
            for w in work:
                encodedWork = self.jsonHandler.encode(w)
                self.sender.send(encodedWork)
                self.runningWork += 1
        else:
            encodedWork = self.jsonHandler.encode(work)
            self.sender.send(encodedWork)
            self.runningWork += 1

        return



    def dequeue(self, totalItems = 1):
        """
        __dequeue__

        Retrieve completed work from the slave workers.  This method will block
        until enough work has been completed.
        """
        completedWork = []

        if totalItems > self.runningWork:
            msg = "Asked to dequeue more work then is running!\n"
            msg += "Failing"
            logging.error(msg)
            raise ProcessPoolException(msg)

        while totalItems > 0:
            try:
                output = self.sink.recv()
                decode = self.jsonHandler.decode(output)
                if type(decode) == type({}) and decode.get('type', None) == 'ERROR':
                    # Then we had some kind of error
                    msg = decode.get('msg', 'Unknown Error in ProcessPool')
                    logging.error("Received Error Message from ProcessPool Slave")
                    logging.error(msg)
                    self.close()
                    raise ProcessPoolException(msg)
                completedWork.append(decode)
                self.runningWork -= 1
                totalItems -= 1
            except Exception as ex:
                msg =  "Exception while getting slave outputin ProcessPool.\n"
                msg += str(ex)
                logging.error(msg)
                break

        return completedWork


    def restart(self):
        """
        _restart_

        Delete everything and restart all pools
        """

        self.close()
        self.createSlaves()
        return
Пример #4
0
        try:
            logging.error(input)
            output = slaveClass(input)
        except Exception as ex:
            crashMessage = "Slave process crashed with exception: " + str(ex)
            crashMessage += "\nStacktrace:\n"

            stackTrace = traceback.format_tb(sys.exc_info()[2], None)
            for stackFrame in stackTrace:
                crashMessage += stackFrame

            logging.error(crashMessage)
            try:
                output        = {'type': 'ERROR', 'msg': crashMessage}
                encodedOutput = jsonHandler.encode(output)
                sender.send(encodedOutput)
                logging.error("Sent error message and now breaking")
                break
            except Exception as ex:
                logging.error("Failed to send error message")
                logging.error(str(ex))
                del jsonHandler
                sys.exit(1)

        if output != None:
            if type(output) == list:
                for item in output:
                    encodedOutput = jsonHandler.encode(item)
                    sender.send(encodedOutput)
            else:
Пример #5
0
class DBSBufferBlock:
    """
    _DBSBufferBlock_

    """

    def __init__(self, name, location, datasetpath):
        """
        Just the necessary objects

        Expects:
          name:  The blockname in full
          location: The PNN of the site the block is at
        """


        self.data      = {'dataset_conf_list':    [],   # List of dataset configurations
                          'file_conf_list':       [],   # List of files, the configuration for each
                          'files':                [],   # List of file objects
                          'block':                {},   # Dict of block info
                          'processing_era':       {},   # Dict of processing era info
                          'acquisition_era':      {},   # Dict of acquisition era information
                          'primds':               {},   # Dict of primary dataset info
                          'dataset':              {},   # Dict of processed dataset info
                          'file_parent_list':     [],   # List of file parents
                          'close_settings':       {}}   # Dict of info about block close settings

        self.files        = []
        self.encoder      = JSONRequests()
        self.status       = 'Open'
        self.inBuff       = False
        self.startTime    = time.time()
        self.name         = name
        self.location     = location
        self.datasetpath  = datasetpath
        self.workflows    = set()

        self.data['block']['block_name']       = name
        self.data['block']['origin_site_name'] = location
        self.data['block']['open_for_writing'] = 1

        self.data['block']['create_by'] = "WMAgent"
        self.data['block']['creation_date'] = int(time.time())
        self.data['block']['block_size'] = 0
        self.data['block']['file_count'] = 0
        self.data['block']['block_events'] = 0

        self.data['close_settings'] = {}
        self.data['close_settings']['block_close_max_wait_time'] = None
        self.data['close_settings']['block_close_max_events'] = None
        self.data['close_settings']['block_close_max_size'] = None
        self.data['close_settings']['block_close_max_files'] = None
        return


    def encode(self):
        """
        _encode_

        Turn this into a JSON object for transmission
        to DBS
        """

        return self.encoder.encode(data = self.data)



    def addFile(self, dbsFile, datasetType, primaryDatasetType):
        """
        _addFile_

        Add a DBSBufferFile object to our block
        """
        if dbsFile['id'] in [x['id'] for x in self.files]:
            msg =  "Duplicate file inserted into DBSBufferBlock: %i\n" % (dbsFile['id'])
            msg += "Ignoring this file for now!\n"
            logging.error(msg)
            logging.debug("Block length: %i" % len(self.files))
            l = sorted([x['id'] for x in self.files])
            logging.debug("First file: %s    Last file: %s" % (l[0], l[-1]))
            return

        for setting in self.data['close_settings']:
            if self.data['close_settings'][setting] is None:
                self.data['close_settings'][setting] = dbsFile[setting]

        self.workflows.add(dbsFile['workflow'])

        self.files.append(dbsFile)
        self.data['block']['block_size'] += int(dbsFile['size'])
        self.data['block']['file_count'] += 1
        self.data['block']['block_events'] += int(dbsFile['events'])

        # Assemble information for the file itself
        fileDict = {}
        fileDict['file_type']              =  'EDM'
        fileDict['logical_file_name']      = dbsFile['lfn']
        fileDict['file_size']              = dbsFile['size']
        fileDict['event_count']            = dbsFile['events']
        fileDict['last_modified_by'] = "WMAgent"
        fileDict['last_modification_date'] = int(time.time())
        fileDict['auto_cross_section'] = 0.0

        # Do the checksums
        for cktype in dbsFile['checksums'].keys():
            cksum = dbsFile['checksums'][cktype]
            if cktype.lower() == 'cksum':
                fileDict['check_sum'] = cksum
            elif cktype.lower() == 'adler32':
                fileDict['adler32'] = cksum
            elif cktype.lower() == 'md5':
                fileDict['md5'] = cksum

        # Do the runs
        lumiList = []
        for run in dbsFile.getRuns():
            for lumi in run.lumis:
                dbsLumiDict = {'lumi_section_num': lumi, 'run_num': run.run}
                if run.getEventsByLumi(lumi) is not None:
                    # if events is not None update event for dbs upload
                    dbsLumiDict['event_count'] = run.getEventsByLumi(lumi)
                lumiList.append(dbsLumiDict)
        fileDict['file_lumi_list'] = lumiList

        # Append to the files list
        self.data['files'].append(fileDict)

        # now add file to data
        parentLFNs = dbsFile.getParentLFNs()
        for lfn in parentLFNs:
            self.addFileParent(child = dbsFile['lfn'], parent = lfn)


        # Do the algo
        algo = self.addConfiguration(release = dbsFile['appVer'],
                                     psetHash = dbsFile['psetHash'],
                                     appName = dbsFile['appName'],
                                     outputLabel = dbsFile['appFam'],
                                     globalTag = dbsFile['globalTag'])

        # Now add the file with the algo
        # Try to avoid messing with pointers here
        fileAlgo = {}
        fileAlgo.update(algo)
        fileAlgo['lfn'] = dbsFile['lfn']
        self.data['file_conf_list'].append(fileAlgo)

        if dbsFile.get('acquisition_era', False):
            self.setAcquisitionEra(dbsFile['acquisition_era'])
        elif dbsFile.get('acquisitionEra', False):
            self.setAcquisitionEra(dbsFile['acquisitionEra'])
        if dbsFile.get('processingVer', False):
            self.setProcessingVer(dbsFile['processingVer'])
        elif dbsFile.get('processing_ver', False):
            self.setProcessingVer(dbsFile['processing_ver'])

        # Take care of the dataset
        self.setDataset(datasetName  = dbsFile['datasetPath'],
                        primaryType  = primaryDatasetType,
                        datasetType  = datasetType,
                        physicsGroup = dbsFile.get('physicsGroup', None),
                        prep_id = dbsFile.get('prep_id', None))

        return

    def addFileParent(self, child, parent):
        """
        _addFileParent_

        Add file parents to the data block
        """
        info = {'parent_logical_file_name': parent,
                'logical_file_name': child}
        self.data['file_parent_list'].append(info)

        return

    def addBlockParent(self, parent):
        """
        _addBlockParent_

        Add the parents of the block
        """

        self.data['block_parent_list'].append({'block_name': parent})
        return

    def addDatasetParent(self, parent):
        """
        _addDatasetParent_

        Add the parent datasets to the data block
        """

        self.data['ds_parent_list'].append({'parent_dataset': parent})
        return

    def setProcessingVer(self, procVer):
        """
        _setProcessingVer_

        Set the block's processing version.
        """
        # compatibility statement for old style proc ver (still needed ?)
        if procVer.count("-") == 1:
            self.data["processing_era"]["processing_version"] = procVer.split("-v")[1]
        else:
            self.data["processing_era"]["processing_version"] = procVer

        self.data["processing_era"]["create_by"] = "WMAgent"
        self.data["processing_era"]["description"] = ""
        return

    def setAcquisitionEra(self, era, date = 123456789):
        """
        _setAcquisitionEra_

        Set the acquisition era for the block
        """
        self.data['acquisition_era']['acquisition_era_name'] = era
        self.data['acquisition_era']['start_date']           = date
        return

    def setPhysicsGroup(self, group):
        """
        _setPhysicsGroup_

        Sets the name of the physics group to which the dataset is attached
        """

        self.data['dataset']['physics_group_name'] = group
        return

    def getDatasetPath(self):
        """
        _getDatasetPath_

        Return the datasetpath
        """
        return self.datasetpath

    def getDataset(self):
        """
        _getDataset_

        Return the dataset (None if not set)
        """
        return self.data['dataset'].get('dataset', None)

    def setDataset(self, datasetName, primaryType,
                   datasetType, physicsGroup = None,
                   prep_id  = None, overwrite = False):
        """
        _setDataset_

        Set all the information concerning a single dataset, including
        the primary, processed and tier info
        """
        if self.getDataset() != None and not overwrite:
            # Do nothing, we already have a dataset
            return

        Lexicon.primaryDatasetType(primaryType)

        if not datasetType in ['VALID', 'PRODUCTION', 'INVALID', 'DEPRECATED', 'DELETED']:
            msg = "Invalid processedDatasetType %s\n" % datasetType
            logging.error(msg)
            raise DBSBufferBlockException(msg)

        try:
            if datasetName[0] == '/':
                junk, primary, processed, tier = datasetName.split('/')
            else:
                primary, processed, tier = datasetName.split('/')
        except Exception:
            msg = "Invalid dataset name %s" % datasetName
            logging.error(msg)
            raise DBSBufferBlockException(msg)

        # Do the primary dataset
        self.data['primds']['primary_ds_name'] = primary
        self.data['primds']['primary_ds_type'] = primaryType
        self.data['primds']['create_by'] = "WMAgent"
        self.data['primds']['creation_date'] = int(time.time())

        # Do the processed
        self.data['dataset']['physics_group_name']  = physicsGroup
        self.data['dataset']['processed_ds_name']   = processed
        self.data['dataset']['data_tier_name']      = tier
        self.data['dataset']['dataset_access_type'] = datasetType
        self.data['dataset']['dataset']             = datasetName
        self.data['dataset']['prep_id'] = prep_id
        # Add misc meta data.
        self.data['dataset']['create_by'] = "WMAgent"
        self.data['dataset']['last_modified_by'] = "WMAgent"
        self.data['dataset']['creation_date'] = int(time.time())
        self.data['dataset']['last_modification_date'] = int(time.time())
        return


    def addConfiguration(self, release, psetHash,
                         appName = 'cmsRun', outputLabel = 'Merged', globalTag = 'None'):
        """
        _addConfiguration_

        Add the algorithm config to the data block
        """

        algo = {'release_version': release,
                'pset_hash': psetHash,
                'app_name': appName,
                'output_module_label': outputLabel,
                'global_tag': globalTag}

        if not algo in self.data['dataset_conf_list']:
            self.data['dataset_conf_list'].append(algo)


        return algo

    def getNFiles(self):
        """
        _getNFiles_

        Return the number of files in the block
        """

        return len(self.files)


    def getSize(self):
        """
        _getSize_

        Get size of block
        """
        return self.data['block']['block_size']

    def getNumEvents(self):
        """
        _getNumEvents_

        Get the number of events in the block
        """
        return self.data['block']['block_events']

    def getTime(self):
        """
        _getTime_

        Return the time the block has been running
        """

        return time.time() - self.startTime

    def getMaxBlockTime(self):
        """
        _getMaxBlockTime_

        Return the max time that the block should stay open
        """
        return self.data['close_settings']['block_close_max_wait_time']

    def getMaxBlockSize(self):
        """
        _getMaxBlockSize_

        Return the max size allowed for the block
        """
        return self.data['close_settings']['block_close_max_size']

    def getMaxBlockNumEvents(self):
        """
        _getMaxBlockNumEvents_

        Return the max number of events allowed for the block
        """
        return self.data['close_settings']['block_close_max_events']

    def getMaxBlockFiles(self):
        """
        _getMaxBlockFiles_

        Return the max number of files allowed for the block
        """
        return self.data['close_settings']['block_close_max_files']

    def getName(self):
        """
        _getName_

        Get Name
        """

        return self.name

    def getLocation(self):
        """
        _getLocation_

        Get location
        """

        return self.location

    def getStartTime(self):
        """
        _getStartTime_

        Get the time the block was opened at
        """

        return self.startTime


    def FillFromDBSBuffer(self, blockInfo):
        """
        _FillFromDBSBuffer_

        Take the info provided by LoadBlocks and
        use it to create a block object
        """
        # Blocks loaded out of the buffer should
        # have both a creation time, and should
        # be in the buffer (duh)
        self.startTime = blockInfo.get('creation_date')
        self.inBuff    = True

        if 'status' in blockInfo.keys():
            self.status = blockInfo['status']
            if self.status == "Pending":
                self.data['block']['open_for_writing'] = 0

            del blockInfo['status']

        for key in blockInfo.keys():
            self.data['block'][key] = blockInfo.get(key)

    def convertToDBSBlock(self):
        """
        convert to DBSBlock structure to upload to dbs
        TODO: check file lumi event and validate event is not null
        """
        block = {}

        #TODO: instead of using key to remove need to change to keyToKeep
        # Ask dbs team to publish the list (API)
        keyToRemove = ['insertedFiles', 'newFiles', 'file_count', 'block_size',
                       'origin_site_name', 'creation_date', 'open',
                       'Name', 'close_settings']

        nestedKeyToRemove = ['block.block_events', 'block.datasetpath', 'block.workflows']

        dbsBufferToDBSBlockKey = {'block_size': 'BlockSize',
                                  'creation_date': 'CreationDate',
                                  'file_count': 'NumberOfFiles',
                                  'origin_site_name': 'location'}

        # clone the new DBSBlock dict after filtering out the data.
        for key in self.data:
            if key in keyToRemove:
                continue
            elif key in dbsBufferToDBSBlockKey.keys():
                block[dbsBufferToDBSBlockKey[key]] = copy.deepcopy(self.data[key])
            else:
                block[key] = copy.deepcopy(self.data[key])

        # delete nested key dictionary
        for nestedKey in nestedKeyToRemove:
            firstkey, subkey = nestedKey.split('.', 1)
            if firstkey in block and subkey in block[firstkey]:
                del block[firstkey][subkey]

        return block

    def setPendingAndCloseBlock(self):
        "set the block status as Pending for upload as well as closed"
        # Pending means ready to upload
        self.status = "Pending"
        # close block on DBS3 status
        self.data['block']['open_for_writing'] = 0
Пример #6
0
class DBSBlock:
    """
    DBSBlock

    Class for holding all the necessary equipment for a DBSBlock
    """
    def __init__(self, name, location, das, workflow):
        """
        Just the necessary objects

        Expects:
          name:  The blockname in full
          location: The SE-name of the site the block is at
        """

        self.data = {
            'dataset_conf_list': [],  # List of dataset configurations
            'file_conf_list': [],  # List of files, the configuration for each
            'files': [],  # List of file objects
            'block': {},  # Dict of block info
            'processing_era': {},  # Dict of processing era info
            'acquisition_era': {},  # Dict of acquisition era information
            'primds': {},  # Dict of primary dataset info
            'dataset': {},  # Dict of processed dataset info
            'file_parent_list': [],  # List of file parents
            'close_settings': {}
        }  # Dict of info about block close settings

        self.files = []
        self.encoder = JSONRequests()
        self.status = 'Open'
        self.inBuff = False
        self.startTime = time.time()
        self.name = name
        self.location = location
        self.das = das
        self.workflow = workflow

        self.data['block']['block_name'] = name
        self.data['block']['origin_site_name'] = location
        self.data['block']['open_for_writing'] = 1

        self.data['block']['create_by'] = "WMAgent"
        self.data['block']['creation_date'] = int(time.time())
        self.data['block']['block_size'] = 0
        self.data['block']['file_count'] = 0
        self.data['block']['block_events'] = 0

        self.data['close_settings'] = {}
        self.data['close_settings']['block_close_max_wait_time'] = None
        self.data['close_settings']['block_close_max_events'] = None
        self.data['close_settings']['block_close_max_size'] = None
        self.data['close_settings']['block_close_max_files'] = None
        return

    def encode(self):
        """
        _encode_

        Turn this into a JSON object for transmission
        to DBS
        """

        return self.encoder.encode(data=self.data)

    def addFile(self, dbsFile, datasetType, primaryDatasetType):
        """
        _addFile_

        Add a DBSBufferFile object to our block
        """
        if dbsFile['id'] in [x['id'] for x in self.files]:
            msg = "Duplicate file inserted into DBSBlock: %i\n" % (
                dbsFile['id'])
            msg += "Ignoring this file for now!\n"
            logging.error(msg)
            logging.debug("Block length: %i" % len(self.files))
            l = [x['id'] for x in self.files]
            l.sort()
            logging.debug("First file: %s    Last file: %s" % (l[0], l[-1]))
            return

        for setting in self.data['close_settings']:
            if self.data['close_settings'][setting] is None:
                self.data['close_settings'][setting] = dbsFile[setting]

        self.files.append(dbsFile)
        self.data['block']['block_size'] += int(dbsFile['size'])
        self.data['block']['file_count'] += 1
        self.data['block']['block_events'] += int(dbsFile['events'])

        # Assemble information for the file itself
        fileDict = {}
        fileDict['file_type'] = 'EDM'
        fileDict['logical_file_name'] = dbsFile['lfn']
        fileDict['file_size'] = dbsFile['size']
        fileDict['event_count'] = dbsFile['events']
        fileDict['adler32'] = "NOTSET"
        fileDict['md5'] = "NOTSET"
        fileDict['last_modified_by'] = "WMAgent"
        fileDict['last_modification_date'] = int(time.time())
        fileDict['auto_cross_section'] = 0.0

        # Do the checksums
        for cktype in dbsFile['checksums'].keys():
            cksum = dbsFile['checksums'][cktype]
            if cktype.lower() == 'cksum':
                fileDict['check_sum'] = cksum
            elif cktype.lower() == 'adler32':
                fileDict['adler32'] = cksum
            elif cktype.lower() == 'md5':
                fileDict['md5'] = cksum

        # Do the runs
        lumiList = []
        for run in dbsFile.getRuns():
            for lumi in run.lumis:
                lumiList.append({'lumi_section_num': lumi, 'run_num': run.run})
        fileDict['file_lumi_list'] = lumiList

        # Append to the files list
        self.data['files'].append(fileDict)

        # now add file to data
        parentLFNs = dbsFile.getParentLFNs()
        for lfn in parentLFNs:
            self.addFileParent(child=dbsFile['lfn'], parent=lfn)

        # Do the algo
        algo = self.addConfiguration(release=dbsFile['appVer'],
                                     psetHash=dbsFile['psetHash'],
                                     appName=dbsFile['appName'],
                                     outputLabel=dbsFile['appFam'],
                                     globalTag=dbsFile['globalTag'])

        # Now add the file with the algo
        # Try to avoid messing with pointers here
        fileAlgo = {}
        fileAlgo.update(algo)
        fileAlgo['lfn'] = dbsFile['lfn']
        self.data['file_conf_list'].append(fileAlgo)

        if dbsFile.get('acquisition_era', False):
            self.setAcquisitionEra(dbsFile['acquisition_era'])
        elif dbsFile.get('acquisitionEra', False):
            self.setAcquisitionEra(dbsFile['acquisitionEra'])
        if dbsFile.get('processingVer', False):
            self.setProcessingVer(dbsFile['processingVer'])
        elif dbsFile.get('processing_ver', False):
            self.setProcessingVer(dbsFile['processing_ver'])

        # Take care of the dataset
        self.setDataset(datasetName=dbsFile['datasetPath'],
                        primaryType=primaryDatasetType,
                        datasetType=datasetType,
                        physicsGroup=dbsFile.get('physicsGroup', None),
                        prep_id=dbsFile.get('prep_id', None))

        return

    def addFileParent(self, child, parent):
        """
        _addFileParent_

        Add file parents to the data block
        """
        info = {'parent_logical_file_name': parent, 'logical_file_name': child}
        self.data['file_parent_list'].append(info)

        return

    def addBlockParent(self, parent):
        """
        _addBlockParent_

        Add the parents of the block
        """

        self.data['block_parent_list'].append({'block_name': parent})
        return

    def addDatasetParent(self, parent):
        """
        _addDatasetParent_

        Add the parent datasets to the data block
        """

        self.data['ds_parent_list'].append({'parent_dataset': parent})
        return

    def setProcessingVer(self, procVer):
        """
        _setProcessingVer_

        Set the block's processing version.
        """
        if procVer.count("-") == 1:
            (junk, self.data["processing_era"]["processing_version"]
             ) = procVer.split("-v")
        else:
            self.data["processing_era"]["processing_version"] = procVer

        self.data["processing_era"]["create_by"] = "WMAgent"
        self.data["processing_era"]["description"] = ""
        return

    def setAcquisitionEra(self, era, date=123456789):
        """
        _setAcquisitionEra_

        Set the acquisition era for the block
        """
        self.data['acquisition_era']['acquisition_era_name'] = era
        self.data['acquisition_era']['start_date'] = date
        return

    def setPhysicsGroup(self, group):
        """
        _setPhysicsGroup_

        Sets the name of the physics group to which the dataset is attached
        """

        self.data['dataset']['physics_group_name'] = group
        return

    def hasDataset(self):
        """
        _hasDataset_

        Check and see if the dataset has been properly set
        """
        return self.data['dataset'].get('dataset', False)

    def setDataset(self,
                   datasetName,
                   primaryType,
                   datasetType,
                   physicsGroup=None,
                   prep_id=None,
                   overwrite=False,
                   valid=1):
        """
        _setDataset_

        Set all the information concerning a single dataset, including
        the primary, processed and tier info
        """
        if self.hasDataset() and not overwrite:
            # Do nothing, we already have a dataset
            return

        Lexicon.primaryDatasetType(primaryType)

        if not datasetType in [
                'VALID', 'PRODUCTION', 'INVALID', 'DEPRECATED', 'DELETED'
        ]:
            msg = "Invalid processedDatasetType %s\n" % datasetType
            logging.error(msg)
            raise DBSBlockException(msg)

        try:
            if datasetName[0] == '/':
                junk, primary, processed, tier = datasetName.split('/')
            else:
                primary, processed, tier = datasetName.split('/')
        except Exception, ex:
            msg = "Invalid dataset name %s" % datasetName
            logging.error(msg)
            raise DBSBlockException(msg)

        # Do the primary dataset
        self.data['primds']['primary_ds_name'] = primary
        self.data['primds']['primary_ds_type'] = primaryType
        self.data['primds']['create_by'] = "WMAgent"
        self.data['primds']['creation_date'] = int(time.time())

        # Do the processed
        self.data['dataset']['physics_group_name'] = physicsGroup
        self.data['dataset']['processed_ds_name'] = processed
        self.data['dataset']['data_tier_name'] = tier
        self.data['dataset']['dataset_access_type'] = datasetType
        self.data['dataset']['dataset'] = datasetName
        self.data['dataset']['prep_id'] = prep_id
        # Add misc meta data.
        self.data['dataset']['create_by'] = "WMAgent"
        self.data['dataset']['last_modified_by'] = "WMAgent"
        self.data['dataset']['creation_date'] = int(time.time())
        self.data['dataset']['last_modification_date'] = int(time.time())
        return
Пример #7
0
class ProcessPool:
    def __init__(self, slaveClassName, totalSlaves, componentDir,
                 config, slaveInit = None, namespace = None):
        """
        __init__

        Constructor for the process pool.  The slave class name must be based
        inside the WMComponent namespace.  For examples, the JobAccountant would
        pass in 'JobAccountant.AccountantWorker' to run the AccountantWorker
        class.  All log files will be stored in the component directory that is
        passed in.  Each slave will have its own log file.

        Note that the config is only used to determine database connection
        parameters.  It is not passed to the slave class.  The slaveInit
        parameter will be serialized and passed to the slave class's
        constructor.
        """
        self.enqueueIndex = 0
        self.dequeueIndex = 0
        self.runningWork  = 0

        #Use the Services.Requests JSONizer, which handles __to_json__ calls
        self.jsonHandler = JSONRequests()
        
        # heartbeat should be registered at this point
        if getattr(config.Agent, "useHeartbeat", True):
            self.heartbeatAPI = HeartbeatAPI(getattr(config.Agent, "componentName", "ProcPoolSlave"))
            
        self.slaveClassName = slaveClassName
        self.componentDir   = componentDir
        self.config         = config
        # Grab the python version from the current version
        # Assume naming convention pythonA.B, i.e., python2.4 for v2.4.X
        majorVersion = sys.version_info[0]
        minorVersion = sys.version_info[1]

        if majorVersion and minorVersion:
            self.versionString = "python%i.%i" % (majorVersion, minorVersion)
        else:
            self.versionString = "python2.4"

        self.workers = []
        self.nSlaves = totalSlaves
        self.slaveInit = slaveInit
        self.namespace = namespace


        # Now actually create the slaves
        self.createSlaves()


        return


    def createSlaves(self):
        """
        _createSlaves_

        Create the slaves by using the values from __init__()
        Moving it into a separate function allows us to restart
        all of them.
        """

        totalSlaves    = self.nSlaves
        slaveClassName = self.slaveClassName
        config         = self.config
        slaveInit      = self.slaveInit
        namespace      = self.namespace
        
        slaveArgs = [self.versionString, __file__, self.slaveClassName]
        if hasattr(config.CoreDatabase, "socket"):
            socket = config.CoreDatabase.socket
        else:
            socket = None

        (connectDialect, junk) = config.CoreDatabase.connectUrl.split(":", 1)
        if connectDialect.lower() == "mysql":
            dialect = "MySQL"
        elif connectDialect.lower() == "oracle":
            dialect = "Oracle"
        elif connectDialect.lower() == "sqlite":
            dialect = "SQLite"

        dbConfig = {"dialect": dialect,
                    "connectUrl": config.CoreDatabase.connectUrl,
                    "socket": socket,
                    "componentDir": self.componentDir}
        if namespace:
            # Then add a namespace to the config
            dbConfig['namespace'] = namespace
        encodedDBConfig = self.jsonHandler.encode(dbConfig)

        if slaveInit == None:
            encodedSlaveInit = None
        else:
            encodedSlaveInit = self.jsonHandler.encode(slaveInit)
        
        count = 0     
        while totalSlaves > 0:
            #For each worker you want create a slave process
            #That process calls this code (WMCore.ProcessPool) and opens
            #A process pool that loads the designated class
            slaveProcess = subprocess.Popen(slaveArgs, stdin = subprocess.PIPE,
                                            stdout = subprocess.PIPE)
            slaveProcess.stdin.write("%s\n" % encodedDBConfig)

            if encodedSlaveInit == None:
                slaveProcess.stdin.write("\n")
            else:
                slaveProcess.stdin.write("%s\n" % encodedSlaveInit)
                
            slaveProcess.stdin.flush()
            self.workers.append(WorkerProcess(subproc = slaveProcess))
            workerName = self._subProcessName(self.slaveClassName, count)
            
            if getattr(self.config.Agent, "useHeartbeat", True):
                self.heartbeatAPI.updateWorkerHeartbeat(workerName, 
                                            pid = slaveProcess.pid)
            totalSlaves -= 1
            count += 1


        return
    
    def _subProcessName(self, slaveClassName, sequence):
        """ subProcessName for heartbeat 
            could change to use process ID as a suffix
        """
        return "%s_%s" % (slaveClassName, sequence + 1)
            
    def __del__(self):
        """
        __del__

        Kill all the workers processes by sending them an invalid JSON object.
        This will cause them to shut down.
        """
        for worker in self.workers:
            try:
                worker.delete()
            except Exception, ex:
                pass

        self.workers = []

        return