Exemplo n.º 1
0
def getOutputDatasetsWithPSet(payloadNode, sorted=False):
    """
    _getOutputDatasetsWithPSet_

    Extract all the information about output datasets from the
    payloadNode object provided, including the {{}} format PSet cfg

    Returns a list of DatasetInfo objects including App details
    from the node.

    """
    result = []

    for item in payloadNode._OutputDatasets:
        resultEntry = DatasetInfo()
        resultEntry.update(item)
        resultEntry["ApplicationName"] = payloadNode.application['Executable']
        resultEntry["ApplicationProject"] = payloadNode.application['Project']
        resultEntry["ApplicationVersion"] = payloadNode.application['Version']
        resultEntry["ApplicationFamily"] = item.get("OutputModuleName",
                                                    "AppFamily")

        try:
            config = payloadNode.cfgInterface
            psetStr = config.originalContent()
            resultEntry['PSetContent'] = psetStr
            resultEntry['Conditions'] = config.conditionsTag
        except Exception, ex:
            resultEntry['PSetContent'] = None

        result.append(resultEntry)
Exemplo n.º 2
0
def expandDatasetInfo(datasetInfo, requestTimestamp):
    """
    _expandDatasetInfo_

    Given a DatasetInfo, check to see if it contains multi tiers,
    and if it does, expand them to a list of basic tier dataset info objects

    returns a list of datasetInfo objects

    """
    result = []
    if not isMultiTier(datasetInfo['DataTier']):
        #  //
        # // No multi tier, return same object as only entry in list
        #//
        result.append(datasetInfo)
        return result

    tiers = splitMultiTier(datasetInfo['DataTier'])
    processedDSName = "%s-%s-%s" % (datasetInfo['ApplicationVersion'],
                                    datasetInfo['OutputModuleName'],
                                    requestTimestamp)
    for tier in tiers:
        newInfo = DatasetInfo()
        newInfo.update(datasetInfo)
        newInfo['DataTier'] = tier
        newInfo['ProcessedDataset'] = processedDSName
        result.append(newInfo)
    return result
Exemplo n.º 3
0
    def __call__(self, node):
        """
        _operator(node)_

        Operate on all output datasets in a Payload Node

        """
        for dataset in node._OutputDatasets:
            if dataset.has_key("NoMerge"):
                #  //
                # // If we need to avoid merging some datasets we
                #//  can add a NoMerge key and this will ignore it
                continue
            newDataset = DatasetInfo()
            newDataset.update(dataset)
            newDataset["ApplicationFamily"] = self.mergeModuleName
            newDataset["ApplicationName"] = self.appName
            newDataset["ApplicationVersion"] = node.application['Version']
            procName = dataset["ProcessedDataset"]
            if procName.endswith("-unmerged"):
                procName = procName.replace("-unmerged", "")
            else:
                procName = "%s-merged" % procName
            newDataset["ProcessedDataset"] = procName
            newDataset["ParentDataset"] = dataset.name()
            newDataset['OutputModuleName'] = "%s-Merged" % (
                newDataset['OutputModuleName'], )
            if node.userSandbox != None:
                newDataset['UserSandbox'] = node.userSandbox
            self.result.append(newDataset)

        return
Exemplo n.º 4
0
    def newDataset(self):
        """
        _newDataset_

        Add a new dataset that this file is associated with and return
        the dictionary to be populated

        """
        newDS = DatasetInfo()
        self.dataset.append(newDS)
        return newDS
Exemplo n.º 5
0
    def addPileupDataset(self, primary, tier, processed):
        """
        _addPileupDataset_

        Add a pileup dataset to this node

        """
        newDataset = DatasetInfo()
        newDataset['PrimaryDataset'] = primary
        newDataset['DataTier'] = tier
        newDataset['ProcessedDataset'] = processed
        self._PileupDatasets.append(newDataset)
        return newDataset
Exemplo n.º 6
0
def getPileupDatasets(payloadNode):
    """
    _getPileupDatasets_

    Extract all pileup dataset info from the node provided.
    Returns a list of dataset info objects

    """
    result = []
    for item in payloadNode._PileupDatasets:
        resultEntry = DatasetInfo()
        resultEntry.update(item)
        resultEntry['NodeName'] = payloadNode.name
        result.append(resultEntry)
    return result
Exemplo n.º 7
0
    def addOutputDataset(self, primaryDS, processedDS, outputModuleName):
        """
        _addOutputDataset_

        Add a new Output Dataset, specifying the Primary and Processed
        Dataset names and the name of the output module in the PSet
        responsible for writing out files for that dataset

        
        """
        newDataset = DatasetInfo()
        newDataset['PrimaryDataset'] = primaryDS
        newDataset['ProcessedDataset'] = processedDS
        newDataset['OutputModuleName'] = outputModuleName
        self._OutputDatasets.append(newDataset)
        return newDataset
Exemplo n.º 8
0
def getInputDatasets(payloadNode, sorted=False):
    """
    _getInputDatasets_

    Extract all the information about input datasets from the
    payloadNode object provided.

    Returns a list of DatasetInfo objects including App details
    from the node.

    """
    result = []
    for item in payloadNode._InputDatasets:
        resultEntry = DatasetInfo()
        resultEntry.update(item)
        result.append(resultEntry)
    if sorted:
        result = _sortDatasets(result)
    return result
Exemplo n.º 9
0
    def addInputDataset(self, primaryDS, processedDS):
        """
        _addInputDataset_

        Add a new Input Dataset to this Node.
        Arguments should be:

        - *primaryDS* : The Primary Dataset name of the input dataset

        - *processedDS* : The Processed Dataset name of the input dataset

        The DatasetInfo object is returned by reference for more information
        to be added to it

        InputModuleName should be the mainInputSource of the PSet for
        the main input dataset. At present this is set elsewhere
        
        """
        newDataset = DatasetInfo()
        newDataset['PrimaryDataset'] = primaryDS
        newDataset['ProcessedDataset'] = processedDS
        self._InputDatasets.append(newDataset)
        return newDataset
Exemplo n.º 10
0
def getOutputDatasets(payloadNode, sorted=False):
    """
    _getOutputDatasets_

    Extract all the information about output datasets from the
    payloadNode object provided.

    Returns a list of DatasetInfo objects including App details
    from the node.

    """
    result = []

    for item in payloadNode._OutputDatasets:
        resultEntry = DatasetInfo()
        resultEntry.update(item)
        resultEntry["ApplicationName"] = payloadNode.application['Executable']
        resultEntry["ApplicationProject"] = payloadNode.application['Project']
        resultEntry["ApplicationVersion"] = payloadNode.application['Version']
        result.append(resultEntry)

    if sorted:
        result = _sortDatasets(result)
    return result
Exemplo n.º 11
0
    def unpackPayloadNodeData(self, improvNode):
        """
        _unpackPayloadNodeData_

        Unpack PayloadNode data from improv Node provided and
        add information to self

        """
        self.name = str(improvNode.attrs["Name"])
        self.type = str(improvNode.attrs["Type"])
        workflowName = improvNode.attrs.get('Workflow', None)
        if workflowName != None:
            self.workflow = str(workflowName)
        #  //
        # // Unpack data for this instance
        #//  App details
        appDataQ = IMProvQuery("/%s/Application" % self.__class__.__name__)
        appData = appDataQ(improvNode)[0]
        for appField in appData.children:
            field = str(appField.name)
            value = str(appField.attrs['Value'])
            self.application[field] = value
        #  //
        # // App Control details
        #//
        appConDataQ = IMProvQuery("/%s/ApplicationControl/*" %
                                  self.__class__.__name__)
        appConData = appConDataQ(improvNode)
        for appConField in appConData:
            field = str(appConField.name)
            value = str(appConField.attrs['Value'])
            self.applicationControls[field] = value

        #  //
        # // Script Controls
        #//
        scriptConQ = IMProvQuery("/%s/ScriptControls/ScriptList" %
                                 self.__class__.__name__)
        scriptLists = scriptConQ(improvNode)
        for scriptList in scriptLists:
            listName = scriptList.attrs.get("Name", None)
            if listName == None: continue
            listName = str(listName)
            for script in scriptList.children:
                scriptName = script.attrs.get("Value", None)
                if scriptName == None: continue
                self.scriptControls[listName].append(str(scriptName))

        #  //
        # // Dataset details
        #//  Input Datasets
        inputDSQ = IMProvQuery("/%s/InputDatasets/DatasetInfo" %
                               self.__class__.__name__)
        inputDS = inputDSQ(improvNode)
        #        print improvNode
        for item in inputDS:
            newDS = DatasetInfo()
            newDS.load(item)
            self._InputDatasets.append(newDS)

        #  //
        # // Output Datasets
        #//
        outputDSQ = IMProvQuery("/%s/OutputDatasets/DatasetInfo" %
                                self.__class__.__name__)
        outputDS = outputDSQ(improvNode)
        for item in outputDS:
            newDS = DatasetInfo()
            newDS.load(item)
            self._OutputDatasets.append(newDS)
        #  //
        # // Pileup Datasets
        #//
        pileupDSQ = IMProvQuery("/%s/PileupDatasets/DatasetInfo" %
                                self.__class__.__name__)
        pileupDS = pileupDSQ(improvNode)
        for item in pileupDS:
            newDS = DatasetInfo()
            newDS.load(item)
            self._PileupDatasets.append(newDS)
        #  //
        # // Input Links
        #//
        inpLinkQ = IMProvQuery("/%s/InputLinks/InputLink" %
                               self.__class__.__name__)
        inpLinks = inpLinkQ(improvNode)
        for ilink in inpLinks:
            newLink = InputLink()
            newLink.load(ilink)
            self._InputLinks.append(newLink)

        #  //
        # // Configuration
        #//
        configQ = IMProvQuery("/%s/Configuration" % self.__class__.__name__)
        configNodes = configQ(improvNode)
        if len(configNodes) > 0:
            configNode = configNodes[0]
            self.configuration = base64.decodestring(str(configNode.chardata))

        cfgIntQ = IMProvQuery("/%s/CMSSWConfig" % self.__class__.__name__)
        cfgNodes = cfgIntQ(improvNode)
        if len(cfgNodes) > 0:
            cfgNode = cfgNodes[0]
            self.cfgInterface = CMSSWConfig()
            self.cfgInterface.load(cfgNode)

        #  //
        # // User sandbox
        #//
        sandboxQ = IMProvQuery("/%s/UserSandbox" % self.__class__.__name__)
        sandboxNodes = sandboxQ(improvNode)
        if len(sandboxNodes) > 0:
            sandboxNode = sandboxNodes[-1]
            self.userSandbox = str(sandboxNode.chardata)

        return