示例#1
0
 def __init__(self, files=[], persistency=None, depth=0):
     new_files = GangaList()
     if isType(files, LHCbDataset):
         for this_file in files:
             new_files.append(copy.deepcopy(this_file))
     elif isType(files, IGangaFile):
         new_files.append(copy.deepcopy(this_file))
     elif type(files) == type([]):
         for this_file in files:
             if type(this_file) == type(''):
                 new_files.append(string_datafile_shortcut_lhcb(this_file, None), False)
             elif isType(this_file, IGangaFile):
                 new_files.append(this_file, False)
             else:
                 new_files.append(strToDataFile(this_file))
     elif type(files) == type(''):
         new_files.append(string_datafile_shortcut_lhcb(this_file, None), False)
     else:
         from Ganga.Core.exceptions import GangaException
         raise GangaException("Unknown object passed to LHCbDataset constructor!")
     new_files._setParent(self)
     super(LHCbDataset, self).__init__()
     # Feel free to turn this on again for debugging but it's potentially quite expensive
     #logger.debug( "Creating dataset with:\n%s" % files )
     self.files = new_files
     self.persistency = persistency
     self.depth = depth
     logger.debug("Dataset Created")
示例#2
0
    def __init__(self, files=None, persistency=None, depth=0):
        super(LHCbDataset, self).__init__()
        if files is None:
            files = []
        new_files = GangaList()
        if isType(files, LHCbDataset):
            for this_file in files:
                new_files.append(deepcopy(this_file))
        elif isType(files, IGangaFile):
            new_files.append(deepcopy(this_file))
        elif isType(files, (list, tuple, GangaList)):
            new_list = []
            for this_file in files:
                if type(this_file) is str:
                    new_file = string_datafile_shortcut_lhcb(this_file, None)
                elif isType(this_file, IGangaFile):
                    new_file = this_file
                else:
                    new_file = strToDataFile(this_file)
                new_list.append(stripProxy(new_file))
            stripProxy(new_files)._list = new_list
        elif type(files) is str:
            new_files.append(string_datafile_shortcut_lhcb(this_file, None), False)
        else:
            raise GangaException("Unknown object passed to LHCbDataset constructor!")
        new_files._setParent(self)

        logger.debug("Processed inputs, assigning files")

        # Feel free to turn this on again for debugging but it's potentially quite expensive
        #logger.debug( "Creating dataset with:\n%s" % files )
        self.files = new_files
        
        logger.debug("Assigned files")

        self.persistency = persistency
        self.depth = depth
        logger.debug("Dataset Created")
示例#3
0
class LHCbDataset(GangaDataset):

    '''Class for handling LHCb data sets (i.e. inputdata for LHCb jobs).

    Example Usage:
    ds = LHCbDataset(["lfn:/some/lfn.file","pfn:/some/pfn.file"])
    ds[0] # DiracFile("/some/lfn.file") - see DiracFile docs for usage
    ds[1] # PhysicalFile("/some/pfn.file")- see PhysicalFile docs for usage
    len(ds) # 2 (number of files)
    ds.getReplicas() # returns replicas for *all* files in the data set
    ds.replicate("CERN-USER") # replicate *all* LFNs to "CERN-USER" SE
    ds.getCatalog() # returns XML catalog slice
    ds.optionsString() # returns Gaudi-sytle options 
    [...etc...]
    '''
    schema = {}
    docstr = 'List of PhysicalFile and DiracFile objects'
    schema['files'] = GangaFileItem(defvalue=[], typelist=['str', 'Ganga.GPIDev.Adapters.IGangaFile.IGangaFile'], sequence=1, doc=docstr)
    docstr = 'Ancestor depth to be queried from the Bookkeeping'
    schema['depth'] = SimpleItem(defvalue=0, doc=docstr)
    docstr = 'Use contents of file rather than generating catalog.'
    schema['XMLCatalogueSlice'] = GangaFileItem(defvalue=None, doc=docstr)
    docstr = 'Specify the dataset persistency technology'
    schema['persistency'] = SimpleItem(
        defvalue=None, typelist=['str', 'type(None)'], doc=docstr)
    schema['treat_as_inputfiles'] = SimpleItem(defvalue=False, doc="Treat the inputdata as inputfiles, i.e. copy the inputdata to the WN")

    _schema = Schema(Version(3, 0), schema)
    _category = 'datasets'
    _name = "LHCbDataset"
    _exportmethods = ['getReplicas', '__len__', '__getitem__', 'replicate',
                      'hasLFNs', 'append', 'extend', 'getCatalog', 'optionsString',
                      'getLFNs', 'getFileNames', 'getFullFileNames',
                      'difference', 'isSubset', 'isSuperset', 'intersection',
                      'symmetricDifference', 'union', 'bkMetadata',
                      'isEmpty', 'hasPFNs', 'getPFNs']  # ,'pop']

    def __init__(self, files=None, persistency=None, depth=0, fromRef=False):
        super(LHCbDataset, self).__init__()
        if files is None:
            files = []
        self.files = GangaList()
        process_files = True
        if fromRef:
            self.files._list.extend(files)
            process_files = False
        elif isinstance(files, GangaList):
            def isFileTest(_file):
                return isinstance(_file, IGangaFile)
            areFiles = all([isFileTest(f) for f in files._list])
            if areFiles:
                self.files._list.extend(files._list)
                process_files = False
        elif isinstance(files, LHCbDataset):
            self.files._list.extend(files.files._list)
            process_files = False

        if process_files:
            if isType(files, LHCbDataset):
                for this_file in files:
                    self.files.append(deepcopy(this_file))
            elif isType(files, IGangaFile):
                self.files.append(deepcopy(this_file))
            elif isType(files, (list, tuple, GangaList)):
                new_list = []
                for this_file in files:
                    if type(this_file) is str:
                        new_file = string_datafile_shortcut_lhcb(this_file, None)
                    elif isType(this_file, IGangaFile):
                        new_file = stripProxy(this_file)
                    else:
                        new_file = strToDataFile(this_file)
                    new_list.append(new_file)
                self.files.extend(new_list)
            elif type(files) is str:
                self.files.append(string_datafile_shortcut_lhcb(this_file, None), False)
            else:
                raise GangaException("Unknown object passed to LHCbDataset constructor!")

        self.files._setParent(self)

        logger.debug("Processed inputs, assigning files")

        # Feel free to turn this on again for debugging but it's potentially quite expensive
        #logger.debug( "Creating dataset with:\n%s" % self.files )
        
        logger.debug("Assigned files")

        self.persistency = persistency
        self.depth = depth
        logger.debug("Dataset Created")

    def __getitem__(self, i):
        '''Proivdes scripting (e.g. ds[2] returns the 3rd file) '''
        #this_file = self.files[i]
        # print type(this_file)
        # return this_file
        # return this_file
        # return this_file
        if type(i) == type(slice(0)):
            ds = LHCbDataset(files=self.files[i])
            ds.depth = self.depth
            #ds.XMLCatalogueSlice = self.XMLCatalogueSlice
            return ds
        else:
            return self.files[i]

    def getReplicas(self):
        'Returns the replicas for all files in the dataset.'
        lfns = self.getLFNs()
        cmd = 'getReplicas(%s)' % str(lfns)
        result = get_result(cmd, 'LFC query error. Could not get replicas.')
        return result['Successful']

    def hasLFNs(self):
        'Returns True is the dataset has LFNs and False otherwise.'
        for f in self.files:
            if isDiracFile(f):
                return True
        return False

    def hasPFNs(self):
        'Returns True is the dataset has PFNs and False otherwise.'
        for f in self.files:
            if not isDiracFile(f):
                return True
        return False

    def replicate(self, destSE=''):
        '''Replicate all LFNs to destSE.  For a list of valid SE\'s, type
        ds.replicate().'''

        if not destSE:
            from GangaDirac.Lib.Files.DiracFile import DiracFile
            DiracFile().replicate('')
            return
        if not self.hasLFNs():
            raise GangaException('Cannot replicate dataset w/ no LFNs.')

        retry_files = []

        for f in self.files:
            if not isDiracFile(f):
                continue
            try:
                result = f.replicate( destSE=destSE )
            except Exception as err:
                msg = 'Replication error for file %s (will retry in a bit).' % f.lfn
                logger.warning(msg)
                logger.warning("Error: %s" % str(err))
                retry_files.append(f)

        for f in retry_files:
            try:
                result = f.replicate( destSE=destSE )
            except Exception as err:
                msg = '2nd replication attempt failed for file %s. (will not retry)' % f.lfn
                logger.warning(msg)
                logger.warning(str(err))

    def extend(self, files, unique=False):
        '''Extend the dataset. If unique, then only add files which are not
        already in the dataset.'''
        from Ganga.GPIDev.Base import ReadOnlyObjectError

        if self._parent is not None and self._parent._readonly():
            raise ReadOnlyObjectError('object Job#%s  is read-only and attribute "%s/inputdata" cannot be modified now' % (self._parent.id, getName(self)))

        _external_files = []

        if type(files) is str or isType(files, IGangaFile):
            _external_files = [files]
        elif type(files) in [list, tuple]:
            _external_files = files
        elif isType(files, LHCbDataset):
            _external_files = files.files
        else:
            if not hasattr(files, "__getitem__") or not hasattr(files, '__iter__'):
                _external_files = [files]

        # just in case they extend w/ self
        _to_remove = []
        for this_file in _external_files:
            if hasattr(this_file, 'subfiles'):
                if len(this_file.subfiles) > 0:
                    _external_files = makeGangaListByRef(this_file.subfiles)
                    _to_remove.append(this_file)
            if type(this_file) is str:
                _external_files.append(string_datafile_shortcut_lhcb(this_file, None))
                _to_remove.append(this_file)

        for _this_file in _to_remove:
            _external_files.pop(_external_files.index(_this_file))

        for this_f in _external_files:
            _file = getDataFile(this_f)
            if _file is None:
                _file = this_f
            myName = _file.namePattern
            from GangaDirac.Lib.Files.DiracFile import DiracFile
            if isType(_file, DiracFile):
                myName = _file.lfn
            if unique and myName in self.getFileNames():
                continue
            self.files.append(stripProxy(_file))

    def removeFile(self, input_file):
        try:
            self.files.remove(input_file)
        except:
            raise GangaException('Dataset has no file named %s' % input_file.namePattern)

    def getLFNs(self):
        'Returns a list of all LFNs (by name) stored in the dataset.'
        lfns = []
        if not self:
            return lfns
        for f in self.files:
            if isDiracFile(f):
                subfiles = f.getSubFiles()
                if len(subfiles) == 0:
                    lfns.append(f.lfn)
                else:
                    for file in subfiles:
                        lfns.append(file.lfn)

        #logger.debug( "Returning LFNS:\n%s" % str(lfns) )
        logger.debug("Returning #%s LFNS" % str(len(lfns)))
        return lfns

    def getPFNs(self):
        'Returns a list of all PFNs (by name) stored in the dataset.'
        pfns = []
        if not self:
            return pfns
        for f in self.files:
            if isPFN(f):
                pfns.append(f.namePattern)
        return pfns

    def getFullFileNames(self):
        'Returns all file names w/ PFN or LFN prepended.'
        names = []
        from GangaDirac.Lib.Files.DiracFile import DiracFile
        for f in self.files:
            if isType(f, DiracFile):
                names.append('LFN:%s' % f.lfn)
            else:
                try:
                    names.append('PFN:%s' % f.namePattern)
                except:
                    logger.warning("Cannot determine filename for: %s " % f)
                    raise GangaException("Cannot Get File Name")
        return names

    def getCatalog(self, site=''):
        '''Generates an XML catalog from the dataset (returns the XML string).
        Note: site defaults to config.LHCb.LocalSite
        Note: If the XMLCatalogueSlice attribute is set, then it returns
              what is written there.'''
        if hasattr(self.XMLCatalogueSlice, 'name'):
            if self.XMLCatalogueSlice.name:
                f = open(self.XMLCatalogueSlice.name)
                xml_catalog = f.read()
                f.close()
                return xml_catalog
        if not site:
            site = getConfig('LHCb')['LocalSite']
        lfns = self.getLFNs()
        depth = self.depth
        tmp_xml = tempfile.NamedTemporaryFile(suffix='.xml')
        cmd = 'getLHCbInputDataCatalog(%s,%d,"%s","%s")' \
              % (str(lfns), depth, site, tmp_xml.name)
        result = get_result(cmd, 'LFN->PFN error. XML catalog error.')
        xml_catalog = tmp_xml.read()
        tmp_xml.close()
        return xml_catalog

    def optionsString(self, file=None):
        'Returns the Gaudi-style options string for the dataset (if a filename' \
            ' is given, the file is created and output is written there).'
        if not self or len(self) == 0:
            return ''
        snew = ''
        if self.persistency == 'ROOT':
            snew = '\n#new method\nfrom GaudiConf import IOExtension\nIOExtension(\"%s\").inputFiles([' % self.persistency
        elif self.persistency == 'POOL':
            snew = '\ntry:\n    #new method\n    from GaudiConf import IOExtension\n    IOExtension(\"%s\").inputFiles([' % self.persistency
        elif self.persistency == None:
            snew = '\ntry:\n    #new method\n    from GaudiConf import IOExtension\n    IOExtension().inputFiles(['
        else:
            logger.warning(
                "Unknown LHCbDataset persistency technology... reverting to None")
            snew = '\ntry:\n    #new method\n    from GaudiConf import IOExtension\n    IOExtension().inputFiles(['

        sold = '\nexcept ImportError:\n    #Use previous method\n    from Gaudi.Configuration import EventSelector\n    EventSelector().Input=['
        sdatasetsnew = ''
        sdatasetsold = ''

        dtype_str_default = getConfig('LHCb')['datatype_string_default']
        dtype_str_patterns = getConfig('LHCb')['datatype_string_patterns']
        for f in self.files:
            dtype_str = dtype_str_default
            for this_str in dtype_str_patterns:
                matched = False
                for pat in dtype_str_patterns[this_str]:
                    if fnmatch.fnmatch(f.namePattern, pat):
                        dtype_str = this_str
                        matched = True
                        break
                if matched:
                    break
            sdatasetsnew += '\n        '
            sdatasetsold += '\n        '
            if isDiracFile(f):
                sdatasetsnew += """ \"LFN:%s\",""" % f.lfn
                sdatasetsold += """ \"DATAFILE='LFN:%s' %s\",""" % (f.lfn, dtype_str)
            else:
                sdatasetsnew += """ \"PFN:%s\",""" % f.namePattern
                sdatasetsold += """ \"DATAFILE='PFN:%s' %s\",""" % (f.namePattern, dtype_str)
        if sdatasetsold.endswith(","):
            if self.persistency == 'ROOT':
                sdatasetsnew = sdatasetsnew[:-1] + """\n], clear=True)"""
            else:
                sdatasetsnew = sdatasetsnew[:-1] + """\n    ], clear=True)"""
            sdatasetsold = sdatasetsold[:-1]
            sdatasetsold += """\n    ]"""
        if(file):
            f = open(file, 'w')
            if self.persistency == 'ROOT':
                f.write(snew)
                f.write(sdatasetsnew)
            else:
                f.write(snew)
                f.write(sdatasetsnew)
                f.write(sold)
                f.write(sdatasetsold)
            f.close()
        else:
            if self.persistency == 'ROOT':
                return snew + sdatasetsnew
            else:
                return snew + sdatasetsnew + sold + sdatasetsold

    def _checkOtherFiles(self, other ):
        if isType(other, GangaList) or isType(other, []):
            other_files = LHCbDataset(other).getFullFileNames()
        elif isType(other, LHCbDataset):
            other_files = other.getFullFileNames()
        else:
            raise GangaException("Unknown type for difference")
        return other_files

    def difference(self, other):
        '''Returns a new data set w/ files in this that are not in other.'''
        other_files = self._checkOtherFiles(other)
        files = set(self.getFullFileNames()).difference(other_files)
        data = LHCbDataset()
        data.extend(list(files))
        data.depth = self.depth
        return data

    def isSubset(self, other):
        '''Is every file in this data set in other?'''
        other_files = self._checkOtherFiles(other)
        return set(self.getFileNames()).issubset(other_files)

    def isSuperset(self, other):
        '''Is every file in other in this data set?'''
        other_files = self._checkOtherFiles(other)
        return set(self.getFileNames()).issuperset(other_files)

    def symmetricDifference(self, other):
        '''Returns a new data set w/ files in either this or other but not
        both.'''
        other_files = other._checkOtherFiles(other)
        files = set(self.getFullFileNames()).symmetric_difference(other_files)
        data = LHCbDataset()
        data.extend(list(files))
        data.depth = self.depth
        return data

    def intersection(self, other):
        '''Returns a new data set w/ files common to this and other.'''
        other_files = other._checkOtherFiles(other)
        files = set(self.getFullFileNames()).intersection(other_files)
        data = LHCbDataset()
        data.extend(list(files))
        data.depth = self.depth
        return data

    def union(self, other):
        '''Returns a new data set w/ files from this and other.'''
        other_files = self._checkOtherFiles(other)
        files = set(self.getFullFileNames()).union(other_files)
        data = LHCbDataset()
        data.extend(list(files))
        data.depth = self.depth
        return data

    def bkMetadata(self):
        'Returns the bookkeeping metadata for all LFNs. '
        logger.info("Using BKQuery(bkpath).getDatasetMetadata() with bkpath=the bookkeeping path, will yeild more metadata such as 'TCK' info...")
        cmd = 'bkMetaData(%s)' % self.getLFNs()
        b = get_result(cmd, 'Error removing replica. Replica rm error.')
        return b
示例#4
0
class LHCbDataset(GangaDataset):
    '''Class for handling LHCb data sets (i.e. inputdata for LHCb jobs).

    Example Usage:
    ds = LHCbDataset(["lfn:/some/lfn.file","pfn:/some/pfn.file"])
    ds[0] # DiracFile("/some/lfn.file") - see DiracFile docs for usage
    ds[1] # PhysicalFile("/some/pfn.file")- see PhysicalFile docs for usage
    len(ds) # 2 (number of files)
    ds.getReplicas() # returns replicas for *all* files in the data set
    ds.replicate("CERN-USER") # replicate *all* LFNs to "CERN-USER" SE
    ds.getCatalog() # returns XML catalog slice
    ds.optionsString() # returns Gaudi-sytle options 
    [...etc...]
    '''
    schema = {}
    docstr = 'List of PhysicalFile and DiracFile objects'
    schema['files'] = GangaFileItem(
        defvalue=[],
        typelist=['str', 'Ganga.GPIDev.Adapters.IGangaFile.IGangaFile'],
        sequence=1,
        doc=docstr)
    docstr = 'Ancestor depth to be queried from the Bookkeeping'
    schema['depth'] = SimpleItem(defvalue=0, doc=docstr)
    docstr = 'Use contents of file rather than generating catalog.'
    schema['XMLCatalogueSlice'] = GangaFileItem(defvalue=None, doc=docstr)
    docstr = 'Specify the dataset persistency technology'
    schema['persistency'] = SimpleItem(defvalue=None,
                                       typelist=['str', 'type(None)'],
                                       doc=docstr)
    schema['treat_as_inputfiles'] = SimpleItem(
        defvalue=False,
        doc=
        "Treat the inputdata as inputfiles, i.e. copy the inputdata to the WN")

    _schema = Schema(Version(3, 0), schema)
    _category = 'datasets'
    _name = "LHCbDataset"
    _exportmethods = [
        'getReplicas', '__len__', '__getitem__', 'replicate', 'hasLFNs',
        'append', 'extend', 'getCatalog', 'optionsString', 'getLFNs',
        'getFileNames', 'getFullFileNames', 'difference', 'isSubset',
        'isSuperset', 'intersection', 'symmetricDifference', 'union',
        'bkMetadata', 'isEmpty', 'hasPFNs', 'getPFNs'
    ]  # ,'pop']

    def __init__(self, files=None, persistency=None, depth=0, fromRef=False):
        super(LHCbDataset, self).__init__()
        if files is None:
            files = []
        self.files = GangaList()
        process_files = True
        if fromRef:
            self.files._list.extend(files)
            process_files = False
        elif isinstance(files, GangaList):

            def isFileTest(_file):
                return isinstance(_file, IGangaFile)

            areFiles = all([isFileTest(f) for f in files._list])
            if areFiles:
                self.files._list.extend(files._list)
                process_files = False
        elif isinstance(files, LHCbDataset):
            self.files._list.extend(files.files._list)
            process_files = False

        if process_files:
            if isType(files, LHCbDataset):
                for this_file in files:
                    self.files.append(deepcopy(this_file))
            elif isType(files, IGangaFile):
                self.files.append(deepcopy(files))
            elif isType(files, (list, tuple, GangaList)):
                new_list = []
                for this_file in files:
                    if type(this_file) is str:
                        new_file = string_datafile_shortcut_lhcb(
                            this_file, None)
                    elif isType(this_file, IGangaFile):
                        new_file = stripProxy(this_file)
                    else:
                        new_file = strToDataFile(this_file)
                    new_list.append(new_file)
                self.files.extend(new_list)
            elif type(files) is str:
                self.files.append(string_datafile_shortcut_lhcb(files, None),
                                  False)
            else:
                raise GangaException(
                    "Unknown object passed to LHCbDataset constructor!")

        self.files._setParent(self)

        logger.debug("Processed inputs, assigning files")

        # Feel free to turn this on again for debugging but it's potentially quite expensive
        #logger.debug( "Creating dataset with:\n%s" % self.files )

        logger.debug("Assigned files")

        self.persistency = persistency
        self.depth = depth
        logger.debug("Dataset Created")

    def __getitem__(self, i):
        '''Proivdes scripting (e.g. ds[2] returns the 3rd file) '''
        #this_file = self.files[i]
        # print type(this_file)
        # return this_file
        # return this_file
        # return this_file
        if type(i) == type(slice(0)):
            ds = LHCbDataset(files=self.files[i])
            ds.depth = self.depth
            #ds.XMLCatalogueSlice = self.XMLCatalogueSlice
            return ds
        else:
            return self.files[i]

    def getReplicas(self):
        'Returns the replicas for all files in the dataset.'
        lfns = self.getLFNs()
        cmd = 'getReplicas(%s)' % str(lfns)
        result = get_result(cmd, 'LFC query error. Could not get replicas.')
        return result['Successful']

    def hasLFNs(self):
        'Returns True is the dataset has LFNs and False otherwise.'
        for f in self.files:
            if isDiracFile(f):
                return True
        return False

    def hasPFNs(self):
        'Returns True is the dataset has PFNs and False otherwise.'
        for f in self.files:
            if not isDiracFile(f):
                return True
        return False

    def replicate(self, destSE=''):
        '''Replicate all LFNs to destSE.  For a list of valid SE\'s, type
        ds.replicate().'''

        if not destSE:
            from GangaDirac.Lib.Files.DiracFile import DiracFile
            DiracFile().replicate('')
            return
        if not self.hasLFNs():
            raise GangaException('Cannot replicate dataset w/ no LFNs.')

        retry_files = []

        for f in self.files:
            if not isDiracFile(f):
                continue
            try:
                result = f.replicate(destSE=destSE)
            except Exception as err:
                msg = 'Replication error for file %s (will retry in a bit).' % f.lfn
                logger.warning(msg)
                logger.warning("Error: %s" % str(err))
                retry_files.append(f)

        for f in retry_files:
            try:
                result = f.replicate(destSE=destSE)
            except Exception as err:
                msg = '2nd replication attempt failed for file %s. (will not retry)' % f.lfn
                logger.warning(msg)
                logger.warning(str(err))

    def extend(self, files, unique=False):
        '''Extend the dataset. If unique, then only add files which are not
        already in the dataset.'''
        from Ganga.GPIDev.Base import ReadOnlyObjectError

        if self._parent is not None and self._parent._readonly():
            raise ReadOnlyObjectError(
                'object Job#%s  is read-only and attribute "%s/inputdata" cannot be modified now'
                % (self._parent.id, getName(self)))

        _external_files = []

        if type(files) is str or isType(files, IGangaFile):
            _external_files = [files]
        elif type(files) in [list, tuple]:
            _external_files = files
        elif isType(files, LHCbDataset):
            _external_files = files.files
        else:
            if not hasattr(files, "__getitem__") or not hasattr(
                    files, '__iter__'):
                _external_files = [files]

        # just in case they extend w/ self
        _to_remove = []
        for this_file in _external_files:
            if hasattr(this_file, 'subfiles'):
                if len(this_file.subfiles) > 0:
                    _external_files = makeGangaListByRef(this_file.subfiles)
                    _to_remove.append(this_file)
            if type(this_file) is str:
                _external_files.append(
                    string_datafile_shortcut_lhcb(this_file, None))
                _to_remove.append(this_file)

        for _this_file in _to_remove:
            _external_files.pop(_external_files.index(_this_file))

        for this_f in _external_files:
            _file = getDataFile(this_f)
            if _file is None:
                _file = this_f
            myName = _file.namePattern
            from GangaDirac.Lib.Files.DiracFile import DiracFile
            if isType(_file, DiracFile):
                myName = _file.lfn
            if unique and myName in self.getFileNames():
                continue
            self.files.append(stripProxy(_file))

    def removeFile(self, input_file):
        try:
            self.files.remove(input_file)
        except:
            raise GangaException('Dataset has no file named %s' %
                                 input_file.namePattern)

    def getLFNs(self):
        'Returns a list of all LFNs (by name) stored in the dataset.'
        lfns = []
        if not self:
            return lfns
        for f in self.files:
            if isDiracFile(f):
                subfiles = f.getSubFiles()
                if len(subfiles) == 0:
                    lfns.append(f.lfn)
                else:
                    for file in subfiles:
                        lfns.append(file.lfn)

        #logger.debug( "Returning LFNS:\n%s" % str(lfns) )
        logger.debug("Returning #%s LFNS" % str(len(lfns)))
        return lfns

    def getPFNs(self):
        'Returns a list of all PFNs (by name) stored in the dataset.'
        pfns = []
        if not self:
            return pfns
        for f in self.files:
            if isPFN(f):
                pfns.append(f.namePattern)
        return pfns

    def getFullFileNames(self):
        'Returns all file names w/ PFN or LFN prepended.'
        names = []
        from GangaDirac.Lib.Files.DiracFile import DiracFile
        for f in self.files:
            if isType(f, DiracFile):
                names.append('LFN:%s' % f.lfn)
            else:
                try:
                    names.append('PFN:%s' % f.namePattern)
                except:
                    logger.warning("Cannot determine filename for: %s " % f)
                    raise GangaException("Cannot Get File Name")
        return names

    def getCatalog(self, site=''):
        '''Generates an XML catalog from the dataset (returns the XML string).
        Note: site defaults to config.LHCb.LocalSite
        Note: If the XMLCatalogueSlice attribute is set, then it returns
              what is written there.'''
        if hasattr(self.XMLCatalogueSlice, 'name'):
            if self.XMLCatalogueSlice.name:
                f = open(self.XMLCatalogueSlice.name)
                xml_catalog = f.read()
                f.close()
                return xml_catalog
        if not site:
            site = getConfig('LHCb')['LocalSite']
        lfns = self.getLFNs()
        depth = self.depth
        tmp_xml = tempfile.NamedTemporaryFile(suffix='.xml')
        cmd = 'getLHCbInputDataCatalog(%s,%d,"%s","%s")' \
              % (str(lfns), depth, site, tmp_xml.name)
        result = get_result(cmd, 'LFN->PFN error. XML catalog error.')
        xml_catalog = tmp_xml.read()
        tmp_xml.close()
        return xml_catalog

    def optionsString(self, file=None):
        'Returns the Gaudi-style options string for the dataset (if a filename' \
            ' is given, the file is created and output is written there).'
        if not self or len(self) == 0:
            return ''
        snew = ''
        if self.persistency == 'ROOT':
            snew = '\n#new method\nfrom GaudiConf import IOExtension\nIOExtension(\"%s\").inputFiles([' % self.persistency
        elif self.persistency == 'POOL':
            snew = '\ntry:\n    #new method\n    from GaudiConf import IOExtension\n    IOExtension(\"%s\").inputFiles([' % self.persistency
        elif self.persistency == None:
            snew = '\ntry:\n    #new method\n    from GaudiConf import IOExtension\n    IOExtension().inputFiles(['
        else:
            logger.warning(
                "Unknown LHCbDataset persistency technology... reverting to None"
            )
            snew = '\ntry:\n    #new method\n    from GaudiConf import IOExtension\n    IOExtension().inputFiles(['

        sold = '\nexcept ImportError:\n    #Use previous method\n    from Gaudi.Configuration import EventSelector\n    EventSelector().Input=['
        sdatasetsnew = ''
        sdatasetsold = ''

        dtype_str_default = getConfig('LHCb')['datatype_string_default']
        dtype_str_patterns = getConfig('LHCb')['datatype_string_patterns']
        for f in self.files:
            dtype_str = dtype_str_default
            for this_str in dtype_str_patterns:
                matched = False
                for pat in dtype_str_patterns[this_str]:
                    if fnmatch.fnmatch(f.namePattern, pat):
                        dtype_str = this_str
                        matched = True
                        break
                if matched:
                    break
            sdatasetsnew += '\n        '
            sdatasetsold += '\n        '
            if isDiracFile(f):
                sdatasetsnew += """ \"LFN:%s\",""" % f.lfn
                sdatasetsold += """ \"DATAFILE='LFN:%s' %s\",""" % (f.lfn,
                                                                    dtype_str)
            else:
                sdatasetsnew += """ \"%s\",""" % f.accessURL()[0]
                sdatasetsold += """ \"DATAFILE='%s' %s\",""" % (
                    f.accessURL()[0], dtype_str)
        if sdatasetsold.endswith(","):
            if self.persistency == 'ROOT':
                sdatasetsnew = sdatasetsnew[:-1] + """\n], clear=True)"""
            else:
                sdatasetsnew = sdatasetsnew[:-1] + """\n    ], clear=True)"""
            sdatasetsold = sdatasetsold[:-1]
            sdatasetsold += """\n    ]"""
        if (file):
            f = open(file, 'w')
            if self.persistency == 'ROOT':
                f.write(snew)
                f.write(sdatasetsnew)
            else:
                f.write(snew)
                f.write(sdatasetsnew)
                f.write(sold)
                f.write(sdatasetsold)
            f.close()
        else:
            if self.persistency == 'ROOT':
                return snew + sdatasetsnew
            else:
                return snew + sdatasetsnew + sold + sdatasetsold

    def _checkOtherFiles(self, other):
        if isType(other, GangaList) or isType(other, []):
            other_files = LHCbDataset(other).getFullFileNames()
        elif isType(other, LHCbDataset):
            other_files = other.getFullFileNames()
        else:
            raise GangaException("Unknown type for difference")
        return other_files

    def difference(self, other):
        '''Returns a new data set w/ files in this that are not in other.'''
        other_files = self._checkOtherFiles(other)
        files = set(self.getFullFileNames()).difference(other_files)
        data = LHCbDataset()
        data.extend(list(files))
        data.depth = self.depth
        return data

    def isSubset(self, other):
        '''Is every file in this data set in other?'''
        other_files = self._checkOtherFiles(other)
        return set(self.getFileNames()).issubset(other_files)

    def isSuperset(self, other):
        '''Is every file in other in this data set?'''
        other_files = self._checkOtherFiles(other)
        return set(self.getFileNames()).issuperset(other_files)

    def symmetricDifference(self, other):
        '''Returns a new data set w/ files in either this or other but not
        both.'''
        other_files = other._checkOtherFiles(other)
        files = set(self.getFullFileNames()).symmetric_difference(other_files)
        data = LHCbDataset()
        data.extend(list(files))
        data.depth = self.depth
        return data

    def intersection(self, other):
        '''Returns a new data set w/ files common to this and other.'''
        other_files = other._checkOtherFiles(other)
        files = set(self.getFullFileNames()).intersection(other_files)
        data = LHCbDataset()
        data.extend(list(files))
        data.depth = self.depth
        return data

    def union(self, other):
        '''Returns a new data set w/ files from this and other.'''
        other_files = self._checkOtherFiles(other)
        files = set(self.getFullFileNames()).union(other_files)
        data = LHCbDataset()
        data.extend(list(files))
        data.depth = self.depth
        return data

    def bkMetadata(self):
        'Returns the bookkeeping metadata for all LFNs. '
        logger.info(
            "Using BKQuery(bkpath).getDatasetMetadata() with bkpath=the bookkeeping path, will yeild more metadata such as 'TCK' info..."
        )
        cmd = 'bkMetaData(%s)' % self.getLFNs()
        b = get_result(cmd, 'Error removing replica. Replica rm error.')
        return b
示例#5
0
文件: Objects.py 项目: kreczko/ganga
    def __set__(self, _obj, _val):

        obj = stripProxy(_obj)
        val = stripProxy(_val)

        from Ganga.GPIDev.Lib.GangaList.GangaList import makeGangaList, GangaList

        cs = self._bind_method(obj, self._checkset_name)
        if cs:
            cs(val)
        this_filter = self._bind_method(obj, self._filter_name)
        if this_filter:
            val = this_filter(val)

        # LOCKING
        obj._getWriteAccess()

        # self._check_getter()

        item = stripProxy(obj._schema[self._name])

        def cloneVal(v):
            return self.__cloneVal(v, obj)

        obj_reg = obj._getRegistry()
        full_reg = obj_reg is not None and hasattr(obj_reg, 'dirty_flush_counter')

        ### THIS CODE HERE CHANGES THE DIRTY FLUSH COUNTER SUCH THAT GANGALIST OBJECTS ARE WRITTEN TO DISK ATOMICALLY
        ### THIS COMES WITH A MAJOR PERFORMANCE IMPROVEMENT IN SOME CODE THAT REALLY SHOULDN'T BE SLOW
        ### Maybe delete this if/when we aren't using XML repo in future as allowing a really dirty repo is probably bad m'kay

        ## NB Should really tie in the default here to defaults from Core
        old_count = 10
        if hasattr(val, '__len__') and full_reg:
            old_count = obj_reg.dirty_flush_counter
            val_len = 2*len(val) + 10
            obj_reg.dirty_flush_counter = val_len
            obj_len = 1
            if len(val) > 0:
                bare_val = stripProxy(val)
                if hasattr(bare_val, '_schema'):
                    obj_len = len(stripProxy(bare_val._schema).datadict.keys())
                    obj_len = obj_len*2
            val_len = val_len * obj_len

        if item['sequence']:
            _preparable = True if item['preparable'] else False
            if len(val) == 0:
                val = GangaList()
            else:
                if isType(item, Schema.ComponentItem):
                        val = makeGangaList(val, cloneVal, parent=obj, preparable=_preparable)
                else:
                        val = makeGangaList(val, parent=obj, preparable=_preparable)
        else:
            if isType(item, Schema.ComponentItem):
                val = cloneVal(val)

        if hasattr(val, '_setParent'):
            val._setParent(obj)
        
        ### RESET DIRTY COUNT
        if full_reg:
            obj_reg.dirty_flush_counter = old_count

        obj._data[self._name] = val

        obj._setDirty()
示例#6
0
    def __atomic_set__(self, _obj, _val):
        ## self: attribute being changed or Ganga.GPIDev.Base.Objects.Descriptor in which case getName(self) gives the name of the attribute being changed
        ## _obj: parent class which 'owns' the attribute
        ## _val: value of the attribute which we're about to set

        #if hasattr(_obj, getName(self)):
        #    if not isinstance(getattr(_obj, getName(self)), GangaObject):
        #        if type( getattr(_obj, getName(self)) ) == type(_val):
        #            object.__setattr__(_obj, getName(self), deepcopy(_val))
        #            return
#
#        if not isinstance(_obj, GangaObject) and type(_obj) == type(_val):
#            _obj = deepcopy(_val)
#            return

        obj = _obj
        temp_val = _val

        from Ganga.GPIDev.Lib.GangaList.GangaList import makeGangaList

        if hasattr(obj, '_checkset_name'):
            checkSet = self._bind_method(obj, self._checkset_name)
            if checkSet is not None:
                checkSet(temp_val)
        if hasattr(obj, '_filter_name'):
            this_filter = self._bind_method(obj, self._filter_name)
            if this_filter:
                val = this_filter(temp_val)
            else:
                val = temp_val
        else:
            val = temp_val

        # LOCKING
        obj._getWriteAccess()

        #self._check_getter()

        item = obj._schema[getName(self)]

        def cloneVal(v):
            GangaList = _getGangaList()
            if isinstance(v, (list, tuple, GangaList)):
                new_v = GangaList()
                for elem in v:
                    new_v.append(self.__cloneVal(elem, obj))
                return new_v
            else:
                return self.__cloneVal(v, obj)

        ## If the item has been defined as a sequence great, let's continue!
        if item['sequence']:
            _preparable = True if item['preparable'] else False
            if len(val) == 0:
                GangaList = _getGangaList()
                new_val = GangaList()
            else:
                if isinstance(item, Schema.ComponentItem):
                    new_val = makeGangaList(val, cloneVal, parent=obj, preparable=_preparable)
                else:
                    new_val = makeGangaList(val, parent=obj, preparable=_preparable)
        else:
            ## Else we need to work out what we've got.
            if isinstance(item, Schema.ComponentItem):
                GangaList = _getGangaList()
                if isinstance(val, (list, tuple, GangaList)):
                    ## Can't have a GangaList inside a GangaList easily so lets not
                    if isinstance(_obj, GangaList):
                        newListObj = []
                    else:
                        newListObj = GangaList()

                    self.__createNewList(newListObj, val, cloneVal)
                    #for elem in val:
                    #    newListObj.append(cloneVal(elem))
                    new_val = newListObj
                else:
                    new_val = cloneVal(val)
            else:
                new_val = val
                pass
            #val = deepcopy(val)

        if isinstance(new_val, Node):
            new_val._setParent(obj)

        obj.setNodeAttribute(getName(self), new_val)

        obj._setDirty()