Пример #1
0
    def __getitem__(self, x):
        """Retrieve the job object from the registry: registry[x].
         If 'x' is a job id (int) then a single job object is returned or IndexError.
         If 'x' is a name (string) then a unique same name is returned, otherwise [].
         If 'x' is a job object then it is returned if it belongs to the registry, otherwise None.
         If 'x' is not of any of the types above, raise TypeError.
          or by name. If retrieved by name then the job must be unique, otherwise the RegistryKeyError is raised.
         If the input is incorrect, RegistryAccessError is raised.
        """
        if isinstance(x, int):
            try:
                return addProxy(self.objects[x])
            except IndexError:
                raise RegistryIndexError('list index out of range')

        if isinstance(x, str):
            ids = []
            for i in self.objects.keys():
                j = self.objects[i]
                if j.name == x:
                    ids.append(j.id)
            if len(ids) > 1:
                raise RegistryKeyError('object "%s" not unique' % x)
            if len(ids) == 0:
                raise RegistryKeyError('object "%s" not found' % x)
            return addProxy(self.objects[ids[0]])

        raise RegistryAccessError('Expected int or string (job name).')
Пример #2
0
    def __getitem__(self, x):
        """Retrieve the job object from the registry: registry[x].
         If 'x' is a job id (int) then a single job object is returned or IndexError.
         If 'x' is a name (string) then a unique same name is returned, otherwise [].
         If 'x' is a job object then it is returned if it belongs to the registry, otherwise None.
         If 'x' is not of any of the types above, raise TypeError.
          or by name. If retrieved by name then the job must be unique, otherwise the RegistryKeyError is raised.
         If the input is incorrect, RegistryAccessError is raised.
        """
        if isinstance(x, int):
            try:
                return addProxy(self.objects[x])
            except IndexError:
                raise RegistryIndexError('list index out of range')

        if isinstance(x, str):
            ids = []
            for i in self.objects.keys():
                j = self.objects[i]
                if j.name == x:
                    ids.append(j.id)
            if len(ids) > 1:
                raise RegistryKeyError('object "%s" not unique' % x)
            if len(ids) == 0:
                raise RegistryKeyError('object "%s" not found' % x)
            return addProxy(self.objects[ids[0]])

        raise RegistryAccessError('Expected int or string (job name).')
Пример #3
0
    def createNewJob(self):
        """Create any jobs required for this unit"""
        j = GPI.Job()

        j.backend = self._getParent().backend.clone()

        # copy form ourselves or the parent transform depending on what's
        # specified
        fields = [
            'application', 'splitter', 'inputfiles', 'inputdata',
            'inputsandbox', 'outputfiles', 'postprocessors'
        ]

        for f in fields:

            if (f == "postprocessors"
                    and len(getattr(self, f).process_objects) > 0):
                j.postprocessors = copy.deepcopy(addProxy(self).postprocessors)
            elif (f != "postprocessors" and getattr(self, f)):
                setattr(j, f, copy.deepcopy(getattr(self, f)))
            elif (f == "postprocessors"
                  and len(getattr(self._getParent(), f).process_objects) > 0):
                j.postprocessors = copy.deepcopy(
                    addProxy(self._getParent()).postprocessors)
            elif (f != "postprocessors" and getattr(self._getParent(), f)):
                setattr(j, f, copy.deepcopy(getattr(self._getParent(), f)))

        return j
Пример #4
0
    def select(self, minid=None, maxid=None, **attrs):
        from Ganga.GPIDev.Lib.Job.Job import Job

        if isType(minid, Job):
            if minid.master is not None:
                minid = minid.master.id
            else:
                minid = minid.id
            if maxid is None:
                maxid = minid

        if isType(maxid, Job):
            if maxid.master is not None:
                maxid = maxid.master.id
            else:
                maxid = maxid.id

        logger = getLogger()

        this_repr = repr.Repr()
        from Ganga.GPIDev.Base.Proxy import addProxy
        attrs_str = ""
        ## Loop through all possible input combinations to constructa string representation of the attrs from possible inputs
        ## Required to flatten the additional arguments into a flat string in attrs_str
        for a in attrs:
            if isclass(attrs[a]):
                this_attr = addProxy(attrs[a]())
            else:
                from Ganga.GPIDev.Base.Objects import GangaObject
                if isType(attrs[a], GangaObject):
                    this_attr = addProxy(attrs[a])
                else:
                    if type(attrs[a]) is str:
                        from Ganga.GPIDev.Base.Proxy import getRuntimeGPIObject
                        this_attr = getRuntimeGPIObject(attrs[a], True)
                    else:
                        this_attr = attrs[a]
            full_str = str(this_attr)
            split_str = full_str.split('\n')
            for line in split_str:
                line = line.strip()
            flat_str = ''.join(split_str)
            attrs_str += ", %s=\"%s\"" % (a, flat_str)

        logger.debug("Attrs_Str: %s" % attrs_str)
        logger.debug(
            "Constructing slice: %s" % ("%s.select(minid='%s', maxid='%s'%s)" %
                                        (self.name, this_repr.repr(minid),
                                         this_repr.repr(maxid), attrs_str)))
        this_slice = self.__class__("%s.select(minid='%s', maxid='%s'%s)" %
                                    (self.name, this_repr.repr(minid),
                                     this_repr.repr(maxid), attrs_str))

        def append(id, obj):
            this_slice.objects[id] = obj

        self.do_select(append, minid, maxid, **attrs)
        return this_slice
Пример #5
0
 def setParameter(self, **args):
     """Use: setParameter(processName="HWW") to set the processName in all applications to "HWW"
        Warns if applications are not affected because they lack the parameter"""
     for name, parm in args.iteritems():
         for tf in [t for t in self.transforms if t.application]:
             if name in tf.application.getNodeData():
                 addProxy(tf.application).__setattr__(name, parm)
             else:
                 logger.warning("Transform %i was not affected!", tf.name)
Пример #6
0
 def setParameter(self, **args):
     """Use: setParameter(processName="HWW") to set the processName in all applications to "HWW"
        Warns if applications are not affected because they lack the parameter"""
     for name, parm in args.iteritems():
         for tf in [t for t in self.transforms if t.application]:
             if name in tf.application.getNodeData():
                 addProxy(tf.application).__setattr__(name, parm)
             else:
                 logger.warning("Transform %i was not affected!", tf.name)
Пример #7
0
    def select(self, minid=None, maxid=None, **attrs):
        import repr
        from Ganga.GPIDev.Lib.Job.Job import Job

        if isType(minid, Job):
            if minid.master:
                minid = minid.master.id
            else:
                minid = minid.id
            if maxid is None:
                maxid = minid

        if isType(maxid, Job):
            if maxid.master:
                maxid = maxid.master.id
            else:
                maxid = maxid.id

        logger = getLogger()

        this_repr = repr.Repr()
        from Ganga.GPIDev.Base.Proxy import addProxy
        attrs_str = ""
        ## Loop through all possible input combinations to constructa string representation of the attrs from possible inputs
        ## Required to flatten the additional arguments into a flat string in attrs_str
        for a in attrs:
            from inspect import isclass
            if isclass(attrs[a]):
                this_attr = addProxy(attrs[a]())
            else:
                from Ganga.GPIDev.Base.Objects import GangaObject
                if isType(attrs[a], GangaObject):
                    this_attr = addProxy(attrs[a])
                else:
                    if type(attrs[a]) is str:
                        from Ganga.GPIDev.Base.Proxy import getRuntimeGPIObject
                        this_attr = getRuntimeGPIObject(attrs[a], True)
                    else:
                        this_attr = attrs[a]
            full_str = str(this_attr)
            split_str = full_str.split('\n')
            for line in split_str:
                line = line.strip()
            flat_str = ''.join(split_str)
            attrs_str += ", %s=\"%s\"" % (a, flat_str)

        logger.debug("Attrs_Str: %s" % attrs_str)
        logger.debug("Constructing slice: %s" % ("%s.select(minid='%s', maxid='%s'%s)" % (self.name, this_repr.repr(minid), this_repr.repr(maxid), attrs_str)))
        this_slice = self.__class__("%s.select(minid='%s', maxid='%s'%s)" % (self.name, this_repr.repr(minid), this_repr.repr(maxid), attrs_str))

        def append(id, obj):
            this_slice.objects[id] = obj
        self.do_select(append, minid, maxid, **attrs)
        return this_slice
Пример #8
0
    def createNewJob(self):
        """Create any jobs required for this unit"""
        import copy
        j = makeRegisteredJob()
        j.backend = self._getParent().backend.clone()
        j.application = self._getParent().application.clone()
        if self.inputdata:
            j.inputdata = self.inputdata.clone()

        j.inputfiles = copy.deepcopy(self._getParent().inputfiles)

        trf = self._getParent()
        task = trf._getParent()
        j.inputsandbox = self._getParent().inputsandbox

        j.outputfiles = copy.deepcopy(self._getParent().outputfiles)
        if len(self._getParent().postprocessors.process_objects) > 0:
            j.postprocessors = copy.deepcopy(
                addProxy(self._getParent()).postprocessors)

        if trf.splitter:
            j.splitter = trf.splitter.clone()

            # change the first event for GaussSplitter
            from GangaLHCb.Lib.Splitters.GaussSplitter import GaussSplitter
            if isType(trf.splitter, GaussSplitter):
                events_per_unit = j.splitter.eventsPerJob * \
                    j.splitter.numberOfJobs
                j.splitter.firstEventNumber += self.getID() * events_per_unit

        else:
            j.splitter = SplitByFiles()

        return j
Пример #9
0
    def split(self, job):

        subjobs = []

        # sort out multiple arg splitting
        if (self.attribute != '' or len(self.values) > 0) and len(self.multi_attrs) > 0:
            raise ApplicationConfigurationError("Setting both 'attribute'/'values' and 'multi_attrs' is unsupported")

        if self.attribute != '':
            attrlist = [self.attribute]
            values = []
            for v in self.values:
                values.append([v])
        else:
            # check we have enough values in the dictionary
            numjobs = -1
            attrlist = []
            for attr in self.multi_attrs:
                if numjobs == -1:
                    numjobs = len(self.multi_attrs[attr])
                else:
                    if len(self.multi_attrs[attr]) != numjobs:
                        raise ApplicationConfigurationError(
                            "Number of values for '%s' doesn't equal others '%d'" % (attr, numjobs))

                attrlist.append(attr)

            # now get everything organised
            values = []
            for i in range(0, numjobs):
                valtmp = []
                for attr in attrlist:
                    valtmp.append(self.multi_attrs[attr][i])
                values.append(valtmp)

        # check we have enough values to cover the attributes
        for vallist in values:
            if len(attrlist) != len(vallist):
                raise ApplicationConfigurationError(
                    "Number of attributes to split over doesn't equal number of values in list '%s'" % vallist)

        # now perform the split
        for vallist in values:

            # for each list of values, set the attributes
            j = addProxy(self.createSubjob(job))

            for i in range(0, len(attrlist)):
                attrs = attrlist[i].split('.')
                obj = j
                for attr in attrs[:-1]:
                    obj = getattr(obj, attr)
                attr = attrs[-1]
                setattr(obj, attr, vallist[i])
                logger.debug('set %s = %s to subjob.' %
                             (attrlist[i], getattr(obj, attr)))

            subjobs.append(stripProxy(j))

        return subjobs
Пример #10
0
def browseBK(gui=True):
    """Return an LHCbDataset from the GUI LHCb Bookkeeping.

    Utility function to launch the new LHCb bookkeeping from inside Ganga.
    The function returns an LHCbDataset object. 

    After browsing and selecting the desired datafiles, click on the
    \"Save as ...\" button. The Browser will quit and save the seleted files
    as an LHCbDataset object

    Usage:
    # retrieve an LHCbDataset object with the selected files and store
    # them in the variable l
    l = browseBK()

    # retrieve an LHCbDataset object with the selected files and store
    # them in the jobs inputdata field, ready for submission
    j.inputdata=browseBK()    
    """
    import Ganga.Utility.logging
    from Ganga.GPIDev.Base.Proxy import addProxy
    logger = Ganga.Utility.logging.getLogger()
    try: 
        from GangaLHCb.Lib.DIRAC.Bookkeeping import Bookkeeping
        from Ganga.GPI import LHCbDataset
    except ImportError:
        logger.warning('Could not start Bookkeeping Browser')
        return None
    bkk = Bookkeeping()
    return  addProxy(bkk.browse(gui))
Пример #11
0
    def createNewJob(self):
        """Create any jobs required for this unit"""
        import copy
        j = GPI.Job()
        j.backend = self._getParent().backend.clone()
        j.application = self._getParent().application.clone()
        if self.inputdata:
            j.inputdata = self.inputdata.clone()

        j.inputfiles = copy.deepcopy(self._getParent().inputfiles)

        trf = self._getParent()
        task = trf._getParent()
        j.inputsandbox = self._getParent().inputsandbox

        j.outputfiles = copy.deepcopy(self._getParent().outputfiles)
        if len(self._getParent().postprocessors.process_objects) > 0:
            j.postprocessors = copy.deepcopy(
                addProxy(self._getParent()).postprocessors)

        if trf.splitter:
            j.splitter = trf.splitter.clone()

            # change the first event for GaussSplitter
            from GangaLHCb.Lib.Splitters.GaussSplitter import GaussSplitter
            if isType(trf.splitter, GaussSplitter):
                events_per_unit = j.splitter.eventsPerJob * \
                    j.splitter.numberOfJobs
                j.splitter.firstEventNumber = self.getID() * events_per_unit

        else:
            j.splitter = SplitByFiles()

        return j
Пример #12
0
    def getDataset(self):
        '''Gets the dataset from the bookkeeping for current path, etc.'''
        if not self.path:
            return None
        if not self.type in ['Path', 'RunsByDate', 'Run', 'Production']:
            raise GangaException('Type="%s" is not valid.' % self.type)
        if not self.type is 'RunsByDate':
            if self.startDate:
                msg = 'startDate not supported for type="%s".' % self.type
                raise GangaException(msg)
            if self.endDate:
                msg = 'endDate not supported for type="%s".' % self.type
                raise GangaException(msg)
            if self.selection:
                msg = 'selection not supported for type="%s".' % self.type
                raise GangaException(msg)
        cmd = "getDataset('%s','%s','%s','%s','%s','%s')" % (self.path, self.dqflag, self.type, self.startDate, self.endDate, self.selection)
        from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList
        knownLists = [tuple, list, GangaList]
        if isType(self.dqflag, knownLists):
            cmd = "getDataset('%s',%s,'%s','%s','%s','%s')" % (self.path, self.dqflag, self.type, self.startDate,
                                                               self.endDate, self.selection)
        result = get_result(cmd, 'BK query error.', 'BK query error.')

        logger.debug("Finished Running Command")

        files = []
        value = result['Value']
        if 'LFNs' in value:
            files = value['LFNs']
        if not type(files) is list:  # i.e. a dict of LFN:Metadata
            # if 'LFNs' in files: # i.e. a dict of LFN:Metadata
            files = files.keys()

        logger.debug("Creating DiracFile objects")

        ## Doesn't work not clear why
        from GangaDirac.Lib.Files.DiracFile import DiracFile
        #new_files = []
        #def _createDiracLFN(this_file):
        #    return DiracFile(lfn = this_file)
        #GangaObject.__createNewList(new_files, files, _createDiracLFN)

        logger.debug("Creating new list")
        new_files = [DiracFile(lfn=f) for f in files]

        #new_files = [DiracFile(lfn=_file) for _file in files]
        #for f in files:
        #    new_files.append(DiracFile(lfn=f))
            #ds.extend([DiracFile(lfn = f)])

        logger.info("Constructing LHCbDataset")

        from GangaLHCb.Lib.LHCbDataset import LHCbDataset
        logger.debug("Imported LHCbDataset")
        ds = LHCbDataset(files=new_files, fromRef=True)

        logger.debug("Returning Dataset")

        return addProxy(ds)
Пример #13
0
def browseBK(gui=True):
    """Return an LHCbDataset from the GUI LHCb Bookkeeping.

    Utility function to launch the new LHCb bookkeeping from inside Ganga.
    The function returns an LHCbDataset object. 

    After browsing and selecting the desired datafiles, click on the
    \"Save as ...\" button. The Browser will quit and save the seleted files
    as an LHCbDataset object

    Usage:
    # retrieve an LHCbDataset object with the selected files and store
    # them in the variable l
    l = browseBK()

    # retrieve an LHCbDataset object with the selected files and store
    # them in the jobs inputdata field, ready for submission
    j.inputdata=browseBK()    
    """
    import Ganga.Utility.logging
    from Ganga.GPIDev.Base.Proxy import addProxy

    logger = Ganga.Utility.logging.getLogger()
    try:
        from GangaLHCb.Lib.Backends.Bookkeeping import Bookkeeping
        from Ganga.GPI import LHCbDataset
    except ImportError:
        logger.warning("Could not start Bookkeeping Browser")
        return None
    bkk = Bookkeeping()
    return addProxy(bkk.browse(gui))
Пример #14
0
def _addToInterface(interface, name, _object):

    if isType(_object, GangaObject):
        setattr(interface, name, addProxy(_object))
    elif isclass(_object) and issubclass(_object, GangaObject):
        setattr(interface, name, getProxyClass(_object))
    else:
        setattr(interface, name, _object)
Пример #15
0
def _addToInterface(interface, name, _object):

    if isType(_object, GangaObject):
        setattr(interface, name, addProxy(_object))
    elif isclass(_object) and issubclass(_object, GangaObject):
        setattr(interface, name, getProxyClass(_object))
    else:
        setattr(interface, name, _object)
Пример #16
0
def _wrap(obj):
    if isType(obj, GangaObject):
        return addProxy(obj)
    if isType(obj, RegistrySlice):
        return obj._proxyClass(obj)
    if isType(obj, list):
        return map(addProxy, obj)
    return obj
Пример #17
0
    def setUp(self):

        # make a list of lists containing GangaObjects
        self.filelist = []
        for _ in range(10):
            self.filelist.append([self._makeRandomTFile() for _ in range(3)])

        # make an empty GangaList
        self.gangalist = addProxy(makeGangaList([]))
Пример #18
0
    def testContains(self):
        """Tests __contains__"""

        plist = [addProxy(x) for x in self.plain1]
        self.assertEqual(plist, self.proxied1)

        for p in plist:
            self.assertTrue(isProxy(p))
            self.assertIn(p, self.proxied1, 'Proxied list should contain each proxied object')
Пример #19
0
    def setUp(self):

        # make a list of lists containing GangaObjects
        self.filelist = []
        for _ in range(10):
            self.filelist.append([self._makeRandomTFile() for _ in range(3)])

        # make an empty GangaList
        self.gangalist = addProxy(makeGangaList([]))
Пример #20
0
    def testContains(self):
        """Tests __contains__"""

        plist = [addProxy(x) for x in self.plain1]
        assert plist == self.proxied1

        for p in plist:
            assert isProxy(p)
            assert p in self.proxied1, 'Proxied list should contain each proxied object'
Пример #21
0
    def testContains(self):
        """Tests __contains__"""

        plist = [addProxy(x) for x in self.plain1]
        self.assertEqual(plist, self.proxied1)

        for p in plist:
            self.assertTrue(isProxy(p))
            self.assertIn(p, self.proxied1, 'Proxied list should contain each proxied object')
Пример #22
0
    def testContains(self):
        """Tests __contains__"""

        plist = [addProxy(x) for x in self.plain1]
        assert plist == self.proxied1

        for p in plist:
            assert isProxy(p)
            assert p in self.proxied1, 'Proxied list should contain each proxied object'
Пример #23
0
    def setUp(self):
        super(TestNestedLists, self).setUp()
        # make a list of lists containing GangaObjects
        self.filelist = []
        self.gangalist = None
        for _ in range(10):
            self.filelist.append([self._makeRandomTFile() for _ in range(3)])

        # make an empty GangaList
        self.gangalist = addProxy(makeGangaList([]))
Пример #24
0
    def testRemove(self):

        t = addProxy(TFile(name='bar'))
        self.proxied1.insert(7, t)
        list_len = len(self.proxied1)

        self.proxied1.remove(t)

        assert len(self.proxied1) == list_len - 1
        assert t not in self.proxied1
        assert t._impl not in self.proxied1._impl
Пример #25
0
    def testRemove(self):

        t = addProxy(TFile(name='bar'))
        self.proxied1.insert(7, t)
        list_len = len(self.proxied1)

        self.proxied1.remove(t)

        assert len(self.proxied1) == list_len - 1
        assert t not in self.proxied1
        assert t._impl not in self.proxied1._impl
Пример #26
0
    def split(self, job):

        subjobs = []

        for arg in self.args:
            j = addProxy(self.createSubjob(job))
            # Add new arguments to subjob
            j.application.args = arg
            logger.debug('Arguments for split job is: ' + str(arg))
            subjobs.append(stripProxy(j))
        return subjobs
Пример #27
0
def getDiracFiles():
    import os
    from GangaDirac.Lib.Files.DiracFile import DiracFile
    from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList
    filename = DiracFile.diracLFNBase().replace('/', '-') + '.lfns'
    logger.info('Creating list, this can take a while if you have a large number of SE files, please wait...')
    execute('dirac-dms-user-lfns &> /dev/null', shell=True, timeout=None)
    g = GangaList()
    with open(filename[1:], 'r') as lfnlist:
        lfnlist.seek(0)
        g.extend((DiracFile(lfn='%s' % lfn.strip()) for lfn in lfnlist.readlines()))
    return addProxy(g)
Пример #28
0
    def testAppend(self):

        t = addProxy(TFile(name='foo'))

        self.plain1.append(t)
        assert self.plain1[-1] == t
        assert self.plain1.pop() == t

        self.proxied1.append(t)
        assert self.proxied1[-1] == t
        assert self.proxied1[-1] is t, 'Identity Test'
        assert isProxy(self.proxied1[-1]), 'Make sure we get back a proxy'
        assert self.proxied1.pop() == t
Пример #29
0
    def testAppend(self):

        t = addProxy(TFile(name='foo'))

        self.plain1.append(t)
        assert self.plain1[-1] == t
        assert self.plain1.pop() == t

        self.proxied1.append(t)
        assert self.proxied1[-1] == t
        assert self.proxied1[-1] is t, 'Identity Test'
        assert isProxy(self.proxied1[-1]), 'Make sure we get back a proxy'
        assert self.proxied1.pop() == t
Пример #30
0
 def __call__(self, this_id):
     """ Retrieve an object by id.
     """
     if isinstance(this_id, str):
         if this_id.isdigit():
             this_id = int(this_id)
         else:
             matches = [o for o in self.objects if fnmatch.fnmatch(o._getRegistry()._getName(o), this_id)]
             if len(matches) > 1:
                 logger.error('Multiple Matches: Wildcards are allowed for ease of matching, however')
                 logger.error('                  to keep a uniform response only one item may be matched.')
                 logger.error('                  If you wanted a slice, please use the select method')
                 raise RegistryKeyError("Multiple matches for id='%s':%s" % (this_id, str(map(lambda x: x._getRegistry()._getName(x), matches))))
             if len(matches) < 1:
                 return
             return addProxy(matches[0])
     try:
         return addProxy(self.objects[this_id])
     except KeyError as err:
         logger.debug('Object id=%d not found' % this_id)
         logger.debug("%s" % err)
         raise RegistryKeyError('Object id=%d not found' % this_id)
Пример #31
0
    def split(self, job):
        from Ganga.GPIDev.Lib.Job import Job

        subjobs = []
        for run in self.Files.keys():
            j = addProxy(self.createSubjob(job))
            #      j.splitter = None
            #      j.merger = None
            jp = stripProxy(j)
            jp._splitter_data = self.optionsString(run)
            subjobs.append(jp)

        print "Submitting jobs for %d runs" % (len(subjobs))
        return subjobs
Пример #32
0
    def split(self, job):

        subjobs = []

        for arg in self.args:
            j = addProxy(self.createSubjob(job,['application']))
            # Add new arguments to subjob
            app = copy.deepcopy(job.application)
            app.args = arg
            j.application = app
            logger.debug('Arguments for split job is: ' + str(arg))
            subjobs.append(stripProxy(j))

        return subjobs
Пример #33
0
def getDiracFiles():
    import os
    from GangaDirac.Lib.Files.DiracFile import DiracFile
    from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList
    filename = DiracFile.diracLFNBase().replace('/', '-') + '.lfns'
    logger.info(
        'Creating list, this can take a while if you have a large number of SE files, please wait...'
    )
    execute('dirac-dms-user-lfns &> /dev/null', shell=True, timeout=None)
    g = GangaList()
    with open(filename[1:], 'r') as lfnlist:
        lfnlist.seek(0)
        g.extend(
            (DiracFile(lfn='%s' % lfn.strip()) for lfn in lfnlist.readlines()))
    return addProxy(g)
Пример #34
0
 def __call__(self, this_id):
     """ Retrieve an object by id.
     """
     if isinstance(this_id, str):
         if this_id.isdigit():
             this_id = int(this_id)
         else:
             matches = [
                 o for o in self.objects
                 if fnmatch.fnmatch(o._getRegistry()._getName(o), this_id)
             ]
             if len(matches) > 1:
                 logger.error(
                     'Multiple Matches: Wildcards are allowed for ease of matching, however'
                 )
                 logger.error(
                     '                  to keep a uniform response only one item may be matched.'
                 )
                 logger.error(
                     '                  If you wanted a slice, please use the select method'
                 )
                 raise RegistryKeyError(
                     "Multiple matches for id='%s':%s" %
                     (this_id,
                      str(
                          map(lambda x: x._getRegistry()._getName(x),
                              matches))))
             if len(matches) < 1:
                 return
             return addProxy(matches[0])
     try:
         return addProxy(self.objects[this_id])
     except KeyError as err:
         logger.debug('Object id=%d not found' % this_id)
         logger.deubg("%s" % str(err))
         raise RegistryKeyError('Object id=%d not found' % this_id)
Пример #35
0
    def createNewJob(self):
        """Create any jobs required for this unit"""
        j = GPI.Job()

        j.backend = self._getParent().backend.clone()

        # copy form ourselves or the parent transform depending on what's
        # specified
        fields = ['application', 'splitter', 'inputfiles',
                  'inputdata', 'inputsandbox', 'outputfiles', 'postprocessors']

        for f in fields:

            if (f == "postprocessors" and len(getattr(self, f).process_objects) > 0):
                j.postprocessors = copy.deepcopy(addProxy(self).postprocessors)
            elif (f != "postprocessors" and getattr(self, f)):
                setattr(j, f, copy.deepcopy(getattr(self, f)))
            elif (f == "postprocessors" and len(getattr(self._getParent(), f).process_objects) > 0):
                j.postprocessors = copy.deepcopy(
                    addProxy(self._getParent()).postprocessors)
            elif (f != "postprocessors" and getattr(self._getParent(), f)):
                setattr(j, f, copy.deepcopy(getattr(self._getParent(), f)))

        return j
Пример #36
0
    def split(self, job):
        import os

        subjobs = []

        subsets = splitCSVFile(job.application.csvfile, self.nbevents)

        # Less files than number of jobs wanted => easy
        logger.info('Creating %d subjobs ...', len(allLines))

        # Base for the naming of each subjob's CSV file
        tmpname = os.path.basename(incsvfile)
        if len(tmpname.split('.')) > 1:
            patterncsv = '.'.join(
                tmpname.split('.')[0:-1]) + "_sub%d." + tmpname.split('.')[-1]
        else:
            patterncsv = tmpname + "_sub%d"

        # Base for the naming of each subjob's output file
        tmpname = os.path.basename(job.application.outputfile)
        if len(tmpname.split('.')) > 1:
            patternout = '.'.join(
                tmpname.split('.')[0:-1]) + "_sub%d." + tmpname.split('.')[-1]
        else:
            patternout = tmpname + "_sub%d"

        for s, sub in enumerate(subsets):
            j = addProxy(self.createSubjob(job))

            j.inputdata = job.inputdata

            subLines = '\n'.join(sub)

            from Ganga.GPIDev.Lib.File import FileBuffer
            thiscsv = patterncsv % s
            # Save in the main job's inputdir now, then the file will be moved to
            # the inputdir of each subjobs.
            job.getInputWorkspace().writefile(FileBuffer(thiscsv, subLines),
                                              executable=0)
            j.application.csvfile = os.path.join(job.inputdir, thiscsv)
            j.application.outputfile = patternout % s

            # Prepare the output filenames which must be unique

            subjobs.append(stripProxy(j))

        return subjobs
Пример #37
0
    def split(self, job):

        subjobs = []

        filenames = job.inputdata.get_dataset_filenames()

        subsets = []
        # Less files than number of jobs wanted => easy
        logger.info('Creating %d subjobs ...', len(filenames))
        for nb in range(len(filenames)):
            j = addProxy(self.createSubjob(job))

            j.inputdata.set_dataset_filenames([filenames[nb]])

            subjobs.append(stripProxy(j))

        return subjobs
Пример #38
0
def loadObject(filename):
    '''
    These are complimentary functions to export/load which are already exported to
    the GPI from Ganga.GPIDev.Persistency. The difference being that these functions will
    export the objects using the pickle persistency format rather than a Ganga streaming
    (human readable) format.
    '''
    import os
    import pickle
    import traceback
    try:
        with open(os.path.expandvars(os.path.expanduser(filename)), 'rb') as f:
            r = pickle.load(f)
    except:
        logger.error("Problem when loading file '%s': %s" % (filename, traceback.format_exc()))
    else:
        return addProxy(r)
Пример #39
0
    def split(self,job):
        
        subjobs = []

        filenames = job.inputdata.get_dataset_filenames()
      
        subsets = []
        # Less files than number of jobs wanted => easy
        logger.info('Creating %d subjobs ...',len(filenames))
        for nb in range(len(filenames)):
            j = addProxy(self.createSubjob(job))

            j.inputdata.set_dataset_filenames([filenames[nb]])

            subjobs.append(stripProxy(j))

        return subjobs
Пример #40
0
    def split(self, job):

        subjobs = []

        filenames = job.inputdata.get_dataset_filenames()

        logger.info('Creating %d subjobs ...', self.nbjobs)

        if self.nbjobs < 1:
            raise Exception('Number of nbjobs not set properly.')

        subsets = []
        # Less files than number of jobs wanted => easy
        if len(filenames) < self.nbjobs:
            for f in filenames:
                subsets.append([f])
        else:
            isPerfectSplit = (len(filenames) % self.nbjobs) == 0
            if isPerfectSplit:
                # If the number of input files is divisible by nbjobs
                # then all subjobs have the same number of input files
                nbfulljobs = self.nbjobs
            else:
                # Otherwise all subjobs have the same number of input files
                # except the last subjob which has less
                nbfulljobs = self.nbjobs - 1

            persub = len(filenames) / nbfulljobs
            for nb in range(nbfulljobs):
                Low = nb * persub
                High = (nb + 1) * persub
                subsets.append(filenames[Low:High])

            if not isPerfectSplit:
                subsets.append(filenames[High:])

        for sub in subsets:

            j = addProxy(self.createSubjob(job))

            j.inputdata.set_dataset_filenames(sub)

            subjobs.append(stripProxy(j))

        return subjobs
Пример #41
0
    def split(self,job):
        
        subjobs = []

        filenames = job.inputdata.get_dataset_filenames()
      
        logger.info('Creating %d subjobs ...',self.nbjobs)

        if self.nbjobs < 1:
          raise Exception('Number of nbjobs not set properly.')

        subsets = []
        # Less files than number of jobs wanted => easy
        if len(filenames) < self.nbjobs:
          for f in filenames:
            subsets.append([f])
        else:
          isPerfectSplit = (len(filenames) % self.nbjobs) == 0
          if isPerfectSplit:
            # If the number of input files is divisible by nbjobs
            # then all subjobs have the same number of input files
            nbfulljobs = self.nbjobs
          else:
            # Otherwise all subjobs have the same number of input files
            # except the last subjob which has less
            nbfulljobs = self.nbjobs - 1

          persub = len(filenames) / nbfulljobs
          for nb in range(nbfulljobs):
            Low = nb*persub
            High = (nb+1)*persub
            subsets.append(filenames[Low:High])

          if not isPerfectSplit:
            subsets.append(filenames[High:])

        for sub in subsets:

            j = addProxy(self.createSubjob(job))

            j.inputdata.set_dataset_filenames(sub)

            subjobs.append(stripProxy(j))

        return subjobs
Пример #42
0
    def split(self,job):
        import os
        
        subjobs = []

        subsets = splitCSVFile(job.application.csvfile, self.nbevents)

        # Less files than number of jobs wanted => easy
        logger.info('Creating %d subjobs ...',len(allLines))

        # Base for the naming of each subjob's CSV file
        tmpname = os.path.basename(incsvfile)
        if len(tmpname.split('.')) > 1:
          patterncsv = '.'.join(tmpname.split('.')[0:-1])+"_sub%d."+ tmpname.split('.')[-1]
        else:
          patterncsv = tmpname+"_sub%d"

        # Base for the naming of each subjob's output file
        tmpname = os.path.basename(job.application.outputfile)
        if len(tmpname.split('.')) > 1:
          patternout = '.'.join(tmpname.split('.')[0:-1])+"_sub%d."+ tmpname.split('.')[-1]
        else:
          patternout = tmpname+"_sub%d"

        for s,sub in enumerate(subsets):
            j = addProxy(self.createSubjob(job))

            j.inputdata = job.inputdata

            subLines = '\n'.join(sub)

            from Ganga.GPIDev.Lib.File import FileBuffer
            thiscsv = patterncsv % s
            # Save in the main job's inputdir now, then the file will be moved to
            # the inputdir of each subjobs.
            job.getInputWorkspace().writefile(FileBuffer(thiscsv,subLines),executable=0)
            j.application.csvfile = os.path.join(job.inputdir,thiscsv)
            j.application.outputfile = patternout % s

            # Prepare the output filenames which must be unique

            subjobs.append(stripProxy(j))


        return subjobs
Пример #43
0
def loadObject(filename):
    '''
    These are complimentary functions to export/load which are already exported to
    the GPI from Ganga.GPIDev.Persistency. The difference being that these functions will
    export the objects using the pickle persistency format rather than a Ganga streaming
    (human readable) format.
    '''
    import os
    import pickle
    import traceback
    try:
        with open(os.path.expandvars(os.path.expanduser(filename)), 'rb') as f:
            r = pickle.load(f)
    except:
        logger.error("Problem when loading file '%s': %s" %
                     (filename, traceback.format_exc()))
    else:
        return addProxy(r)
Пример #44
0
def exportToGPI(name, _object, doc_section, docstring=None):
    '''
    Make object available publicly as "name" in Ganga.GPI module. Add automatic documentation to gangadoc system.
    "doc_section" specifies how the object should be documented.
    If docstring is specified then use it to document the object (only use for "Objects" section). Otherwise use __doc__ (via pydoc utilities).
    FIXME: if you try to export the object instance, you should import it with fully qualified path, e.g.
     import X.Y.Z
     X.Y.Z.object = object
     exportToGPI("obj",X.Y.Z.object,"Objects")

    It has been observed that doing exportToGPI("obj",object,"Objects") may not work. To be understood.
    '''

    if isType(_object, GangaObject):
        setattr(Ganga.GPI, name, addProxy(_object))
    else:
        setattr(Ganga.GPI, name, _object)

    adddoc(name, getattr(Ganga.GPI, name), doc_section, docstring)
Пример #45
0
def exportToGPI(name, _object, doc_section, docstring=None):
    '''
    Make object available publicly as "name" in Ganga.GPI module. Add automatic documentation to gangadoc system.
    "doc_section" specifies how the object should be documented.
    If docstring is specified then use it to document the object (only use for "Objects" section). Otherwise use __doc__ (via pydoc utilities).
    FIXME: if you try to export the object instance, you should import it with fully qualified path, e.g.
     import X.Y.Z
     X.Y.Z.object = object
     exportToGPI("obj",X.Y.Z.object,"Objects")

    It has been observed that doing exportToGPI("obj",object,"Objects") may not work. To be understood.
    '''

    if isType(_object, GangaObject):
        setattr(Ganga.GPI, name, addProxy(_object))
    else:
        setattr(Ganga.GPI, name, _object)

    adddoc(name, getattr(Ganga.GPI, name), doc_section, docstring)
Пример #46
0
    def setUp(self):
        super(TestGangaList, self).setUp()

        self.plain1 = [self._makeRandomTFile() for _ in range(15)]
        self.plain2 = [self._makeRandomTFile() for _ in range(10)]

        self.proxied1 = GangaList()
        self.proxied1.extend(self.plain1[:])
        self.proxied2 = GangaList()
        self.proxied2.extend(self.plain2[:])

        t = TFile()
        real_t = stripProxy(t)
        new_proxy_t = addProxy(real_t)
        #hopefully_t = stripProxy(new_proxy_t)
        #assert real_t is hopefully_t
        assert t is new_proxy_t

        self.assertEqual(len(getProxyAttr(self.proxied1, '_list')), len(self.plain1), "Something's wrong with construction")
        self.assertEqual(len(getProxyAttr(self.proxied2, '_list')), len(self.plain2), "Something's wrong with construction")
Пример #47
0
    def setUp(self):
        super(TestGangaList, self).setUp()

        self.plain1 = [self._makeRandomTFile() for _ in range(15)]
        self.plain2 = [self._makeRandomTFile() for _ in range(10)]

        self.proxied1 = GangaList()
        self.proxied1.extend(self.plain1[:])
        self.proxied2 = GangaList()
        self.proxied2.extend(self.plain2[:])

        t = TFile()
        real_t = stripProxy(t)
        new_proxy_t = addProxy(real_t)
        #hopefully_t = stripProxy(new_proxy_t)
        #assert real_t is hopefully_t
        assert t is new_proxy_t

        self.assertEqual(len(getProxyAttr(self.proxied1, '_list')), len(self.plain1), "Something's wrong with construction")
        self.assertEqual(len(getProxyAttr(self.proxied2, '_list')), len(self.plain2), "Something's wrong with construction")
Пример #48
0
    def getDataset(self):
        '''Gets the dataset from the bookkeeping for current dict.'''
        if not self.dict:
            return None
        cmd = 'bkQueryDict(%s)' % self.dict
        result = get_result(cmd, 'BK query error.', 'BK query error.')
        files = []
        value = result['Value']
        if 'LFNs' in value:
            files = value['LFNs']
        if not type(files) is list:
            if 'LFNs' in files:  # i.e. a dict of LFN:Metadata
                files = files['LFNs'].keys()

        from GangaDirac.Lib.Files.DiracFile import DiracFile
        this_list = [DiracFile(lfn=f) for f in files]

        from GangaLHCb.Lib.LHCbDataset import LHCbDataset
        ds = LHCbDataset(files=this_list, fromRef=True)

        return addProxy(ds)
Пример #49
0
    def getDataset(self):
        '''Gets the dataset from the bookkeeping for current dict.'''
        if not self.dict:
            return None
        cmd = 'bkQueryDict(%s)' % self.dict
        result = get_result(cmd, 'BK query error.', 'BK query error.')
        files = []
        value = result['Value']
        if 'LFNs' in value:
            files = value['LFNs']
        if not type(files) is list:
            if 'LFNs' in files:  # i.e. a dict of LFN:Metadata
                files = files['LFNs'].keys()

        from GangaDirac.Lib.Files.DiracFile import DiracFile
        this_list = [DiracFile(lfn=f) for f in files]

        from GangaLHCb.Lib.LHCbDataset import LHCbDataset
        ds = LHCbDataset(files=this_list, fromRef=True)

        return addProxy(ds)
Пример #50
0
    def split(self, job):

        subjobs = []

        filenames = job.inputdata.get_dataset_filenames()

        if self.nbfiles < 1:
            raise Exception('Number of nbfiles not set properly.')

        subsets = splitNbInputFile(filenames, self.nbfiles)

        logger.info('Creating %d subjobs ...', len(subjobs))

        for sub in subsets:

            j = addProxy(self.createSubjob(job))

            j.inputdata.set_dataset_filenames(sub)

            subjobs.append(stripProxy(j))

        return subjobs
Пример #51
0
    def split(self,job):
        
        subjobs = []

        filenames = job.inputdata.get_dataset_filenames()
      
        if self.nbfiles < 1:
          raise Exception('Number of nbfiles not set properly.')

        subsets = splitNbInputFile(filenames, self.nbfiles)

        logger.info('Creating %d subjobs ...',len(subjobs))

        for sub in subsets:

            j = addProxy(self.createSubjob(job))

            j.inputdata.set_dataset_filenames(sub)

            subjobs.append(stripProxy(j))

        return subjobs
Пример #52
0
    def getDataset(self):
        """Gets the dataset from the bookkeeping for current dict."""
        if not self.dict:
            return None
        cmd = "bkQueryDict(%s)" % self.dict
        result = get_result(cmd, "BK query error.", "BK query error.")
        files = []
        value = result["Value"]
        if "LFNs" in value:
            files = value["LFNs"]
        if not type(files) is list:
            if "LFNs" in files:  # i.e. a dict of LFN:Metadata
                files = files["LFNs"].keys()

        from GangaDirac.Lib.Files.DiracFile import DiracFile

        this_list = [DiracFile(lfn=_file) for _file in files]

        from GangaLHCb.Lib.LHCbDataset import LHCbDataset

        ds = LHCbDataset(this_list)

        return addProxy(ds)
Пример #53
0
 def next(self):
     return addProxy(next(self.it))
Пример #54
0
 def _export_pop(self, index=-1):
     self.checkReadOnly()
     return addProxy(self.pop(index))
Пример #55
0
class TestSharedFileGetPut(TestMassStorageGetPut):
    """Testing the get/put/copyTo methods of SharedFile"""
    fileClass = addProxy(SharedFile)
Пример #56
0
class TestMassStorageGetPut(GangaUnitTest):
    """Testing the get/put/copyTo methods of MassStorage"""

    _temp_files = []
    _managed_files = []

    # Num of sj in tests
    sj_len = 3

    fileClass = addProxy(MassStorageFile)

    # Where on local storage we want to have our 'MassStorage solution'
    outputFilePath = '/tmp/Test' + _getName(fileClass) + 'GetPut'

    # This sets up a MassStorageConfiguration which works by placing a file on local storage somewhere we can test using standard tools
    MassStorageTestConfig = {
        'defaultProtocol': 'file://',
        'fileExtensions': [''],
        'uploadOptions': {
            'path': outputFilePath,
            'cp_cmd': 'cp',
            'ls_cmd': 'ls',
            'mkdir_cmd': 'mkdir -p'
        },
        'backendPostprocess': {
            'LSF': 'client',
            'LCG': 'client',
            'ARC': 'client',
            'Dirac': 'client',
            'PBS': 'client',
            'Interactive': 'client',
            'Local': 'client',
            'CREAM': 'client'
        }
    }

    def setUp(self):
        """
        Configure the MassStorageFile for the test
        """
        extra_opts = [('PollThread', 'autostart', 'False'),
                      ('Local', 'remove_workdir', 'False'),
                      ('TestingFramework', 'AutoCleanup', 'False'),
                      ('Output', _getName(self.fileClass),
                       self.MassStorageTestConfig),
                      ('Output', 'FailJobIfNoOutputMatched', 'True')]
        super(TestMassStorageGetPut, self).setUp(extra_opts=extra_opts)

    @staticmethod
    def cleanUp():
        """ Cleanup the current temp jobs """

        from Ganga.GPI import jobs
        for j in jobs:
            shutil.rmtree(j.backend.workdir, ignore_errors=True)
            j.remove()

    @classmethod
    def setUpClass(cls):
        """ This creates a safe place to put the files into 'mass-storage' """
        cls.outputFilePath = tempfile.mkdtemp()
        cls.MassStorageTestConfig['uploadOptions']['path'] = cls.outputFilePath

    @classmethod
    def tearDownClass(cls):
        """ Cleanup the current temp objects """

        for file_ in cls._temp_files:
            os.unlink(file_)
        cls._temp_files = []

        for file_ in cls._managed_files:
            os.unlink(os.path.join(cls.outputFilePath, file_.namePattern))
        cls._managed_files = []

        shutil.rmtree(cls.outputFilePath, ignore_errors=True)

    def test_a_test_put(self):
        """Test that a job can be submitted with inputfiles in the input"""

        MassStorageFile = self.fileClass

        _ext = '.root'
        file_1 = generate_unique_temp_file(_ext)
        file_2 = generate_unique_temp_file(_ext)
        self._temp_files.append(file_1)
        self._temp_files.append(file_2)
        msf_1 = MassStorageFile(file_1)
        msf_2 = MassStorageFile(file_2)
        self._managed_files.append(msf_1)
        self._managed_files.append(msf_2)
        msf_1.put()
        msf_2.put()

        for file_ in [msf for msf in (msf_1, msf_2)]:
            assert os.path.isfile(
                os.path.join(self.outputFilePath, file_.namePattern))
            file_.localDir = ''
            assert file_.localDir == ''

    def test_b_test_get(self):
        """Test that the files were made accessible to the WN area and collected as LocalFile objects in outputfiles"""

        from Ganga.GPI import Job

        tmpdir = tempfile.mkdtemp()

        # Test in the case that the files don't have a parent or a localDir
        for file_ in self._managed_files:
            file_.localDir = ''
            try:
                assert file_.localDir == ''
                file_.get()
                print("Unexpected localDir: %s" % file_.localDir)
                failed = False
            except GangaException:
                failed = True
            assert failed

        # Test in the case that the localDir has been set
        for file_ in self._managed_files:
            file_.localDir = tmpdir
            print("localDir: %s" % file_.localDir)
            file_.get()
            assert os.path.isfile(os.path.join(tmpdir, file_.namePattern))
            file_.localDir = ''
            assert file_.localDir == ''

        # Test in the case that the object is 'owned' by a Job

        j = Job()
        outputdir = stripProxy(j).getOutputWorkspace(create=True).getPath()
        j.outputfiles = self._managed_files
        for file_ in j.outputfiles:
            assert stripProxy(file_).getJobObject() is stripProxy(j)
            assert file_.localDir == ''
            file_.get()
            assert os.path.isfile(os.path.join(outputdir, file_.namePattern))

        shutil.rmtree(tmpdir, ignore_errors=True)

        self.cleanUp()

    def test_c_test_copyTo(self):
        """ Test the new copyTo interface"""

        tmpdir = tempfile.mkdtemp()

        for file_ in self._managed_files:
            file_.localDir = ''
            stripProxy(file_).copyTo(tmpdir)
            assert os.path.isfile(os.path.join(tmpdir, file_.namePattern))

        shutil.rmtree(tmpdir, ignore_errors=True)
Пример #57
0
    def testDelItem(self):
        """Test __delitem__"""

        for p in [addProxy(x) for x in self.plain1[:]]:
            assert isProxy(p)
            del self.proxied1[self.proxied1.index(p)]
Пример #58
0
from __future__ import absolute_import

try:
    import unittest2 as unittest
except ImportError:
    import unittest
import random
import string

from Ganga.GPIDev.Base.Proxy import addProxy, getProxyAttr, isProxy, isType, stripProxy

from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList

GangaList = addProxy(GangaList)
from .TFile import TFile

TFile = addProxy(TFile)

# set the seed for repeatable tests
random.seed(666)

from Ganga.Utility.logging import getLogger

logger = getLogger(modulename=True)


class TestGangaList(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestGangaList, self).__init__(*args, **kwargs)

        self.plain1 = []
Пример #59
0
    def testInsert(self):

        t = addProxy(TFile(name='foo'))
        self.proxied1.insert(8, t)
        assert self.proxied1[8] == t