def __getitem__(self, x): """Retrieve the job object from the registry: registry[x]. If 'x' is a job id (int) then a single job object is returned or IndexError. If 'x' is a name (string) then a unique same name is returned, otherwise []. If 'x' is a job object then it is returned if it belongs to the registry, otherwise None. If 'x' is not of any of the types above, raise TypeError. or by name. If retrieved by name then the job must be unique, otherwise the RegistryKeyError is raised. If the input is incorrect, RegistryAccessError is raised. """ if isinstance(x, int): try: return addProxy(self.objects[x]) except IndexError: raise RegistryIndexError('list index out of range') if isinstance(x, str): ids = [] for i in self.objects.keys(): j = self.objects[i] if j.name == x: ids.append(j.id) if len(ids) > 1: raise RegistryKeyError('object "%s" not unique' % x) if len(ids) == 0: raise RegistryKeyError('object "%s" not found' % x) return addProxy(self.objects[ids[0]]) raise RegistryAccessError('Expected int or string (job name).')
def createNewJob(self): """Create any jobs required for this unit""" j = GPI.Job() j.backend = self._getParent().backend.clone() # copy form ourselves or the parent transform depending on what's # specified fields = [ 'application', 'splitter', 'inputfiles', 'inputdata', 'inputsandbox', 'outputfiles', 'postprocessors' ] for f in fields: if (f == "postprocessors" and len(getattr(self, f).process_objects) > 0): j.postprocessors = copy.deepcopy(addProxy(self).postprocessors) elif (f != "postprocessors" and getattr(self, f)): setattr(j, f, copy.deepcopy(getattr(self, f))) elif (f == "postprocessors" and len(getattr(self._getParent(), f).process_objects) > 0): j.postprocessors = copy.deepcopy( addProxy(self._getParent()).postprocessors) elif (f != "postprocessors" and getattr(self._getParent(), f)): setattr(j, f, copy.deepcopy(getattr(self._getParent(), f))) return j
def select(self, minid=None, maxid=None, **attrs): from Ganga.GPIDev.Lib.Job.Job import Job if isType(minid, Job): if minid.master is not None: minid = minid.master.id else: minid = minid.id if maxid is None: maxid = minid if isType(maxid, Job): if maxid.master is not None: maxid = maxid.master.id else: maxid = maxid.id logger = getLogger() this_repr = repr.Repr() from Ganga.GPIDev.Base.Proxy import addProxy attrs_str = "" ## Loop through all possible input combinations to constructa string representation of the attrs from possible inputs ## Required to flatten the additional arguments into a flat string in attrs_str for a in attrs: if isclass(attrs[a]): this_attr = addProxy(attrs[a]()) else: from Ganga.GPIDev.Base.Objects import GangaObject if isType(attrs[a], GangaObject): this_attr = addProxy(attrs[a]) else: if type(attrs[a]) is str: from Ganga.GPIDev.Base.Proxy import getRuntimeGPIObject this_attr = getRuntimeGPIObject(attrs[a], True) else: this_attr = attrs[a] full_str = str(this_attr) split_str = full_str.split('\n') for line in split_str: line = line.strip() flat_str = ''.join(split_str) attrs_str += ", %s=\"%s\"" % (a, flat_str) logger.debug("Attrs_Str: %s" % attrs_str) logger.debug( "Constructing slice: %s" % ("%s.select(minid='%s', maxid='%s'%s)" % (self.name, this_repr.repr(minid), this_repr.repr(maxid), attrs_str))) this_slice = self.__class__("%s.select(minid='%s', maxid='%s'%s)" % (self.name, this_repr.repr(minid), this_repr.repr(maxid), attrs_str)) def append(id, obj): this_slice.objects[id] = obj self.do_select(append, minid, maxid, **attrs) return this_slice
def setParameter(self, **args): """Use: setParameter(processName="HWW") to set the processName in all applications to "HWW" Warns if applications are not affected because they lack the parameter""" for name, parm in args.iteritems(): for tf in [t for t in self.transforms if t.application]: if name in tf.application.getNodeData(): addProxy(tf.application).__setattr__(name, parm) else: logger.warning("Transform %i was not affected!", tf.name)
def select(self, minid=None, maxid=None, **attrs): import repr from Ganga.GPIDev.Lib.Job.Job import Job if isType(minid, Job): if minid.master: minid = minid.master.id else: minid = minid.id if maxid is None: maxid = minid if isType(maxid, Job): if maxid.master: maxid = maxid.master.id else: maxid = maxid.id logger = getLogger() this_repr = repr.Repr() from Ganga.GPIDev.Base.Proxy import addProxy attrs_str = "" ## Loop through all possible input combinations to constructa string representation of the attrs from possible inputs ## Required to flatten the additional arguments into a flat string in attrs_str for a in attrs: from inspect import isclass if isclass(attrs[a]): this_attr = addProxy(attrs[a]()) else: from Ganga.GPIDev.Base.Objects import GangaObject if isType(attrs[a], GangaObject): this_attr = addProxy(attrs[a]) else: if type(attrs[a]) is str: from Ganga.GPIDev.Base.Proxy import getRuntimeGPIObject this_attr = getRuntimeGPIObject(attrs[a], True) else: this_attr = attrs[a] full_str = str(this_attr) split_str = full_str.split('\n') for line in split_str: line = line.strip() flat_str = ''.join(split_str) attrs_str += ", %s=\"%s\"" % (a, flat_str) logger.debug("Attrs_Str: %s" % attrs_str) logger.debug("Constructing slice: %s" % ("%s.select(minid='%s', maxid='%s'%s)" % (self.name, this_repr.repr(minid), this_repr.repr(maxid), attrs_str))) this_slice = self.__class__("%s.select(minid='%s', maxid='%s'%s)" % (self.name, this_repr.repr(minid), this_repr.repr(maxid), attrs_str)) def append(id, obj): this_slice.objects[id] = obj self.do_select(append, minid, maxid, **attrs) return this_slice
def createNewJob(self): """Create any jobs required for this unit""" import copy j = makeRegisteredJob() j.backend = self._getParent().backend.clone() j.application = self._getParent().application.clone() if self.inputdata: j.inputdata = self.inputdata.clone() j.inputfiles = copy.deepcopy(self._getParent().inputfiles) trf = self._getParent() task = trf._getParent() j.inputsandbox = self._getParent().inputsandbox j.outputfiles = copy.deepcopy(self._getParent().outputfiles) if len(self._getParent().postprocessors.process_objects) > 0: j.postprocessors = copy.deepcopy( addProxy(self._getParent()).postprocessors) if trf.splitter: j.splitter = trf.splitter.clone() # change the first event for GaussSplitter from GangaLHCb.Lib.Splitters.GaussSplitter import GaussSplitter if isType(trf.splitter, GaussSplitter): events_per_unit = j.splitter.eventsPerJob * \ j.splitter.numberOfJobs j.splitter.firstEventNumber += self.getID() * events_per_unit else: j.splitter = SplitByFiles() return j
def split(self, job): subjobs = [] # sort out multiple arg splitting if (self.attribute != '' or len(self.values) > 0) and len(self.multi_attrs) > 0: raise ApplicationConfigurationError("Setting both 'attribute'/'values' and 'multi_attrs' is unsupported") if self.attribute != '': attrlist = [self.attribute] values = [] for v in self.values: values.append([v]) else: # check we have enough values in the dictionary numjobs = -1 attrlist = [] for attr in self.multi_attrs: if numjobs == -1: numjobs = len(self.multi_attrs[attr]) else: if len(self.multi_attrs[attr]) != numjobs: raise ApplicationConfigurationError( "Number of values for '%s' doesn't equal others '%d'" % (attr, numjobs)) attrlist.append(attr) # now get everything organised values = [] for i in range(0, numjobs): valtmp = [] for attr in attrlist: valtmp.append(self.multi_attrs[attr][i]) values.append(valtmp) # check we have enough values to cover the attributes for vallist in values: if len(attrlist) != len(vallist): raise ApplicationConfigurationError( "Number of attributes to split over doesn't equal number of values in list '%s'" % vallist) # now perform the split for vallist in values: # for each list of values, set the attributes j = addProxy(self.createSubjob(job)) for i in range(0, len(attrlist)): attrs = attrlist[i].split('.') obj = j for attr in attrs[:-1]: obj = getattr(obj, attr) attr = attrs[-1] setattr(obj, attr, vallist[i]) logger.debug('set %s = %s to subjob.' % (attrlist[i], getattr(obj, attr))) subjobs.append(stripProxy(j)) return subjobs
def browseBK(gui=True): """Return an LHCbDataset from the GUI LHCb Bookkeeping. Utility function to launch the new LHCb bookkeeping from inside Ganga. The function returns an LHCbDataset object. After browsing and selecting the desired datafiles, click on the \"Save as ...\" button. The Browser will quit and save the seleted files as an LHCbDataset object Usage: # retrieve an LHCbDataset object with the selected files and store # them in the variable l l = browseBK() # retrieve an LHCbDataset object with the selected files and store # them in the jobs inputdata field, ready for submission j.inputdata=browseBK() """ import Ganga.Utility.logging from Ganga.GPIDev.Base.Proxy import addProxy logger = Ganga.Utility.logging.getLogger() try: from GangaLHCb.Lib.DIRAC.Bookkeeping import Bookkeeping from Ganga.GPI import LHCbDataset except ImportError: logger.warning('Could not start Bookkeeping Browser') return None bkk = Bookkeeping() return addProxy(bkk.browse(gui))
def createNewJob(self): """Create any jobs required for this unit""" import copy j = GPI.Job() j.backend = self._getParent().backend.clone() j.application = self._getParent().application.clone() if self.inputdata: j.inputdata = self.inputdata.clone() j.inputfiles = copy.deepcopy(self._getParent().inputfiles) trf = self._getParent() task = trf._getParent() j.inputsandbox = self._getParent().inputsandbox j.outputfiles = copy.deepcopy(self._getParent().outputfiles) if len(self._getParent().postprocessors.process_objects) > 0: j.postprocessors = copy.deepcopy( addProxy(self._getParent()).postprocessors) if trf.splitter: j.splitter = trf.splitter.clone() # change the first event for GaussSplitter from GangaLHCb.Lib.Splitters.GaussSplitter import GaussSplitter if isType(trf.splitter, GaussSplitter): events_per_unit = j.splitter.eventsPerJob * \ j.splitter.numberOfJobs j.splitter.firstEventNumber = self.getID() * events_per_unit else: j.splitter = SplitByFiles() return j
def getDataset(self): '''Gets the dataset from the bookkeeping for current path, etc.''' if not self.path: return None if not self.type in ['Path', 'RunsByDate', 'Run', 'Production']: raise GangaException('Type="%s" is not valid.' % self.type) if not self.type is 'RunsByDate': if self.startDate: msg = 'startDate not supported for type="%s".' % self.type raise GangaException(msg) if self.endDate: msg = 'endDate not supported for type="%s".' % self.type raise GangaException(msg) if self.selection: msg = 'selection not supported for type="%s".' % self.type raise GangaException(msg) cmd = "getDataset('%s','%s','%s','%s','%s','%s')" % (self.path, self.dqflag, self.type, self.startDate, self.endDate, self.selection) from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList knownLists = [tuple, list, GangaList] if isType(self.dqflag, knownLists): cmd = "getDataset('%s',%s,'%s','%s','%s','%s')" % (self.path, self.dqflag, self.type, self.startDate, self.endDate, self.selection) result = get_result(cmd, 'BK query error.', 'BK query error.') logger.debug("Finished Running Command") files = [] value = result['Value'] if 'LFNs' in value: files = value['LFNs'] if not type(files) is list: # i.e. a dict of LFN:Metadata # if 'LFNs' in files: # i.e. a dict of LFN:Metadata files = files.keys() logger.debug("Creating DiracFile objects") ## Doesn't work not clear why from GangaDirac.Lib.Files.DiracFile import DiracFile #new_files = [] #def _createDiracLFN(this_file): # return DiracFile(lfn = this_file) #GangaObject.__createNewList(new_files, files, _createDiracLFN) logger.debug("Creating new list") new_files = [DiracFile(lfn=f) for f in files] #new_files = [DiracFile(lfn=_file) for _file in files] #for f in files: # new_files.append(DiracFile(lfn=f)) #ds.extend([DiracFile(lfn = f)]) logger.info("Constructing LHCbDataset") from GangaLHCb.Lib.LHCbDataset import LHCbDataset logger.debug("Imported LHCbDataset") ds = LHCbDataset(files=new_files, fromRef=True) logger.debug("Returning Dataset") return addProxy(ds)
def browseBK(gui=True): """Return an LHCbDataset from the GUI LHCb Bookkeeping. Utility function to launch the new LHCb bookkeeping from inside Ganga. The function returns an LHCbDataset object. After browsing and selecting the desired datafiles, click on the \"Save as ...\" button. The Browser will quit and save the seleted files as an LHCbDataset object Usage: # retrieve an LHCbDataset object with the selected files and store # them in the variable l l = browseBK() # retrieve an LHCbDataset object with the selected files and store # them in the jobs inputdata field, ready for submission j.inputdata=browseBK() """ import Ganga.Utility.logging from Ganga.GPIDev.Base.Proxy import addProxy logger = Ganga.Utility.logging.getLogger() try: from GangaLHCb.Lib.Backends.Bookkeeping import Bookkeeping from Ganga.GPI import LHCbDataset except ImportError: logger.warning("Could not start Bookkeeping Browser") return None bkk = Bookkeeping() return addProxy(bkk.browse(gui))
def _addToInterface(interface, name, _object): if isType(_object, GangaObject): setattr(interface, name, addProxy(_object)) elif isclass(_object) and issubclass(_object, GangaObject): setattr(interface, name, getProxyClass(_object)) else: setattr(interface, name, _object)
def _wrap(obj): if isType(obj, GangaObject): return addProxy(obj) if isType(obj, RegistrySlice): return obj._proxyClass(obj) if isType(obj, list): return map(addProxy, obj) return obj
def setUp(self): # make a list of lists containing GangaObjects self.filelist = [] for _ in range(10): self.filelist.append([self._makeRandomTFile() for _ in range(3)]) # make an empty GangaList self.gangalist = addProxy(makeGangaList([]))
def testContains(self): """Tests __contains__""" plist = [addProxy(x) for x in self.plain1] self.assertEqual(plist, self.proxied1) for p in plist: self.assertTrue(isProxy(p)) self.assertIn(p, self.proxied1, 'Proxied list should contain each proxied object')
def testContains(self): """Tests __contains__""" plist = [addProxy(x) for x in self.plain1] assert plist == self.proxied1 for p in plist: assert isProxy(p) assert p in self.proxied1, 'Proxied list should contain each proxied object'
def setUp(self): super(TestNestedLists, self).setUp() # make a list of lists containing GangaObjects self.filelist = [] self.gangalist = None for _ in range(10): self.filelist.append([self._makeRandomTFile() for _ in range(3)]) # make an empty GangaList self.gangalist = addProxy(makeGangaList([]))
def testRemove(self): t = addProxy(TFile(name='bar')) self.proxied1.insert(7, t) list_len = len(self.proxied1) self.proxied1.remove(t) assert len(self.proxied1) == list_len - 1 assert t not in self.proxied1 assert t._impl not in self.proxied1._impl
def split(self, job): subjobs = [] for arg in self.args: j = addProxy(self.createSubjob(job)) # Add new arguments to subjob j.application.args = arg logger.debug('Arguments for split job is: ' + str(arg)) subjobs.append(stripProxy(j)) return subjobs
def getDiracFiles(): import os from GangaDirac.Lib.Files.DiracFile import DiracFile from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList filename = DiracFile.diracLFNBase().replace('/', '-') + '.lfns' logger.info('Creating list, this can take a while if you have a large number of SE files, please wait...') execute('dirac-dms-user-lfns &> /dev/null', shell=True, timeout=None) g = GangaList() with open(filename[1:], 'r') as lfnlist: lfnlist.seek(0) g.extend((DiracFile(lfn='%s' % lfn.strip()) for lfn in lfnlist.readlines())) return addProxy(g)
def testAppend(self): t = addProxy(TFile(name='foo')) self.plain1.append(t) assert self.plain1[-1] == t assert self.plain1.pop() == t self.proxied1.append(t) assert self.proxied1[-1] == t assert self.proxied1[-1] is t, 'Identity Test' assert isProxy(self.proxied1[-1]), 'Make sure we get back a proxy' assert self.proxied1.pop() == t
def __call__(self, this_id): """ Retrieve an object by id. """ if isinstance(this_id, str): if this_id.isdigit(): this_id = int(this_id) else: matches = [o for o in self.objects if fnmatch.fnmatch(o._getRegistry()._getName(o), this_id)] if len(matches) > 1: logger.error('Multiple Matches: Wildcards are allowed for ease of matching, however') logger.error(' to keep a uniform response only one item may be matched.') logger.error(' If you wanted a slice, please use the select method') raise RegistryKeyError("Multiple matches for id='%s':%s" % (this_id, str(map(lambda x: x._getRegistry()._getName(x), matches)))) if len(matches) < 1: return return addProxy(matches[0]) try: return addProxy(self.objects[this_id]) except KeyError as err: logger.debug('Object id=%d not found' % this_id) logger.debug("%s" % err) raise RegistryKeyError('Object id=%d not found' % this_id)
def split(self, job): from Ganga.GPIDev.Lib.Job import Job subjobs = [] for run in self.Files.keys(): j = addProxy(self.createSubjob(job)) # j.splitter = None # j.merger = None jp = stripProxy(j) jp._splitter_data = self.optionsString(run) subjobs.append(jp) print "Submitting jobs for %d runs" % (len(subjobs)) return subjobs
def split(self, job): subjobs = [] for arg in self.args: j = addProxy(self.createSubjob(job,['application'])) # Add new arguments to subjob app = copy.deepcopy(job.application) app.args = arg j.application = app logger.debug('Arguments for split job is: ' + str(arg)) subjobs.append(stripProxy(j)) return subjobs
def getDiracFiles(): import os from GangaDirac.Lib.Files.DiracFile import DiracFile from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList filename = DiracFile.diracLFNBase().replace('/', '-') + '.lfns' logger.info( 'Creating list, this can take a while if you have a large number of SE files, please wait...' ) execute('dirac-dms-user-lfns &> /dev/null', shell=True, timeout=None) g = GangaList() with open(filename[1:], 'r') as lfnlist: lfnlist.seek(0) g.extend( (DiracFile(lfn='%s' % lfn.strip()) for lfn in lfnlist.readlines())) return addProxy(g)
def __call__(self, this_id): """ Retrieve an object by id. """ if isinstance(this_id, str): if this_id.isdigit(): this_id = int(this_id) else: matches = [ o for o in self.objects if fnmatch.fnmatch(o._getRegistry()._getName(o), this_id) ] if len(matches) > 1: logger.error( 'Multiple Matches: Wildcards are allowed for ease of matching, however' ) logger.error( ' to keep a uniform response only one item may be matched.' ) logger.error( ' If you wanted a slice, please use the select method' ) raise RegistryKeyError( "Multiple matches for id='%s':%s" % (this_id, str( map(lambda x: x._getRegistry()._getName(x), matches)))) if len(matches) < 1: return return addProxy(matches[0]) try: return addProxy(self.objects[this_id]) except KeyError as err: logger.debug('Object id=%d not found' % this_id) logger.deubg("%s" % str(err)) raise RegistryKeyError('Object id=%d not found' % this_id)
def createNewJob(self): """Create any jobs required for this unit""" j = GPI.Job() j.backend = self._getParent().backend.clone() # copy form ourselves or the parent transform depending on what's # specified fields = ['application', 'splitter', 'inputfiles', 'inputdata', 'inputsandbox', 'outputfiles', 'postprocessors'] for f in fields: if (f == "postprocessors" and len(getattr(self, f).process_objects) > 0): j.postprocessors = copy.deepcopy(addProxy(self).postprocessors) elif (f != "postprocessors" and getattr(self, f)): setattr(j, f, copy.deepcopy(getattr(self, f))) elif (f == "postprocessors" and len(getattr(self._getParent(), f).process_objects) > 0): j.postprocessors = copy.deepcopy( addProxy(self._getParent()).postprocessors) elif (f != "postprocessors" and getattr(self._getParent(), f)): setattr(j, f, copy.deepcopy(getattr(self._getParent(), f))) return j
def split(self, job): import os subjobs = [] subsets = splitCSVFile(job.application.csvfile, self.nbevents) # Less files than number of jobs wanted => easy logger.info('Creating %d subjobs ...', len(allLines)) # Base for the naming of each subjob's CSV file tmpname = os.path.basename(incsvfile) if len(tmpname.split('.')) > 1: patterncsv = '.'.join( tmpname.split('.')[0:-1]) + "_sub%d." + tmpname.split('.')[-1] else: patterncsv = tmpname + "_sub%d" # Base for the naming of each subjob's output file tmpname = os.path.basename(job.application.outputfile) if len(tmpname.split('.')) > 1: patternout = '.'.join( tmpname.split('.')[0:-1]) + "_sub%d." + tmpname.split('.')[-1] else: patternout = tmpname + "_sub%d" for s, sub in enumerate(subsets): j = addProxy(self.createSubjob(job)) j.inputdata = job.inputdata subLines = '\n'.join(sub) from Ganga.GPIDev.Lib.File import FileBuffer thiscsv = patterncsv % s # Save in the main job's inputdir now, then the file will be moved to # the inputdir of each subjobs. job.getInputWorkspace().writefile(FileBuffer(thiscsv, subLines), executable=0) j.application.csvfile = os.path.join(job.inputdir, thiscsv) j.application.outputfile = patternout % s # Prepare the output filenames which must be unique subjobs.append(stripProxy(j)) return subjobs
def split(self, job): subjobs = [] filenames = job.inputdata.get_dataset_filenames() subsets = [] # Less files than number of jobs wanted => easy logger.info('Creating %d subjobs ...', len(filenames)) for nb in range(len(filenames)): j = addProxy(self.createSubjob(job)) j.inputdata.set_dataset_filenames([filenames[nb]]) subjobs.append(stripProxy(j)) return subjobs
def loadObject(filename): ''' These are complimentary functions to export/load which are already exported to the GPI from Ganga.GPIDev.Persistency. The difference being that these functions will export the objects using the pickle persistency format rather than a Ganga streaming (human readable) format. ''' import os import pickle import traceback try: with open(os.path.expandvars(os.path.expanduser(filename)), 'rb') as f: r = pickle.load(f) except: logger.error("Problem when loading file '%s': %s" % (filename, traceback.format_exc())) else: return addProxy(r)
def split(self,job): subjobs = [] filenames = job.inputdata.get_dataset_filenames() subsets = [] # Less files than number of jobs wanted => easy logger.info('Creating %d subjobs ...',len(filenames)) for nb in range(len(filenames)): j = addProxy(self.createSubjob(job)) j.inputdata.set_dataset_filenames([filenames[nb]]) subjobs.append(stripProxy(j)) return subjobs
def split(self, job): subjobs = [] filenames = job.inputdata.get_dataset_filenames() logger.info('Creating %d subjobs ...', self.nbjobs) if self.nbjobs < 1: raise Exception('Number of nbjobs not set properly.') subsets = [] # Less files than number of jobs wanted => easy if len(filenames) < self.nbjobs: for f in filenames: subsets.append([f]) else: isPerfectSplit = (len(filenames) % self.nbjobs) == 0 if isPerfectSplit: # If the number of input files is divisible by nbjobs # then all subjobs have the same number of input files nbfulljobs = self.nbjobs else: # Otherwise all subjobs have the same number of input files # except the last subjob which has less nbfulljobs = self.nbjobs - 1 persub = len(filenames) / nbfulljobs for nb in range(nbfulljobs): Low = nb * persub High = (nb + 1) * persub subsets.append(filenames[Low:High]) if not isPerfectSplit: subsets.append(filenames[High:]) for sub in subsets: j = addProxy(self.createSubjob(job)) j.inputdata.set_dataset_filenames(sub) subjobs.append(stripProxy(j)) return subjobs
def split(self,job): subjobs = [] filenames = job.inputdata.get_dataset_filenames() logger.info('Creating %d subjobs ...',self.nbjobs) if self.nbjobs < 1: raise Exception('Number of nbjobs not set properly.') subsets = [] # Less files than number of jobs wanted => easy if len(filenames) < self.nbjobs: for f in filenames: subsets.append([f]) else: isPerfectSplit = (len(filenames) % self.nbjobs) == 0 if isPerfectSplit: # If the number of input files is divisible by nbjobs # then all subjobs have the same number of input files nbfulljobs = self.nbjobs else: # Otherwise all subjobs have the same number of input files # except the last subjob which has less nbfulljobs = self.nbjobs - 1 persub = len(filenames) / nbfulljobs for nb in range(nbfulljobs): Low = nb*persub High = (nb+1)*persub subsets.append(filenames[Low:High]) if not isPerfectSplit: subsets.append(filenames[High:]) for sub in subsets: j = addProxy(self.createSubjob(job)) j.inputdata.set_dataset_filenames(sub) subjobs.append(stripProxy(j)) return subjobs
def split(self,job): import os subjobs = [] subsets = splitCSVFile(job.application.csvfile, self.nbevents) # Less files than number of jobs wanted => easy logger.info('Creating %d subjobs ...',len(allLines)) # Base for the naming of each subjob's CSV file tmpname = os.path.basename(incsvfile) if len(tmpname.split('.')) > 1: patterncsv = '.'.join(tmpname.split('.')[0:-1])+"_sub%d."+ tmpname.split('.')[-1] else: patterncsv = tmpname+"_sub%d" # Base for the naming of each subjob's output file tmpname = os.path.basename(job.application.outputfile) if len(tmpname.split('.')) > 1: patternout = '.'.join(tmpname.split('.')[0:-1])+"_sub%d."+ tmpname.split('.')[-1] else: patternout = tmpname+"_sub%d" for s,sub in enumerate(subsets): j = addProxy(self.createSubjob(job)) j.inputdata = job.inputdata subLines = '\n'.join(sub) from Ganga.GPIDev.Lib.File import FileBuffer thiscsv = patterncsv % s # Save in the main job's inputdir now, then the file will be moved to # the inputdir of each subjobs. job.getInputWorkspace().writefile(FileBuffer(thiscsv,subLines),executable=0) j.application.csvfile = os.path.join(job.inputdir,thiscsv) j.application.outputfile = patternout % s # Prepare the output filenames which must be unique subjobs.append(stripProxy(j)) return subjobs
def exportToGPI(name, _object, doc_section, docstring=None): ''' Make object available publicly as "name" in Ganga.GPI module. Add automatic documentation to gangadoc system. "doc_section" specifies how the object should be documented. If docstring is specified then use it to document the object (only use for "Objects" section). Otherwise use __doc__ (via pydoc utilities). FIXME: if you try to export the object instance, you should import it with fully qualified path, e.g. import X.Y.Z X.Y.Z.object = object exportToGPI("obj",X.Y.Z.object,"Objects") It has been observed that doing exportToGPI("obj",object,"Objects") may not work. To be understood. ''' if isType(_object, GangaObject): setattr(Ganga.GPI, name, addProxy(_object)) else: setattr(Ganga.GPI, name, _object) adddoc(name, getattr(Ganga.GPI, name), doc_section, docstring)
def setUp(self): super(TestGangaList, self).setUp() self.plain1 = [self._makeRandomTFile() for _ in range(15)] self.plain2 = [self._makeRandomTFile() for _ in range(10)] self.proxied1 = GangaList() self.proxied1.extend(self.plain1[:]) self.proxied2 = GangaList() self.proxied2.extend(self.plain2[:]) t = TFile() real_t = stripProxy(t) new_proxy_t = addProxy(real_t) #hopefully_t = stripProxy(new_proxy_t) #assert real_t is hopefully_t assert t is new_proxy_t self.assertEqual(len(getProxyAttr(self.proxied1, '_list')), len(self.plain1), "Something's wrong with construction") self.assertEqual(len(getProxyAttr(self.proxied2, '_list')), len(self.plain2), "Something's wrong with construction")
def getDataset(self): '''Gets the dataset from the bookkeeping for current dict.''' if not self.dict: return None cmd = 'bkQueryDict(%s)' % self.dict result = get_result(cmd, 'BK query error.', 'BK query error.') files = [] value = result['Value'] if 'LFNs' in value: files = value['LFNs'] if not type(files) is list: if 'LFNs' in files: # i.e. a dict of LFN:Metadata files = files['LFNs'].keys() from GangaDirac.Lib.Files.DiracFile import DiracFile this_list = [DiracFile(lfn=f) for f in files] from GangaLHCb.Lib.LHCbDataset import LHCbDataset ds = LHCbDataset(files=this_list, fromRef=True) return addProxy(ds)
def split(self, job): subjobs = [] filenames = job.inputdata.get_dataset_filenames() if self.nbfiles < 1: raise Exception('Number of nbfiles not set properly.') subsets = splitNbInputFile(filenames, self.nbfiles) logger.info('Creating %d subjobs ...', len(subjobs)) for sub in subsets: j = addProxy(self.createSubjob(job)) j.inputdata.set_dataset_filenames(sub) subjobs.append(stripProxy(j)) return subjobs
def split(self,job): subjobs = [] filenames = job.inputdata.get_dataset_filenames() if self.nbfiles < 1: raise Exception('Number of nbfiles not set properly.') subsets = splitNbInputFile(filenames, self.nbfiles) logger.info('Creating %d subjobs ...',len(subjobs)) for sub in subsets: j = addProxy(self.createSubjob(job)) j.inputdata.set_dataset_filenames(sub) subjobs.append(stripProxy(j)) return subjobs
def getDataset(self): """Gets the dataset from the bookkeeping for current dict.""" if not self.dict: return None cmd = "bkQueryDict(%s)" % self.dict result = get_result(cmd, "BK query error.", "BK query error.") files = [] value = result["Value"] if "LFNs" in value: files = value["LFNs"] if not type(files) is list: if "LFNs" in files: # i.e. a dict of LFN:Metadata files = files["LFNs"].keys() from GangaDirac.Lib.Files.DiracFile import DiracFile this_list = [DiracFile(lfn=_file) for _file in files] from GangaLHCb.Lib.LHCbDataset import LHCbDataset ds = LHCbDataset(this_list) return addProxy(ds)
def next(self): return addProxy(next(self.it))
def _export_pop(self, index=-1): self.checkReadOnly() return addProxy(self.pop(index))
class TestSharedFileGetPut(TestMassStorageGetPut): """Testing the get/put/copyTo methods of SharedFile""" fileClass = addProxy(SharedFile)
class TestMassStorageGetPut(GangaUnitTest): """Testing the get/put/copyTo methods of MassStorage""" _temp_files = [] _managed_files = [] # Num of sj in tests sj_len = 3 fileClass = addProxy(MassStorageFile) # Where on local storage we want to have our 'MassStorage solution' outputFilePath = '/tmp/Test' + _getName(fileClass) + 'GetPut' # This sets up a MassStorageConfiguration which works by placing a file on local storage somewhere we can test using standard tools MassStorageTestConfig = { 'defaultProtocol': 'file://', 'fileExtensions': [''], 'uploadOptions': { 'path': outputFilePath, 'cp_cmd': 'cp', 'ls_cmd': 'ls', 'mkdir_cmd': 'mkdir -p' }, 'backendPostprocess': { 'LSF': 'client', 'LCG': 'client', 'ARC': 'client', 'Dirac': 'client', 'PBS': 'client', 'Interactive': 'client', 'Local': 'client', 'CREAM': 'client' } } def setUp(self): """ Configure the MassStorageFile for the test """ extra_opts = [('PollThread', 'autostart', 'False'), ('Local', 'remove_workdir', 'False'), ('TestingFramework', 'AutoCleanup', 'False'), ('Output', _getName(self.fileClass), self.MassStorageTestConfig), ('Output', 'FailJobIfNoOutputMatched', 'True')] super(TestMassStorageGetPut, self).setUp(extra_opts=extra_opts) @staticmethod def cleanUp(): """ Cleanup the current temp jobs """ from Ganga.GPI import jobs for j in jobs: shutil.rmtree(j.backend.workdir, ignore_errors=True) j.remove() @classmethod def setUpClass(cls): """ This creates a safe place to put the files into 'mass-storage' """ cls.outputFilePath = tempfile.mkdtemp() cls.MassStorageTestConfig['uploadOptions']['path'] = cls.outputFilePath @classmethod def tearDownClass(cls): """ Cleanup the current temp objects """ for file_ in cls._temp_files: os.unlink(file_) cls._temp_files = [] for file_ in cls._managed_files: os.unlink(os.path.join(cls.outputFilePath, file_.namePattern)) cls._managed_files = [] shutil.rmtree(cls.outputFilePath, ignore_errors=True) def test_a_test_put(self): """Test that a job can be submitted with inputfiles in the input""" MassStorageFile = self.fileClass _ext = '.root' file_1 = generate_unique_temp_file(_ext) file_2 = generate_unique_temp_file(_ext) self._temp_files.append(file_1) self._temp_files.append(file_2) msf_1 = MassStorageFile(file_1) msf_2 = MassStorageFile(file_2) self._managed_files.append(msf_1) self._managed_files.append(msf_2) msf_1.put() msf_2.put() for file_ in [msf for msf in (msf_1, msf_2)]: assert os.path.isfile( os.path.join(self.outputFilePath, file_.namePattern)) file_.localDir = '' assert file_.localDir == '' def test_b_test_get(self): """Test that the files were made accessible to the WN area and collected as LocalFile objects in outputfiles""" from Ganga.GPI import Job tmpdir = tempfile.mkdtemp() # Test in the case that the files don't have a parent or a localDir for file_ in self._managed_files: file_.localDir = '' try: assert file_.localDir == '' file_.get() print("Unexpected localDir: %s" % file_.localDir) failed = False except GangaException: failed = True assert failed # Test in the case that the localDir has been set for file_ in self._managed_files: file_.localDir = tmpdir print("localDir: %s" % file_.localDir) file_.get() assert os.path.isfile(os.path.join(tmpdir, file_.namePattern)) file_.localDir = '' assert file_.localDir == '' # Test in the case that the object is 'owned' by a Job j = Job() outputdir = stripProxy(j).getOutputWorkspace(create=True).getPath() j.outputfiles = self._managed_files for file_ in j.outputfiles: assert stripProxy(file_).getJobObject() is stripProxy(j) assert file_.localDir == '' file_.get() assert os.path.isfile(os.path.join(outputdir, file_.namePattern)) shutil.rmtree(tmpdir, ignore_errors=True) self.cleanUp() def test_c_test_copyTo(self): """ Test the new copyTo interface""" tmpdir = tempfile.mkdtemp() for file_ in self._managed_files: file_.localDir = '' stripProxy(file_).copyTo(tmpdir) assert os.path.isfile(os.path.join(tmpdir, file_.namePattern)) shutil.rmtree(tmpdir, ignore_errors=True)
def testDelItem(self): """Test __delitem__""" for p in [addProxy(x) for x in self.plain1[:]]: assert isProxy(p) del self.proxied1[self.proxied1.index(p)]
from __future__ import absolute_import try: import unittest2 as unittest except ImportError: import unittest import random import string from Ganga.GPIDev.Base.Proxy import addProxy, getProxyAttr, isProxy, isType, stripProxy from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList GangaList = addProxy(GangaList) from .TFile import TFile TFile = addProxy(TFile) # set the seed for repeatable tests random.seed(666) from Ganga.Utility.logging import getLogger logger = getLogger(modulename=True) class TestGangaList(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestGangaList, self).__init__(*args, **kwargs) self.plain1 = []
def testInsert(self): t = addProxy(TFile(name='foo')) self.proxied1.insert(8, t) assert self.proxied1[8] == t