class TestSchema(unittest.TestCase): def setUp(self): self.dd = { 'application': ComponentItem(category='applications'), 'backend': ComponentItem(category='backends'), 'name': SimpleItem('', comparable=0), 'workdir': SimpleItem(defvalue=None, type='string', transient=1, protected=1, comparable=0), 'status': SimpleItem(defvalue='new', protected=1, comparable=0), 'id': SimpleItem(defvalue=None, typelist=[str], protected=1, comparable=0), 'inputbox': FileItem(defvalue=[], sequence=1), 'outputbox': FileItem(defvalue=[], sequence=1), 'overriden_copyable': SimpleItem(defvalue=None, protected=1, copyable=1), 'plain_copyable': SimpleItem(defvalue=None, copyable=0) } self.s = Schema(Version(1, 0), self.dd) def test_items_list(self): """ Make sure all the items are added """ self.assertEqual(self.s.allItems(), self.dd.items()) self.assertEqual(sorted(self.s.componentItems()+self.s.simpleItems()), sorted(self.dd.items())) def test_get_non_existant(self): """ Make sure that fetching a non-existant member raises the correct exception. """ def _get(): temp = self.s['b'] self.assertRaises(GangaAttributeError, _get) def test_category_name(self): class PClass(object): _category = 'jobs' _name = 'Job' self.s._pluginclass = PClass self.assertEqual(self.s.name, 'Job') self.assertEqual(self.s.category, 'jobs') def test_item_types(self): self.assertTrue(self.s['id'].isA(SimpleItem)) self.assertTrue(self.s['application'].isA(ComponentItem)) self.assertTrue(self.s['inputbox'].isA(ComponentItem)) self.assertTrue(self.s['inputbox'].isA(FileItem)) def test_item_attributes(self): self.assertTrue(self.s['id']['protected']) self.assertFalse(self.s['id']['comparable']) self.assertTrue(str in self.s['id']['typelist']) def test_implied(self): self.assertTrue(self.s['overriden_copyable']['copyable']) self.assertFalse(self.s['plain_copyable']['copyable']) self.assertFalse(self.s['id']['copyable']) self.assertTrue(self.s['application']['copyable'])
def setUp(self): self.dd = { 'application': ComponentItem(category='applications'), 'backend': ComponentItem(category='backends'), 'name': SimpleItem('', comparable=0), 'workdir': SimpleItem(defvalue=None, type='string', transient=1, protected=1, comparable=0), 'status': SimpleItem(defvalue='new', protected=1, comparable=0), 'id': SimpleItem(defvalue=None, typelist=[str], protected=1, comparable=0), 'inputbox': FileItem(defvalue=[], sequence=1), 'outputbox': FileItem(defvalue=[], sequence=1), 'overriden_copyable': SimpleItem(defvalue=None, protected=1, copyable=1), 'plain_copyable': SimpleItem(defvalue=None, copyable=0) } self.s = Schema(Version(1, 0), self.dd)
class CoreUnit(IUnit): _schema = Schema(Version(1, 0), dict(IUnit._schema.datadict.items() + { }.items())) _category = 'units' _name = 'CoreUnit' _exportmethods = IUnit._exportmethods + [] def __init__(self): super(CoreUnit, self).__init__() def createNewJob(self): """Create any jobs required for this unit""" j = makeRegisteredJob() j.backend = self._getParent().backend.clone() # copy form ourselves or the parent transform depending on what's # specified fields = ['application', 'splitter', 'inputfiles', 'inputdata', 'inputsandbox', 'outputfiles', 'postprocessors'] for f in fields: if (f == "postprocessors" and len(getattr(self, f).process_objects) > 0): j.postprocessors = copy.deepcopy(addProxy(self).postprocessors) elif (f != "postprocessors" and getattr(self, f)): setattr(j, f, copy.deepcopy(getattr(self, f))) elif (f == "postprocessors" and len(getattr(self._getParent(), f).process_objects) > 0): j.postprocessors = copy.deepcopy( addProxy(self._getParent()).postprocessors) elif (f != "postprocessors" and getattr(self._getParent(), f)): setattr(j, f, copy.deepcopy(getattr(self._getParent(), f))) return j
class Notifier(IPostProcessor): """ Object which emails a user about jobs status are they have finished. The default behaviour is to email when a job has failed or when a master job has completed. Notes: * Ganga must be running to send the email, so this object is only really useful if you have a ganga session running the background (e.g. screen session). * Will not send emails about failed subjobs if autoresubmit is on. """ _schema = Schema( Version(1, 0), { 'verbose': SimpleItem(defvalue=False, doc='Email on subjob completion'), 'address': SimpleItem( defvalue='', doc='Email address', optional=False) }) _category = 'postprocessor' _name = 'Notifier' order = 3 def execute(self, job, newstatus): """ Email user if: * job is a master job, or * job has failed but do not have auto resubmit * job has not failed but verbose is set to true """ if len(job.subjobs) or (newstatus == 'failed' and job.do_auto_resubmit is False) or (newstatus != 'failed' and self.verbose is True): return self.email(job, newstatus) return True def email(self, job, newstatus): """ Method to email a user about a job """ sender = '*****@*****.**' receivers = self.address subject = 'Ganga Notification: Job(%s) has %s.' % (job.fqid, newstatus) msg_string = """ Dear User,\n Job(%s) has gone into %s state.\n Regards, Ganga\n PS: This is an automated notification from Ganga, if you would like these messages to stop please remove the notifier object from future jobs. """ % (job.fqid, newstatus) msg = email.message_from_string(msg_string) msg['Subject'] = subject msg['From'] = sender msg['To'] = receivers string_message = msg.as_string() try: smtpObj = smtplib.SMTP(config['SMTPHost']) smtpObj.sendmail(sender, receivers, string_message) except smtplib.SMTPException as e: raise PostProcessException(str(e)) return True
class NonProxiedGangaObject(GangaObject): """ This is a class which should not be present in the GPI and should not be wrapped with a proxy """ _schema = Schema(Version(1, 0)) _category = 'TestGangaObject' _name = 'TestGangaObject'
class DefaultSplitter(ISplitter): """ The DefaultSplitter is assigned to all jobs by default and is intended to provide a single subjob for every job on submit. This has been implemented as it potentially simplifies the internal logic in job managment significantly. This splitter is not expected to be configurable or to split a dataset based upon any input. In order to do this please make use of another splitter. """ _name = "DefaultSplitter" ## A dummy value is required to not get a bug in writing the object to an XML repo. ## The nature of the problem of writing an empty schema should probably be understood more correctly but is difficult to track down -rcurrie _schema = Schema( Version(1, 0), { 'dummy_value': SimpleItem(defvalue=1, hidden=1, visitable=0, doc='the number of files per subjob', typelist=["int"]) }) def split(self, job): subjobs = [] sj = self.createSubjob(job) subjobs.append(sj) return subjobs
class SimpleGangaObject(GangaObject): _schema = Schema(Version(1, 0), { 'a': SimpleItem(42, typelist=[int]), }) _category = 'TestGangaObject' _hidden = True _enable_plugin = True
class TestGangaObject(GangaObject): """Test Ganga Object. Is used to construct test jobs""" _schema = Schema( Version(1, 0), { 'id': SimpleItem('0', doc='ID Needed for tests'), 'name': SimpleItem( '', doc= 'optional label which may be any combination of ASCII characters', typelist=['str']), 'subjobs': ComponentItem('internal', defvalue=[], sequence=1, protected=1, load_default=0, copyable=0, optional=1, doc='test subjobs'), }) _name = "TestGangaObject" _category = "internal" def __init__(self, name='TestObjectName', sj=0): super(TestGangaObject, self).__init__() self.name = name for i in range(sj): self.subjobs.append(TestGangaObject(name + "." + str(i)))
class GridFileIndex(GangaObject): ''' Data object for indexing a file on the grid. @author: Hurng-Chun Lee @contact: [email protected] ''' _schema = Schema( Version(1, 0), { 'id': SimpleItem(defvalue='', doc='the main identity of the file'), 'name': SimpleItem(defvalue='', doc='the name of the file'), 'md5sum': SimpleItem(defvalue='', doc='the md5sum of the file'), 'attributes': SimpleItem(defvalue={}, doc='a key:value pairs of file metadata') }) _category = 'GridFileIndex' _name = 'GridFileIndex' logger = getLogger() def __init__(self): super(GridFileIndex, self).__init__() def __eq__(self, other): return other.id == self.id
class SampleGangaObject(GangaObject): _schema = Schema( Version(1, 0), { 'a': SimpleItem(42, typelist=[int]), # 'b' is skipped on purpose 'c': ComponentItem('gangafiles'), }) _category = 'TestGangaObject' _name = 'TestGangaObject' _exportmethods = ['example', 'check_not_proxy'] def example(self): return 'example_string' def check_not_proxy(self, obj): assert not Ganga.GPIDev.Base.Proxy.isProxy( obj), 'incoming argument should be proxy-stripped' ret = SampleGangaObject() assert not Ganga.GPIDev.Base.Proxy.isProxy( ret), 'new object should not be proxy-wrapped' return ret def not_proxied(self): return 'example_string'
def taskify(baseclass, name): smajor = baseclass._schema.version.major sminor = baseclass._schema.version.minor cat = baseclass._category if cat == "applications": schema_items = _app_schema taskclass = TaskApplication elif cat == "splitters": schema_items = _splitter_schema taskclass = TaskSplitter classdict = { "_schema": Schema(Version(smajor, sminor), dict(baseclass._schema.datadict.items() + schema_items)), "_category": cat, "_name": name, "__init__": __task__init__, } if '_exportmethods' in baseclass.__dict__: classdict['_exportmethods'] = baseclass.__dict__['_exportmethods'] cls = classobj(name, (taskclass, baseclass), classdict) global handler_map # Use the same handlers as for the base class handler_map.append((getName(baseclass), name)) return cls
class ICommandSet(GangaObject): """ Class used to define shell commands and options for working with credentials """ _schema = Schema( Version(1, 0), { "init": SimpleItem(defvalue="", doc="Command for creating/initialising credential"), "info": SimpleItem( defvalue="", doc="Command for obtaining information about credential"), "destroy": SimpleItem(defvalue="", doc="Command for destroying credential"), "init_parameters": SimpleItem( defvalue={}, doc= "Dictionary of parameter-value pairs to pass to init command"), "destroy_parameters": SimpleItem( defvalue={}, doc= "Dictionary of parameter-value pairs to pass to destroy command" ), "info_parameters": SimpleItem( defvalue={}, doc= "Dictionary mapping from Ganga credential properties to command-line options" ), }) _category = "credential_commands" _name = "ICommandSet" _hidden = 1 _enable_config = 1 def __init__(self): super(ICommandSet, self).__init__() return def setConfigValues(self): """ Update properties using values from relevant section of config file. """ section = "defaults_%s" % self._name config = getConfig(section) for attribute in self._schema.datadict.keys(): try: value = config[attribute] try: value = eval(value) except: pass setattr(self, attribute, value) except ConfigError: pass
class ThreadedTestGangaObject(GangaObject): _schema = Schema(Version(1, 0), { 'a': SimpleItem(42, typelist=[int]), 'b': ComponentItem('TestGangaObject', defvalue='SimpleGangaObject'), }) _category = 'TestGangaObject' _hidden = True _enable_plugin = True
class ArgSplitter(ISplitter): """ Split job by changing the args attribute of the application. This splitter only applies to the applications which have args attribute (e.g. Executable, Root). It is a special case of the GenericSplitter. This splitter allows the creation of a series of subjobs where the only difference between different jobs are their arguments. Below is an example that executes a ROOT script ~/analysis.C void analysis(const char* type, int events) { std::cout << type << " " << events << std::endl; } with 3 different sets of arguments. s = ArgSplitter(args=[['AAA',1],['BBB',2],['CCC',3]]) r = Root(version='5.10.00',script='~/analysis.C') j.Job(application=r, splitter=s) Notice how each job takes a list of arguments (in this case a list with a string and an integer). The splitter thus takes a list of lists, in this case with 3 elements so there will be 3 subjobs. Running the subjobs will produce the output: subjob 1 : AAA 1 subjob 2 : BBB 2 subjob 3 : CCC 3 """ _name = "ArgSplitter" _schema = Schema( Version(1, 0), { 'args': SimpleItem(defvalue=[], typelist=[ 'list', 'Ganga.GPIDev.Lib.GangaList.GangaList.GangaList' ], sequence=1, doc='A list of lists of arguments to pass to script') }) def split(self, job): subjobs = [] for arg in self.args: j = self.createSubjob(job, ['application']) # Add new arguments to subjob app = copy.deepcopy(job.application) app.args = arg j.application = app logger.debug('Arguments for split job is: ' + str(arg)) subjobs.append(stripProxy(j)) return subjobs
class EmptyGangaObject(GangaObject): """Empty Ganga Object. Is used to construct incomplete jobs""" _schema = Schema(Version(0, 0), {}) _name = "EmptyGangaObject" _category = "internal" _hidden = 1 def __init__(self): super(EmptyGangaObject, self).__init__()
class IncompleteObject(GangaObject): """ This class represents an object that could not be loaded on startup""" _schema = Schema(Version(0, 0), {}) _name = "IncompleteObject" _category = "internal" _hidden = 1 _exportmethods = ['reload', 'remove', '__repr__'] def __init__(self, registry, this_id): """ This constructs an object which is placed into the objects dict when a repo fails to load an object due to some error Args: registry (Registry): This is the registry the object belongs to this_id (int): This is the registry/repo id of the object in the objects dict """ super(IncompleteObject, self).__init__() self.registry = registry self.id = this_id def reload(self): """ This will trigger a re-load of the object from disk which is useful if the object was locked but accessible by Ganga TODO work ouf if this is still called anywhere """ with self.registry._flush_lock: with self.registry._read_lock: self.registry._load(self) logger.debug("Successfully reloaded '%s' object #%i!" % (self.registry.name, self.id)) def remove(self): """ This will trigger a delete of the the object itself from within the given Repository but not registry TODO work out if this is safe and still called """ with self.registry._flush_lock: with self.registry._read_lock: if len(self.registry.repository.lock([self.id])) == 0: errstr = "Could not lock '%s' object #%i!" % ( self.registry.name, self.id) try: errstr += " Object is locked by session '%s' " % self.registry.repository.get_lock_session( self.id) except Exception as err: logger.debug("Remove Lock error: %s" % err) raise RegistryLockError(errstr) self.registry.repository.delete([self.id]) def __repr__(self): """ This returns a repr of the object in question as inaccessible """ return "Incomplete object in '%s', ID %i. Try reload() or remove()." % ( self.registry.name, self.id)
class Im3ShapeSplitter(ISplitter): """ This splitter splits jobs using the Im3ShapeApp application using the size parameter. If a splitter is configured with size = 5, split_by_file = True, then it will create 5 subjobs per file in the master_job.inputdata If a splitter is configured with size = 5, split_by_file = False, then it will create 5 subjobs total and configure all subjobs to use all given data. In the future there may be support for splitting based upon regex and namePatterns in the inputdata to allow a certain subset of data to be put in each subjob. """ _name = "Im3ShapeSplitter" _schema = Schema( Version(1, 0), { 'size': SimpleItem(defvalue=5, doc='Size of the tiles which are to be split.'), 'split_by_file': SimpleItem( defvalue=True, doc= 'Should we auto-split into subjobs here on a per-file basis?') }) def split(self, job): """ Actually perform the splitting of the given master job. The generated subjobs of the splitting are returned Args: job (Job): This is the master job object which is to be split and the subjobs of which are returned """ assert isinstance(job.application, Im3ShapeApp) subjobs = [] def getApp(job, rank, size): app = copy.deepcopy(job.application) app.rank = rank app.size = size return app if self.split_by_file: for this_file in job.inputdata: for rank in range(0, self.size): j = self.createSubjob(job, ['application']) # Add new arguments to subjob j.application = getApp(job, rank, self.size) j.inputdata = GangaDataset(files=[stripProxy(this_file)]) subjobs.append(j) else: for rank in range(0, self.size): j = self.createSubjob(job, ['application']) j.application = getApp(job, rank, self.size) j.inputdata = job.inputdata logger.debug('Rank for split job is: ' + str(rank)) subjobs.append(j) return subjobs
def test_create(self): """ Create a complex schema and make sure all the items are added """ dd = { 'application': ComponentItem(category='applications'), 'backend': ComponentItem(category='backends'), 'name': SimpleItem('', comparable=0), 'workdir': SimpleItem(defvalue=None, type='string', transient=1, protected=1, comparable=0), 'status': SimpleItem(defvalue='new', protected=1, comparable=0), 'id': SimpleItem(defvalue=None, type='string', protected=1, comparable=0), 'inputbox': FileItem(defvalue=[], sequence=1), 'outputbox': FileItem(defvalue=[], sequence=1), 'overriden_copyable': SimpleItem(defvalue=None, protected=1, copyable=1), 'plain_copyable': SimpleItem(defvalue=None, copyable=0) } s = Schema(Version(1, 0), dd) self.assertEqual(s.allItems(), dd.items()) self.assertEqual(sorted(s.componentItems()+s.simpleItems()), sorted(dd.items()))
class GangaDatasetSplitter(ISplitter): """ Split job based on files given in GangaDataset inputdata field """ _name = "GangaDatasetSplitter" _schema = Schema(Version(1, 0), { 'files_per_subjob': SimpleItem(defvalue=5, doc='the number of files per subjob', typelist=[int]), 'maxFiles': SimpleItem(defvalue=-1, doc='Maximum number of files to use in a masterjob (None or -1 = all files)', typelist=[int, None]), }) def split(self, job): subjobs = [] if not job.inputdata or not isType(job.inputdata, GangaDataset): raise ApplicationConfigurationError( "No GangaDataset given for GangaDatasetSplitter") # find the full file list full_list = [] for f in job.inputdata.files: if f.containsWildcards(): # we have a wildcard so grab the subfiles for sf in f.getSubFiles(process_wildcards=True): full_list.append(sf) else: # no wildcards so just add the file full_list.append(f) if len(full_list) == 0: raise ApplicationConfigurationError( "GangaDatasetSplitter couldn't find any files to split over") masterType = type(job.inputdata) # split based on all the sub files fid = 0 subjobs = [] filesToRun = len(full_list) if not self.maxFiles == -1: filesToRun = min(self.maxFiles, filesToRun) while fid < filesToRun: j = self.createSubjob(job) j.inputdata = masterType() j.inputdata.treat_as_inputfiles = job.inputdata.treat_as_inputfiles for sf in full_list[fid:fid + self.files_per_subjob]: j.inputdata.files.append(sf) fid += self.files_per_subjob subjobs.append(j) return subjobs
class CoreTask(ITask): """General non-experimentally specific Task""" _schema = Schema(Version(1, 0), dict(ITask._schema.datadict.items() + {}.items())) _category = 'tasks' _name = 'CoreTask' _exportmethods = ITask._exportmethods + [] _tasktype = "ITask" default_registry = "tasks"
class Dataset(GangaObject): _schema = Schema(Version(1, 0), {}) _category = 'datasets' _name = "EmptyDataset" def __init__(self): super(Dataset, self).__init__() # Return true if the dataset is an instance of the default base class. # You may override it in your dataset definition but it is not mandatory. def isEmpty(self): return self._name == Dataset._name
class TestGangaObject(GangaObject): _schema = Schema(Version(1, 0), {'a': SimpleItem(42, typelist=['int'])}) _category = 'TestGangaObject' _name = 'TestGangaObject' _exportmethods = ['example'] def example(self): return 'example_string' def not_proxied(self): return 'example_string'
class TaskLocalCopy(Dataset): """Dummy dataset to force Tasks to copy the output from a job to local storage somewhere""" _schema = Schema( Version(1, 0), { 'local_location': SimpleItem(defvalue="", doc="Local location to copy files to"), 'include_file_mask': SimpleItem( defvalue=[], typelist=['str'], sequence=1, doc= 'List of Regular expressions of which files to include in copy' ), 'exclude_file_mask': SimpleItem( defvalue=[], typelist=['str'], sequence=1, doc= 'List of Regular expressions of which files to exclude from copy' ), 'files': SimpleItem(defvalue=[], typelist=['str'], sequence=1, doc='List of successfully downloaded files'), }) _category = 'datasets' _name = 'TaskLocalCopy' _exportmethods = ["isValid", "isDownloaded"] def __init__(self): super(TaskLocalCopy, self).__init__() def isValid(self, fname): """Check if this file should be downloaded""" for in_re in self.include_file_mask: if not re.search(in_re, fname): return False for out_re in self.exclude_file_mask: if re.search(out_re, fname): return False return True def isDownloaded(self, fname): """Check if this file is present at the local_location""" return os.path.exists(os.path.join(self.local_location, fname))
class CRABSplitter(ISplitter): """Splitter object for CRAB jobs.""" schemadic = {} schemadic['maxevents'] = SimpleItem(defvalue=None, typelist=['type(None)', 'int'], doc='Maximum number of events/task') schemadic['inputfiles'] = SimpleItem(defvalue=None, typelist=['type(None)', 'str'], doc='Number of input files') schemadic['skipevents'] = SimpleItem(defvalue=None, typelist=['type(None)', 'int'], doc='Offset for the events') _name = 'CRABSplitter' _schema = Schema(Version(1, 0), schemadic) def parseArguments(self, path): """Gets some job arguments from the FJR.""" splittingData = [] for job in parse(path).getElementsByTagName("Job"): splittingData.append([ job.getAttribute("MaxEvents"), job.getAttribute("InputFiles"), job.getAttribute("SkipEvents") ]) return splittingData def split(self, job): """Main splitter for the job.""" try: arguments_path = os.path.join(job.inputdata.ui_working_dir, 'share/arguments.xml') splittingData = self.parseArguments(arguments_path) except IOError, e: raise SplitterError(e) subjobs = [] for maxevents, inputfiles, skipevents in splittingData: j = self.createSubjob(job) j.master = job j.application = job.application j.inputdata = job.inputdata j.backend = job.backend splitter = CRABSplitter() splitter.maxevents = maxevents splitter.inputfiles = inputfiles splitter.skipevents = skipevents j.splitter = splitter subjobs.append(j) return subjobs
class BoxMetadataObject(GangaObject): """Internal object to store names""" _schema = Schema( Version(1, 0), { "name": SimpleItem(defvalue="", copyable=1, doc='the name of this object', typelist=[str]) }) _name = "BoxMetadataObject" _category = "internal" _enable_plugin = True _hidden = 1
class CRABDataset(Dataset): """Dataset definition for CRAB jobsets.""" schemadic = {} schemadic.update(CRAB().schemadic) schemadic.update(TASK().schemadic) #schemadic['target_site'] = SimpleItem(defvalue=None, # typelist=['type(None)', 'str'], # doc='Target site name for the job.') #schemadic['taskname'] = SimpleItem(defvalue=None, typelist=['type(None)','str'], doc='TaskName of the submitted task, returned from CRAB3 server') _schema = Schema(Version(1, 0), schemadic) _category = 'datasets' _name = 'CRABDataset' def __init__(self): super(CRABDataset, self).__init__()
class CRABDataset(Dataset): """Dataset definition for CRAB jobsets.""" schemadic = {} schemadic.update(CMSSW().schemadic) schemadic.update(CRAB().schemadic) schemadic.update(GRID().schemadic) schemadic.update(USER().schemadic) schemadic['target_site'] = SimpleItem(defvalue=None, typelist=['type(None)', 'str'], doc='Target site name for the job.') _schema = Schema(Version(1, 0), schemadic) _category = 'datasets' _name = 'CRABDataset' def __init__(self): super(CRABDataset, self).__init__()
class OptionsFileSplitter(ISplitter): '''Split a jobs based on a list of option file fragments This Splitter takes as argument a list of option file statements and will generate a job for each item in this list. The value of the indevidual list item will be appended to the master options file. A use case of this splitter would be to change a parameter in an algorithm (e.g. a cut) and to recreate a set of jobs with different cuts ''' _name = "OptionsFileSplitter" docstr = "List of option-file strings, each list item creates a new subjob" _schema = Schema(Version(1, 0), {'optsArray': SimpleItem(defvalue=[], doc=docstr)}) def _create_subjob(self, job, inputdata): j = Job() j.copyFrom(job) j.splitter = None j.merger = None j.inputsandbox = [] # master added automatically j.inputfiles = [] j.inputdata = inputdata return j def split(self, job): subjobs = [] inputdata = job.inputdata if not job.inputdata: share_path = os.path.join( expandfilename(getConfig('Configuration')['gangadir']), 'shared', getConfig('Configuration')['user'], job.application.is_prepared.name, 'inputdata', 'options_data.pkl') if os.path.exists(share_path): f = open(share_path, 'r+b') inputdata = pickle.load(f) f.close() for i in self.optsArray: j = self._create_subjob(job, inputdata) j._splitter_data = i subjobs.append(j) return subjobs
class OutputData(GangaObject): '''Class for handling outputdata for LHCb jobs. Example Usage: od = OutputData(["file.1","file.2"]) od[0] # "file.1" [...etc...] ''' schema = {} schema['files'] = SimpleItem(defvalue=[], typelist=['str'], sequence=1) schema['location'] = SimpleItem(defvalue='', typelist=['str']) _schema = Schema(Version(1, 1), schema) _category = 'datasets' _name = "OutputData" _exportmethods = ['__len__', '__getitem__'] def __init__(self, files=None): if files is None: files = [] super(OutputData, self).__init__() self.files = files def __construct__(self, args): if (len(args) != 1) or (type(args[0]) not in [list, tuple]): super(OutputData, self).__construct__(args) else: self.files = args[0] def __len__(self): """The number of files in the dataset.""" result = 0 if self.files: result = len(self.files) return result def __nonzero__(self): """This is always True, as with an object.""" return True def __getitem__(self, i): '''Proivdes scripting (e.g. od[2] returns the 3rd file name) ''' if type(i) == type(slice(0)): return GPIProxyObjectFactory(OutputData(files=self.files[i])) else: return self.files[i]
class IChecker(IPostProcessor): """ Abstract class which all checkers inherit from. """ _schema = Schema( Version(1, 0), { 'checkSubjobs': SimpleItem(defvalue=True, doc='Run on subjobs'), 'checkMaster': SimpleItem(defvalue=True, doc='Run on master') }) _category = 'postprocessor' _name = 'IChecker' _hidden = 1 order = 2 def execute(self, job, newstatus): """ Execute the check method, if check fails pass the check and issue an ERROR message. Message is also added to the debug folder. """ if newstatus == 'completed': # If we're master job and check master check. # If not master job and check subjobs check if (job.master is None and self.checkMaster) or\ ((job.master is not None) and self.checkSubjobs): try: return self.check(job) except Exception as e: with open( os.path.join(job.getDebugWorkspace().getPath(), 'checker_errors.txt'), 'a') as debug_file: debug_file.write( '\n Checker has failed with the following error: \n' ) debug_file.write(str(e)) logger.error("%s" % e) return True else: return True def check(self, job): """ Method to check the output of jobs. Should be overidden. """ raise NotImplementedError
class GridftpFileIndex(GridFileIndex): """ Data object containing Gridftp file index information. - id: gsiftp URI - name: basename of the file - md5sum: md5 checksum - attributes['fpath']: path of the file on local machine @author: Hurng-Chun Lee @contact: [email protected] """ _schema = Schema(Version(1, 0), gridftp_file_index_schema_datadict) _category = 'GridFileIndex' _name = 'GridftpFileIndex' def __init__(self): super(GridftpFileIndex, self).__init__()