def write(self, filename, name='dax'): """Generate Pegasus abstract workflow (DAX). Parameters ---------- filename : `str` File to write the DAX to. name : `str`, optional Name of the DAX. Returns ------- `Pegasus.ADAG` Abstract workflow used by Pegasus' planner. """ dax = ADAG(name) # Add files to DAX-level replica catalog. catalog = {} for file_id in self.files: attrs = self.graph.node[file_id] f = File(attrs['lfn']) # Add physical file names, if any. urls = attrs.get('urls') if urls is not None: sites = attrs.get('sites') if sites is None: sites = ','.join(len(urls) * ['local']) for url, site in zip(urls.split(','), sites.split(',')): f.addPFN(PFN(url, site)) catalog[attrs['lfn']] = f dax.addFile(f) # Add jobs to the DAX. for task_id in self.tasks: attrs = self.graph.node[task_id] job = Job(name=attrs['name'], id=task_id) # Add job command line arguments replacing any file name with # respective Pegasus file object. args = attrs.get('args') if args is not None and args: args = args.split() lfns = list(set(catalog) & set(args)) if lfns: indices = [args.index(lfn) for lfn in lfns] for idx, lfn in zip(indices, lfns): args[idx] = catalog[lfn] job.addArguments(*args) # Specify job's inputs. inputs = [file_id for file_id in self.graph.predecessors(task_id)] for file_id in inputs: attrs = self.graph.node[file_id] f = catalog[attrs['lfn']] job.uses(f, link=Link.INPUT) # Specify job's outputs outputs = [file_id for file_id in self.graph.successors(task_id)] for file_id in outputs: attrs = self.graph.node[file_id] f = catalog[attrs['lfn']] job.uses(f, link=Link.OUTPUT) streams = attrs.get('streams') if streams is not None: if streams & 1 != 0: job.setStdout(f) if streams & 2 != 0: job.setStderr(f) dax.addJob(job) # Add job dependencies to the DAX. for task_id in self.tasks: parents = set() for file_id in self.graph.predecessors(task_id): parents.update(self.graph.predecessors(file_id)) for parent_id in parents: dax.depends(parent=dax.getJob(parent_id), child=dax.getJob(task_id)) # Finally, write down the workflow in DAX format. with open(filename, 'w') as f: dax.writeXML(f)
# Create an abstract dag cluster = ADAG(config.get('all', 'workflow_name')) input_file = config.get('all', 'input_file') if (input_file == ''): input_file = os.getcwd() else: input_file += '/' + os.getenv('USER') + '/inputs' # Add input file to the DAX-level replica catalog a = File("f.a") a.addPFN( PFN( config.get('all', 'file_url') + input_file + "/f.a", config.get('all', 'file_site'))) cluster.addFile(a) for i in range(1, 3): sleep = Executable(namespace="cluster", name="level" + str(i), version="1.0", os="linux", arch="x86_64", installed=config.getboolean('all', 'executable_installed')) sleep.addPFN( PFN( config.get('all', 'executable_url') + sys.argv[1] + "/bin/pegasus-keg", config.get('all', 'executable_site'))) sleep.addProfile( Profile(namespace="pegasus",
class PegasusWorkflowModel (WorkflowModel): def __init__(self, namespace, wms): logger.debug ("wms:pegasus:create-workflowmodel: %s" % namespace) self.wms = wms logger.debug ("outputdir: %s", wms.getOutputDir ()) self.adag = ADAG (namespace) self.namespace = namespace self.files = {} self.exes = {} self.propertyMap = {} self.nodes = {} self.jobTransformations = {} self.variableMap = { 'literalToVariable' : {}, 'literalToNodeId' : {} } def addToVariableMap (self, literal, variable, id): self.variableMap ['literalToVariable'][literal] = variable self.variableMap ['literalToNodeId'][literal] = id logger.debug ("variablematch: recorded lit=%s var=%s id=%s", literal, variable, id) def setProperties (self, nodeId, properties): logger.debug ("wms:pegasus:dax:setprops: (%s)->(%s)" % (nodeId, properties)) self.propertyMap [nodeId] = properties def getProperties (self, nodeId): return self.propertyMap [nodeId] def createFile (self, fileName): return File (fileName) def addNode (self, id, node): logger.debug ("wms:pegasus:dax:add-node: (%s)->(%s)" % (node.getId(), properties)) self.nodes [id] = node def getNode (self, id): node = None if id in self.nodes: node = self.nodes [id] return node def createFile (self, fileName, fileURL=None, site=None): #traceback.print_stack () file = self.getFile (fileName) if not file: file = File (fileName) if not fileURL: fileURL = "file://%s/%s" % (self.wms.getOutputDir (), fileName) logger.debug ("fileurl: %s", fileURL) if not site: site = "local" if not isinstance(fileURL, basestring) and len (fileURL) > 0: fileURL = fileURL [0] logger.debug ("--add-pfn: (%s)(%s)(%s)", fileName, fileURL, site) pfn = PFN (fileURL, site) file.addPFN (pfn) self.files [fileName] = file return file def addFileToDAG (self, file): if not self.adag.hasFile (file): self.adag.addFile (file) def removeFileFromDAG (self, file): if file in self.adag.files: self.adag.files.remove (file) def addFile (self, fileName, fileURL=None, site=None): #traceback.print_stack () file = self.getFile (fileName) if not file: file = File (fileName) if not fileURL: fileURL = "file://%s/%s" % (self.wms.getOutputDir (), fileName) logger.debug ("fileurl: %s", fileURL) if not site: site = "local" if not isinstance(fileURL, basestring) and len (fileURL) > 0: fileURL = fileURL [0] logger.debug ("--add-pfn: (%s)(%s)(%s)", fileName, fileURL, site) pfn = PFN (fileURL, site) file.addPFN (pfn) self.adag.addFile (file) self.files [fileName] = file return file def getFile (self, fileName, prefix=""): value = None key = "%s%s" % (prefix, fileName) if key in self.files: logger.debug ("wms:pegasus:dax:get-file: (%s)" % key) value = self.files [key] else: value = None return value def addExecutable (self, jobId, name, path, version="1.0", exe_os="linux", exe_arch="x86_64", site="local", installed="true"): e_exe = self.getExecutable (name) if not version: version = "1.0" if not exe_arch: exe_arch="x86_64" if not e_exe: e_exe = Executable ( namespace=self.namespace, name=name, version=version, os=exe_os, arch=exe_arch, installed=installed) if not site: site = "local" if not installed: installed = False if logging.getLogger().isEnabledFor (logging.DEBUG): logger.debug ("wms:pegasus:dax:add-exe: (name=[%s], path=[%s], version=[%s], os=[%s], arch=[%s], site=[%s], installed=[%s])" % (name, path, version, exe_os, exe_arch, site, installed)) if not "://" in path: path = "file://%s" % path if not path: raise ValueError ("empty path for executable: %s at site %s" % (name, site)) e_exe.addPFN (PFN (path, site)) if not installed: e_exe.installed = installed self.adag.addExecutable (e_exe) self.exes [name] = e_exe transformation = Transformation (name, self.namespace, version) self.jobTransformations [jobId] = transformation return e_exe def getExecutable (self, name): key = name if key in self.exes: return self.exes [key] else: return None def addSubWorkflow (self, name, transformation=None): #self.adag.addTransformation (transformation) abstractJob = DAX (name) self.adag.addDAX (abstractJob) return abstractJob def addJob (self, id): #self.adag.addTransformation (transformation) transformation = self.jobTransformations [id] logger.debug ("wms:pegasus:add-job: transformation(%s) jobid(%s)", transformation.name, id) abstractJob = Job (name=transformation, id=id) self.adag.addJob (abstractJob) return abstractJob def addProfiles (self, abstractJob, profiles): if profiles: for astProfile in profiles: logger.debug ("wms:pegasus:add-profile: (namespace=%s,key=%s,value=%s) to job (%s)", astProfile.namespace, astProfile.key, astProfile.value, abstractJob.name) profile = Profile (astProfile.namespace, astProfile.key, astProfile.value) abstractJob.addProfile (profile) def addFiles (self, abstractJob, files, link): if files: for fileKey in files: tuple = files [fileKey] fileElement = tuple [0] file = fileElement.getDaxNode () try: isLeaf = tuple [2] if isLeaf: abstractJob.uses (file, link=link, transfer=True) else: abstractJob.uses (file, link=link) ''' abstractJob.uses (file, link=link) ''' arg = tuple [1] if arg: abstractJob.addArguments (arg, file) except DuplicateError: pass def addInputFiles (self, abstractJob, files): self.addFiles (abstractJob, files, Link.INPUT) def addOutputFiles (self, abstractJob, files): self.addFiles (abstractJob, files, Link.OUTPUT) def addArguments (self, abstractJob, arguments): if arguments: abstractJob.addArguments (arguments) def addDependency (self, parent, child): self.adag.addDependency (Dependency (parent, child)) def writeExecutable (self, stream): self.adag.writeXML (stream) filename = "%s.%s" % (self.namespace, 'obj') filepath = os.path.join (self.wms.getOutputDir (), filename) try: output = None try: output = open (filepath, 'w') output.write (json.dumps (self.variableMap, indent=3, sort_keys=True)) output.flush () finally: if output: output.close () except IOError: traceback.print_stack () def getADAG (self): return self.adag
config = ConfigParser.ConfigParser({'input_file':'', 'workflow_name':'horizontal-clustering-test', 'executable_installed':"False", 'clusters_size':"3", 'clusters_maxruntime':"7"}) config.read(sys.argv[2] + '/test.config') # Create an abstract dag cluster = ADAG (config.get('all', 'workflow_name')) input_file = config.get('all', 'input_file') if (input_file == ''): input_file = os.getcwd () else: input_file += '/' + os.getenv ('USER') + '/inputs' # Add input file to the DAX-level replica catalog a = File("f.a") a.addPFN(PFN(config.get('all', 'file_url') + input_file + "/f.a", config.get('all', 'file_site'))) cluster.addFile(a) for i in range (1, 3): sleep = Executable (namespace = "cluster", name = "level" + str (i), version = "1.0", os = "linux", arch = "x86", installed=config.getboolean('all', 'executable_installed')) sleep.addPFN (PFN (config.get('all', 'executable_url') + sys.argv[1] + "/bin/pegasus-keg", config.get('all', 'executable_site'))) sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.size", value = config.get('all', 'clusters_size'))) sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.maxruntime", value = config.get('all', 'clusters_maxruntime'))) cluster.addExecutable(sleep) for i in range (4): job = Job (namespace = "cluster", name = "level1", version = "1.0") job.addArguments('-a level1 -T ' + str (i + 1)) job.addArguments('-i', a) job.addProfile (Profile (namespace = "pegasus", key = "job.runtime", value = str (i + 1))) job.uses(a, link=Link.INPUT) cluster.addJob (job)