def updateCatalog (self, masterCat, other): newResources = [] master = GraysonUtil.readFile (masterCat) masterLines = master.split ('\n') for line in masterLines: logger.debug (" __ _ _ _ _ _ _____: %s", line) sub = GraysonUtil.readFile (other) subLines = sub.split ('\n') masterMap = {} for line in masterLines: parts = line.split (' ') if len (parts) > 0: resource = parts [0] if resource: masterMap [resource] = resource logger.debug (" resource : %s", resource) for line in subLines: parts = line.split (' ') if len (parts) > 0: resource = parts [0] if resource and not resource in masterMap: newResources.append (line) logger.debug (" new resource: %s", resource) GraysonUtil.writeFile (masterCat, "%s\n%s" % (master, '\n'.join (newResources)))
def readFlows (self, root): fileName = self.flowPath (root) text = GraysonUtil.readFile (fileName) flows = [] if text: flows = json.loads (text) return flows
def get_compile_msgs (request): log = request.REQUEST ['log'] logger.debug ("getting compilation messages: log=%s", log) process_username = ViewUtil.get_os_username () text = GraysonUtil.readFileAsString (log) text = unicode (text.replace ('\n', '<br/>')) if text else 'An unknown error occurred compiling the model.' return ViewUtil.get_text_response (text)
def getExecuteArguments (self, sites, workflow=None, other=[]): result = None pegasusHome = GraysonUtil.getPegasusHome () args = ["--conf=${outputDir}/${pegasusProperties}", "--sites ${sites}", "--force", "--verbose", "--verbose", "--verbose", "--nocleanup", "--output local"] ''' if using one of the new data configuration modes ''' if self.dataConfiguration: args.append ("-Dpegasus.data.configuration=%s" % self.dataConfiguration) args.append ("--staging-site=local") # TODO - make this more flexible for arg in other: args.append (arg) template = Template (" ".join (args)) context = { "outputDir" : self.getOutputDir (), "pegasusProperties" : PegasusProperties.PEGASUS_PROPERTIES, "sites" : sites } if workflow: context ["outputDax"] = os.path.join (self.getOutputDir (), workflow) result = template.substitute (context) logger.debug ("generated workflow execute arguments: %s", result) return result
def examineWorkflow (self): jobstatelog = os.path.join (self.workdir, 'jobstate.log') sched_id = 0 def process (line): finishedTag = 'DAGMAN_FINISHED' schedIdTag = 'DAGMAN STARTED' index = line.find (schedIdTag) if index > -1: sched_id = line.split (' ')[-2] index = line.find (finishedTag) if index > -1: self.isComplete = True text = GraysonUtil.readFile (jobstatelog, process) output = [] executor = Executor ({ 'condorHome' : os.environ ['CONDOR_HOME'], 'sched_id' : sched_id }) executor.execute (command = "${condorHome}/bin/condor_q ${sched_id} -format '%s' JobStatus", pipe = True, processor = lambda n : output.append (n)) self.isRunning = ''.join (output) == WorkflowStatus.CONDOR_JOB_STATUS__RUNNING logger.debug ("WorkflowMonitor - isRunning=%s, isComplete=%s", self.isRunning, self.isComplete)
def normalize (self, event): if "logdir" in event: logdir = event["logdir"] event["logdir"] = os.path.relpath (logdir, self.workflowRoot) return GraysonUtil.relativize (object = event, keys = [ 'flowId', 'workdir', 'graph' ], username = event ['clientId'])
def get_flow_file (request): username = '' if 'addUser' in request.REQUEST: user = ViewUtil.get_user (request) username = user.username path = os.path.join (settings.GRAYSONWEB_WORKFLOW_ROOT, username, request.REQUEST ['path']) text = GraysonUtil.readFileAsString (path) if os.path.exists (path) else '' print ("path: %s", path) return ViewUtil.get_text_response (text)
def get_job_output (request): user = ViewUtil.get_user (request) workdir = request.REQUEST ['workdir'] workflow_id = request.REQUEST ['workflowid'] job_id = request.REQUEST ['jobid'] run_id = request.REQUEST ['runid'] if not run_id: run_id = "" if not workflow_id: workflow_id = "" logger.debug ("getting job output: workdir=%s, workflowid: %s, runid: %s, jobid: %s", workdir, workflow_id, run_id, job_id) process_username = ViewUtil.get_os_username () workdirPath = GraysonUtil.form_workdir_path (workdir, process_username, workflow_id, run_id) workdirPath = ViewUtil.form_workflow_path (user, workdirPath) logger.debug ("workdirPath: %s", workdirPath) text = "" if job_id.startswith ('/'): job_id = job_id [1:] concrete = os.path.join (workdirPath, job_id) logger.debug ('concrete: --- %s', concrete) if os.path.exists (concrete): logger.debug ("concrete --- : %s", concrete) text = GraysonUtil.readFile (concrete) else: logger.debug ("regex: --- : %s", concrete) workflow = GridWorkflow (workdirPath) outputs = workflow.getOutputFiles (subworkflows = [ workdirPath ], item = job_id) jobOutput = None if outputs and len (outputs) > 0: jobOutput = outputs [0] logger.debug ("got job output: %s \n for job_id: %s", jobOutput, job_id) if jobOutput: text = GraysonUtil.readFileAsString (jobOutput) return ViewUtil.get_text_response (text)
def getOutputFiles (self, subworkflows=[], item=None): output = [] path = [] files = GraysonUtil.getFiles (self.workdir) ''' ''' if len (subworkflows) > 0: pattern = subworkflows [0] else: for sub in subworkflows: sub = sub.replace (".dax", "") path.append (".*?%s" % sub) pattern = "".join (path) if type (item) == unicode or type (item) == str: request = ".*?%s/%s" % (pattern, item) output = GraysonUtil.findFilesByName (".*?%s/%s" % (pattern, item), files) elif type(item) == list: for output in item: partial = GraysonUtil.findFilesByName (".*?%s/%s" % (pattern, output), files) for element in partial: output.append (element) return output
def writeMetaDataCatalogs (self): GraysonUtil.writeFile ( outputPath=os.path.join (self.outputDir, PegasusProperties.SITE_CATALOG), data=self.getSiteCatalog().generateXML ()) ''' replica catalog ''' GraysonUtil.writeFile ( outputPath=os.path.join (self.outputDir, PegasusProperties.REPLICA_CATALOG), data=self.getReplicaCatalog().generateRC ()) ''' transformation catalog ''' GraysonUtil.writeFile ( outputPath=os.path.join (self.outputDir, PegasusProperties.TRANSFORMATION_CATALOG), data=self.getTransformationCatalog().generateTC ()) ''' properties ''' GraysonUtil.writeFile ( outputPath=os.path.join (os.path.join (self.outputDir, PegasusProperties.PEGASUS_PROPERTIES)), data=self.pegasusProperties.generateProperties (configDir=self.outputDir))
def get_job_status(self, path): value = "" status = os.path.join(path, "jobstate.log") try: text = GraysonUtil.readFile(status) text = text.split("\n") if len(text) > 2: for line in text[len(text) - 3 :]: logger.debug("line: %s", line) if "DAGMAN_FINISHED" in line and "0 ***" in line: value = "0" break elif "DAGMAN_FINISHED" in line and "1 ***" in line: value = "1" break except IOError as e: pass return value
def delete_run (request): response = { "status" : "ok" } workdir = request.REQUEST ["workdir"] workflowId = request.REQUEST ["workflowid"] runId = request.REQUEST ["runid"] workflowName = os.path.basename (workflowId).replace (".dax", "") process_username = ViewUtil.get_os_username () workdirPath = workdir if runId: workdirPath = GraysonUtil.form_workdir_path (workdir, process_username, workflowName, runId) user = ViewUtil.get_user (request) workdirPath = ViewUtil.form_workflow_path (user, workdirPath) logger.debug ("DELETING workflow run: %s", workdirPath) try: shutil.rmtree (workdirPath) except Exception as e: logger.exception ("exception deleting %s", workdirPath) traceback.print_exc () response ["status"] = "fail" return ViewUtil.get_json_response (response)
def findFlows(self, request): user = ViewUtil.get_user(request) app_username = user.username os_username = ViewUtil.get_os_username() logger.debug("connect_flows:user: %s", user.username) workflowPath = ViewUtil.form_workflow_path(user) files = GraysonUtil.getDirs(of_dir=workflowPath) workdirs = GraysonUtil.findFilesByName(".*?\.grayson_upk$", files) response = [] for workdir in workdirs: logger.debug("connect_flows: workdir: %s", workdir) conf = None try: conf = GraysonUtil.readJSONFile(os.path.join(workdir, "grayson.conf")) except IOError as e: pass if conf: outputFile = conf["output-file"] files = GraysonUtil.getFiles(workdir, recursive=False) runs = GraysonUtil.getDirs( os.path.join(workdir, "work", os_username, "pegasus", outputFile.replace(".dax", "")) ) runs.sort() def normalize(line): return GraysonUtil.getUserRelativePath(line, app_username) runDirs = GraysonUtil.findFilesByName("[0-9]{8}T[0-9]{6}\-[0-9]{4}$", runs) item = { "flow": normalize(workdir), "id": outputFile, "runs": map(lambda p: " ".join([os.path.basename(p), self.get_job_status(p)]), runDirs), "graphs": GraysonUtil.findFilesByName(".*?.graphml$", files), "daxen": map(normalize, GraysonUtil.findFilesByName("[a-zA-Z0-9\._\-]+\.dax$", files)), } response.append(item) response.sort(key=lambda k: k["id"]) return response
def configureLocal (self): pegasusLocation = GraysonUtil.getPegasusHome () globusLocation = os.getenv ("GLOBUS_LOCATION") if not globusLocation: raise ValueError ("GLOBUS_LOCATION must be defined") self.addEntry ( "local", { "architecture" : "x86_64", # TODO: inspect environment "scratchFileServerProtocol" : "file", "scratchFileServerMountPoint" : "%s/work/outputs" % self.wms.getOutputDir (), "scratchInternalMountPoint" : "%s/work/outputs" % self.wms.getOutputDir (), "storageFileServerProtocol" : "file", "storageFileServerMountPoint" : "%s/work/outputs" % self.wms.getOutputDir (), "storageMountPoint" : "%s/work/outputs" % self.wms.getOutputDir (), "storageInternalMountPoint" : "%s/work/outputs" % self.wms.getOutputDir (), "pegasusLocation" : pegasusLocation, "globusLocation" : globusLocation })
def get_flow_events (request): workdir = request.REQUEST ["workdir"] workflowId = request.REQUEST ["workflowid"] runId = request.REQUEST ["runid"] dax = request.REQUEST ["dax"] if "dax" in request.REQUEST else None if not dax: dax = os.path.basename (workflowId) logger.debug ("dax: %s", dax) workflowName = os.path.basename (workflowId).replace (".dax", "") process_username = ViewUtil.get_os_username () workdirPath = GraysonUtil.form_workdir_path (workdir, process_username, workflowName, runId) user = ViewUtil.get_user (request) workdirPath = ViewUtil.form_workflow_path (user, workdirPath) logger.debug ("launching monitor: user: %s, workdir: %s, workflowId: %s, runId: %s, dax: %s", user.username, workdirPath, workflowId, runId, dax) workflowMonitorDatabase = WorkflowMonitorDatabase () WorkflowMonitor.ensureRunning (workflowRoot = settings.GRAYSONWEB_WORKFLOW_ROOT, amqpSettings = settings.AMQP_SETTINGS, eventBufferSize = settings.EVENT_BUFFER_SIZE) workflowMonitorDatabase.subscribeToWorkflow ( settings.GRAYSONWEB_WORKFLOW_ROOT, { "username" : user.username, "workflowId" : workflowId, "workdir" : workdirPath, "daxen" : dax.split (','), "buffer" : 0 }) return ViewUtil.get_json_response ({ "status" : "ok" })
def normalize(line): return GraysonUtil.getUserRelativePath(line, app_username)
def writeFlows (self, root, flows): fileName = self.flowPath (root) workflowText = json.dumps (flows, indent=3, sort_keys=True) logger.debug (workflowText) GraysonUtil.writeFile (fileName, workflowText)
def getFileLines (self, fileName): return GraysonUtil.readFile (fileName).split ('\n')
def get_workflow (request): workflow = request.REQUEST ['workflow'] user = ViewUtil.get_user (request) workflowPath = ViewUtil.form_workflow_path (user, workflow) text = GraysonUtil.readFileAsString (workflowPath) return HttpResponse (text, GraysonWebConst.MIME_XML, 200, GraysonWebConst.MIME_XML)
def getfile (request): # TODO: construct path dynamically and selectively for security purposes. return ViewUtil.get_text_response (GraysonUtil.readFile (request.REQUEST ['file']))
def put_file (request): path = request.REQUEST ["path"] content = request.REQUEST ["content"] logger.debug ("writing file: %s", path) GraysonUtil.writeFile (path, content) return ViewUtil.get_json_response ({ "status" : "ok" })
def detectEventDetails (self, event, logdir, status): jobId = event.name aux = { 'sched_id' : event.sched_id } if not logdir: return aux if jobId.startswith ('stage_in') or jobId.startswith ('stage_out'): path = os.path.join (logdir, "%s.in" % jobId) #logger.debug ("opening path: %s", path) text = GraysonUtil.readFile (path) lines = text.split ('\n') if lines: transfers = [] pair = 0 while len (lines) > ((pair * 4) + 4): ''' for line in lines: logger.debug ("line: %s", line) ''' offset = pair * 4 transfer = { "sourceSite" : lines [offset + 0], "sourceFile" : lines [offset + 1], "destSite" : lines [offset + 2], "destFile" : lines [offset + 3], } pair += 1 execution = self.getExecutionData (logdir, jobId) if execution: stdout = execution ['stdout'] transferred = GraysonUtil.getPrecompiledPattern (self.transferBytesPattern, stdout) duration = GraysonUtil.getPrecompiledPattern (self.transferDurationPattern, stdout) rateUp = GraysonUtil.getPrecompiledPattern (self.transferRateUpPattern, stdout) rateDown = GraysonUtil.getPrecompiledPattern (self.transferRateDownPattern, stdout) transfer ["bytes"] = transferred transfer ["time"] = duration transfer ["up"] = rateUp transfer ["down"] = rateDown ''' logger.debug ("kickstart stdout/err: %s" + json.dumps (execution, indent=4)) logger.debug (" transferred: %s duration: %s rateUp: %s rateDown: %s", transferred, duration, rateUp, rateDown) ''' transfers.append (transfer) aux ['transfer'] = transfers if status == WorkflowStatus.STATUS_FAILED: execution = self.getExecutionData (logdir, jobId) if execution: aux ["detail"] = { "stdout" : GraysonUtil.ceilingString (execution ["stdout"], maxLength=500, fromEnd=True), "stderr" : GraysonUtil.ceilingString (execution ["stderr"], maxLength=500, fromEnd=True) } dagLog = glob.glob (os.path.join (logdir, '*.dag.dagman.out')) dax = glob.glob (os.path.join (logdir, 'dax', '*.dax')) if len(dagLog) > 0 or len (dax) > 0: log = {} aux ['log'] = log if len (dagLog) > 0: log ['daglog'] = os.path.basename (dagLog [0]) if len (dax) > 0: log ['dax'] = os.path.basename (dax [0]) return aux
def updateCatalog (self, master, other): masterText = GraysonUtil.readFile (master) text = GraysonUtil.readFile (other) GraysonUtil.writeFile (master, "%s\n%s" % (masterText, text))
def execute (self, context={}): operatorContext = context ['operator'] method = operatorContext ["method"] inputFile = operatorContext ["input"] variable = operatorContext ["variable"] index = operatorContext ["index"] flow = operatorContext ["flow"] version = operatorContext ["version"] instanceArgs = operatorContext ["instanceArgs"] mapType = context ["mapType"] outputBasename = context ["outputName"] modelPath = context ["modelPath"] outputDir = context ["outputDir"] contextModels = context ["contextModels"] sites = context ["sites"] appHome = context ["appHome"] graysonHome = context ["graysonHome"] #print "%s" % json.dumps (context, indent=3, sort_keys=True) tmpOutputDir = os.path.join (outputDir, "tmp") # avoid overwriting replica catalog. contextModels = contextModels.split (os.pathsep) models = [ flow ] for model in contextModels: models.append (model) main_flow_name = os.path.join (outputDir, "%s.dax" % flow.replace (".graphml", "")) namespace = flow.replace (".graphml", "") flowContext = { "namespace" : outputBasename } template = Template (self.header) text = [ template.substitute (flowContext) ] replicaText = [] if mapType == 'tar': tar = tarfile.open (inputFile, "r:gz") members = tar.getmembers () c = 0 for archiveMember in members: outputname = "%s.%s.dax" % ( outputBasename, c ) definitions = { variable : archiveMember.name, index : "%s" % c, Operator.DYNAMIC_INDEX : "%s" % c, "appHome" : appHome } logger.debug ("dynamic-map: invoking compiler") try: output = open (os.path.join (outputDir, outputname), 'w') try: GraysonCompiler.compile (models = models, output = output, modelPath = modelPath.split (os.pathsep), namespace = namespace, version = None, logLevel = "debug", modelProperties = definitions, outputdir = tmpOutputDir, sites = sites, toLogFile = os.path.join (outputDir, "log.txt")) finally: if output: output.close () except IOError as e: logger.error ("Encountered IOError %s compiling subdax %s", e.__str__ (), output) raise e replicaText.append ('%s file://%s/%s pool="local"' % (outputname, outputDir, outputname)) template = Template (self.subdax) flowContext ['c'] = c flowContext ['outputname'] = outputname flowContext ['instanceArgs'] = instanceArgs flowContext ['sites'] = "--sites %s" % sites if instanceArgs == "" else "" text.append (template.substitute (flowContext)) replicaCatalogName = "replica-catalog.rc" masterRC = os.path.join (outputDir, replicaCatalogName) self.updateCatalog (master = masterRC, other = os.path.join (tmpOutputDir, replicaCatalogName)) c += 1 elif mapType == 'list': stream = open (inputFile, "r") c = 0 for line in stream: outputname = "%s.%s.dax" % ( outputBasename, c ) definitions = { variable : line, index : "%s" % c, Operator.DYNAMIC_INDEX : "%s" % c, "appHome" : appHome } logger.debug ("dynamic-map: invoking compiler") try: output = open (os.path.join (outputDir, outputname), 'w') try: GraysonCompiler.compile (models = models, output = output, modelPath = modelPath.split (os.pathsep), namespace = namespace, version = None, logLevel = "debug", modelProperties = definitions, outputdir = tmpOutputDir, sites = sites, toLogFile = os.path.join (outputDir, "log.txt")) finally: if output: output.close () except IOError as e: logger.error ("Encountered IOError %s compiling subdax %s", e.__str__ (), output) raise e replicaText.append ('%s file://%s/%s pool="local"' % (outputname, outputDir, outputname)) template = Template (self.subdax) flowContext ['c'] = c flowContext ['outputname'] = outputname flowContext ['instanceArgs'] = instanceArgs flowContext ['sites'] = "--sites %s" % sites if instanceArgs == "" else "" text.append (template.substitute (flowContext)) replicaCatalogName = "replica-catalog.rc" masterRC = os.path.join (outputDir, replicaCatalogName) self.updateCatalog (master = masterRC, other = os.path.join (tmpOutputDir, replicaCatalogName)) c += 1 text.append (self.footer) mainFlowContent = ''.join (text) GraysonUtil.writeFile (outputPath = os.path.join (outputDir, main_flow_name), data = mainFlowContent) logger.debug ("dynamic-map: writing output dax: %s" % mainFlowContent) replicaText.append ('%s file://%s pool="local"' % (os.path.basename (main_flow_name), main_flow_name)) GraysonUtil.writeFile (os.path.join (outputDir, "tmp", replicaCatalogName), '\n'.join (replicaText)) self.updateCatalog (master = masterRC, other = os.path.join (tmpOutputDir, replicaCatalogName)) transformationCatalogName = "transformation-catalog.tc" masterTC = os.path.join (outputDir, transformationCatalogName) self.updateCatalog (master = masterTC, other = os.path.join (tmpOutputDir, transformationCatalogName))