def register_job(self, id): monitorid, syncid = self.generate_ids(id) apmonSend(self._taskid, monitorid, { 'taskId': self._taskid, 'jobId': monitorid, 'sid': syncid, 'broker': 'condor', 'bossId': str(id), 'SubmissionType': 'Direct', 'TargetSE': 'cmseos.fnal.gov', 'localId': '', 'tool': 'cmsconnect', 'JSToolVersion': '3.2.1', 'tool_ui': os.environ.get('HOSTNAME', ''), 'scheduler': 'condor', 'GridName': '/CN=' + self.__fullname, 'ApplicationVersion': self.__cmssw_version, 'taskType': 'analysis', 'vo': 'cms', 'user': self.__username, 'CMSUser': self.__username, # 'datasetFull': self.datasetPath, 'resubmitter': 'user', 'exe': self.__executable }) return monitorid, syncid
def register_job(self, id): monitorid, syncid = self.generate_ids(id) apmonSend(self._taskid, monitorid, { 'taskId': self._taskid, 'jobId': monitorid, 'sid': syncid, 'broker': 'condor', 'bossId': str(id), 'SubmissionType': 'Direct', 'TargetSE': 'ndcms.crc.nd.edu', 'localId' : '', 'tool': 'lobster', 'JSToolVersion': '3.2.1', 'tool_ui': os.environ.get('HOSTNAME',''), 'scheduler': 'work_queue', 'GridName': '/CN=' + self.__fullname, 'ApplicationVersion': os.path.basename(os.path.normpath(os.environ.get('LOCALRT'))), 'taskType': 'analysis', 'vo': 'cms', 'CMSUser': self.__username, 'user': self.__username, # 'datasetFull': self.datasetPath, 'resubmitter': 'user', 'exe': 'cmsRun' }) return monitorid, syncid
def update_job(self, id, status): monitorid, syncid = self.generate_ids(id) apmonSend(self._taskid, monitorid, { 'taskId': self._taskid, 'jobId': monitorid, 'sid': syncid, 'StatusValueReason': '', 'StatusValue': status, 'StatusEnterTime': "{0:%F_%T}".format(datetime.datetime.utcnow()), 'StatusDestination': 'ndcms.crc.nd.edu', 'RBname': 'condor' })
def update_job(self, id, status): monitorid, syncid = self.generate_ids(id) apmonSend(self._taskid, monitorid, { 'taskId': self._taskid, 'jobId': monitorid, 'sid': syncid, 'StatusValueReason': '', 'StatusValue': status, 'StatusEnterTime': "{0:%F_%T}".format(datetime.datetime.utcnow()), 'StatusDestination': 'unknown', 'RBname': 'condor' })
def sendToML(self, params, jobid=None, taskid=None): # Figure out taskId and jobId taskId = 'unknown' jobId = 'unknown' # taskId if self.taskId is not None: taskId = self.taskId if params.has_key('taskId'): taskId = params['taskId'] if taskid is not None: taskId = taskid # jobId if self.jobId is not None: jobId = self.jobId if params.has_key('jobId'): jobId = params['jobId'] if jobid is not None: jobId = jobid # Send to Monalisa apmonSend(taskId, jobId, params)
def register_run(self): apmonSend(self._taskid, 'TaskMeta', { 'taskId': self._taskid, 'jobId': 'TaskMeta', 'tool': 'lobster', 'tool_ui': os.environ.get('HOSTNAME',''), 'SubmissionType': 'direct', 'JSToolVersion': '3.2.1', 'scheduler': 'work_queue', 'GridName': '/CN=' + self.__fullname, 'ApplicationVersion': os.path.basename(os.path.normpath(os.environ.get('LOCALRT'))), 'taskType': 'analysis', 'vo': 'cms', 'CMSUser': self.__username, 'user': self.__username, 'datasetFull': '', 'resubmitter': 'user', 'exe': 'cmsRun' }) self.free()
def register_run(self): apmonSend(self._taskid, 'TaskMeta', { 'taskId': self._taskid, 'jobId': 'TaskMeta', 'tool': 'cmsconnect', 'tool_ui': os.environ.get('HOSTNAME', ''), 'SubmissionType': 'direct', 'JSToolVersion': '3.2.1', 'scheduler': 'condor', 'GridName': '/CN=' + self.__fullname, 'ApplicationVersion': self.__cmssw_version, 'taskType': 'analysis', 'vo': 'cms', 'user': self.__username, 'CMSUser': self.__username, 'datasetFull': '', 'resubmitter': 'user', 'exe': self.__executable }) self.free()
def sendToML(self, params, jobid=None, taskid=None): # Figure out taskId and jobId taskId = 'unknown' jobId = 'unknown' # taskId if self.taskId is not None : taskId = self.taskId if params.has_key('taskId') : taskId = params['taskId'] if taskid is not None : taskId = taskid # jobId if self.jobId is not None : jobId = self.jobId if params.has_key('jobId') : jobId = params['jobId'] if jobid is not None : jobId = jobid # Send to Monalisa apmonSend(taskId, jobId, params)
def reportDash(self,jobReport): ''' dashboard report dictionary ''' event_report = self.n_of_events(jobReport) storage_report, throughput_report = self.storageStat(jobReport) dashboard_report = {} # for k,v in event_report.iteritems() : dashboard_report[k]=v # extract information to be sent to DashBoard # per protocol and for action=read, calculate MBPS # dashboard key is io_action dashboard_report['MonitorID'] = self.MonitorID dashboard_report['MonitorJobID'] = self.MonitorJobID for protocol in storage_report.keys() : for action in storage_report[protocol].keys() : try: size = float(storage_report[protocol][action][2]) except: size = 'NULL' try: time = float(storage_report[protocol][action][3])/1000 except: time = 'NULL' dashboard_report['io_'+protocol+'_'+action] = str(size)+'_'+str(time) if self.debug : ordered = dashboard_report.keys() ordered.sort() for key in ordered: print key,'=',dashboard_report[key] # IO throughput information dashboard_report['io_read_throughput'] = throughput_report['readThr'] dashboard_report['io_write_throughput'] = throughput_report['writeThr'] dashboard_report['io_netAvg_throughput'] = throughput_report['avgNetThr'] # send to DashBoard apmonSend(self.MonitorID, self.MonitorJobID, dashboard_report) apmonFree() if self.debug == 1 : print dashboard_report return
monitorid = os.environ.get('Dashboard_monitorid') syncid = os.environ.get('Dashboard_syncid') #Replace MetaId by Dashboard_id _jobid = str(os.environ.get('Dashboard_Id')) monitorid = monitorid.replace('MetaID', _jobid) syncid = syncid.replace('MetaID', _jobid) # Start Dashboard Report hostname = str(socket.gethostname()) parameters = { 'ExeStart': executable, 'SyncCE': str(ce), 'SyncGridJobId': syncid, 'WNHostName': hostname } apmonSend(taskid, monitorid, parameters) apmonFree() ############### # Execute job ############### #Add PWD to PATH environment myenv = os.environ myenv['PATH'] += ':{0}'.format(os.environ.get('PWD')) t0 = os.times() p = subprocess.Popen(executable, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True,
if finit == default and line[26:36] == "Initiating": finit = int(datetime.strptime(line[0:20], "%d-%b-%Y %X").strftime('%s')) elif fopen == default and line[26:38] == "Successfully": fopen = int(datetime.strptime(line[0:20], "%d-%b-%Y %X").strftime('%s')) elif first == default and line[21:24] == "1st": first = int(datetime.strptime(line[-29:-9], "%d-%b-%Y %X").strftime('%s')) return (finit, fopen, first) (config, data) = sys.argv[1:] with open(data, 'rb') as f: (args, files, lumis, stageout, server, taskid, monitorid, syncid, want_summary) = pickle.load(f) apmonSend(taskid, monitorid, { 'ExeStart': 'cmsRun', 'SyncCE': 'ndcms.crc.nd.edu', 'SyncGridJobId': syncid, 'WNHostName': os.environ.get('HOSTNAME', '') }) apmonFree() configfile = config.replace(".py", "_mod.py") shutil.copy2(config, configfile) env = os.environ env['X509_USER_PROXY'] = 'proxy' edit_process_source(configfile, files, lumis, want_summary) # exit_code = subprocess.call('python "{0}" {1}'.format(configfile, ' '.join(map(repr, args))), shell=True, env=env) exit_code = subprocess.call('cmsRun -j report.xml "{0}" {1} > cmssw.log 2>&1'.format(configfile, ' '.join(map(repr, args))), shell=True, env=env)
print "---" data['task timing info'][3] = int(datetime.now().strftime('%s')) # # Start proper CMSSW job # parameters = { 'ExeStart': 'cmsRun', 'SyncCE': 'ndcms.crc.nd.edu', 'SyncGridJobId': syncid, 'WNHostName': os.environ.get('HOSTNAME', '') } apmonSend(taskid, monitorid, parameters) apmonFree() print "--- Running cmsRun" print 'cmsRun -j report.xml "{0}" {1} > cmssw.log 2>&1'.format(pset_mod, ' '.join([repr(str(arg)) for arg in args])) print "---" data['job exit code'] = subprocess.call( 'cmsRun -j report.xml "{0}" {1} > cmssw.log 2>&1'.format(pset_mod, ' '.join([repr(str(arg)) for arg in args])), shell=True, env=env) apmonSend(taskid, monitorid, {'ExeEnd': 'cmsRun'}) cputime = 0 with check_execution(data, 190): cputime = extract_info(config, data, 'report.xml')
if not executable: executable = "Unknown" # todo: Handle case when taskid, monitorid, syncid are not present taskid = os.environ.get("Dashboard_taskid") monitorid = os.environ.get("Dashboard_monitorid") syncid = os.environ.get("Dashboard_syncid") # Replace MetaId by Dashboard_id _jobid = str(os.environ.get("Dashboard_Id")) monitorid = monitorid.replace("MetaID", _jobid) syncid = syncid.replace("MetaID", _jobid) # Start Dashboard Report hostname = str(socket.gethostname()) parameters = {"ExeStart": executable, "SyncCE": str(ce), "SyncGridJobId": syncid, "WNHostName": hostname} apmonSend(taskid, monitorid, parameters) apmonFree() ############### # Execute job ############### # Add PWD to PATH environment myenv = os.environ myenv["PATH"] += ":{0}".format(os.environ.get("PWD")) t0 = os.times() p = subprocess.Popen(executable, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, env=myenv) out, err = p.communicate() t1 = os.times()
def popularityInfos(self, jobReport): report_dict = {} inputList = [] inputParentList = [] report_dict['inputBlocks'] = '' if (os.path.exists(self.inputInfos)): file=open(self.inputInfos,'r') lines = file.readlines() for line in lines: if line.find("inputBlocks")>=0: report_dict['inputBlocks']= line.split("=")[1].strip() if line.find("inputFiles")>=0: inputList = line.split("=")[1].strip().split(";") if line.find("parentFiles")>=0: inputParentList = line.split("=")[1].strip().split(";") file.close() if len(inputList) == 1 and inputList[0] == '': inputList=[] if len(inputParentList) == 1 and inputParentList[0] == '': inputParentList=[] basename = '' if len(inputList) > 1: basename = os.path.commonprefix(inputList) elif len(inputList) == 1: basename = "%s/"%os.path.dirname(inputList[0]) basenameParent = '' if len(inputParentList) > 1: basenameParent = os.path.commonprefix(inputParentList) elif len(inputParentList) == 1: basenameParent = "%s/"%os.path.dirname(inputParentList[0]) readFile = {} readFileParent = {} fileAttr = [] fileParentAttr = [] for inputFile in jobReport.inputFiles: fileAccess = 'Local' if inputFile.get("PFN").find('xrootd') >= 0 : fileAccess = 'Remote' if inputFile['LFN'].find(basename) >=0: fileAttr = (inputFile.get("FileType"), fileAccess, inputFile.get("Runs")) readFile[inputFile.get("LFN").split(basename)[1]] = fileAttr else: fileParentAttr = (inputFile.get("FileType"), fileAccess, inputFile.get("Runs")) readParentFile[inputFile.get("LFN").split(basenameParent)[1]] = fileParentAttr cleanedinputList = [] for file in inputList: cleanedinputList.append(file.split(basename)[1]) cleanedParentList = [] for file in inputParentList: cleanedParentList.append(file.split(basenameParent)[1]) inputString = '' LumisString = '' countFile = 1 for f,t in readFile.items(): cleanedinputList.remove(f) inputString += '%s::%d::%s::%s::%d;'%(f,1,t[0],t[1],countFile) LumisString += '%s::%s::%d;'%(t[2].keys()[0],self.makeRanges(t[2].values()[0]),countFile) countFile += 1 inputParentString = '' LumisParentString = '' countParentFile = 1 for fp,tp in readFileParent.items(): cleanedParentList.remove(fp) inputParentString += '%s::%d::%s::%s::%d;'%(fp,1,tp[0],tp[1],countParentFile) LumisParentString += '%s::%s::%d;'%(tp[2].keys()[0],self.makeRanges(tp[2].values()[0]),countParentFile) countParentFile += 1 if len(cleanedinputList): for file in cleanedinputList : if len(jobReport.errors): if jobReport.errors[0]["Description"].find(file) >= 0: fileAccess = 'Local' if jobReport.errors[0]["Description"].find('xrootd') >= 0: fileAccess = 'Remote' inputString += '%s::%d::%s::%s::%s;'%(file,0,'Unknown',fileAccess,'Unknown') else: inputString += '%s::%d::%s::%s::%s;'%(file,2,'Unknown','Unknown','Unknown') else: inputString += '%s::%d::%s::%s::%s;'%(file,2,'Unknown','Unknown','Unknown') if len(cleanedParentList): for file in cleanedParentList : if len(jobReport.errors): if jobReport.errors[0]["Description"].find(file) >= 0: inputString += '%s::%d::%s::%s::%s;'%(file,0,'Unknown','Local','Unknown') else: inputString += '%s::%d::%s::%s::%s;'%(file,2,'Unknown','Unknown','Unknown') else: inputParentString += '%s::%d::%s::%s::%s;'%(file,2,'Unknown','Unknown','Unknown') report_dict['inputFiles']= inputString report_dict['parentFiles']= inputParentString report_dict['lumisRange']= LumisString report_dict['lumisParentRange']= LumisParentString report_dict['Basename']= basename report_dict['BasenameParent']= basenameParent # send to DashBoard apmonSend(self.MonitorID, self.MonitorJobID, report_dict) apmonFree() # if self.debug == 1 : print "Popularity start" for k,v in report_dict.items(): print "%s : %s"%(k,v) print "Popularity stop" return