def findmatchefficiency(self): #figure out the filter efficiency if not self.hasfilter: self.matchefficiency = 1 return "filter efficiency is set to 1 +/- 0" else: if not self.implementsfilter: raise ValueError("Can't find match efficiency for {.__name__} which doesn't implement filtering!".format(type(self))) mkdir_p(self.workdir) jobsrunning = False eventsprocessed = eventsaccepted = 0 with cd(self.workdir): for i in range(100): mkdir_p(str(i)) with cd(str(i)), KeepWhileOpenFile("runningfilterjob.tmp", message=LSB_JOBID(), deleteifjobdied=True) as kwof: if not kwof: jobsrunning = True continue if not os.path.exists(self.filterresultsfile): if not LSB_JOBID(): submitLSF(self.filterefficiencyqueue) jobsrunning = True continue if not queuematches(self.filterefficiencyqueue): jobsrunning = True continue self.dofilterjob(i) processed, accepted = self.getfilterresults(i) eventsprocessed += processed eventsaccepted += accepted if jobsrunning: return "some filter efficiency jobs are still running" self.matchefficiency = uncertainties.ufloat(1.0*eventsaccepted / eventsprocessed, (1.0*eventsaccepted * (eventsprocessed-eventsaccepted) / eventsprocessed**3) ** .5) #shutil.rmtree(self.workdir) return "match efficiency is measured to be {}".format(self.matchefficiency)
def patchtarball(self): if os.path.exists(self.cvmfstarball) or os.path.exists(self.eostarball) or os.path.exists(self.foreostarball): return if not self.needspatch: assert False mkdir_p(self.workdir) with KeepWhileOpenFile(self.tmptarball+".tmp", message=LSB_JOBID()) as kwof: if not kwof: return "job to patch the tarball is already running" kwargs = self.needspatch if isinstance(kwargs, int): kwargs = self.patchkwargs kwargs["oldtarballversion"] = self.needspatch if "oldfilename" in kwargs or "newfilename" in kwargs or "sample" in kwargs: assert False, kwargs kwargs["oldfilename"] = self.cvmfstarball_anyversion(version=kwargs.pop("oldtarballversion")) kwargs["newfilename"] = self.foreostarball mkdir_p(os.path.dirname(self.foreostarball)) patches.dopatch(**kwargs) if not os.path.exists(self.foreostarball): raise RuntimeError("Patching failed, gridpack doesn't exist") if self.timeperevent is not None: del self.timeperevent self.needspatch = False return "tarball is patched and the new version is in this directory to be copied to eos"
def checkandfixtarball(self): mkdir_p(self.workdir) with KeepWhileOpenFile(os.path.join(self.workdir, self.prepid + '.tmp'), message=LSB_JOBID(), deleteifjobdied=True) as kwof: if not kwof: return " check in progress" if not LSB_JOBID(): self.submitLSF() return "Check if the tarball needs fixing" with cdtemp(): subprocess.call(['cp', self.cvmfstarball, '.']) subprocess.check_call(['tar', 'xzvf', self.cvmfstarball]) subprocess.call(['cp', 'readInput.DAT', 'readInput.DAT_bak']) os.system('chmod 755 runcmsgrid.sh') try: output = subprocess.check_output( ['bash', 'runcmsgrid.sh', '1', '31313', '12'], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: output = e.output for line in output.split('\n'): if not 'Reading in vegas grid from' in line: continue else: line = line.split()[-2] internalgridname = line.split('CMS_')[1] internalgridname = str(internalgridname) print "internal tarball name: " + internalgridname if self.datasetname + '_grid' == internalgridname: with open(os.path.join(self.workdir, 'INTACT'), 'w') as fout: fout.write(LSB_JOBID()) return str(self.identifiers) + "'s gridpack is intact" else: os.system('cp ' + self.datasetname + '_grid ' + internalgridname) os.system('mv readInput.DAT_bak readInput.DAT') os.system('rm -r *tgz CMSSW*') curdirpath = subprocess.check_output(['pwd']) os.system('tar cvaf ' + self.tmptarball + ' ./*') if os.path.exists(self.tmptarball): with open(os.path.join(self.workdir, 'FIXED'), 'w') as fout: fout.write(LSB_JOBID())
def getprepid(self): if LSB_JOBID(): return query = "dataset_name={}&extension={}&prepid={}-{}-*".format(self.datasetname, self.extensionnumber, self.pwg, self.campaign) output = restful().get('requests', query=query) prepids = {_["prepid"] for _ in output} prepids -= frozenset(self.badprepid) if not prepids: return None if len(prepids) != 1: raise RuntimeError("Multiple prepids for {} ({})".format(self, self.datasetname, query)) assert len(prepids) == 1, prepids self.prepid = prepids.pop()
def getsizeandtime(self): mkdir_p(self.workdir) with KeepWhileOpenFile(os.path.join(self.workdir, self.prepid+".tmp"), message=LSB_JOBID(), deleteifjobdied=True) as kwof: if not kwof: return "job to get the size and time is already running" if not LSB_JOBID(): return "need to get time and size per event, submitting to LSF" if submitLSF(self.timepereventqueue) else "need to get time and size per event, job is pending on LSF" if not queuematches(self.timepereventqueue): return "need to get time and size per event, but on the wrong queue" with cdtemp(): wget(os.path.join("https://cms-pdmv.cern.ch/mcm/public/restapi/requests/get_test/", self.prepid, str(self.neventsfortest) if self.neventsfortest else "").rstrip("/"), output=self.prepid) with open(self.prepid) as f: testjob = f.read() with open(self.prepid, "w") as newf: newf.write(eval(testjob)) os.chmod(self.prepid, os.stat(self.prepid).st_mode | stat.S_IEXEC) subprocess.check_call(["./"+self.prepid], stderr=subprocess.STDOUT) with open(self.prepid+"_rt.xml") as f: nevents = totalsize = None for line in f: line = line.strip() match = re.match('<TotalEvents>([0-9]*)</TotalEvents>', line) if match: nevents = int(match.group(1)) match = re.match('<Metric Name="Timing-tstoragefile-write-totalMegabytes" Value="([0-9.]*)"/>', line) if match: totalsize = float(match.group(1)) if self.year >= 2017: match = re.match('<Metric Name="EventThroughput" Value="([0-9.eE+-]*)"/>', line) if match: self.timeperevent = 1/float(match.group(1)) else: match = re.match('<Metric Name="AvgEventTime" Value="([0-9.eE+-]*)"/>', line) if match: self.timeperevent = float(match.group(1)) if nevents is not None is not totalsize: self.sizeperevent = totalsize * 1024 / nevents shutil.rmtree(self.workdir) if not (self.sizeperevent and self.timeperevent): return "failed to get the size and time" if LSB_JOBID(): return "size and time per event are found to be {} and {}, run locally to send to McM".format(self.sizeperevent, self.timeperevent) self.updaterequest() return "size and time per event are found to be {} and {}, sent it to McM".format(self.sizeperevent, self.timeperevent)
def badprepid(self): with cd(here): result = self.value.get("badprepid", []) #backwards compatibility if isinstance(result, basestring): result = [result] originalresult = result[:] for _ in result[:]: if not LSB_JOBID() and not restful().get("requests", _): result.remove(_) if result != originalresult: self.badprepid = result return result
def getprepid(self): super(ClonedRequest, self).getprepid() if self.prepid: return if LSB_JOBID(): return query = "dataset_name={}&extension={}&prepid={}-{}-*".format( self.originalfullinfo["dataset_name"], self.extensionnumber, self.pwg, self.campaign) output = restful().get('requests', query=query) prepids = {_["prepid"] for _ in output} if not prepids: return None if len(prepids) != 1: raise RuntimeError("Multiple prepids for {} ({})".format( self, self.datasetname, query)) assert len(prepids) == 1, prepids self.prepid = prepids.pop()
def createrequest(self, clonequeue): if LSB_JOBID(): return "run locally to submit to McM" mcm = restful() req = { "pwg": self.pwg, "member_of_campaign": self.campaign, "mcdb_id": 0, "dataset_name": self.datasetname, "extension": self.extensionnumber, } answer = mcm.put("requests", req) if not (answer and answer.get("results")): raise RuntimeError("Failed to create the request on McM\n{}\n\n{}\n\n{}".format(self, req, answer)) self.getprepid() if self.prepid != answer["prepid"]: raise RuntimeError("Wrong prepid?? {} {}".format(self.prepid, answer["prepid"])) self.updaterequest() return "created request "+self.prepid+" on McM"
def createrequest(self, clonequeue): self.needsupdate = True return clonequeue.add(self, self.pwg, self.newcampaign) if LSB_JOBID(): return "run locally to submit to McM" mcm = restful() clone_req = mcm.get('requests', self.originalprepid) clone_req['member_of_campaign'] = self.campaign answer = mcm.clone(self.originalprepid, clone_req) if not (answer and answer.get("results")): raise RuntimeError( "Failed to create the request on McM\n{}\n{}".format( self, answer)) self.getprepid() if self.prepid != answer["prepid"]: raise RuntimeError("Wrong prepid?? {} {}".format( self.prepid, answer["prepid"])) self.updaterequest() return "cloned request " + self.originalprepid + " as " + self.prepid + " on McM"
def makegridpack(self, approvalqueue, badrequestqueue, clonequeue, setneedsupdate=False): if self.finished: return "finished!" if not self.cvmfstarballexists: if not os.path.exists(self.eostarball): if not os.path.exists(self.foreostarball): if self.needspatch: return self.patchtarball() return self.createtarball() return "gridpack exists in this folder, to be copied to eos" return "gridpack exists on eos, not yet copied to cvmfs" if os.path.exists(self.foreostarball): if filecmp.cmp(self.cvmfstarball, self.foreostarball, shallow=False): os.remove(self.foreostarball) self.needsupdate = True else: return "gridpack exists on cvmfs, but it's wrong!" if self.badprepid: badrequestqueue.add(self) if self.prepid is None: self.getprepid() if self.prepid is None: #need to make the request return self.createrequest(clonequeue) else: return "found prepid: {}".format(self.prepid) if (self.matchefficiency is None or self.matchefficiencyerror is None) and not self.needsupdate: return self.findmatchefficiency() if not (self.sizeperevent and self.timeperevent) and not self.needsupdate: return self.getsizeandtime() if LSB_JOBID(): return "please run locally to check and/or advance the status".format(self.prepid) if self.badprepid: badrequestqueue.add(self) if (self.approval, self.status) == ("none", "new"): if self.needsoptionreset: if not self.optionreset(): return "need to do option reset but failed" return "needed option reset, sent it to McM" if self.needsupdateiffailed: self.updaterequest() if self.badprepid: badrequestqueue.add(self) return "needs update on McM, sending it there" if not self.dovalidation: return "not starting the validation" if self.nthreads > 1 and self.fullinfo["history"][-1]["action"] == "failed": self.nthreads /= 2 self.updaterequest() return "validation failed, decreasing the number of threads" if setneedsupdate and not self.needsupdate: result = self.setneedsupdate() if result: return result check = self.request_fragment_check() if check: return check approvalqueue.validate(self) return "starting the validation" if (self.approval, self.status) == ("validation", "new"): if setneedsupdate and not self.needsupdate: result = self.setneedsupdate() if result: return result return "validation is running" if (self.approval, self.status) == ("validation", "validation"): self.gettimepereventfromMcM() if setneedsupdate and not self.needsupdate: result = self.setneedsupdate() if result: return result if self.needsupdate: approvalqueue.reset(self) return "needs update on McM, resetting the request" self.needsupdateiffailed = False approvalqueue.define(self) return "defining the request" if (self.approval, self.status) == ("define", "defined"): if self.needsupdate: approvalqueue.reset(self) return "needs update on McM, resetting the request" if setneedsupdate and not self.needsupdate: result = self.setneedsupdate() if result: return result self.needsupdateiffailed = False return "request is defined" if (self.approval, self.status) in (("submit", "approved"), ("approve", "approved")): if self.needsupdate: return "{} is already approved, but needs update!".format(self) self.needsupdateiffailed = False return "approved" if (self.approval, self.status) == ("submit", "submitted"): if self.needsupdate: return "{} is already submitted, but needs update!".format(self) self.needsupdateiffailed = False return "submitted" if (self.approval, self.status) == ("submit", "done"): if self.needsupdate: return "{} is already finished, but needs update!".format(self) self.needsupdateiffailed = False self.gettimepereventfromMcM() self.finished = True return "finished!" return "Unknown approval "+self.approval+" and status "+self.status
def createtarball(self): if os.path.exists(self.cvmfstarball) or os.path.exists(self.eostarball) or os.path.exists(self.foreostarball): return mkdir_p(self.workdir) with cd(self.workdir), KeepWhileOpenFile(self.tmptarball+".tmp", message=LSB_JOBID()) as kwof: if not kwof: with open(self.tmptarball+".tmp") as f: try: jobid = int(f.read().strip()) except ValueError: return "try running again, probably you just got really bad timing" if jobended(str(jobid)): if self.makinggridpacksubmitsjob: os.remove(self.tmptarball+".tmp") return "job died at a very odd time, cleaned it up. Try running again." for _ in os.listdir("."): #--> delete everything in the folder, except the tarball if that exists if os.path.basename(_) != os.path.basename(self.tmptarball) and os.path.basename(_) != os.path.basename(self.tmptarball)+".tmp": try: os.remove(_) except OSError: shutil.rmtree(_) os.remove(os.path.basename(self.tmptarball)+".tmp") #remove that last return "gridpack job died, cleaned it up. run makegridpacks.py again." else: return "job to make the tarball is already running" if self.gridpackjobsrunning: return "job to make the tarball is already running" if not os.path.exists(self.tmptarball): if not self.inthemiddleofmultistepgridpackcreation: for _ in os.listdir("."): if not _.endswith(".tmp"): try: os.remove(_) except OSError: shutil.rmtree(_) if not self.makinggridpacksubmitsjob and self.creategridpackqueue is not None: if not LSB_JOBID(): return "need to create the gridpack, submitting to LSF" if submitLSF(self.creategridpackqueue) else "need to create the gridpack, job is pending on LSF" if not queuematches(self.creategridpackqueue): return "need to create the gridpack, but on the wrong queue" for filename in self.makegridpackscriptstolink: os.symlink(filename, os.path.basename(filename)) makinggridpacksubmitsjob = self.makinggridpacksubmitsjob #https://stackoverflow.com/a/17698359/5228524 makegridpackstdout = "" pipe = subprocess.Popen(self.makegridpackcommand, stdout=subprocess.PIPE, bufsize=1) with pipe.stdout: for line in iter(pipe.stdout.readline, b''): print line, makegridpackstdout += line self.processmakegridpackstdout(makegridpackstdout) if makinggridpacksubmitsjob: return "submitted the gridpack creation job" if self.inthemiddleofmultistepgridpackcreation: return "ran one step of gridpack creation, run again to continue" mkdir_p(os.path.dirname(self.foreostarball)) if self.patchkwargs: kwargs = self.patchkwargs for _ in "oldfilename", "newfilename", "sample": assert _ not in kwargs, _ with cdtemp(): kwargs["oldfilename"] = self.tmptarball kwargs["newfilename"] = os.path.abspath(os.path.basename(self.tmptarball)) #kwargs["sample"] = self #??? patches.dopatch(**kwargs) shutil.move(os.path.basename(self.tmptarball), self.tmptarball) if self.timeperevent is not None: del self.timeperevent shutil.move(self.tmptarball, self.foreostarball) shutil.rmtree(os.path.dirname(self.tmptarball)) return "tarball is created and moved to this folder, to be copied to eos"