def createtarball(self): mkdir_p(self.workdir) with KeepWhileOpenFile(self.tmptarball + ".tmp") as kwof: if not kwof: return "another process is already copying the tarball" if not os.path.exists(self.originaltarball): return "original tarball does not exist" if datetime.datetime.fromtimestamp( os.path.getmtime( self.originaltarball)) <= self.modifiedafter: return "original tarball is an older version than we want" mkdir_p(os.path.dirname(self.foreostarball)) if self.patchkwargs: kwargs = self.patchkwargs for _ in "oldfilename", "newfilename", "sample": assert _ not in kwargs, _ with cdtemp(): kwargs["oldfilename"] = self.originaltarball kwargs["newfilename"] = os.path.abspath( os.path.basename(self.originaltarball)) #kwargs["sample"] = self #??? patches.dopatch(**kwargs) shutil.move(os.path.basename(self.originaltarball), self.foreostarball) else: shutil.copy(self.originaltarball, self.foreostarball) return "gridpack is copied from " + self.originaltarball + " to this folder, to be copied to eos"
def findmatchefficiency(self): #figure out the filter efficiency if not self.hasfilter: self.matchefficiency = 1 return "filter efficiency is set to 1 +/- 0" else: if not self.implementsfilter: raise ValueError("Can't find match efficiency for {.__name__} which doesn't implement filtering!".format(type(self))) mkdir_p(self.workdir) jobsrunning = False eventsprocessed = eventsaccepted = 0 with cd(self.workdir): for i in range(100): mkdir_p(str(i)) with cd(str(i)), KeepWhileOpenFile("runningfilterjob.tmp", message=LSB_JOBID(), deleteifjobdied=True) as kwof: if not kwof: jobsrunning = True continue if not os.path.exists(self.filterresultsfile): if not LSB_JOBID(): submitLSF(self.filterefficiencyqueue) jobsrunning = True continue if not queuematches(self.filterefficiencyqueue): jobsrunning = True continue self.dofilterjob(i) processed, accepted = self.getfilterresults(i) eventsprocessed += processed eventsaccepted += accepted if jobsrunning: return "some filter efficiency jobs are still running" self.matchefficiency = uncertainties.ufloat(1.0*eventsaccepted / eventsprocessed, (1.0*eventsaccepted * (eventsprocessed-eventsaccepted) / eventsprocessed**3) ** .5) #shutil.rmtree(self.workdir) return "match efficiency is measured to be {}".format(self.matchefficiency)
def patchtarball(self): if os.path.exists(self.cvmfstarball) or os.path.exists(self.eostarball) or os.path.exists(self.foreostarball): return if not self.needspatch: assert False mkdir_p(self.workdir) with KeepWhileOpenFile(self.tmptarball+".tmp", message=LSB_JOBID()) as kwof: if not kwof: return "job to patch the tarball is already running" kwargs = self.needspatch if isinstance(kwargs, int): kwargs = self.patchkwargs kwargs["oldtarballversion"] = self.needspatch if "oldfilename" in kwargs or "newfilename" in kwargs or "sample" in kwargs: assert False, kwargs kwargs["oldfilename"] = self.cvmfstarball_anyversion(version=kwargs.pop("oldtarballversion")) kwargs["newfilename"] = self.foreostarball mkdir_p(os.path.dirname(self.foreostarball)) patches.dopatch(**kwargs) if not os.path.exists(self.foreostarball): raise RuntimeError("Patching failed, gridpack doesn't exist") if self.timeperevent is not None: del self.timeperevent self.needspatch = False return "tarball is patched and the new version is in this directory to be copied to eos"
def download(date,simu_starttime,folder): output_directory=folder+"/"+date+"_"+simu_starttime if not os.path.exists(output_directory): os.makedirs(output_directory) origin_folder =output_directory+"/grib2" util.mkdir_p(origin_folder) year=date[0:4] number=0 while (number <=384): number_str=("%03d" %(number)) url="http://rda.ucar.edu/data/ds084.1/"+year+"/"+date+"/gfs.0p25."+date+simu_starttime+".f"+number_str+".grib2" global filename filename="starting to download file "+date+" "+number_str # print("starting to download "+url+".....") wget_command = 'wget -N --no-check-certificate --load-cookies auth.rda_ucar_edu '+url+" -P "+origin_folder #os.system(wget_command) cmd = wget_command.split() subprocess.call(cmd) # execute(wget_command) if (number<240): number=number+3 else: number=number+12
def store_remote(self, dest_file, content, manipulate): usable_path = self.in_remote_root(dest_file) if not exists(usable_path)\ or self.all_object.get_remote(dest_file) != content: mkdir_p(dirname(usable_path)) write(usable_path, content) # XXX: I suspect that ^this and immediate power off of the target # system led to truncation of some affected files to length 0! manipulate.execute(usable_path) self.append_to_file_list(dest_file)
def configure_logger(logging_config_file, log_file): try: if not os.path.exists(os.path.dirname(os.path.abspath(log_file))): mkdir_p(os.path.dirname(os.path.abspath(log_file))) logging.config.fileConfig(logging_config_file, disable_existing_loggers=False, defaults={'logfilename': log_file}) logging.getLogger("urllib3").setLevel(logging.ERROR) logging.getLogger("requests_kerberos").setLevel(logging.ERROR) except Exception as e: print("Error in configuring logger %s" % e) exit(1)
def checkandfixtarball(self): mkdir_p(self.workdir) with KeepWhileOpenFile(os.path.join(self.workdir, self.prepid + '.tmp'), message=LSB_JOBID(), deleteifjobdied=True) as kwof: if not kwof: return " check in progress" if not LSB_JOBID(): self.submitLSF() return "Check if the tarball needs fixing" with cdtemp(): subprocess.call(['cp', self.cvmfstarball, '.']) subprocess.check_call(['tar', 'xzvf', self.cvmfstarball]) subprocess.call(['cp', 'readInput.DAT', 'readInput.DAT_bak']) os.system('chmod 755 runcmsgrid.sh') try: output = subprocess.check_output( ['bash', 'runcmsgrid.sh', '1', '31313', '12'], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: output = e.output for line in output.split('\n'): if not 'Reading in vegas grid from' in line: continue else: line = line.split()[-2] internalgridname = line.split('CMS_')[1] internalgridname = str(internalgridname) print "internal tarball name: " + internalgridname if self.datasetname + '_grid' == internalgridname: with open(os.path.join(self.workdir, 'INTACT'), 'w') as fout: fout.write(LSB_JOBID()) return str(self.identifiers) + "'s gridpack is intact" else: os.system('cp ' + self.datasetname + '_grid ' + internalgridname) os.system('mv readInput.DAT_bak readInput.DAT') os.system('rm -r *tgz CMSSW*') curdirpath = subprocess.check_output(['pwd']) os.system('tar cvaf ' + self.tmptarball + ' ./*') if os.path.exists(self.tmptarball): with open(os.path.join(self.workdir, 'FIXED'), 'w') as fout: fout.write(LSB_JOBID())
def launch_scrape(directory, method, params, timestamp, umap): cur_date = None cur_file = None user_string = "|".join(umap.keys()) # CLOSURE def replace_uid(match): uid = match.group("id") if uid in umap: return "@%s"%umap[uid] return "<@%s>"%uid for message in api.message_generator(method, params, timestamp): # the format will be line separated json objects for each message because # large json objects are the worst # update current file datestring_short, datestring_long = get_date_string(message["ts"]) if cur_file is None or cur_date != datestring_long: if not cur_file is None: cur_file.close() cur_date = datestring_long utilities.mkdir_p("%s/%s"%(directory, datestring_short)) cur_file = open("%s/%s/%s.json"%(directory, datestring_short, datestring_long), "a") # clean up message object user ids if config.replace_user_ids: # replace who's talking if "user" in message and message["user"] in umap: message["user"] = umap[message["user"]] # replace mentions (this is a bit slow!) if "text" in message: message["text"] = re.sub("<@(?P<id>" + user_string + ")>", replace_uid, message["text"]) # save! potentially devastating assumption: chronological cur_file.write("%s\n"%json.dumps(message)) if cur_file: cur_file.close()
def getsizeandtime(self): mkdir_p(self.workdir) with KeepWhileOpenFile(os.path.join(self.workdir, self.prepid+".tmp"), message=LSB_JOBID(), deleteifjobdied=True) as kwof: if not kwof: return "job to get the size and time is already running" if not LSB_JOBID(): return "need to get time and size per event, submitting to LSF" if submitLSF(self.timepereventqueue) else "need to get time and size per event, job is pending on LSF" if not queuematches(self.timepereventqueue): return "need to get time and size per event, but on the wrong queue" with cdtemp(): wget(os.path.join("https://cms-pdmv.cern.ch/mcm/public/restapi/requests/get_test/", self.prepid, str(self.neventsfortest) if self.neventsfortest else "").rstrip("/"), output=self.prepid) with open(self.prepid) as f: testjob = f.read() with open(self.prepid, "w") as newf: newf.write(eval(testjob)) os.chmod(self.prepid, os.stat(self.prepid).st_mode | stat.S_IEXEC) subprocess.check_call(["./"+self.prepid], stderr=subprocess.STDOUT) with open(self.prepid+"_rt.xml") as f: nevents = totalsize = None for line in f: line = line.strip() match = re.match('<TotalEvents>([0-9]*)</TotalEvents>', line) if match: nevents = int(match.group(1)) match = re.match('<Metric Name="Timing-tstoragefile-write-totalMegabytes" Value="([0-9.]*)"/>', line) if match: totalsize = float(match.group(1)) if self.year >= 2017: match = re.match('<Metric Name="EventThroughput" Value="([0-9.eE+-]*)"/>', line) if match: self.timeperevent = 1/float(match.group(1)) else: match = re.match('<Metric Name="AvgEventTime" Value="([0-9.eE+-]*)"/>', line) if match: self.timeperevent = float(match.group(1)) if nevents is not None is not totalsize: self.sizeperevent = totalsize * 1024 / nevents shutil.rmtree(self.workdir) if not (self.sizeperevent and self.timeperevent): return "failed to get the size and time" if LSB_JOBID(): return "size and time per event are found to be {} and {}, run locally to send to McM".format(self.sizeperevent, self.timeperevent) self.updaterequest() return "size and time per event are found to be {} and {}, sent it to McM".format(self.sizeperevent, self.timeperevent)
def save_bookmarks(self): """Save bookmarks to file structure""" data = self.load_browser_bookmarks() self._process_bookmarks(data) for folder in self.folders.keys(): # strip forward slashes from folder names folder = folder.replace('/', '') folder = folder.replace('\\', '') # create Chrome directories path = self.output_location + '/' + folder + '/' mkdir_p(path) for item in self.folders[folder]: # create new directory for web page web_page_path = path + '/' + item['name'] mkdir_p(web_page_path) # create new directory for web page resources directory web_page_resources = web_page_path + '/resources' mkdir_p(web_page_resources) # strip slashes from web page names name = item['name'] name = name.replace('/', '') name = name.replace('\\', '') # Skip URLs with PDF extension if '.pdf' in name[-4:]: continue # save files try: self.logger.info('Getting URL {}'.format(item['url'])) web_object = WebScraper(item['url']) # get web page css css = web_object.get_css() # get web page content content = web_object.get_web_page() # save main web page with open(web_page_path + '/bookmark.html', 'wb') as f: f.write(content) # save css files if css is not None: with open(web_page_resources + '/styles.css', 'wb') as f: f.write(css) self.logger.info('Successfully saved URL {}'.format(item['url'])) except Exception as e: self.logger.error('Web page not saved - {} - {}'.format(item['url'], e)) pass
def setup(filename=CONFIGNAME): """Prepare a default GromacsWrapper global environment. 1) Create the global config file. 2) Create the directories in which the user can store template and config files. This function can be run repeatedly without harm. """ # setup() must be separate and NOT run automatically when config # is loaded so that easy_install installations work # (otherwise we get a sandbox violation) # populate cfg with defaults (or existing data) get_configuration() if not os.path.exists(filename): with open(filename, 'w') as configfile: cfg.write(configfile) # write the default file so that user can edit msg = "NOTE: GromacsWrapper created the configuration file \n\t%r\n" \ " for you. Edit the file to customize the package." % filename print msg # directories for d in config_directories: utilities.mkdir_p(d)
def setup(filename=CONFIGNAME): """Prepare a default BornProfiler global environment. 1) Create the global config file. 2) Create the directories in which the user can store template and config files. This function can be run repeatedly without harm. """ # setup() must be separate and NOT run automatically when config # is loaded so that easy_install installations work # (otherwise we get a sandbox violation) # Note that cfg is populated with defaults when this module is imported. if not os.path.exists(filename): with open(filename, 'w') as configfile: cfg.write( configfile) # write the default file so that user can edit msg = "NOTE: BornProfiler created the configuration file \n\t{0}\n".format(filename) + \ " for you. Edit the file to customize the package." print msg # directories utilities.mkdir_p(configdir) utilities.mkdir_p(qscriptdir) utilities.mkdir_p(templatesdir)
def save_bookmarks(self): """Save bookmarks to file structure""" self.logger.info("Starting bookmark collection") data = self.load_browser_bookmarks() self._process_bookmarks(data) for folder in self.folders.keys(): # strip forward slashes from folder names folder = folder.replace("/", "") folder = folder.replace("\\", "") # create Chrome directories path = self.output_location + "/" + folder + "/" mkdir_p(path) for item in self.folders[folder]: # create new directory for web page web_page_path = path + "/" + item["name"] mkdir_p(web_page_path) # create new directory for web page resources directory web_page_resources = web_page_path + "/resources" mkdir_p(web_page_resources) # strip slashes from web page names name = item["name"] name = name.replace("/", "") name = name.replace("\\", "") # Skip URLs with PDF extension if ".pdf" in name[-4:]: continue # save files try: self.logger.info("Getting URL {}".format(item["url"])) web_object = WebScraper(item["url"]) # get web page css css = web_object.get_css() # get web page content content = web_object.get_web_page() # save main web page with open(web_page_path + "/bookmark.html", "wb") as f: f.write(content) # save css files if css is not None: with open(web_page_resources + "/styles.css", "wb") as f: f.write(css) self.logger.info("Successfully saved URL {}".format(item["url"])) except Exception as e: self.logger.error("Web page not saved - {} - {}".format(item["url"], e)) pass self.logger.info("Completed bookmark collection")
def save_bookmarks(self): """Save bookmarks to file structure""" self.logger.info('Starting bookmark collection') data = self.load_browser_bookmarks() self._process_bookmarks(data) for folder in self.folders.keys(): # strip forward slashes from folder names folder = folder.replace('/', '') folder = folder.replace('\\', '') # create Chrome directories path = self.output_location + '/' + folder + '/' mkdir_p(path) for item in self.folders[folder]: # create new directory for web page web_page_path = path + '/' + item['name'] mkdir_p(web_page_path) # create new directory for web page resources directory web_page_resources = web_page_path + '/resources' mkdir_p(web_page_resources) # strip slashes from web page names name = item['name'] name = name.replace('/', '') name = name.replace('\\', '') # Skip URLs with PDF extension if '.pdf' in name[-4:]: continue # save files try: self.logger.info('Getting URL {}'.format(item['url'])) web_object = WebScraper(item['url']) # get web page css css = web_object.get_css() # get web page content content = web_object.get_web_page() # save main web page with open(web_page_path + '/bookmark.html', 'wb') as f: f.write(content) # save css files if css is not None: with open(web_page_resources + '/styles.css', 'wb') as f: f.write(css) self.logger.info('Successfully saved URL {}'.format( item['url'])) except Exception as e: self.logger.error('Web page not saved - {} - {}'.format( item['url'], e)) pass self.logger.info('Completed bookmark collection')
def main(args): # reproducibility if args.seed is not None: torch.manual_seed( args.seed) # don't think this works with SparseMNIST right now np.random.seed(args.seed) if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) if args.checkpoint_filename is None: checkpoint_file = args.checkpoint + str(datetime.now())[:-10] else: checkpoint_file = args.checkpoint + args.checkpoint_filename # cuda args.use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if args.use_cuda else "cpu") # eval? args.evaluate = args.val_batches > 0 # prep sparse mnist if not args.evaluate: train_loader, _, test_loader = prepare_data(args) else: train_loader, val_loader, test_loader = prepare_data(args) # machinery model = Judge().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # setup validation metrics we want to track for tracking best model over training run best_val_loss = float('inf') best_val_acc = 0 print('\n================== TRAINING ==================') model.train() # set model to training mode # set up training metrics we want to track correct = 0 train_num = args.batches * args.batch_size # timer time0 = time.time() for ix, ( sparse, img, label) in enumerate(train_loader): # iterate over training batches sparse, label = sparse.to(device), label.to( device) # get data, send to gpu if needed optimizer.zero_grad( ) # clear parameter gradients from previous training update logits = model(sparse) # forward pass loss = F.cross_entropy(logits, label) # calculate network loss loss.backward() # backward pass optimizer.step( ) # take an optimization step to update model's parameters pred = logits.max(1, keepdim=True)[1] # get the index of the max logit correct += pred.eq( label.view_as(pred)).sum().item() # add to running total of hits if ix % args.log_interval == 0: # maybe log current metrics to terminal print('Train: [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t\ Accuracy: {:.2f}%\tTime: {:0f} min, {:.2f} s'.format( (ix + 1) * len(sparse), train_num, 100. * ix / len(train_loader), loss.item(), 100. * correct / ((ix + 1) * len(sparse)), (time.time() - time0) // 60, (time.time() - time0) % 60)) print( 'Train Accuracy: {}/{} ({:.2f}%)\tTrain Time: {:0f} minutes, {:2f} seconds\n' .format(correct, train_num, 100. * correct / train_num, (time.time() - time0) // 60, (time.time() - time0) % 60)) if args.evaluate: print('\n================== VALIDATION ==================') model.eval() # set up validation metrics we want to track val_loss = 0. val_correct = 0 val_num = args.eval_batch_size * args.val_batches # disable autograd here (replaces volatile flag from v0.3.1 and earlier) with torch.no_grad(): for sparse, img, label in val_loader: sparse, label = sparse.to(device), label.to(device) logits = model(sparse) val_loss += F.cross_entropy(logits, label, size_average=False).item() pred = logits.max(1, keepdim=True)[1] val_correct += pred.eq(label.view_as(pred)).sum().item() # update current evaluation metrics val_loss /= val_num val_acc = 100. * val_correct / val_num print( '\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n' .format(val_loss, val_correct, val_num, val_acc)) is_best = val_acc > best_val_acc if is_best: best_val_acc = val_acc best_val_loss = val_loss # note this is val_loss of best model w.r.t. accuracy, # not the best val_loss throughout training # create checkpoint dictionary and save it; # if is_best, copy the file over to the file containing best model for this run state = { 'state_dict': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'val_loss': val_loss, 'val_acc': val_acc, } save_checkpoint(state, is_best, checkpoint_file) print('\n================== TESTING ==================') check = torch.load(checkpoint_file + '-best.pth.tar') model.load_state_dict(check['state_dict']) model.eval() test_loss = 0. test_correct = 0 test_num = args.eval_batch_size * args.test_batches # disable autograd here (replaces volatile flag from v0.3.1 and earlier) with torch.no_grad(): for sparse, img, label in test_loader: sparse, label = sparse.to(device), label.to(device) logits = model(sparse) test_loss += F.cross_entropy(logits, label, size_average=False).item() pred = logits.max( 1, keepdim=True)[1] # get the index of the max logit test_correct += pred.eq(label.view_as(pred)).sum().item() test_loss /= test_num print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format( test_loss, test_correct, test_num, 100. * test_correct / test_num)) print('Final model stored at "{}".'.format(checkpoint_file + '-best.pth.tar'))
def createtarball(self): if os.path.exists(self.cvmfstarball) or os.path.exists(self.eostarball) or os.path.exists(self.foreostarball): return mkdir_p(self.workdir) with cd(self.workdir), KeepWhileOpenFile(self.tmptarball+".tmp", message=LSB_JOBID()) as kwof: if not kwof: with open(self.tmptarball+".tmp") as f: try: jobid = int(f.read().strip()) except ValueError: return "try running again, probably you just got really bad timing" if jobended(str(jobid)): if self.makinggridpacksubmitsjob: os.remove(self.tmptarball+".tmp") return "job died at a very odd time, cleaned it up. Try running again." for _ in os.listdir("."): #--> delete everything in the folder, except the tarball if that exists if os.path.basename(_) != os.path.basename(self.tmptarball) and os.path.basename(_) != os.path.basename(self.tmptarball)+".tmp": try: os.remove(_) except OSError: shutil.rmtree(_) os.remove(os.path.basename(self.tmptarball)+".tmp") #remove that last return "gridpack job died, cleaned it up. run makegridpacks.py again." else: return "job to make the tarball is already running" if self.gridpackjobsrunning: return "job to make the tarball is already running" if not os.path.exists(self.tmptarball): if not self.inthemiddleofmultistepgridpackcreation: for _ in os.listdir("."): if not _.endswith(".tmp"): try: os.remove(_) except OSError: shutil.rmtree(_) if not self.makinggridpacksubmitsjob and self.creategridpackqueue is not None: if not LSB_JOBID(): return "need to create the gridpack, submitting to LSF" if submitLSF(self.creategridpackqueue) else "need to create the gridpack, job is pending on LSF" if not queuematches(self.creategridpackqueue): return "need to create the gridpack, but on the wrong queue" for filename in self.makegridpackscriptstolink: os.symlink(filename, os.path.basename(filename)) makinggridpacksubmitsjob = self.makinggridpacksubmitsjob #https://stackoverflow.com/a/17698359/5228524 makegridpackstdout = "" pipe = subprocess.Popen(self.makegridpackcommand, stdout=subprocess.PIPE, bufsize=1) with pipe.stdout: for line in iter(pipe.stdout.readline, b''): print line, makegridpackstdout += line self.processmakegridpackstdout(makegridpackstdout) if makinggridpacksubmitsjob: return "submitted the gridpack creation job" if self.inthemiddleofmultistepgridpackcreation: return "ran one step of gridpack creation, run again to continue" mkdir_p(os.path.dirname(self.foreostarball)) if self.patchkwargs: kwargs = self.patchkwargs for _ in "oldfilename", "newfilename", "sample": assert _ not in kwargs, _ with cdtemp(): kwargs["oldfilename"] = self.tmptarball kwargs["newfilename"] = os.path.abspath(os.path.basename(self.tmptarball)) #kwargs["sample"] = self #??? patches.dopatch(**kwargs) shutil.move(os.path.basename(self.tmptarball), self.tmptarball) if self.timeperevent is not None: del self.timeperevent shutil.move(self.tmptarball, self.foreostarball) shutil.rmtree(os.path.dirname(self.tmptarball)) return "tarball is created and moved to this folder, to be copied to eos"
def do_file_list(self): mkdir_p(self.file_list_dir) write(self.file_list_file_name(), '\n'.join(sorted(self.file_list)) + '\n')
cur_date = None cur_file = None user_string = "|".join(umap.keys()) def replace_uid(match): uid = match.group("id") if uid in umap: return "@%s"%umap[uid] return "<@%s>"%uid for message in api.message_generator(method, params, timestamp): datestring_short, datestring_long = get_date_string(message["ts"]) if cur_file is None or cur_date != datestring_long: if not cur_file is None: cur_file.close() cur_date = datestring_long utilities.mkdir_p("%s/%s"%(directory, datestring_short)) cur_file = open("%s/%s/%s.json"%(directory, datestring_short, datestring_long), "a") if config.replace_user_ids: if "user" in message and message["user"] in umap: message["user"] = umap[message["user"]] if "text" in message: message["text"] = re.sub("<@(?P<id>" + user_string + ")>", replace_uid, message["text"]) cur_file.write("%s\n"%json.dumps(message)) if cur_file: cur_file.close() def scrape_channels(umap): print "Getting channels..." for channel in api.channel_generator(): if not config.scrape_archived_channels and channel["is_archived"]:
def cardsurl(self): commit = self.genproductionscommit productioncardurl = os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/", commit, self.productioncard.split("genproductions/")[-1]) mdatascript = os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/", commit, "bin/MCFM/ACmdataConfig.py") with cdtemp(): with contextlib.closing(urllib.urlopen(productioncardurl)) as f: productiongitcard = f.read() with cdtemp(): subprocess.check_output(["tar", "xvaf", self.cvmfstarball]) if glob.glob("core.*"): raise ValueError( "There is a core dump in the tarball\n{}".format(self)) # for root, dirs, files in os.walk("."): # for ifile in files: # try: # os.stat(ifile) # except Exception as e: # if e.args == 'No such file or directory': continue # print ifile # print e.message, e.args # raise ValueError("There is a broken symlink in the tarball\n{}".format(self)) try: with open("readInput.DAT") as f: productioncard = f.read() except IOError: raise ValueError( "no readInput.DAT in the tarball\n{}".format(self)) try: with open("src/User/mdata.f") as f: mdatacard = f.read() except IOError: raise ValueError( "no src/User/mdata.f in the tarball\n{}".format(self)) if differentproductioncards( productioncard, productiongitcard) and not 'BKG' in self.identifiers: with cd(here): with open("productioncard", "w") as f: f.write(productioncard) with open("productiongitcard", "w") as f: f.write(productiongitcard) raise ValueError( "productioncard != productiongitcard\n{}\nSee ./productioncard and ./productiongitcard" .format(self)) with contextlib.closing( urllib.urlopen( os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/" + commit + "/bin/MCFM/run_mcfm_AC.py"))) as f: infunction = False for line in f: if re.match(r"^\s*def .*", line): infunction = False if re.match(r"^\s*def downloadmcfm.*", line): infunction = True if not infunction: continue match = re.search(r"git checkout ([\w.]*)", line) if match: mcfmcommit = match.group(1) with cdtemp(): mkdir_p("src/User") with cd("src/User"): wget( os.path.join( "https://raw.githubusercontent.com/usarica/MCFM-7.0_JHUGen", mcfmcommit, "src/User/mdata.f")) wget(mdatascript) subprocess.check_call([ "python", os.path.basename(mdatascript), "--coupling", self.coupling, "--mcfmdir", ".", "--bsisigbkg", self.signalbkgbsi ]) with open("src/User/mdata.f") as f: mdatagitcard = f.read() if mdatacard != mdatagitcard and not 'BKG' in self.identifiers: with cd(here): with open("mdatacard", "w") as f: f.write(mdatacard) with open("mdatagitcard", "w") as f: f.write(mdatagitcard) raise ValueError( "mdatacard != mdatagitcard\n{}\nSee ./mdatacard and ./mdatagitcard" .format(self)) result = (productioncardurl + "\n" + "# " + mdatascript + "\n" + "# --coupling " + self.coupling + " --bsisigbkg " + self.signalbkgbsi) return result
def copy_to_remote_authorized_keys(self): remote_ssh_dir = join(self.mount_point, 'root/.ssh') remote_authorized_keys = join(remote_ssh_dir, 'authorized_keys') our_public_key = file_content(expanduser('~/.ssh/id_rsa.pub')) mkdir_p(remote_ssh_dir) ensure_contains(remote_authorized_keys, our_public_key)
def main(args): # reproducibility # need to seed numpy/torch random number generators if args.seed is not None: torch.manual_seed(args.seed) np.random.seed(args.seed) # need directory with checkpoint files to recover previously trained models if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) checkpoint_file = args.checkpoint + args.model + str(datetime.now())[:-10] # decide which device to use; assumes at most one GPU is available args.use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if args.use_cuda else "cpu") # decide if we're using a validation set; # if not, don't evaluate at end of epochs evaluate = args.train_split < 1. # prep data loaders if args.train_split == 1: train_loader, _, test_loader = prepare_data(args) else: train_loader, val_loader, test_loader = prepare_data(args) # build model if args.model == 'linear': model = Softmax().to(device) elif args.model == 'neuralnet': model = TwoLayer().to(device) else: model = ConvNet().to(device) # build optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad=args.amsgrad) # setup validation metrics we want to track for tracking best model over training run best_val_loss = float('inf') best_val_acc = 0 # set up tensorboard logger logger = LoggerX('test_mnist', 'mnist_data', 25) # loop over epochs for epoch in range(args.epochs): print('\n================== TRAINING ==================') model.train() # set model to training mode # set up training metrics we want to track correct = 0 train_num = len(train_loader.sampler) # metrics from logger model_metrics = CalculateMetrics(batch_size=args.batch_size, batches_per_epoch=len(train_loader)) for ix, (img, label ) in enumerate(train_loader): # iterate over training batches img, label = img.to(device), label.to( device) # get data, send to gpu if needed optimizer.zero_grad( ) # clear parameter gradients from previous training update output = model(img) # forward pass loss = F.cross_entropy(output, label) # calculate network loss loss.backward() # backward pass optimizer.step( ) # take an optimization step to update model's parameters pred = output.max( 1, keepdim=True)[1] # get the index of the max logit # correct += pred.eq(label.view_as(pred)).sum().item() # add to running total of hits # convert this data to binary for the sake of testing the metrics functionality label[label < 5] = 0 label[label > 0] = 1 pred[pred < 5] = 0 pred[pred > 0] = 1 ###### scores_dict = model_metrics.update_scores(label, pred) if ix % args.log_interval == 0: # log the metrics to tensorboard X, track best model according to current weighted average accuracy logger.log(model, optimizer, loss.item(), track_score=scores_dict['weighted_acc'] / model_metrics.bn, scores_dict=scores_dict, epoch=epoch, bn=model_metrics.bn, batches_per_epoch=model_metrics.batches_per_epoch) print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, model_metrics.bn, model_metrics.batches_per_epoch, (model_metrics.bn / model_metrics.batches_per_epoch) * 100, loss.item())) # print whole epoch's training accuracy; useful for monitoring overfitting print('Train Accuracy: ({:.0f}%)'.format(model_metrics.w_accuracy * 100)) if evaluate: print('\n================== VALIDATION ==================') model.eval() # set model to evaluate mode # set up validation metrics we want to track val_loss = 0. val_correct = 0 val_num = len(val_loader.sampler) # disable autograd here (replaces volatile flag from v0.3.1 and earlier) with torch.no_grad(): # loop over validation batches for img, label in val_loader: img, label = img.to(device), label.to( device) # get data, send to gpu if needed output = model(img) # forward pass # sum up batch loss val_loss += F.cross_entropy(output, label, size_average=False).item() # monitor for accuracy pred = output.max( 1, keepdim=True)[1] # get the index of the max logit val_correct += pred.eq( label.view_as(pred)).sum().item() # add to total hits # update current evaluation metrics val_loss /= val_num val_acc = 100. * val_correct / val_num print( '\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n' .format(val_loss, val_correct, val_num, val_acc)) # check if best model according to accuracy; # if so, replace best metrics is_best = val_acc > best_val_acc if is_best: best_val_acc = val_acc best_val_loss = val_loss # note this is val_loss of best model w.r.t. accuracy, # not the best val_loss throughout training # create checkpoint dictionary and save it; # if is_best, copy the file over to the file containing best model for this run state = { 'epoch': epoch, 'model': args.model, 'state_dict': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'val_loss': val_loss, 'best_val_loss': best_val_loss, 'val_acc': val_acc, 'best_val_acc': best_val_acc } save_checkpoint(state, is_best, checkpoint_file) print('\n================== TESTING ==================') # load best model from training run (according to validation accuracy) check = torch.load(logger.best_path) model.load_state_dict(check['state_dict']) model.eval() # set model to evaluate mode # set up evaluation metrics we want to track test_loss = 0. test_correct = 0 test_num = len(test_loader.sampler) test_metrics = CalculateMetrics(batch_size=args.batch_size, batches_per_epoch=test_num) # disable autograd here (replaces volatile flag from v0.3.1 and earlier) with torch.no_grad(): for img, label in test_loader: img, label = img.to(device), label.to(device) output = model(img) # sum up batch loss test_loss += F.cross_entropy(output, label, size_average=False).item() pred = output.max( 1, keepdim=True)[1] # get the index of the max logit test_scores = test_metrics.update_scores(label, pred) logger.log(model, optimizer, test_loss, test_scores['weighted_acc'], test_scores, phase='test') test_loss /= test_num print('Test set: Average loss: {:.4f}, Accuracy: ({:.0f}%)\n'.format( test_loss, test_metrics['weighted_acc'] * 100)) print('Final model stored at "{}".'.format(checkpoint_file + '-best.pth.tar'))
'total_isolates': "{}{}/figures/total_isolates.png".format(save_path, drug), 'Mean_MIC_trend_with_sd': "{}{}/figures/MIC_Trend_with_SD.png".format(save_path, drug), 'error_path': "/home/rossco/Documents/web_projects/microbiology_data_portal/public/img/broken_robot.png" } if drug: if os.path.exists(save_path): if os.path.exists('{}{}/'.format(save_path, drug)): #Load previous results print(json.dumps(shred_string(data_locations))) sys.exit() else: mkdir_p("{}{}/".format(save_path, drug)) mkdir_p("{}{}/figures/".format(save_path, drug)) print( json.dumps( shred_string( create_figures(myargs, save_path, pickle_file, start_date, end_date)))) else: mkdir_p(save_path) mkdir_p("{}{}/".format(save_path, drug)) mkdir_p("{}{}/figures/".format(save_path, drug)) client = pymongo.MongoClient() extract = ExtractData(db_name=dbname, mongo_client=client) bug_data = extract.get_mic_data(organism=bug) file_name = '{}.pickle'.format(bug) extract.to_pickle(mic_data=bug_data,
def createtarball(self): mkdir_p(os.path.dirname(self.foreostarball)) return "making a phantom tarball is not automated, you have to make it yourself and put it in {}".format(self.foreostarball)
def cardsurl(self): def getcontents(f): contents = "" for line in f: line = line.split("!")[0] line = line.split("#")[0] line = line.strip() line = re.sub(" *= *", " = ", line) if not line: continue if line.startswith("define p = "): continue if line.startswith("define j = "): continue contents += line+"\n" return contents gitcardcontents = [] if self.madgraphcardscript is None: cardurls = tuple( os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/", self.genproductionscommit, (_[0] if len(_) == 2 else _).replace(genproductions+"/", "") ) for _ in self.madgraphcards ) with cdtemp(): for cardurl in cardurls: wget(cardurl) with open(os.path.basename(cardurl)) as f: gitcardcontents.append(getcontents(f)) else: scripturls = tuple( os.path.join( "https://raw.githubusercontent.com/cms-sw/genproductions/", self.genproductionscommit, _.replace(genproductions+"/", "") ) for _ in self.madgraphcardscript ) with cdtemp(): wget(scripturls[0]) for _ in scripturls[1:]: relpath = os.path.relpath(os.path.dirname(_), os.path.dirname(scripturls[0])) assert ".." not in relpath, relpath mkdir_p(relpath) with cd(relpath): wget(_) subprocess.check_call(["chmod", "u+x", os.path.basename(scripturls[0])]) try: subprocess.check_output(["./"+os.path.basename(scripturls[0])], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: print e.output raise for _ in self.madgraphcards: if len(_) == 2: _ = _[0] with open(_) as f: gitcardcontents.append(getcontents(f)) with cdtemp(): subprocess.check_output(["tar", "xvaf", self.cvmfstarball]) if glob.glob("core.*"): raise ValueError("There is a core dump in the tarball\n{}".format(self)) cardnamesintarball = tuple( os.path.join("InputCards", os.path.basename(_[1] if len(_) == 2 else _)) for _ in self.madgraphcards ) cardcontents = [] for cardnameintarball in cardnamesintarball: try: with open(cardnameintarball) as f: cardcontents.append(getcontents(f)) except IOError: raise ValueError("no "+cardnameintarball+" in the tarball\n{}".format(self)) for _ in glob.iglob("InputCards/*"): if _ not in cardnamesintarball and not _.endswith(".tar.gz") and _ not in self.otherthingsininputcards: raise ValueError("unknown thing "+_+" in InputCards\n{}".format(self)) for name, cc, gcc in itertools.izip(cardnamesintarball, cardcontents, gitcardcontents): _, suffix = os.path.splitext(os.path.basename(name)) if cc != gcc: with cd(here): with open("cardcontents"+suffix, "w") as f: f.write(cc) with open("gitcardcontents"+suffix, "w") as f: f.write(gcc) raise ValueError(name + " in tarball != " + name + " in git\n{}\nSee ./cardcontents{} and ./gitcardcontents{}".format(self, suffix, suffix)) if self.madgraphcardscript: return "\n# ".join((scripturls[0],) + tuple(self.madgraphcards)) else: return "\n# ".join(cardurls)