def __get_eigenface_file_from_image(cls, model_file, image_file, size): model = helpers.load(model_file) im = helpers.get_nparray_from_img(image_file, size) eigenface = model.transform([im]) eigenface_file = helpers.get_temp_filename() helpers.dump(eigenface, eigenface_file, compress_level=3) return eigenface_file
def save (self, iteration): # initialize history self.history [iteration] = self.values # dump history helpers.delete (self.coefficients_file) for iteration in range (iteration + 1): helpers.dump (self.history [iteration], '%f', 'coefficients', self.coefficients_file, iteration)
def main(argv): """ Command line example: python builddataset.py -i 'img/dir' -o 'out/dir' -w <img width> -h <img height> -n <number of components for the model> """ indir = '.\dataset' outdir = '.\output' w = 320 h = 243 nc = 150 try: opts, args = getopt.getopt( argv, 'i:o:w:h:n:', ['img_dir=', 'out_dir=', 'width=', 'height=', 'n_components=']) except getopt.GetoptError: usage() sys.exit(2) for opt, arg in opts: if opt in ('-h', '--height'): h = int(arg) elif opt in ('-w', '--width'): w = int(arg) elif opt in ('-o', '--out_dir'): outdir = arg elif opt in ('-i', '--img_dir'): indir = arg elif opt in ('-n', '--n_components'): nc = int(arg) else: usage() sys.exit(2) files = [join(indir, f) for f in listdir(indir) if isfile(join(indir, f))] data = list() data_labels = list() for file in files: data.append(helpers.get_nparray_from_img(file, (w, h))) data_labels.append( basename(file)) # temporary while we do not have more info model = helpers.get_model(n_components=nc, data=data) helpers.dump(model, join(outdir, MODEL_FILE), compress_level=3) with open(join(outdir, DATASET_FILE), 'wb') as f: for index, eigenface in enumerate(model.transform(data)): f.write('"{}","{}","{}"\n'.format(index, data_labels[index], ' '.join(map(str, eigenface)))) print '' print 'Created {} and {} in directory {}.'.format(MODEL_FILE, DATASET_FILE, outdir) print 'PCA Explained Variance Ratio: {}'.format( sum(model.explained_variance_ratio_)) print 'Obs.: if this number is not satisfactory try increasing the number of components' print ''
def save(self, iteration): origins = ['measured', 'infered'] # initialize history if len(self.history) == 0: for origin in origins: self.history['mean_fine_' + origin] = {} self.history['mean_coarse_' + origin] = {} self.history['variance_fine_' + origin] = {} self.history['variance_coarse_' + origin] = {} self.history['covariance_' + origin] = {} self.history['correlation_' + origin] = {} self.history['coefficients_' + origin] = {} self.history['mean_diff_' + origin] = {} self.history['variance_diff_' + origin] = {} self.history['mean_diff_opt_' + origin] = {} self.history['variance_diff_opt_' + origin] = {} # append history for origin in origins: self.history['mean_fine_' + origin][iteration] = self.mean[self.FINE][origin] self.history['mean_coarse_' + origin][iteration] = self.mean[self.COARSE][origin] self.history['variance_fine_' + origin][iteration] = self.variance[self.FINE][origin] self.history['variance_coarse_' + origin][iteration] = self.variance[ self.COARSE][origin] self.history['covariance_' + origin][iteration] = self.covariance[origin] self.history['correlation_' + origin][iteration] = self.correlation[origin] self.history['coefficients_' + origin][iteration] = self.coefficients.values self.history['mean_diff_' + origin][iteration] = self.mean_diff[origin] self.history['variance_diff_' + origin][iteration] = self.variance_diff[origin] self.history['mean_diff_opt_' + origin][iteration] = self.mean_diff_opt[origin] self.history['variance_diff_opt_' + origin][iteration] = self.variance_diff_opt[origin] # dump history helpers.delete(self.indicators_file) for variable in self.history: for i in range(iteration + 1): helpers.dump(self.history[variable][i], '%f', variable, self.indicators_file, i)
def main(argv): """ Command line example: python builddataset.py -i 'img/dir' -o 'out/dir' -w <img width> -h <img height> -n <number of components for the model> """ indir = '.\dataset' outdir = '.\output' w = 320 h = 243 nc = 150 try: opts, args = getopt.getopt(argv, 'i:o:w:h:n:', ['img_dir=', 'out_dir=', 'width=', 'height=', 'n_components=']) except getopt.GetoptError: usage() sys.exit(2) for opt, arg in opts: if opt in ('-h', '--height'): h = int(arg) elif opt in ('-w', '--width'): w = int(arg) elif opt in ('-o', '--out_dir'): outdir = arg elif opt in ('-i', '--img_dir'): indir = arg elif opt in ('-n', '--n_components'): nc = int(arg) else: usage() sys.exit(2) files = [join(indir, f) for f in listdir(indir) if isfile(join(indir, f))] data = list() data_labels = list() for file in files: data.append(helpers.get_nparray_from_img(file, (w, h))) data_labels.append(basename(file)) # temporary while we do not have more info model = helpers.get_model(n_components=nc, data=data) helpers.dump(model, join(outdir, MODEL_FILE), compress_level=3) with open(join(outdir, DATASET_FILE), 'wb') as f: for index, eigenface in enumerate(model.transform(data)): f.write('"{}","{}","{}"\n'.format(index, data_labels[index], ' '.join(map(str, eigenface)))) print '' print 'Created {} and {} in directory {}.'.format(MODEL_FILE, DATASET_FILE, outdir) print 'PCA Explained Variance Ratio: {}'.format(sum(model.explained_variance_ratio_)) print 'Obs.: if this number is not satisfactory try increasing the number of components' print ''
def init(rootPassword=None, username=None, password=None): try: LoadRepo() print 'Already a repository' exit(0) except Exception, e: if os.path.exists(os.path.join(globals.ROOT, '.gaea')): p = Popen(['rm', '-rf', os.path.join(globals.ROOT, '.gaea')]) p.wait() os.makedirs(os.path.join(globals.ROOT, '.gaea', 'snaps')) os.makedirs(os.path.join(globals.ROOT, '.gaea', 'peers')) dataMap = {'HEAD':'0', 'latestId':'0', 'author': '', 'email': '', 'remote':{} } helpers.dump(dataMap) initPeerDirec(rootPassword, username, password) print "new repo created"
def overview(req): posts = h.postings(req) for post in posts.latest(): print post.date(), post.title() return h.dump(req)
def SKL_gen(params, methodname, uploadfile): TrainDF, _ = dataset_gen(datatype=0) if DROP_NEGATIVE_W: TrainDF = TrainDF[TrainDF["weightModified"] > 0] print((TrainDF["weightModified"] < 0).any()) train_data = np.array(TrainDF.iloc[:,:11], dtype="float64") labels = np.array(TrainDF.iloc[:,14], dtype="float64") sample_weight = np.array(TrainDF.iloc[:,12], dtype="float64") if methodname == "SKL_BDT": classifier = GradientBoostingClassifier(n_estimators=int(params[0]), max_depth=float(params[1]), learning_rate=float(params[2]), random_state=1, verbose=1) elif methodname == "SKL_MLP": classifier = MLPClassifier(random_state=1, verbose=True, early_stopping=True, n_iter_no_change=10, tol=0.001, solver=params[0], activation=params[1], batch_size=int(params[2]), learning_rate_init=float(params[3]), alpha=float(params[4]), hidden_layer_sizes=[int(el) for el in params[5].split(":")],) else: raise ValueError(f"No method name {methodname}") t = time.time() if methodname == "SKL_BDT": if USE_W: classifier.fit(train_data, labels, sample_weight=sample_weight) else: classifier.fit(train_data, labels) else: classifier.fit(train_data, labels) dump(methodname, uploadfile, "time:"+str(round(time.time() - t))+"\n") with open(f"models/{methodname}/{uploadfile}.pickle", "wb") as file: pickle.dump(classifier, file)
def save(self, iteration): # initialize history if len(self.history) == 0: self.history['computed'] = {} self.history['additional'] = {} self.history['combined'] = {} # append history self.history['computed'][iteration] = self.counts.computed self.history['additional'][iteration] = self.counts.additional self.history['combined'][iteration] = self.counts.combined # dump history helpers.delete(self.samples_file) for i in range(iteration + 1): for entry in self.history: helpers.dump(self.history[entry][i], '%d', entry, self.samples_file, i)
def process_data(data, dumpname=DATANAME): """ Clear and format the data """ print("Processing data...") names, units, recipes = data db = [] len_recipes = len(recipes) for i, row in enumerate(recipes): if i%100 == 0: print(f"Processing data... {i}/{len_recipes}") for word in row.split(" "): word = word.strip().strip(string.punctuation + " ") if len(word) > 0: if word in names or check_contains(word, names): db.append("INGREDIENT") else: db.append(word.lower()) db.append("\n") hp.dump(db, DATANAME) print(f"Processed data! Saved as {DATANAME}\n") return db
def pullAll(): peerInfo = globals.PEERINFO['peers'] abort = False print peerInfo.keys() for index,ip in enumerate(peerInfo.keys()): if 'pull' in globals.REPOINFO.keys(): if index <= globals.REPOINFO['pull']: print "continuing " + str(index) continue #try: globals.REPOINFO['pull'] = index helpers.dump(globals.REPOINFO) pull(ip, peerInfo[ip]['path'], peerInfo[ip]['username'], peerInfo[ip]['password']) # except Exception,e: # print e # abort = True; # break; if not abort: print "deleting" del globals.REPOINFO['pull'] helpers.dump(globals.REPOINFO) pass
def save(self, iteration): # init history if len(self.history) == 0: self.history['relative_error'] = {} self.history['total_relative_error'] = {} self.history['total_error'] = {} # append history self.history['relative_error'][ iteration] = self.relative_error if self.available else float( 'NaN') self.history['total_relative_error'][ iteration] = self.total_relative_error if self.available else float( 'NaN') self.history['total_error'][ iteration] = self.total_error if self.available else float('NaN') # dump history helpers.delete(self.errors_file) for variable in self.history: for i in range(iteration + 1): helpers.dump(self.history[variable][i], '%f', variable, self.errors_file, i)
def do_dump(self, arg=None): '''Dump object.''' if arg == 'all': for obj in _DUMP_OBJECTS[1:]: print(f'ACTIVE {obj.upper()}:') self.do_dump(obj) return if arg == 'selection': self.do_dump_selection() if hasattr(self, arg): dump(getattr(self, arg)) elif hasattr(self.sdcard, arg): dump(getattr(self.sdcard, arg)) elif hasattr(self.session, arg): dump(getattr(self.session, arg)) else: logger.warning(f'Object {arg} unknown.')
def setRemote(name, address): globals.REPOINFO['remote'].update({name: address}) helpers.dump(globals.REPOINFO)
def setEmail(email): globals.REPOINFO['email'] = email helpers.dump(globals.REPOINFO)
def setAuthor(author): globals.REPOINFO['author'] = author helpers.dump(globals.REPOINFO)
def TMVA_gen(params, methodname, uploadfile): fout = root.TFile(f"TMVA_outputs/{methodname}/{uploadfile}.root", "RECREATE") # if methodname == "TMVA_MLP": # PARAMETERS.append("Transformations=G") # # PARAMETERS.append("Transformations=I;D;P;G,D") factory = root.TMVA.Factory(f"TMVAClassification_{methodname}_{uploadfile}", fout, ":".join(PARAMETERS)) dataloader = root.TMVA.DataLoader("models/dataloader") SFile = root.TFile("source/"+SFILENAME) STree = SFile.Get(TREENAME) dataloader.AddSignalTree(STree) BFile1 = root.TFile("source/"+"ZgQCD.root") BFile2 = root.TFile("source/"+"ttgamma.root") BFile3 = root.TFile("source/"+"WenuDataDriven.root") BFile4 = root.TFile("source/"+"Wgam.root") BFile5 = root.TFile("source/"+"WgamEWK.root") BTree1 = BFile1.Get(TREENAME) BTree2 = BFile2.Get(TREENAME) BTree3 = BFile3.Get(TREENAME) BTree4 = BFile4.Get(TREENAME) BTree5 = BFile5.Get(TREENAME) dataloader.AddBackgroundTree(BTree1) dataloader.AddBackgroundTree(BTree2) dataloader.AddBackgroundTree(BTree3) dataloader.AddBackgroundTree(BTree4) dataloader.AddBackgroundTree(BTree5) dataloader.AddVariable("mJJ","F") dataloader.AddVariable("deltaYJJ","F") dataloader.AddVariable("metPt","F") dataloader.AddVariable("ptBalance","F") dataloader.AddVariable("subleadJetEta","F") dataloader.AddVariable("leadJetPt","F") dataloader.AddVariable("photonEta","F") dataloader.AddVariable("ptBalanceRed","F") dataloader.AddVariable("nJets","F") dataloader.AddVariable("sinDeltaPhiJJOver2","F") dataloader.AddVariable("deltaYJPh","F") dataloader.AddSpectator("weightModified", "F") if USE_W: dataloader.SetSignalWeightExpression("weightModified") dataloader.SetBackgroundWeightExpression("weightModified") if DROP_NEGATIVE_W: cut = root.TCut("(nJets > 1)&&(nLeptons == 0)&&(weightModified > 0)") else: cut = root.TCut("(nJets > 1)&&(nLeptons == 0)") dataloader.PrepareTrainingAndTestTree(cut, ":".join(["nTrain_Signal=0", "nTrain_Background=0", "SplitMode=Random", "NormMode=NumEvents", "!V"])) model = root.TMVA.Types.kBDT if methodname == "TMVA_BDT" else root.TMVA.Types.kMLP if methodname == "TMVA_MLP": settings = ["!H", "!V", "VarTransform=G", "NCycles=400", "BPMode=batch", "TestRate=5", "!UseRegulator", "NeuronType=sigmoid", "ConvergenceImprove=0.0025", "ConvergenceTests=5", f"LearningRate={params[0]}", f"HiddenLayers={params[1]}", f"BatchSize={params[2]}",] elif methodname == "TMVA_BDT": settings = ["!H", "!V", "MinNodeSize=5", "BoostType=Grad", f"NTrees={params[0]}", f"nCuts={params[1]}", f"MaxDepth={params[2]}", f"shrinkage={params[3]}",] else: raise ValueError(f"No method name {methodname}") method = factory.BookMethod(dataloader, model, methodname, ":".join(settings)) t = time.time() factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() fout.Close() dump(methodname, uploadfile, "time:"+str(round(time.time() - t))+"\n")