def all_varianceplot_T(ax, samplesize, pH, salttype='nom', Trange=[273,400], fixupdate={}, fn_preamble='', cm='Blues'): """ Plot the total variance in power supply in log space given by the parameter space as a histogram. Do so for a given pH at variable T. """ nx, nPS = Sampler.nominal_T(pH, salttype=salttype, Templims=Trange, zeroed=False, fixupdate=fixupdate, fn_preamble=fn_preamble) nPS = np.log10(nPS) xvals, PS = Sampler.sampleT(samplesize, pH=pH, Trange=Trange, salttype=salttype, fixupdate=fixupdate, fn_preamble=fn_preamble) _logy = np.log10(PS) zeroes = (_logy != -50.) logy = _logy[zeroes] x = xvals[zeroes] logvariance = np.empty(len(logy)) xreds = [] vreds = [] ic = 0 for xi, logyi in zip(x, logy): close_nom = int((xi-Trange[0]) / (Trange[1]-Trange[0]) * len(nx)) logvariance[ic] = logyi - nPS[close_nom] if logvariance[ic] > 10: vreds.append(0) xreds.append(x[ic]) ic+=1 hb = ax.hist2d(x,logvariance, bins=[45,45], range=[Trange, [-4, 4]], cmap='Blues', edgecolor=None, vmin=0, vmax=samplesize/200) hr = ax.hist2d(xreds, vreds, bins=[45,45], range=[Trange, [-4, 4]], cmap=cmapper.r2a(), edgecolor=None, vmin=0, vmax=samplesize/100) return ax, hb, hr
def all_varianceplot_pH(ax, samplesize, T, salttype='nom', pHrange=[8,12], fixupdate={}, fn_preamble='', cm='Blues'): """ Plot the total variance in power supply in log space given by the parameter space as a histogram. Do so for a given T at variable pH. """ nx, nPS = Sampler.nominal_pH(T, salttype=salttype, pHlims=pHrange, zeroed=False, fixupdate=fixupdate, fn_preamble=fn_preamble) nPS = np.log10(nPS) xvals, PS = Sampler.samplepH(samplesize, Temp=T, pHrange=pHrange, salttype=salttype, fixupdate=fixupdate, fn_preamble=fn_preamble) _logy = np.log10(PS) zeroes = (_logy != -50.) logy = _logy[zeroes] x = xvals[zeroes] logvariance = np.empty(len(logy)) ic = 0 for xi, logyi in zip(x, logy): close_nom = int((xi-pHrange[0]) / (pHrange[1]-pHrange[0]) * len(nx)) logvariance[ic] = logyi - nPS[close_nom] ic+=1 ax.hist2d(x,logvariance, bins=[45,45], range=[pHrange, [-4, 4]], cmap=cm, edgecolor=None) return ax
def __init__(self, config): self.config = self._Parameters(config) if self.config.oversample_method == 'SMOTE_imlearn': self.sampler = Sampler.SMOTEImlearn(config) elif self.config.oversample_method == 'naive': self.sampler = Sampler.Naive(config) elif self.config.oversample_method == 'ADASYN': self.sampler = Sampler.ADASYNImlearn(config) elif self.config.oversample_method == 'pySMOTE': self.sampler = Sampler.PySmote(config) elif self.config.oversample_method == 'RUS': self.sampler = Sampler.RUSImlearn(config)
def setup(): glEnable(GL_BLEND) glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA) glEnable(GL_MULTISAMPLE) samp = Sampler() samp.bind(0) glClearColor(0.2,0.4,0.6,0) prog = Program("vs.txt","fs.txt") prog.use() globs.car = Car()
def model_eval(nspins,alpha,op,nsweeps,model_file,state_file=None,opname = 'energy'): wf = Nqs.Nqs(nspins,alpha) wf.load_parameters(model_file) samp = Sampler.Sampler(wf,op,opname=opname) estav = samp.run(nsweeps,state_file) return estav
def update_vector(self, wf, init_state, batch_size, gamma, step, therm=False): self.nvar = wf.N + wf.M + wf.N*wf.M wf.init_theta(init_state) samp = Sampler.Sampler(wf,self.h,mag0=self.m) # if init_state = np.array([]), Sampler.run() will do init_random_state() samp.state = np.copy(init_state) if therm == True: # where change the samp.state samp.thermalize(batch_size) #print(samp.state) results = Parallel(n_jobs=self.parallel_cores)(\ delayed(get_sampler)(samp,self) for i in range(batch_size)) #print(samp.state) #print("parallel_process\n") # three kinds of results with extra dimension "batch_size for" statistic average elocals = np.array([i[0] for i in results]) deriv_vectors = np.array([i[1] for i in results]) states = np.array([i[2] for i in results]) # v = S^(-1)*F; W(p+1) = W(p) - gamma(p)*v; W are all the wf variables # S*v = F; So this is a problem to solve the quation and find v # Thus the cov_operator*v represent S*v, i.e. self.cov_operator ''' cov_operator = LinearOperator((wf.N,wf.N), dtype=complex,\ matvec=lambda v: self.cov_operator(v,deriv_vectors,step)) ''' forces = self.get_forces(elocals, deriv_vectors) vec, info = cg(self.cov_operator(deriv_vectors,step), forces) updates = -gamma * vec self.step_count += batch_size return updates, samp.state, np.mean(elocals) / self.nspins
def drawStuff(self, background=False): Samp = Sampler() if background: Points = [-2, -1, 4, -1, 4, 1, -2, 1] else: Points = [-1, -1, 1, -1, 1, 1, -1, 1] Indicies = [0, 1, 2, 0, 2, 3] for i in range(len(Points)): Points[i] = Points[i] * self.scale Array = array.array("f", Points) ArrayBuffer = Buffer(Array) IndicieArray = array.array("I", Indicies) IndicieBuffer = Buffer(IndicieArray) TextureBuffer = Buffer(array.array("f", [0, 0, 1, 0, 1, 1, 0, 1])) tmp = array.array("I", [0]) glGenVertexArrays(1, tmp) vao = tmp[0] glBindVertexArray(vao) ArrayBuffer.bind(GL_ARRAY_BUFFER) IndicieBuffer.bind(GL_ELEMENT_ARRAY_BUFFER) glEnableVertexAttribArray(0) glVertexAttribPointer(0, 2, GL_FLOAT, False, 2 * 4, 0) TextureBuffer.bind(GL_ARRAY_BUFFER) glEnableVertexAttribArray(1) glVertexAttribPointer(1, 2, GL_FLOAT, False, 2 * 4, 0) glBindVertexArray(0) self.samp = Samp self.vao = vao
def check2(checkpoint_path): args = { 'dataset': 'mcpas', 'tcr_encoding_model': 'LSTM', 'use_alpha': False, 'embedding_dim': 10, 'lstm_dim': 500, 'encoding_dim': 'none', 'dropout': 0.1 } hparams = Namespace(**args) checkpoint = checkpoint_path model = load_model(hparams, checkpoint) train_pickle = model.dataset + '_train_samples.pickle' test_pickle = model.dataset + '_test_samples.pickle' datafiles = train_pickle, test_pickle spb(model, datafiles, peptide='LPRRSGAAGA') spb(model, datafiles, peptide='GILGFVFTL') spb(model, datafiles, peptide='NLVPMVATV') spb(model, datafiles, peptide='GLCTLVAML') spb(model, datafiles, peptide='SSYRRPVGI') d_peps = list(Sampler.get_diabetes_peptides('data/McPAS-TCR.csv')) print(d_peps) for pep in d_peps: try: print(pep) spb(model, datafiles, peptide=pep) except ValueError: pass
def addNode(self, node, neighbors='ball', pr=None, anchor=False, anchors=None): # extend all neighbors nodes within a boundary ball # note: the boundary is set to l2-norm while the real case is l1-norm in supsapce if pr==None: pr = BOUNDARY if neighbors == 'ball': for v in self.nodes: isAnchor = not (anchor and not Sampler.isRejected((node.getVal()+v.getVal())/2)) # anchor tes if (np.linalg.norm(v.getVal()-node.getVal()) < pr) and isAnchor: if anchor: anchors[node]=True; anchors[v]=True; anchors[node.trans_pair]=True; anchors[v.trans_pair]=True; node.extendNeighbors(v) v.extendNeighbors(node) # add all nodes as neighbors if neighbors == 'all': for v in self.nodes: node.extendNeighbors(v) v.extendNeighbors(node) # append node to graph self.nodes.append(node)
def tpot_optimization_clf(count, train_path, test_path, verbose=False): """ Optimize algorithms and parameters using TPOT for Classification trees. :param count: int, number of samples to be generated. :param train_path: string, path to the dataset used for training. :param test_path: string, path to the dataset used for testing. :param verbose: bool, representing if information regarding the process should be displayed. """ # Generate samples. if verbose: print("Get train samples. ") X_train, Y_train = Sampler.generate_samples(dataset=train_path, count=count) if verbose: print("Get test samples. ") X_test, Y_test = Sampler.generate_samples(dataset=test_path, count=count) tpot_config = { 'xgboost.XGBClassifier': { 'max_depth': [2, 3, 4, 5], "learning_rate": [0.02, 0.05, 0.1, 0.15, 0.2], 'n_estimators': [10, 20, 30, 40, 50, 100, 500], 'objective': ["reg:linear", "multi:softmax", "multi:softprob"], 'booster': ["gbtree", "gblinear", "dart"], 'n_jobs': [-1] }, 'sklearn.ensemble.RandomForestClassifier': { 'n_estimators': [10, 20, 30, 40, 50, 100, 500], 'criterion': ["gini", "entropy"], 'max_features': ["auto", "sqrt", "log2"], 'max_depth': [2, 3, 4, 5], 'n_jobs': [-1] } } if verbose: print("Start TPOT optimization. ") tpot = TPOTClassifier(generations=10, population_size=30, verbosity=2, config_dict=tpot_config) tpot.fit(np.array(X_train), np.array(Y_train)) print( tpot.score(np.array(X_test, dtype=np.float64), np.array(Y_test, dtype=np.float64))) tpot.export('tpot_pipeline_clf.py')
def nominalline_T(ax, pH, c, Trange=[273,400], salttype='nom', zeroed=True, ls='-', fixupdate={}, fn_preamble=''): """ Plot a line of nominal power supplies with temperature at a given pH """ nT, nTPS = Sampler.nominal_T(pH, salttype=salttype, Templims=Trange, zeroed=zeroed, fixupdate=fixupdate, fn_preamble=fn_preamble) ax.plot(nT, nTPS, c=c, ls=ls) return ax
def nominalline_pH(ax, T, c, pHrange=[7,12], salttype='nom', zeroed=True, ls='-', fixupdate={}, fn_preamble=''): """ Plot a line of nominal power supplies with pH at a given temperature """ npH, npHPS = Sampler.nominal_pH(T, salttype=salttype, pHlims=pHrange, zeroed=zeroed, fixupdate=fixupdate, fn_preamble=fn_preamble) ax.plot(npH, npHPS, c=c, ls=ls) return ax
def kfold_cv_unique_datasets(clf, train_path, test_path, count=100, k_fold=5, verbose=False): """ Custom K-Fold validation using different datasets. :param clf: classifier in sklearn format. :param train_path: path to the dataset used for training. :param test_path: path to the dataset used for testing. :param count: int representing training count. :param k_fold: int representing number of k-folds. :param verbose: bool representing whether information should be shown. :return: list containing the cross validations scores. """ cv_scores = [] for i in range(k_fold): # Generate samples. if verbose: print("Generate training samples") X_train, Y_train = Sampler.generate_samples(dataset=train_path, count=count) if verbose: print("Generate testing samples") X_test, Y_test = Sampler.generate_samples(dataset=test_path, count=count) score = __cv_fit_predict(clf=clf, X_train=X_train, Y_train=Y_train, X_test=X_test, Y_test=Y_test, verbose=verbose) if verbose: print("(" + str(i + 1) + ") - cv: " + str(score)) cv_scores.append(score) if verbose: print("Avg cv score = " + str(sum(cv_scores) / len(cv_scores))) return cv_scores
def train_model(clf, count, verbose=False): """ Train a model. :param clf: classifier in sklearn format. :param count: int representing training count. :param verbose: bool representing whether information should be shown. :return: the trained classifier object. """ if verbose: print("Generate training samples") X_train, Y_train = Sampler.generate_samples( dataset=Dataset.data_background, count=count) if verbose: print("Fit") clf.fit(X_train, Y_train) return clf
def n_way_one_shot_learning(clf, count, dataset, n, verbose=False, interpretability=False): """ Implementation of N-way one-shot learning generation. :param clf: classifier. :param count: int representing training count. :param dataset: string of the dataset. :param n: number of other images in the n-way one shot test. :param verbose: bool representing whether information should be shown. :param interpretability: bool indicating if images should be saved to disk for interpretability reasons. :return: float representing the final score of the test. """ prefix = str(n) + "-way one-shot learning: " if verbose: show_progressbar(i=0, max_i=count, prefix=prefix) if interpretability: __remove_interpret_folder() correct_count = 0 for i in range(count): image_main, X, Y, alphabet = Sampler.n_way_one_shot_learning( dataset=dataset, n=n) prediction = predict(clf=clf, image_main=image_main, X=X) prediction_single = transform_to_signle_prediction(prediction) # Save images for interpretability reasons. __save_image_interpretability(image_main=image_main, prediction=prediction, prediction_single=prediction_single, X=X, Y=Y, i=i, correct_count=correct_count, alphabet=alphabet, interpretability=interpretability) if prediction_single == Y: correct_count += 1 if verbose: show_progressbar(i=i + 1, max_i=count, prefix=prefix) if verbose: show_progressbar(i=count, max_i=count, prefix=prefix, finish=True) return (correct_count * 100) / count
def callback(self, realTimeFFT, rmsMethod=False): self.realAudio = np.concatenate((self.realAudio[1:], [realTimeFFT])) accuracy = 0.5 for phrase, sig in self.sigs.items(): diff = Sampler.rms(self.realAudio, sig) # print(diff) if rmsMethod: # if diff < self.rmsAccuracy: # print(phrase) # do something # if phrase == 'Down' and diff < 0.8: # print("RMS Down") if phrase == 'Left' and diff < accuracy: print("RMS Left")
def samplebins_pH(ax, T, samplesize, cmap, pHrange=[7,12], salttype='nom', ylims=[-30,-5], bins=[45,45], fixupdate={}, fn_preamble=''): """ plot sampling results as a histogram with pH at a given T (for plotting over a nonimal line) """ pHvals, PS = Sampler.samplepH(samplesize, Temp=T, pHrange=pHrange, salttype=salttype, fixupdate=fixupdate, fn_preamble=fn_preamble) zeroes = (np.log10(PS) != -50.) y = np.log10(PS)[zeroes] x = pHvals[zeroes] # ax[0].hexbin(x, y, gridsize=(int(pHnum/2), int(pHnum/8)), cmap=cmaps[i]) ax.hist2d(x,y, bins=bins, range=[pHrange, ylims], cmap=cmap, edgecolor=None) return ax
def run(self): path = self.path ori_g = Reader.single_readG(path) _, samp_g = Sampler.single_sampling(path, self.s_p) p = link_pred.Prediction() v_set = p.create_vertex(ori_g.edges()) matrix_ori = p.create_adjmatrix(ori_g.edges(), v_set) matrix_samp = p.create_adjmatrix(samp_g.edges(), v_set) cn = link_pred.CommonNeighbors() score_cn = cn.fit(matrix_ori) auc_cn = p.auc_score(score_cn, matrix_ori, matrix_samp, 'cc') print("*** CommonNeighbors AUC:", auc_cn) ja = link_pred.Jaccard() score_ja = ja.fit(matrix_ori) auc_ja = p.auc_score(score_ja, matrix_ori, matrix_samp, 'cc') print("*** Jaccard AUC:", auc_ja)
def __init__(self, fontname, size): if Text.prog == None: Text.prog = Program("TextVertexShader.txt", "TextFragmentShader.txt") self.txt = "temp" self.samp = Sampler() self.font = TTF_OpenFont( os.path.join("assets", fontname).encode(), size) open(os.path.join("assets", fontname)) vbuff = Buffer(array.array("f", [0, 0, 1, 0, 1, 1, 0, 1])) ibuff = Buffer(array.array("I", [0, 1, 2, 0, 2, 3])) tmp = array.array("I", [0]) glGenVertexArrays(1, tmp) self.vao = tmp[0] glBindVertexArray(self.vao) ibuff.bind(GL_ELEMENT_ARRAY_BUFFER) vbuff.bind(GL_ARRAY_BUFFER) glEnableVertexAttribArray(0) glVertexAttribPointer(0, 2, GL_FLOAT, False, 2 * 4, 0) glBindVertexArray(0) self.tex = DataTexture2DArray(1, 1, 1, array.array("B", [0, 0, 0, 0])) self.textQuadSize = vec2(0, 0) self.pos = vec2(0, 0) self.dirty = False surf1p = TTF_RenderUTF8_Blended(self.font, self.txt.encode(), SDL_Color(255, 255, 255, 255)) surf2p = SDL_ConvertSurfaceFormat(surf1p, SDL_PIXELFORMAT_ABGR8888, 0) w = surf2p.contents.w h = surf2p.contents.h pitch = surf2p.contents.pitch if pitch != w * 4: print("Uh Oh!", pitch, w) pix = surf2p.contents.pixels B = string_at(pix, pitch * h) self.tex.setData(w, h, 1, B) SDL_FreeSurface(surf2p) SDL_FreeSurface(surf1p) self.textQuadSize = vec2(w, h) self.dirty = False Program.setUniform("textPosInPixels", self.pos) Program.setUniform("textQuadSizeInPixels", self.textQuadSize) Program.updateUniforms()
def run(self): path = self.path nx_graphs, total_edges = Reader.multi_readG(path) r_list, nx_graphs_sampled = Sampler.multi_sampling(path, self.s_p) print('%d edges sampled, graph length is %d' % (len(r_list), len(nx_graphs_sampled))) MK_G = Node2Vec_LayerSelect.Graph(nx_graphs_sampled, self.p, self.q) MK_G.preprocess_transition_probs() MK_walks = MK_G.simulate_walks(self.num_walks, self.walk_length) MK_words = [] for walk in MK_walks: MK_words.extend([str(step) for step in walk]) M_L = Word2Vec.Learn(MK_words) M_matrix, M_mapping = M_L.train() r_set = set([node for edge in r_list for node in edge]) eval_p = Evaluator.Precision_Eval(M_matrix, M_mapping, nx_graphs, r_set, self.e_p) M_precision = eval_p.eval() print("*** Merged graph precision: ", M_precision)
def varianceexmaple_extend(startfrom, totsize, stepsize): Sampler.variance_chain_sample(None, 8, stepsize=stepsize, totsize=totsize, startfrom=startfrom, salttype='nom', ones=['all']) Sampler.variance_chain_sample(None, 9, stepsize=stepsize, totsize=totsize, startfrom=startfrom, salttype='nom', ones=['all']) Sampler.variance_chain_sample(None, 10, stepsize=stepsize, totsize=totsize, startfrom=startfrom, salttype='nom', ones=['all']) Sampler.variance_chain_sample(None, 8, stepsize=stepsize, totsize=totsize, startfrom=startfrom, salttype='high', ones=['all']) Sampler.variance_chain_sample(None, 9, stepsize=stepsize, totsize=totsize, startfrom=startfrom, salttype='high', ones=['all']) Sampler.variance_chain_sample(None, 10, stepsize=stepsize, totsize=totsize, startfrom=startfrom, salttype='high', ones=['all']) Sampler.variance_chain_sample(None, 8, stepsize=stepsize, totsize=totsize, startfrom=startfrom, salttype='low', ones=['all']) Sampler.variance_chain_sample(None, 9, stepsize=stepsize, totsize=totsize, startfrom=startfrom, salttype='low', ones=['all']) Sampler.variance_chain_sample(None, 10, stepsize=stepsize, totsize=totsize, startfrom=startfrom, salttype='low', ones=['all'])
from TestModel import * from Sampler import * options = Options() options.load("OPTIONS") sampler = Sampler(TestModel, options=options) sampler.run()
def main(): parser = argparse.ArgumentParser(description="Quasar proper motions code") parser.add_argument("parameter_file", metavar="Parameter file", type=str, help=".par file") args = parser.parse_args() params = C.set_params(args.parameter_file) U.assert_config_params(params) C.check_output_dir(params['General']['output_dir']) C.record_config_params(params) data = AD.AstrometricDataframe() AD.load_astrometric_data(data, params=params['Data']) astrometric_model = S.model(data, logL_method=params['MCMC']['logL_method'], prior_bounds=params['MCMC']['prior_bounds']) nest = cpnest.CPNest(astrometric_model, output=params['General']['output_dir'], nthreads=params['MCMC']['nthreads'], nlive=params['MCMC']['nlive'], maxmcmc=params['MCMC']['maxmcmc'], resume=True, verbose=params['General']['verbose']) nest.run() nested_samples = nest.get_nested_samples(filename=None) np.savetxt(os.path.join(params['General']['output_dir'], 'nested_samples.dat'), nested_samples.ravel(), header=' '.join(nested_samples.dtype.names), newline='\n', delimiter=' ') posterior_samples = nest.get_posterior_samples(filename=None) np.savetxt(os.path.join(params['General']['output_dir'], 'posterior_samples.dat'), posterior_samples.ravel(), header=' '.join(posterior_samples.dtype.names), newline='\n', delimiter=' ') A_limit = PP.post_process_results( posterior_file=os.path.join(params['General']['output_dir'], 'posterior_samples.dat'), which_basis=astrometric_model.which_basis, Lmax=params['Data']['Lmax'], L=astrometric_model.overlap_matrix_Cholesky, pol=params['Post_processing']['pol'], limit=params['Post_processing']['limit']) U.export_data(data, A_limit, output=params['General']['output_dir']) if params['General']['plotting'] == True: P.plot(data, output=params['General']['output_dir'])
raw_input("Press enter to exit...") # ================================================= # PHASE I: scan for transition configurations # ================================================= with env: # instantitate multi-modal sampler query = [np.array([-5, 0, 20.3]), np.array(final_task[0])] n = float(robots.instance_number - 1) pr = (float(RADIUS * 2) * (n - 2)) / 100 pair0 = (UNLOCK, LOCK0) pair1 = (UNLOCK, LOCKN) sampler01 = Sampler(mode=3, is_trans=True, pair=pair0, pair_range=pr) sampler02 = Sampler(mode=4, is_trans=True, pair=pair1, pair_range=pr) trans_samplers = [sampler01] #trans_samplers = [sampler01, sampler02] # multiModalPlanning init_node, goal_node, _ = effMultiModalPlanner(query, robots, trans_samplers, CLIFF, pr) # heuristic search frontier = [(0, init_node)] parent = {init_node: (0, None)} while True: cost, node = hq.heappop(frontier) if node == goal_node:
keys = ['event_info', 'jet1_PFCands', 'jet1_extraInfo', 'jet2_PFCands', 'jet2_extraInfo', 'jet_kinematics', 'truth_label'] base_dir = "/eos/user/t/tloesche/CASE_data/CASE_pancakes/" out_dir = "/eos/cms/store/group/phys_b2g/CASE/h5_files/2017/BB_norepeats_v3_2500/" qcd1_name = base_dir + "QCD/qcd_1000to1500_merge.h5" qcd2_name = base_dir + "QCD/qcd_1500to2000_merge.h5" qcd3_name = base_dir + "QCD/qcd_2000toInf_merge.h5" sig1_name = base_dir + "signal/grav_merge.h5" sig2_name = base_dir + "signal/wprime_0.h5" sig3_name = base_dir + "signal/wkk_0.h5" sig4_name = base_dir + "signal/bstar_merge.h5" qcd1 = Sampler( qcd1_name, pbTofb * 1088., lumi, holdout_frac = bkg_holdout_frac) qcd2 = Sampler( qcd2_name, pbTofb * 99.11, lumi, holdout_frac = bkg_holdout_frac) qcd3 = Sampler( qcd3_name, pbTofb * 20.23, lumi, holdout_frac = bkg_holdout_frac) sig1 = Sampler(sig1_name, n_sig, 1., isSignal = True, holdout_frac = sig_holdout_frac) sig2 = Sampler(sig2_name, n_sig, 1., isSignal = True, holdout_frac = sig_holdout_frac) sig3 = Sampler(sig3_name, n_sig, 1., isSignal = True, holdout_frac = sig_holdout_frac) sig4 = Sampler(sig4_name, n_sig, 1., isSignal = True, holdout_frac = sig_holdout_frac) ws = [qcd1, qcd2, qcd3, sig1, sig2, sig3, sig4] BB = BlackBox(ws, keys, nBatches = options.nBatch) os.system("mkdir %s" % out_dir) f_out_name = out_dir + 'BB' BB.writeOut(f_out_name) #h_name = out_dir + "BB_testset" #BB.writeHoldOut(h_name + ".h5")
from Sampler import * np.random.seed(123) pbTofb = 1000. bkg_holdout_frac = 0.05 sig_holdout_frac = 0.2 nBatches = 2 lumi = 1. qcd1 = Sampler("../H5_maker/test_files/QCD_HT1000to1500_test.h5", pbTofb * 1088., lumi, holdout_frac=bkg_holdout_frac) qcd2 = Sampler("../H5_maker/test_files/QCD_HT1500to2000_test.h5", pbTofb * 99.11, lumi, holdout_frac=bkg_holdout_frac) qcd3 = Sampler("../H5_maker/test_files/QCD_HT2000toInf_test.h5", pbTofb * 20.23, lumi, holdout_frac=bkg_holdout_frac) sig = Sampler( "../H5_maker/test_files/WprimeToWZToWhadZhad_narrow_M-3500_TuneCP5_13TeV-madgraph_test.h5", 20000., lumi, isSignal=True, holdout_frac=sig_holdout_frac) ws = [qcd1, qcd2, qcd3, sig] #ws = [qcd1] keys = [
from Sampler import * np.random.seed(123) pbTofb = 1000. qcd1 = Sampler("../H5_maker/test_files/QCD_HT1000to1500_test.h5", pbTofb * 1088., 1.) qcd2 = Sampler("../H5_maker/test_files/QCD_HT1500to2000_test.h5", pbTofb * 99.11, 1.) qcd3 = Sampler("../H5_maker/test_files/QCD_HT2000toInf_test.h5", pbTofb * 20.23, 1.) sig = Sampler( "../H5_maker/test_files/WprimeToWZToWhadZhad_narrow_M-3500_TuneCP5_13TeV-madgraph_test.h5", 30000., 1.) ws = [qcd1, qcd2, qcd3, sig] #keys= ['event_info', 'jet_kinematics', 'truth_label', 'jet1_extraInfo', 'jet1_PFCands'] keys = [] BB = BlackBox(ws, keys) #print(BB['truth_label'].shape) #print(BB['jet_kinematics'].shape) BB.writeOut('BB_test_Wprime.h5')
def run(self): path = self.path #### Step 1: reading and sampling graphs m_graph, nx_graphs, total_edges = Reader.multi_readG_with_Merg(path) print("%d total nodes" % len(m_graph.nodes())) r_list, m_graph_sampled, nx_graphs_sampled = Sampler.multi_sampling_with_Merg( path, self.s_p) print( "%d edges before sampling, %d edges after sampling. sampled %d " % (len(m_graph.edges()), len(m_graph_sampled.edges()), len(r_list))) r_set = set([node for edge in r_list for node in edge]) if self.flag == 0 or self.flag == 1: #### Step 2: Aggregated graph #for i in range(2): M_G = Node2Vec.Graph(m_graph_sampled, self.p, self.q) M_G.preprocess_transition_probs() M_walks = M_G.simulate_walks(self.num_walks, self.walk_length) M_words = [] for walk in M_walks: M_words.extend([str(step) for step in walk]) M_L = Word2Vec.Learn(M_words) M_matrix, M_mapping = M_L.train() eval_p = Evaluator.Precision_Eval(M_matrix, M_mapping, m_graph, r_set, self.e_p) precision, recall, F = eval_p.eval() print("*** Aggregated graph: precision %f, accuracy %f, F %f " % (precision, recall, F)) eval_a = Evaluator.AUC_Eval(M_matrix, M_mapping, m_graph, m_graph_sampled) M_auc = eval_a.eval_auc(1) print("@@@ Merged graph AUC:", M_auc) print( "-----------------------DONE--------------------------------") #### Step 3: Aggregated result if self.flag == 0 or self.flag == 2: T_matrix = {} T_mapping = {} for g in nx_graphs_sampled: #print(g.edges()) G = Node2Vec.Graph(g, self.p, self.q) G.preprocess_transition_probs() walks = G.simulate_walks(self.num_walks, self.walk_length) words = [] for walk in walks: words.extend([str(step) for step in walk]) L = Word2Vec.Learn(words) matrix, mapping = L.train() T_matrix[g] = matrix T_mapping[g] = mapping eval_p_s = Evaluator.combining_Precision_Eval( T_matrix, T_mapping, nx_graphs, r_set, self.e_p) precision, recall, F = eval_p_s.eval() print("*** Aggregated result: precision %f, accuracy %f, F %f" % (precision, recall, F)) eval_a = Evaluator.combining_AUC_Eval(T_matrix, T_mapping, nx_graphs, nx_graphs_sampled) S_auc = eval_a.eval_auc(1) print('@@@ Separated garph AUC:', S_auc) print( "-----------------------DONE--------------------------------") #### Step 4: MKII verification if self.flag == 0 or self.flag == 3: graph_list_sampled = [] graph_list_sampled.append(m_graph_sampled) graph_list = [] graph_list.append(m_graph) w_dict = {} MK_G = Node2Vec_LayerSelect.Graph(graph_list, self.p, self.q, self.r) MK_G.preprocess_transition_probs(w_dict, 1) MK_walks = MK_G.simulate_walks(self.num_walks, self.walk_length) MK_words = [] for walk in MK_walks: MK_words.extend([str(step) for step in walk]) M_L = Word2Vec.Learn(MK_words) M_matrix, M_mapping = M_L.train() eval_p = Evaluator.Precision_Eval(M_matrix, M_mapping, graph_list[0], r_set, self.e_p) precision, recall, F = eval_p.eval() print("*** MKII verification: precision %f, accuracy %f, F %f" % (precision, recall, F)) eval_a = Evaluator.AUC_Eval(M_matrix, M_mapping, m_graph, m_graph_sampled) M_auc = eval_a.eval_auc(1) print("@@@ Merged graph AUC:", M_auc) print( "-----------------------DONE--------------------------------") #### Step 5: MKII Random if self.flag == 0 or self.flag == 4: w_dict = Reader.weight(self.path) #print(w_dict) MK_G = Node2Vec_LayerSelect.Graph(nx_graphs_sampled, self.p, self.q, self.r) MK_G.preprocess_transition_probs(w_dict, 1) MK_walks = MK_G.simulate_walks(self.num_walks, self.walk_length) MK_words = [] for walk in MK_walks: MK_words.extend([str(step) for step in walk]) M_L = Word2Vec.Learn(MK_words) M_matrix, M_mapping = M_L.train() eval_p = Evaluator.Precision_Eval(M_matrix, M_mapping, nx_graphs, r_set, self.e_p) precision, recall, F = eval_p.eval() print("*** MKII Random: precision %f, accuracy %f, F %f" % (precision, recall, F)) eval_a = Evaluator.AUC_Eval(M_matrix, M_mapping, nx_graphs, nx_graphs_sampled) M_auc = eval_a.eval_auc(1) print("@@@ MKII Random AUC:", M_auc) print( "-----------------------DONE--------------------------------") #### Step 6: MKII Weighted if self.flag == 0 or self.flag == 4: w_dict = Reader.weight(self.path) #print(w_dict) MK_G = Node2Vec_LayerSelect.Graph(nx_graphs_sampled, self.p, self.q, self.r) MK_G.preprocess_transition_probs(w_dict, 2) MK_walks = MK_G.simulate_walks(self.num_walks, self.walk_length) MK_words = [] for walk in MK_walks: MK_words.extend([str(step) for step in walk]) M_L = Word2Vec.Learn(MK_words) M_matrix, M_mapping = M_L.train() eval_p = Evaluator.Precision_Eval(M_matrix, M_mapping, nx_graphs, r_set, self.e_p) precision, recall, F = eval_p.eval() print("*** MKII Weighted: precision %f, accuracy %f, F %f" % (precision, recall, F)) eval_a = Evaluator.AUC_Eval(M_matrix, M_mapping, nx_graphs, nx_graphs_sampled) M_auc = eval_a.eval_auc(1) print("@@@ MKII Weighted AUC:", M_auc) print( "-----------------------DONE--------------------------------") #### Step 7: MKII Biased if self.flag == 0 or self.flag == 4: w_dict = Reader.weight(self.path) #print(w_dict) MK_G = Node2Vec_LayerSelect.Graph(nx_graphs_sampled, self.p, self.q, self.r) MK_G.preprocess_transition_probs(w_dict, 0) MK_walks = MK_G.simulate_walks(self.num_walks, self.walk_length) MK_words = [] for walk in MK_walks: MK_words.extend([str(step) for step in walk]) M_L = Word2Vec.Learn(MK_words) M_matrix, M_mapping = M_L.train() eval_p = Evaluator.Precision_Eval(M_matrix, M_mapping, nx_graphs, r_set, self.e_p) precision, recall, F = eval_p.eval() print("*** MKII Biased: precision %f, accuracy %f, F %f" % (precision, recall, F)) eval_a = Evaluator.AUC_Eval(M_matrix, M_mapping, nx_graphs, nx_graphs_sampled) M_auc = eval_a.eval_auc(1) print("@@@ MKII Biased AUC:", M_auc) print( "-----------------------DONE--------------------------------") #### Step 8: MKII Biased_ii if self.flag == 0 or self.flag == 4: w_dict = Reader.weight(self.path) #print(w_dict) MK_G = Node2Vec_LayerSelect.Graph(nx_graphs_sampled, self.p, self.q, self.r) MK_G.preprocess_transition_probs(w_dict, 3) MK_walks = MK_G.simulate_walks(self.num_walks, self.walk_length) MK_words = [] for walk in MK_walks: MK_words.extend([str(step) for step in walk]) M_L = Word2Vec.Learn(MK_words) M_matrix, M_mapping = M_L.train() eval_p = Evaluator.Precision_Eval(M_matrix, M_mapping, nx_graphs, r_set, self.e_p) precision, recall, F = eval_p.eval() print("*** MKII Biased_ii: precision %f, accuracy %f, F %f" % (precision, recall, F)) eval_a = Evaluator.AUC_Eval(M_matrix, M_mapping, nx_graphs, nx_graphs_sampled) M_auc = eval_a.eval_auc(1) print("@@@ MKII Biased_ii AUC:", M_auc) print( "-----------------------DONE--------------------------------") if self.flag == 4: for r in range(11): r_t = r / 10.0 if r_t == 0: w_dict = Reader.weight(self.path) #print(w_dict) MK_G = Node2Vec_LayerSelect.Graph(nx_graphs_sampled, self.p, self.q, 0.1) MK_G.preprocess_transition_probs(w_dict, 1) MK_walks = MK_G.simulate_walks(self.num_walks, self.walk_length) MK_words = [] for walk in MK_walks: MK_words.extend([str(step) for step in walk]) M_L = Word2Vec.Learn(MK_words) M_matrix, M_mapping = M_L.train() eval_p = Evaluator.Precision_Eval(M_matrix, M_mapping, nx_graphs, r_set, self.e_p) precision, recall, F = eval_p.eval() print("*** MKII Random: precision %f, accuracy %f, F %f" % (precision, recall, F)) eval_a = Evaluator.AUC_Eval(M_matrix, M_mapping, nx_graphs, nx_graphs_sampled) M_auc = eval_a.eval_auc(1) print("@@@ MKII Random AUC:", M_auc) print( "-----------------------DONE--------------------------------" ) else: w_dict = Reader.weight(self.path) #print(w_dict) MK_G = Node2Vec_LayerSelect.Graph(nx_graphs_sampled, self.p, self.q, r_t) MK_G.preprocess_transition_probs(w_dict, 3) MK_walks = MK_G.simulate_walks(self.num_walks, self.walk_length) MK_words = [] for walk in MK_walks: MK_words.extend([str(step) for step in walk]) M_L = Word2Vec.Learn(MK_words) M_matrix, M_mapping = M_L.train() eval_p = Evaluator.Precision_Eval(M_matrix, M_mapping, nx_graphs, r_set, self.e_p) precision, recall, F = eval_p.eval() print( "*** MKII Biased_ii with %f: precision %f, accuracy %f, F %f" % (r_t, precision, recall, F)) eval_a = Evaluator.AUC_Eval(M_matrix, M_mapping, nx_graphs, nx_graphs_sampled) M_auc = eval_a.eval_auc(1) print("@@@ MKII Biased_ii AUC:", M_auc) #### Step 9: CommoneNeighbors and Jaccard if self.flag == 0 or self.flag == 5: p = link_pred.Prediction() v_set = p.create_vertex(m_graph.edges()) matrix_perm = p.create_adjmatrix( [edge for edge in itertools.combinations(r_set, 2)], v_set) matrix_ori = p.create_adjmatrix(m_graph.edges(), v_set) matrix_samp = p.create_adjmatrix(m_graph_sampled.edges(), v_set) cn = link_pred.CommonNeighbors() score_cn = cn.fit(matrix_ori) C_precision, C_recall, C_F = p.acc(score_cn, matrix_ori, matrix_perm, self.e_p) print("*** CommonNeighbors: precision %f, accuracy %f, F %f" % (C_precision, C_recall, C_F)) C_auc = p.auc_score(score_cn, matrix_ori, matrix_samp, "cc") print("@@@ CommonNeighbors: AUC %f", C_auc) ja = link_pred.Jaccard() score_ja = ja.fit(matrix_ori) J_precision, J_recall, J_F = p.acc(score_ja, matrix_ori, matrix_perm, self.e_p) print("*** Jaccard: precision %f, accuracy %f, F %f" % (J_precision, J_recall, J_F)) J_auc = p.auc_score(score_ja, matrix_ori, matrix_samp, "cc") print("@@@ Jaccard: AUC %f", J_auc) print( "-----------------------DONE--------------------------------")
Vector.writeVector(credentials.hd_normals, rho) # Save high dim normal (normalised). print("Stage 2/4-- High-Dim Normals Computed. Total (nC2)=", len(comb), ", Projective Dim= ", hd_dim) ''' Generate a sample of points on the hypersphere in d+1 and project to high dimensional space. Use these points to find the largest cells in the high dimensional arrangement and return the best points. ''' num_smart_samples = beta * hd_dim points = Vector.generateNPoints(num_smart_samples, d + 1) # Generate points in the input space Matrix.writeMatrix(credentials.temp, points) hd_points = np.zeros((points.shape[0], hd_dim)) for i in range(points.shape[0]): hd_points[i, :] = Transform.qComp(points[i, :]) best_idx = Sampler.getSignVectors(rho_normals, hd_points, k, alpha, credentials.plot) best_points = points[best_idx, :] Matrix.writeMatrix(credentials.candidates, best_points) print("Stage 3/4-- ", num_smart_samples, "Samples taken / sign vectors generated.") ''' For every input, measure the distance to the k candidates and assign the closest point as cluster. ''' comb = list(combinations(range(len(best_idx)), k)) min_score = 100000 best_comb = [] iii = 0 for col in range(len(comb)): scores = np.zeros((n, k)) selected_comb = best_points[comb[col], :] for i in range(n):
#!python3.6 #coding:utf-8 import time import Player import Sampler import BaseWaveMaker import MusicTheory.EqualTemperament import MusicTheory.Scale import MusicTheory.tempo import WaveFile if __name__ == "__main__": wm = BaseWaveMaker.BaseWaveMaker() sampler = Sampler.Sampler() et = MusicTheory.EqualTemperament.EqualTemperament() scale = MusicTheory.Scale.Scale() timebase = MusicTheory.tempo.TimeBase() timebase.BPM = 120 timebase.Metre = (4, 4) nv = MusicTheory.tempo.NoteValue(timebase) p = Player.Player() p.Open() print('---------- メジャー・スケール ----------') for key in [ 'C', 'C+', 'D', 'D+', 'E', 'F', 'F+', 'G', 'G+', 'A', 'A+', 'B' ]: print(key, 'メジャー・スケール') scale.Major(key=key) # print(','.join([MusicTheory.Key.Key.ValueToName(k) for k in scale.Scales])) # print(','.join([str(f0) for f0 in scale.Frequencies]))
def tpot_optimization_reg(count, train_path, test_path, verbose=False): """ Optimize algorithms and parameters using TPOT for Regression trees. :param count: int, number of samples to be generated. :param train_path: string, path to the dataset used for training. :param test_path: string, path to the dataset used for testing. :param verbose: bool, representing if information regarding the process should be displayed. """ # Generate samples. formater = PredictionModelSymmetricXGBoost() if verbose: print("Get train samples. ") X_train, Y_train = Sampler.generate_samples(dataset=train_path, count=count) X_train, Y_train = formater._format_fit_inputs(X=X_train, Y=Y_train) if verbose: print("Get test samples. ") X_test, Y_test = Sampler.generate_samples(dataset=test_path, count=count) X_test, Y_test = formater._format_fit_inputs(X=X_test, Y=Y_test) tpot_config = { 'sklearn.ensemble.RandomForestRegressor': { 'n_estimators': [10, 25, 50, 75, 100, 300], 'max_features': ["auto", "sqrt", "log2"], 'max_depth': [2, 3, 4, 5, 6, 8, 10], 'n_jobs': [-1] }, 'sklearn.ensemble.ExtraTreesRegressor': { 'n_estimators': [10, 25, 50, 75, 100, 300], 'max_features': ["auto", "sqrt", "log2"], 'max_depth': [2, 3, 4, 5, 6, 8, 10], 'n_jobs': [-1] }, 'sklearn.ensemble.GradientBoostingRegressor': { 'n_estimators': [10, 25, 50, 75, 100, 300], "learning_rate": [0.02, 0.05, 0.1, 0.15, 0.2], 'loss': ["ls", "lad", "huber", "quantile"], 'max_depth': [2, 3, 4, 5, 6, 8, 10] }, 'sklearn.ensemble.AdaBoostRegressor': { 'base_estimator': [ "DecisionTreeRegressor, GradientBoostingRegressor, RandomForestRegressor" ], 'n_estimators': [10, 25, 50, 75, 100, 300], "learning_rate": [0.6, 0.7, 0.8, 0.9, 1.0], 'loss': ["linear", "square", "exponential"], 'max_depth': [2, 3, 4, 5, 6, 8, 10] }, 'xgboost.XGBRegressor': { 'n_estimators': [10, 25, 50, 75, 100, 300], 'booster': ["gbtree", "gblinear", "dart"], "learning_rate": [0.02, 0.05, 0.1, 0.15, 0.2], 'max_depth': [2, 3, 4, 5, 6, 8, 10], 'n_jobs': [-1], 'objective': ["reg:linear"] } } tpot = TPOTRegressor(generations=10, population_size=30, verbosity=2, config_dict=tpot_config, cv=5) tpot.fit(np.array(X_train), np.array(Y_train)) print( tpot.score(np.array(X_test, dtype=np.float64), np.array(Y_test, dtype=np.float64))) tpot.export('tpot_pipeline_reg.py')