def degrees(G): """ takes in nx graph. outputs various relevant plots. """ # in degrees in_data = Counter(dict(G.in_degree).values()) in_meta = { "title": f"{G.name}, in degree distribution", "folder": "-".join(G.name.split("-")[::-1]), "file": f"in-deg-dist-{G.name}", "xlab": "k", "ylab": "P(k)" } plotting.scatter(G, in_data, in_meta) # out degrees out_data = Counter(dict(G.out_degree).values()) out_meta = { "title": f"{G.name}, out degree distribution", "folder": "-".join(G.name.split("-")[::-1]), "file": f"out-deg-dist-{G.name}", "xlab": "k", "ylab": "P(k" } plotting.scatter(G, out_data, out_meta)
def features_pca_classified(fscaled, labels_true, labels_predict, axes=None, algorithm="pca"): if algorithm == 'pca': pc = PCA(n_components=2) fscaled_trans = pc.fit(fscaled).transform(fscaled) elif algorithm == "tsne": fscaled_trans = TSNE(n_components=2).fit_transform(fscaled) else: raise AlgorithmUnrecognizedException("Not recognizing method of "+ "dimensionality reduction.") sns.set_style("whitegrid") plt.rc("font", size=24, family="serif", serif="Computer Sans") plt.rc("axes", titlesize=20, labelsize=20) plt.rc("text", usetex=True) plt.rc('xtick', labelsize=20) plt.rc('ytick', labelsize=20) # make a Figure object if axes is None: fig, axes = plt.subplots(1,2,figsize=(16,6), sharey=True) ax1, ax2 = axes[0], axes[1] ax1 = plotting.scatter(fscaled_trans, labels_true, ax=ax1) # second panel: physical labels: ax2 = plotting.scatter(fscaled_trans, labels_predict, ax=ax2) plt.tight_layout() return ax1, ax2
def features_pca(fscaled, labels, axes=None, alpha=0.8, palette="Set3", algorithm="pca"): #_, _, _, _, _, _, _, _, _, _, _, fscaled_full, labels_all = \ # load_data(datadir, tseg=tseg, log_features=log_features, # ranking=ranking) if algorithm == 'pca': pc = PCA(n_components=2) fscaled_trans = pc.fit(fscaled).transform(fscaled) elif algorithm == "tsne": fscaled_trans = TSNE(n_components=2).fit_transform(fscaled) else: raise AlgorithmUnrecognizedException("Not recognizing method of "+ "dimensionality reduction.") sns.set_style("whitegrid") plt.rc("font", size=24, family="serif", serif="Computer Sans") plt.rc("axes", titlesize=20, labelsize=20) plt.rc("text", usetex=True) plt.rc('xtick', labelsize=20) plt.rc('ytick', labelsize=20) # make a Figure object if axes is None: fig, axes = plt.subplots(1,2,figsize=(16,6), sharey=True) ax1, ax2 = axes[0], axes[1] labels_all = np.hstack([labels["train"], labels["val"], labels["test"]]) ax1 = plotting.scatter(fscaled_trans, labels_all, ax=ax1) # second panel: physical labels: labels_phys = feature_engineering.convert_labels_to_physical(labels) labels_all_phys = np.hstack([labels_phys["train"], labels_phys["val"], labels_phys["test"]]) ax2 = plotting.scatter(fscaled_trans, labels_all_phys, ax=ax2) plt.tight_layout() return ax1, ax2
def centrality(G): # betweenness betw = nx.betweenness_centrality(G, k=1000) in_meta = {"title": f"{G.name}, betweenness centrality", "folder": "-".join(G.name.split("-")[::-1]), "file": f"betw-cent-{G.name}", "xlab": "C_b", "ylab": "P(C_b)"} plotting.scatter(betw, in_meta) #ideg = nx.in_degree_centrality(G) #odeg = nx.out_degree_centrality(G) # cloeseness clos = {nx.closeness_centrality(component(G), u=n) for n in sampler(component(G))} in_meta = {"title": f"{G.name}, closeness centrality", "folder": "-".join(G.name.split("-")[::-1]), "file": f"clos-cent-{G.name}", "xlab": "C_b", "ylab": "P(C_b)"} plotting.scatter(clos, in_meta) # eigen eige = nx.eigenvector_centrality(G, max_iter=200) clos = {nx.closeness_centrality(component(G), u=n) for n in sampler(component(G))} in_meta = {"title": f"{G.name}, eigenvector centrality", "folder": "-".join(G.name.split("-")[::-1]), "file": f"eigen-cent-{G.name}", "xlab": "C_b", "ylab": "P(C_b)"} plotting.scatter(eige, in_meta)
# Make a vector of outputs comp_preds_bkg = [] comp_true_bkg = [] for (batchX, batchY) in next_batch(test_bkg, probs_test_bkg, batchSize): if batchY.shape[0] < batchSize: print('Batch size insufficient (%s), continuing...' % batchY.shape[0]) continue output = model.evaluate_total(batchX, debug=False) comp_preds_bkg.extend(output.T) comp_true_bkg.extend(batchY) # plot the comparison to the truth scatter(comp_preds, comp_true, [0.0, 1.00], [0.0, 1.00], "Prediction", "Truth", "Approximation comparison", "plots/approx_vs_truth_deep_fromLoad.pdf") comp_preds = [d.item(0) for d in comp_preds] difflist = [(p - t) for p, t in zip(comp_preds, comp_true) if (math.fabs(p - t) < 0.0001)] comp_preds_bkg = [d.item(0) for d in comp_preds_bkg] difflist_bkg = [(p - t) for p, t in zip(comp_preds_bkg, comp_true_bkg) if (math.fabs(p - t) < 0.0001)] hd_hist([difflist, difflist_bkg], 'plots/approx_vs_truth_diff.pdf', [-0.00005, 0.00005], [0.0, 1100.0], "Approx. difference", "Events", np.arange(-0.00005, 0.00005, 0.0001 / 350), ['signal', 'background']) # Training analysis f_in = open('training.pkl', 'rb') training = pickle.load(f_in)
model.add(Dense(1, activation='sigmoid')) # compile model model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) history = model.fit(np.expand_dims(setTrain, axis=2), labels, batch_size=batchSize, epochs=epochNum, validation_data=(np.expand_dims(setTest, axis=2), labels)) scatter(range(0, 300), history.history['loss'], [0, 300], [min(history.history['loss']), max(history.history['loss'])], 'Epoch', 'Loss', 'Training Loss', 'trainig_loss.pdf') joblib.dump(history.history, open('./keras_hist.pkl', 'wb')) model.save('./keras_locallyconnected1d_for_drone.h5') if not model: print('ERROR: Could not load or create Keras model. Exiting...') sys.exit(1) # get full keras response space on data refs = [] flattened = [] for point in all_data: conv_point = np.expand_dims(np.expand_dims(point, axis=2), axis=0)
for n in names: sigPreds, bkgPreds = predsFromModel(n) predsSig.append(sigPreds) predsBkg.append(bkgPreds) # plot the comparison to the truth totalpreds = sigPreds + bkgPreds totaltrue = comp_true + comp_true_bkg totalpreds = totalpreds[:400] gen = [x for x in range(len(totalpreds))] np.random.shuffle(gen) totalpredsNew = [totalpreds[x] for x in gen] totaltrue = totaltrue[:400] totaltrueNew = [totaltrue[x] for x in gen] scatter( totalpredsNew, totaltrueNew, [0.0, 1.00], [0.0, 1.00], "Prediction", "Truth", "Approximation comparison", "plots_gpd/approx_vs_truth_deep_fromLoad_%s.pdf" % n.rstrip('..pkl')) # make ROC curves xvals_orig = [] xvals_drone = [] yvals_orig = [] yvals_drone = [] scanpoints = np.linspace(0.0, 1.0, 500) for p in range(len(predsSig)): xvals_drone_inner = [] yvals_drone_inner = [] for s in scanpoints: es, rb, nSig, nBKG = scanPoint(s, predsSig[p], predsBkg[p]) xvals_drone_inner.append(rb) yvals_drone_inner.append(es)
def scatter(self, points, **kwargs): ax = self.get_axes() permutation = self._permutation plot_ = plotting.scatter(points, ax=ax, permutation=permutation, **kwargs) return plot_
def main(argv): # defaults window_length = 50 overlap = window_length / 2 featdim = 10 #data_115818,sgmdata_115818 = load_dataset(window_length,overlap) training_data, training_sgmdata = load_dataset(window_length, overlap) training_featdata, header = build_dataset_features(training_sgmdata) cl.rnn_test(training_featdata) return data_120250, sgmdata_120250 = load_dataset( window_length, overlap, median_filter=True, alldatafile= '../../acquisizione20062014/acquisizione_20062014/Data_120250.txt') # questi dati son completamente diversi dagli altri tre # data_120611,sgmdata_120611 = load_dataset(window_length,overlap,median_filter=True,alldatafile='../../acquisizione20062014/acquisizione_20062014/Data_120611.txt') """ data_120922,sgmdata_120922 = load_dataset(window_length,overlap,median_filter=True,alldatafile='../../acquisizione20062014/acquisizione_20062014/Data_120922.txt') all_data = [(data_115818,"115818"),(data_120250,"120250"),(data_120611,"120611"),(data_120922,"120922")] sgm_data = [sgmdata_115818,sgmdata_120250,sgmdata_120611,sgmdata_120922] cols = ['b','r','g','m'] for (data,title),c in zip(all_data,cols): print "Acquisizione", title plt.plot_in_subplots(data,0,1,c) return """ return training_data, training_sgmdata = load_dataset(window_length, overlap) training_featdata, header = build_dataset_features(training_sgmdata) training_targets = fm.assign_target(training_featdata) """ data1,sgmdata1 = load_dataset(window_length,overlap,alldatafile='/home/ilaria/Scrivania/marsupio/acquisizione20062014/acquisizione_20062014/Data_120250.txt') featdata1,_ = build_dataset_features(sgmdata1) targets1 = fm.assign_target(featdata1) """ #write_feature_data_to_file(featdata,header) #print featdata[0,idxs] #plt.plot_in_subplots(featdata,idxs) #plt.plot_all(featdata1[:,idxs]) #X_r=preprocessing.scale(featdata) #pca = PCA(n_components=featdim) #kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=0.1) #X_r = kpca.fit_transform(X_r) #X_r = pca.fit(X_r).transform(X_r) X_r = training_featdata targets = training_targets pca = PCA(n_components=2) X_r = preprocessing.scale(X_r) X_r = pca.fit(X_r).transform(X_r) kmeans = KMeans(n_clusters=10) kmeans.fit(X_r) plt.plot_clustering_and_targets(X_r, kmeans, 0, 1, targets) return pars = [{ 'clf__kernel': ['rbf'], 'clf__gamma': [1e-3, 1e-5, 1e-2, 1e-1, 1e-4], 'clf__C': [0.001, 0.01, 0.1, 1, 10, 100], 'pca__n_components': [5, 10, 20, 50, 80] }, { 'clf__kernel': ['linear'], 'clf__C': [0.001, 0.01, 0.1, 0.5, 1, 10, 100], 'pca__n_components': [5, 10, 20, 50, 80] }] #evaluation set cl.cross_model_selection(X_r, targets, pars, save=True) c = cl.load_model('model.pkl') print c return #print X_train.shape, X_test.shape clf = svm.SVC(kernel='rbf', gamma=0.7, C=0.8) pca = PCA(n_components=featdim) pca_svm = Pipeline([ ('pca', pca), ('svm', clf), ]) scores = cross_validation.cross_val_score(clf, X_r, targets, cv=5, scoring='acc') print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) #pca_svm.fit(X_train, y_train) #print pca_svm.score(X_test,y_test) return #X_r = pca.fit(sint).transform(sint) #X_r = preprocessing pca = PCA(n_components=featdim) #kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=0.1) #X_r = kpca.fit_transform(X_r) X_r = pca.fit(X_r).transform(X_r) ncluster = 10 """ from sklearn.cluster import DBSCAN dbscan = DBSCAN() plt.plot_DBSCAN_clustering_result(X_r,dbscan,0,1) return """ #X_r = preprocessing.scale(X_r) kmeans = KMeans(n_clusters=ncluster) #print X_r kmeans.fit(X_r) plt.plot_clustering_and_targets(X_r, kmeans, 0, 1, target) return """ test = open('./test.csv','w') for dt in sint: for ft in dt: test.write(str(ft)+',') test.write('\n') """ #colors = np.array([x for x in 'bgrcmykbgrcmykbgrcmykbgrcmyk']) #colors = np.hstack([colors] * 20) featdim = 10 Y = randomtargets(sint) clf = svm.SVC(kernel='rbf', gamma=0.7) pca = PCA(n_components=featdim) pca_svm = Pipeline([ ('pca', pca), ('svm', clf), ]) pca_svm.fit(sint, Y) X_r = pca.fit(sint).transform(sint) cX_r = pca.fit(sint).transform(cint) #th1 = [l[1] for l in sint] #accx1 = [l[2] for l in sint] #print(th1) #plt.scatter(th1, accx1, 50,c=Y) #plt.show() features = [] for i in range(0, featdim): features.append([l[i] for l in cX_r]) Yp = [int(i) for i in pca_svm.predict(cint)] print Yp s = 411 for f in features[1:5]: # plt.subplot(s) # plt.scatter(features[0], f, 50,c=Yp) i += 1 s += 1 #plt.show() s = 511 for f in features[5:10]: # plt.subplot(s) # plt.scatter(features[0], f, color=colors[Yp].tolist()) i += 1 s += 1 #plt.show() print clf.support_vectors_ # plt.scatter(clf.support_vectors_,range(0,3), color=colors[range(0,3)].tolist()) # create a mesh to plot in sint = np.array(sint) Y = (np.array(Y)) x_min, x_max = sint[:, 2].min() - 1, sint[:, 2].max() + 1 y_min, y_max = Y.min() - 1, Y.max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, .02), np.arange(y_min, y_max, .02)) #print len(Y), yy.shape #Z = Y.reshape(yy.shape) pl.contourf(xx, yy, Y, cmap=pl.cm.Paired) pl.axis('off') # Plot also the training points pl.scatter(X[:, 1], X[:, 2], c=Y, cmap=pl.cm.Paired) pl.show() return #intervalslist=scale(intervalslist) #print intervalslist featdim = 5 ncluster = 8 clusters = range(1, ncluster + 1) pca = PCA(n_components=featdim) X_r = pca.fit(intervalslist).transform(intervalslist) features = [] for i in range(0, featdim): features.append([l[i] for l in X_r]) #return kmeans = KMeans() #print X_r pca_clustering = Pipeline([('pca', pca), ('minmaxnorm', preprocessing.Normalizer()), ('kmeans', kmeans)]) clustering = Pipeline([('kmeans', kmeans)]) print pca_clustering.fit(intervalslist) #return pca_clusters = pca_clustering.predict(intervalslist) clustering.fit(intervalslist) nopca_clusters = clustering.predict(intervalslist) clustered = [] i = 0 s = 411 for f in features[1:]: plt.subplot(s) plt.scatter(features[0], f, color=colors[pca_clusters].tolist()) i += 1 s += 1 plt.show() """
def scatter(self, points, **kwargs): plot_ = plotting.scatter(points, ax=self.get_axes(), **kwargs) return plot_