Пример #1
0
def degrees(G):
    """
    takes in nx graph.
    outputs various relevant plots.
    """
    # in degrees
    in_data = Counter(dict(G.in_degree).values())
    in_meta = {
        "title": f"{G.name}, in degree distribution",
        "folder": "-".join(G.name.split("-")[::-1]),
        "file": f"in-deg-dist-{G.name}",
        "xlab": "k",
        "ylab": "P(k)"
    }
    plotting.scatter(G, in_data, in_meta)

    # out degrees
    out_data = Counter(dict(G.out_degree).values())
    out_meta = {
        "title": f"{G.name}, out degree distribution",
        "folder": "-".join(G.name.split("-")[::-1]),
        "file": f"out-deg-dist-{G.name}",
        "xlab": "k",
        "ylab": "P(k"
    }
    plotting.scatter(G, out_data, out_meta)
Пример #2
0
def features_pca_classified(fscaled, labels_true, labels_predict, axes=None,
                            algorithm="pca"):
    if algorithm == 'pca':
        pc = PCA(n_components=2)
        fscaled_trans = pc.fit(fscaled).transform(fscaled)
    elif algorithm == "tsne":
        fscaled_trans = TSNE(n_components=2).fit_transform(fscaled)
    else:
        raise AlgorithmUnrecognizedException("Not recognizing method of "+
                                             "dimensionality reduction.")

    sns.set_style("whitegrid")
    plt.rc("font", size=24, family="serif", serif="Computer Sans")
    plt.rc("axes", titlesize=20, labelsize=20)
    plt.rc("text", usetex=True)
    plt.rc('xtick', labelsize=20)
    plt.rc('ytick', labelsize=20)

    # make a Figure object
    if axes is None:
        fig, axes = plt.subplots(1,2,figsize=(16,6), sharey=True)

    ax1, ax2 = axes[0], axes[1]

    ax1 = plotting.scatter(fscaled_trans, labels_true, ax=ax1)

    # second panel: physical labels:

    ax2 = plotting.scatter(fscaled_trans, labels_predict, ax=ax2)

    plt.tight_layout()

    return ax1, ax2
Пример #3
0
def features_pca_classified(fscaled, labels_true, labels_predict, axes=None,
                            algorithm="pca"):
    if algorithm == 'pca':
        pc = PCA(n_components=2)
        fscaled_trans = pc.fit(fscaled).transform(fscaled)
    elif algorithm == "tsne":
        fscaled_trans = TSNE(n_components=2).fit_transform(fscaled)
    else:
        raise AlgorithmUnrecognizedException("Not recognizing method of "+
                                             "dimensionality reduction.")

    sns.set_style("whitegrid")
    plt.rc("font", size=24, family="serif", serif="Computer Sans")
    plt.rc("axes", titlesize=20, labelsize=20)
    plt.rc("text", usetex=True)
    plt.rc('xtick', labelsize=20)
    plt.rc('ytick', labelsize=20)

    # make a Figure object
    if axes is None:
        fig, axes = plt.subplots(1,2,figsize=(16,6), sharey=True)

    ax1, ax2 = axes[0], axes[1]

    ax1 = plotting.scatter(fscaled_trans, labels_true, ax=ax1)

    # second panel: physical labels:

    ax2 = plotting.scatter(fscaled_trans, labels_predict, ax=ax2)

    plt.tight_layout()

    return ax1, ax2
Пример #4
0
def features_pca(fscaled, labels, axes=None,
                 alpha=0.8, palette="Set3", algorithm="pca"):

    #_, _, _, _, _, _, _, _, _, _, _, fscaled_full, labels_all = \
    #        load_data(datadir, tseg=tseg, log_features=log_features,
    #                  ranking=ranking)

    if algorithm == 'pca':
        pc = PCA(n_components=2)
        fscaled_trans = pc.fit(fscaled).transform(fscaled)
    elif algorithm == "tsne":
        fscaled_trans = TSNE(n_components=2).fit_transform(fscaled)
    else:
        raise AlgorithmUnrecognizedException("Not recognizing method of "+
                                             "dimensionality reduction.")

    sns.set_style("whitegrid")
    plt.rc("font", size=24, family="serif", serif="Computer Sans")
    plt.rc("axes", titlesize=20, labelsize=20)
    plt.rc("text", usetex=True)
    plt.rc('xtick', labelsize=20)
    plt.rc('ytick', labelsize=20)

    # make a Figure object
    if axes is None:
        fig, axes = plt.subplots(1,2,figsize=(16,6), sharey=True)

    ax1, ax2 = axes[0], axes[1]

    labels_all = np.hstack([labels["train"], labels["val"], labels["test"]])

    ax1 = plotting.scatter(fscaled_trans, labels_all, ax=ax1)

    # second panel: physical labels:
    labels_phys = feature_engineering.convert_labels_to_physical(labels)

    labels_all_phys = np.hstack([labels_phys["train"], labels_phys["val"],
                                 labels_phys["test"]])

    ax2 = plotting.scatter(fscaled_trans, labels_all_phys, ax=ax2)

    plt.tight_layout()

    return ax1, ax2
Пример #5
0
def features_pca(fscaled, labels, axes=None,
                 alpha=0.8, palette="Set3", algorithm="pca"):

    #_, _, _, _, _, _, _, _, _, _, _, fscaled_full, labels_all = \
    #        load_data(datadir, tseg=tseg, log_features=log_features,
    #                  ranking=ranking)

    if algorithm == 'pca':
        pc = PCA(n_components=2)
        fscaled_trans = pc.fit(fscaled).transform(fscaled)
    elif algorithm == "tsne":
        fscaled_trans = TSNE(n_components=2).fit_transform(fscaled)
    else:
        raise AlgorithmUnrecognizedException("Not recognizing method of "+
                                             "dimensionality reduction.")

    sns.set_style("whitegrid")
    plt.rc("font", size=24, family="serif", serif="Computer Sans")
    plt.rc("axes", titlesize=20, labelsize=20)
    plt.rc("text", usetex=True)
    plt.rc('xtick', labelsize=20)
    plt.rc('ytick', labelsize=20)

    # make a Figure object
    if axes is None:
        fig, axes = plt.subplots(1,2,figsize=(16,6), sharey=True)

    ax1, ax2 = axes[0], axes[1]

    labels_all = np.hstack([labels["train"], labels["val"], labels["test"]])
    
    ax1 = plotting.scatter(fscaled_trans, labels_all, ax=ax1)

    # second panel: physical labels:
    labels_phys = feature_engineering.convert_labels_to_physical(labels)

    labels_all_phys = np.hstack([labels_phys["train"], labels_phys["val"],
                                 labels_phys["test"]])

    ax2 = plotting.scatter(fscaled_trans, labels_all_phys, ax=ax2)

    plt.tight_layout()

    return ax1, ax2
Пример #6
0
def centrality(G):
    # betweenness
    betw = nx.betweenness_centrality(G, k=1000)
    in_meta = {"title": f"{G.name}, betweenness centrality",
               "folder": "-".join(G.name.split("-")[::-1]),
               "file": f"betw-cent-{G.name}",
               "xlab": "C_b", "ylab": "P(C_b)"}
    plotting.scatter(betw, in_meta)

    #ideg = nx.in_degree_centrality(G)

    #odeg = nx.out_degree_centrality(G)
    # cloeseness
    clos = {nx.closeness_centrality(component(G), u=n) for n in sampler(component(G))}
    in_meta = {"title": f"{G.name}, closeness centrality",
               "folder": "-".join(G.name.split("-")[::-1]),
               "file": f"clos-cent-{G.name}",
               "xlab": "C_b", "ylab": "P(C_b)"}
    plotting.scatter(clos, in_meta)

    # eigen
    eige = nx.eigenvector_centrality(G, max_iter=200)

    clos = {nx.closeness_centrality(component(G), u=n) for n in sampler(component(G))}
    in_meta = {"title": f"{G.name}, eigenvector centrality",
               "folder": "-".join(G.name.split("-")[::-1]),
               "file": f"eigen-cent-{G.name}",
               "xlab": "C_b", "ylab": "P(C_b)"}
    plotting.scatter(eige, in_meta)
Пример #7
0
# Make a vector of outputs
comp_preds_bkg = []
comp_true_bkg = []
for (batchX, batchY) in next_batch(test_bkg, probs_test_bkg, batchSize):
    if batchY.shape[0] < batchSize:
        print('Batch size insufficient (%s), continuing...' % batchY.shape[0])
        continue

    output = model.evaluate_total(batchX, debug=False)

    comp_preds_bkg.extend(output.T)
    comp_true_bkg.extend(batchY)

# plot the comparison to the truth
scatter(comp_preds, comp_true, [0.0, 1.00], [0.0, 1.00], "Prediction", "Truth",
        "Approximation comparison", "plots/approx_vs_truth_deep_fromLoad.pdf")

comp_preds = [d.item(0) for d in comp_preds]
difflist = [(p - t) for p, t in zip(comp_preds, comp_true)
            if (math.fabs(p - t) < 0.0001)]
comp_preds_bkg = [d.item(0) for d in comp_preds_bkg]
difflist_bkg = [(p - t) for p, t in zip(comp_preds_bkg, comp_true_bkg)
                if (math.fabs(p - t) < 0.0001)]

hd_hist([difflist, difflist_bkg], 'plots/approx_vs_truth_diff.pdf',
        [-0.00005, 0.00005], [0.0, 1100.0], "Approx. difference", "Events",
        np.arange(-0.00005, 0.00005, 0.0001 / 350), ['signal', 'background'])

# Training analysis
f_in = open('training.pkl', 'rb')
training = pickle.load(f_in)
Пример #8
0
    model.add(Dense(1, activation='sigmoid'))

    # compile model
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    history = model.fit(np.expand_dims(setTrain, axis=2),
                        labels,
                        batch_size=batchSize,
                        epochs=epochNum,
                        validation_data=(np.expand_dims(setTest,
                                                        axis=2), labels))

    scatter(range(0, 300), history.history['loss'], [0, 300],
            [min(history.history['loss']),
             max(history.history['loss'])], 'Epoch', 'Loss', 'Training Loss',
            'trainig_loss.pdf')

    joblib.dump(history.history, open('./keras_hist.pkl', 'wb'))

    model.save('./keras_locallyconnected1d_for_drone.h5')

if not model:
    print('ERROR: Could not load or create Keras model. Exiting...')
    sys.exit(1)

# get full keras response space on data
refs = []
flattened = []
for point in all_data:
    conv_point = np.expand_dims(np.expand_dims(point, axis=2), axis=0)
Пример #9
0
for n in names:
    sigPreds, bkgPreds = predsFromModel(n)
    predsSig.append(sigPreds)
    predsBkg.append(bkgPreds)

    # plot the comparison to the truth
    totalpreds = sigPreds + bkgPreds
    totaltrue = comp_true + comp_true_bkg
    totalpreds = totalpreds[:400]
    gen = [x for x in range(len(totalpreds))]
    np.random.shuffle(gen)
    totalpredsNew = [totalpreds[x] for x in gen]
    totaltrue = totaltrue[:400]
    totaltrueNew = [totaltrue[x] for x in gen]
    scatter(
        totalpredsNew, totaltrueNew, [0.0, 1.00], [0.0, 1.00], "Prediction",
        "Truth", "Approximation comparison",
        "plots_gpd/approx_vs_truth_deep_fromLoad_%s.pdf" % n.rstrip('..pkl'))

# make ROC curves
xvals_orig = []
xvals_drone = []
yvals_orig = []
yvals_drone = []
scanpoints = np.linspace(0.0, 1.0, 500)
for p in range(len(predsSig)):
    xvals_drone_inner = []
    yvals_drone_inner = []
    for s in scanpoints:
        es, rb, nSig, nBKG = scanPoint(s, predsSig[p], predsBkg[p])
        xvals_drone_inner.append(rb)
        yvals_drone_inner.append(es)
Пример #10
0
 def scatter(self, points, **kwargs):
     ax = self.get_axes()
     permutation = self._permutation
     plot_ = plotting.scatter(points, ax=ax, permutation=permutation,
                              **kwargs)
     return plot_
 def scatter(self, points, **kwargs):
     ax = self.get_axes()
     permutation = self._permutation
     plot_ = plotting.scatter(points, ax=ax, permutation=permutation,
                              **kwargs)
     return plot_
Пример #12
0
def main(argv):
    # defaults

    window_length = 50
    overlap = window_length / 2
    featdim = 10
    #data_115818,sgmdata_115818 = load_dataset(window_length,overlap)
    training_data, training_sgmdata = load_dataset(window_length, overlap)

    training_featdata, header = build_dataset_features(training_sgmdata)
    cl.rnn_test(training_featdata)
    return
    data_120250, sgmdata_120250 = load_dataset(
        window_length,
        overlap,
        median_filter=True,
        alldatafile=
        '../../acquisizione20062014/acquisizione_20062014/Data_120250.txt')

    # questi dati son completamente diversi dagli altri tre
    # data_120611,sgmdata_120611 = load_dataset(window_length,overlap,median_filter=True,alldatafile='../../acquisizione20062014/acquisizione_20062014/Data_120611.txt')
    """
	data_120922,sgmdata_120922 = load_dataset(window_length,overlap,median_filter=True,alldatafile='../../acquisizione20062014/acquisizione_20062014/Data_120922.txt')

	all_data = [(data_115818,"115818"),(data_120250,"120250"),(data_120611,"120611"),(data_120922,"120922")]
	sgm_data = [sgmdata_115818,sgmdata_120250,sgmdata_120611,sgmdata_120922]
	cols = ['b','r','g','m']
	for (data,title),c in zip(all_data,cols):
		print "Acquisizione", title
		plt.plot_in_subplots(data,0,1,c)
		return
	"""
    return

    training_data, training_sgmdata = load_dataset(window_length, overlap)

    training_featdata, header = build_dataset_features(training_sgmdata)
    training_targets = fm.assign_target(training_featdata)
    """
	data1,sgmdata1 = load_dataset(window_length,overlap,alldatafile='/home/ilaria/Scrivania/marsupio/acquisizione20062014/acquisizione_20062014/Data_120250.txt')
	featdata1,_ = build_dataset_features(sgmdata1)
	targets1 = fm.assign_target(featdata1)
	"""

    #write_feature_data_to_file(featdata,header)
    #print featdata[0,idxs]
    #plt.plot_in_subplots(featdata,idxs)
    #plt.plot_all(featdata1[:,idxs])

    #X_r=preprocessing.scale(featdata)
    #pca = PCA(n_components=featdim)

    #kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=0.1)
    #X_r = kpca.fit_transform(X_r)
    #X_r = pca.fit(X_r).transform(X_r)

    X_r = training_featdata
    targets = training_targets
    pca = PCA(n_components=2)
    X_r = preprocessing.scale(X_r)
    X_r = pca.fit(X_r).transform(X_r)
    kmeans = KMeans(n_clusters=10)
    kmeans.fit(X_r)
    plt.plot_clustering_and_targets(X_r, kmeans, 0, 1, targets)
    return
    pars = [{
        'clf__kernel': ['rbf'],
        'clf__gamma': [1e-3, 1e-5, 1e-2, 1e-1, 1e-4],
        'clf__C': [0.001, 0.01, 0.1, 1, 10, 100],
        'pca__n_components': [5, 10, 20, 50, 80]
    }, {
        'clf__kernel': ['linear'],
        'clf__C': [0.001, 0.01, 0.1, 0.5, 1, 10, 100],
        'pca__n_components': [5, 10, 20, 50, 80]
    }]

    #evaluation set
    cl.cross_model_selection(X_r, targets, pars, save=True)
    c = cl.load_model('model.pkl')
    print c
    return

    #print X_train.shape, X_test.shape
    clf = svm.SVC(kernel='rbf', gamma=0.7, C=0.8)
    pca = PCA(n_components=featdim)
    pca_svm = Pipeline([
        ('pca', pca),
        ('svm', clf),
    ])
    scores = cross_validation.cross_val_score(clf,
                                              X_r,
                                              targets,
                                              cv=5,
                                              scoring='acc')
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    #pca_svm.fit(X_train, y_train)
    #print pca_svm.score(X_test,y_test)
    return
    #X_r = pca.fit(sint).transform(sint)

    #X_r = preprocessing
    pca = PCA(n_components=featdim)

    #kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=0.1)
    #X_r = kpca.fit_transform(X_r)
    X_r = pca.fit(X_r).transform(X_r)
    ncluster = 10
    """
	from sklearn.cluster import DBSCAN
	dbscan = DBSCAN()
	
	plt.plot_DBSCAN_clustering_result(X_r,dbscan,0,1)
	return
	"""
    #X_r = preprocessing.scale(X_r)
    kmeans = KMeans(n_clusters=ncluster)
    #print X_r
    kmeans.fit(X_r)
    plt.plot_clustering_and_targets(X_r, kmeans, 0, 1, target)

    return
    """
	test = open('./test.csv','w')
	for dt in sint:
		for ft in dt:
			test.write(str(ft)+',')
		
		test.write('\n')
	"""
    #colors = np.array([x for x in 'bgrcmykbgrcmykbgrcmykbgrcmyk'])
    #colors = np.hstack([colors] * 20)

    featdim = 10

    Y = randomtargets(sint)
    clf = svm.SVC(kernel='rbf', gamma=0.7)
    pca = PCA(n_components=featdim)
    pca_svm = Pipeline([
        ('pca', pca),
        ('svm', clf),
    ])

    pca_svm.fit(sint, Y)
    X_r = pca.fit(sint).transform(sint)
    cX_r = pca.fit(sint).transform(cint)
    #th1 = [l[1] for l in sint]
    #accx1 = [l[2] for l in sint]
    #print(th1)
    #plt.scatter(th1, accx1, 50,c=Y)
    #plt.show()

    features = []
    for i in range(0, featdim):
        features.append([l[i] for l in cX_r])
    Yp = [int(i) for i in pca_svm.predict(cint)]
    print Yp
    s = 411
    for f in features[1:5]:
        #	plt.subplot(s)
        #	plt.scatter(features[0], f, 50,c=Yp)
        i += 1
        s += 1

    #plt.show()
    s = 511
    for f in features[5:10]:
        #	plt.subplot(s)
        #	plt.scatter(features[0], f, color=colors[Yp].tolist())
        i += 1
        s += 1

    #plt.show()
    print clf.support_vectors_
    #	plt.scatter(clf.support_vectors_,range(0,3), color=colors[range(0,3)].tolist())
    # create a mesh to plot in
    sint = np.array(sint)
    Y = (np.array(Y))

    x_min, x_max = sint[:, 2].min() - 1, sint[:, 2].max() + 1
    y_min, y_max = Y.min() - 1, Y.max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, .02),
                         np.arange(y_min, y_max, .02))
    #print len(Y), yy.shape
    #Z = Y.reshape(yy.shape)
    pl.contourf(xx, yy, Y, cmap=pl.cm.Paired)
    pl.axis('off')

    # Plot also the training points
    pl.scatter(X[:, 1], X[:, 2], c=Y, cmap=pl.cm.Paired)
    pl.show()
    return
    #intervalslist=scale(intervalslist)
    #print intervalslist
    featdim = 5
    ncluster = 8
    clusters = range(1, ncluster + 1)

    pca = PCA(n_components=featdim)
    X_r = pca.fit(intervalslist).transform(intervalslist)
    features = []
    for i in range(0, featdim):
        features.append([l[i] for l in X_r])

    #return
    kmeans = KMeans()
    #print X_r
    pca_clustering = Pipeline([('pca', pca),
                               ('minmaxnorm', preprocessing.Normalizer()),
                               ('kmeans', kmeans)])
    clustering = Pipeline([('kmeans', kmeans)])
    print pca_clustering.fit(intervalslist)
    #return
    pca_clusters = pca_clustering.predict(intervalslist)

    clustering.fit(intervalslist)
    nopca_clusters = clustering.predict(intervalslist)
    clustered = []
    i = 0
    s = 411
    for f in features[1:]:
        plt.subplot(s)
        plt.scatter(features[0], f, color=colors[pca_clusters].tolist())
        i += 1
        s += 1

    plt.show()
    """
 def scatter(self, points, **kwargs):
     plot_ = plotting.scatter(points, ax=self.get_axes(), **kwargs)
     return plot_