def test_cwba_inv(): n = 1000 rho_inv = Decimal(str(0.0)) m = Decimal(str(300.0)) censorP = 0.7 avgRuns = 10 increment = Decimal(str(0.05)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] rho_inv += increment while float(rho_inv) < 1: #A, truth = cg.construct_adj(n,float(p),float(q)) #A, truth = cg.constructWithHubs(n,float(p),float(q),r) A, truth = cwba.cwba_graph(n, int(m), 1 / float(rho_inv)) spdcorr, spdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) rho_inv += increment params.append(float(rho_inv)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "1/rho", ["SPD", "DSD", "RD"])
def test_ncch_q(): # Initialize parameters global n,r,rp,censorP,avgRuns,increment,vote p = Decimal(str(0.5)) q = Decimal(str(0.0)) accs_spd,accs_dsd,accs_rd = [],[],[] params = [] # Run simulations while float(q) <= 1: A, truth = ncch.construct_adj(n,float(p),float(q),r,rp) correct_spd, total_spd = sim.runsim(truth, censorP, vote, metrics.spd_mat(A), avgRuns) correct_dsd, total_dsd = sim.runsim(truth, censorP, vote, metrics.dsd_mat(A), avgRuns) correct_rd, total_rd = sim.runsim(truth, censorP, vote, metrics.rd_mat(A), avgRuns) accs_spd.append(correct_spd/total_spd) accs_dsd.append(correct_dsd/total_dsd) accs_rd.append(correct_rd/total_rd) print(q) params.append(float(q)) q += increment plotting.plot_params_vs_accuracy(params, [accs_spd, accs_dsd, accs_rd], "Edge deletion probability (q)", ["SPD","DSD","RD"], "NCCH (p=0.5)") return
def test_cwba(): n = 1000 rho = Decimal(str(2.0)) m = Decimal(str(0.0)) censorP = 0.7 avgRuns = 10 increment = Decimal(str(1.0)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] m += increment while int(m) < 20: #A, truth = cg.construct_adj(n,float(p),float(q)) #A, truth = cg.constructWithHubs(n,float(p),float(q),r) A, truth = cwba.cwba_graph(n, int(m), float(rho)) spdcorr, spdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) m += increment params.append(int(m)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "Minimum vertex degree (m)", ["SPD", "DSD", "RD"], "CWBA (" + u"\u03C1" + "=2)")
def test_ncch_r(): # Initialize parameters global n,rp,censorP,avgRuns,vote p = Decimal(str(0.2)) # p=0.2,0.8 q = Decimal(str(0.5)) # q=0.5 r = 100 # r=(0,20,1), r=(100,400,50) increment = 50 accs_spd,accs_dsd,accs_rd = [],[],[] params = [] # Run simulations while r <= 400: A, truth = ncch.construct_adj(n,float(p),float(q),r,rp) correct_spd, total_spd = sim.runsim(truth, censorP, vote, metrics.spd_mat(A), avgRuns) correct_dsd, total_dsd = sim.runsim(truth, censorP, vote, metrics.dsd_mat(A), avgRuns) correct_rd, total_rd = sim.runsim(truth, censorP, vote, metrics.rd_mat(A), avgRuns) accs_spd.append(correct_spd/total_spd) accs_dsd.append(correct_dsd/total_dsd) accs_rd.append(correct_rd/total_rd) print(r) params.append(r) r += increment plotting.plot_params_vs_accuracy(params, [accs_spd, accs_dsd, accs_rd], "Number of hubs", ["SPD","DSD","RD"], "NCCH (p="+str(p)+",q="+str(q)+")") return
def test_ncch_censor(): # Initialize parameters global n,r,rp,avgRuns,vote p = Decimal(str(0.8)) # p=0.8 q = Decimal(str(0.5)) # q=0.5 censorP = Decimal(str(0.1)) increment = Decimal(str(0.1)) accs_spd,accs_dsd,accs_rd = [],[],[] params = [] # Run simulations while float(censorP) < 1: A, truth = ncch.construct_adj(n,float(p),float(q),r,rp) correct_spd, total_spd = sim.runsim(truth, float(censorP), vote, metrics.spd_mat(A), avgRuns) correct_dsd, total_dsd = sim.runsim(truth, float(censorP), vote, metrics.dsd_mat(A), avgRuns) correct_rd, total_rd = sim.runsim(truth, float(censorP), vote, metrics.rd_mat(A), avgRuns) accs_spd.append(correct_spd/total_spd) accs_dsd.append(correct_dsd/total_dsd) accs_rd.append(correct_rd/total_rd) print(censorP) params.append(float(censorP)) censorP += increment plotting.plot_params_vs_accuracy(params, [accs_spd, accs_dsd, accs_rd], "Vertex label censor proportion", ["SPD","DSD","RD"], "NCCH (p="+str(p)+",q="+str(q)+")") return
def test_cwba_rhoinv(): # Initialize parameters global n, censorP, avgRuns, vote rho_inv = Decimal(str(0.05)) m = Decimal(str(300.0)) increment = Decimal(str(0.05)) accs_spd, accs_dsd, accs_rd = [], [], [] params = [] # Run simulations while float(rho_inv) <= 1: A, truth = cwba.cwba_graph(n, int(m), 1 / float(rho_inv)) correct_spd, total_spd = sim.runsim(truth, censorP, vote, metrics.spd_mat(A), avgRuns) correct_dsd, total_dsd = sim.runsim(truth, censorP, vote, metrics.dsd_mat(A), avgRuns) correct_rd, total_rd = sim.runsim(truth, censorP, vote, metrics.rd_mat(A), avgRuns) accs_spd.append(correct_spd / total_spd) accs_dsd.append(correct_dsd / total_dsd) accs_rd.append(correct_rd / total_rd) print(rho_inv) params.append(float(rho_inv)) rho_inv += increment rho_str = u"\u03C1" plotting.plot_params_vs_accuracy( params, [accs_spd, accs_dsd, accs_rd], "Inverse likeliness of clusters (1/" + rho_str + ")", ["SPD", "DSD", "RD"], "CWBA (m=300)") return
def test_cwba_m(): # Initialize parameters global n, censorP, avgRuns, vote rho = Decimal(str(2.0)) m = Decimal(str(1.0)) increment = Decimal(str(1.0)) accs_spd, accs_dsd, accs_rd = [], [], [] params = [] # Run simulations while int(m) < 20: A, truth = cwba.cwba_graph(n, int(m), float(rho)) correct_spd, total_spd = sim.runsim(truth, censorP, vote, metrics.spd_mat(A), avgRuns) correct_dsd, total_dsd = sim.runsim(truth, censorP, vote, metrics.dsd_mat(A), avgRuns) correct_rd, total_rd = sim.runsim(truth, censorP, vote, metrics.rd_mat(A), avgRuns) accs_spd.append(correct_spd / total_spd) accs_dsd.append(correct_dsd / total_dsd) accs_rd.append(correct_rd / total_rd) print(m) params.append(int(m)) m += increment rho_str = u"\u03C1" plotting.plot_params_vs_accuracy(params, [accs_spd, accs_dsd, accs_rd], "New vertex degree (m)", ["SPD", "DSD", "RD"], "CWBA (" + rho_str + "=2)") return
def class_dists(G, truth_tab): """Returns a three-column data frame where the first column is all shortest path distances in G and the second is corresponding DSDs. The third column is 0 if the two vertices have the same class and 1 otherwise. Paths of length 0 are skipped (when source node == target node). Parameters ---------- G : networkx.Graph or numpy.ndarray truth_tab : dict A dictionary of labels keyed by each vertex in G Returns ------- df : DataFrame """ A = as_adj(G) # total number of distances n_rows = int((A.shape[0] * (A.shape[0] - 1)) / 2) # numpy table to hold the result before we stick it in the data frame tab = np.empty((n_rows, 3), dtype=np.float64) dsds = dsd_mat(A) spds = spd_mat(A) col = 0 for u, v in combinations(range(A.shape[0]), r=2): if u == v: continue tab[col, 0] = spds[u, v] tab[col, 1] = dsds[u, v] tab[col, 2] = 0 if truth_tab[u] == truth_tab[v] else 1 col += 1 return pd.DataFrame(tab, columns=['spd', 'dsd', 'class_diff'])
def spdists(G): A = as_adj(G) spds = spd_mat(A) res = [] for u in range(0, A.shape[0]): for v in range(u + 1, A.shape[0]): res.append(spds[u][v]) return res
def test_censor(): n = 250 p = Decimal(str(0.3)) q = Decimal(str(0.5)) censorP = Decimal(str(0.05)) avgRuns = 10 increment = Decimal(str(0.05)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] r = 100 #p += increment while float(censorP) < 1: A, truth = cg.construct_adj(n, float(p), float(q)) #A, truth = cg.constructWithHubs(n,float(p),float(q),r) spdcorr, spdtotal = sim.runsim(truth, float(censorP), voting.scipy_weighted_knn, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, float(censorP), voting.scipy_weighted_knn, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, float(censorP), voting.scipy_weighted_knn, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) print(censorP) censorP += increment params.append(float(censorP)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "Censoring probability (censorP)", ["SPD", "DSD", "RD"], "NCC (q=0.5)") return
def test_cg(): n = 250 p = Decimal(str(0.0)) q = Decimal(str(0.0)) censorP = 0.6 avgRuns = 10 increment = Decimal(str(0.05)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] r = 100 #p += increment while float(p) <= 1: q = Decimal(str(0.5)) A, truth = cg.construct_adj(n, float(p), float(q)) #A, truth = cg.constructWithHubs(n,float(p),float(q),r) spdcorr, spdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) print(p) p += increment params.append(float(p)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "Edge addition probability (p)", ["SPD", "DSD", "RD"], "NCC (q=0.5)") p = Decimal(str(0.5)) q = Decimal(str(0.0)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] q += increment while float(q) <= 1: A, truth = cg.construct_adj(n, float(p), float(q)) #A, truth = cg.constructWithHubs(n,float(p),float(q),r) spdcorr, spdtotal = sim.runsim(truth, censorP, voting.knn_weighted_majority_vote, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, censorP, voting.knn_weighted_majority_vote, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, censorP, voting.knn_weighted_majority_vote, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) print(q) q += increment params.append(float(q)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "Edge deletion probability (q)", ["SPD", "DSD", "RD"], "NCCH (p=0.5, number of hubs=100)") return
def test_cg_h(): n = 250 p = Decimal(str(0.5)) q = Decimal(str(0.5)) censorP = 0.7 avgRuns = 10 increment = Decimal(str(1.0)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] r = Decimal(str(0.0)) r += increment while float(r) <= 20: #A, truth = cg.construct_adj(n,float(p),float(q)) A, truth = cg.constructWithHubs(n, float(p), float(q), int(r)) spdcorr, spdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) r += increment params.append(int(r)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "Number of hubs", ["SPD", "DSD", "RD"]) increment = Decimal(str(50.0)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] r = Decimal(str(50.0)) r += increment while float(r) <= 400: #A, truth = cg.construct_adj(n,float(p),float(q)) A, truth = cg.constructWithHubs(n, float(p), float(q), int(r)) spdcorr, spdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) r += increment params.append(int(r)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "Number of hubs", ["SPD", "DSD", "RD"]) return