def test_ncpplots(): G = load_example_graph() ncp = lgc.NCPData(G) ncp.mqi() plots = lgc.NCPPlots(ncp) plots.cond_by_vol() plots.cond_by_size() plots.isop_by_size() plots.mqi_input_output_cond_plot() plots.cond_by_vol_itrv(alpha=0.2) plots.cond_by_size_itrv(alpha=0.2) plots.isop_by_size_itrv(alpha=0.2) plots = lgc.NCPPlots(ncp, method_name="mqi") G = load_example_graph() ncp = lgc.NCPData(G) ncp.approxPageRank() df = ncp.as_data_frame() plots = lgc.NCPPlots(df) plots.cond_by_vol() ncp.crd() ncp.l1reg() plots = lgc.NCPPlots(ncp, method_name="crd") plots = lgc.NCPPlots(ncp, method_name="l1reg") plots = lgc.NCPPlots(ncp, method_name="ncpapr")
def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000): ratio = 1.0 if g._num_vertices > 1000000: ratio = 0.05 elif g._num_vertices > 100000: ratio = 0.1 elif g._num_vertices > 10000: ratio = 0.4 elif g._num_vertices > 7500: ratio = 0.6 elif g._num_vertices > 5000: ratio = 0.8 print("ratio: ", ratio) ncp = lgc.NCPData(g,store_output_clusters=True) ncp.approxPageRank(ratio=ratio,nthreads=nthreads,localmins=False,neighborhoods=False,random_neighborhoods=False) sets = [st["output_cluster"] for st in ncp.results] print("Make an NCP object for Improve Algo") ncp2 = lgc.NCPData(g) print("Going into improve mode") output = ncp2.refine(sets, method=method, methodname=methodname, nthreads=nthreads, timeout=timeout, **{"delta": delta}) fig = lgc.NCPPlots(ncp2).mqi_input_output_cond_plot()[0] fig.axes[0].set_title(gname + " " + methodname+"-NCP") fig.savefig("figures/" + method + "-ncp-"+gname+".pdf", bbox_inches="tight", figsize=(100,100)) plt.show() pickle.dump(ncp, open('results/' + method + "delta" + delta + "-ncp-" + gname + '.pickle', 'wb')) pickle.dump(ncp2, open('results/' + method + "delta" + delta + "-ncp2-" + gname + '.pickle', 'wb'))
def test_ncp(): G = load_example_graph() ncp = localgraphclustering.NCPData(G) df = ncp.as_data_frame() assert len(df) == 0 ncp.mqi(nthreads=1,ratio=1.0) df = ncp.as_data_frame() assert len(df) == G._num_vertices func = lambda G,R: localgraphclustering.flow_clustering(G,R,method="mqi")[0] ncp = localgraphclustering.NCPData(G) ncp.add_set_samples([[1]],nthreads=1,method=func,methodname="mqi")
def test_ncp(): G = load_example_graph() ncp = lgc.NCPData(G) df = ncp.as_data_frame() assert len(df) == 0 ncp.mqi(nthreads=1,ratio=1.0) df = ncp.as_data_frame() assert len(df) == G._num_vertices #func = lambda G,R: lgc.flow_clustering(G,R,method="mqi")[0] func = lgc.partialfunc(lgc.flow_clustering, method="mqi") ncp = lgc.NCPData(G) ncp.add_set_samples([[1]],nthreads=1,method=func,methodname="mqi") ncp.add_random_neighborhood_samples(ratio=2,nthreads=1,method=func,methodname="mqi")
def test_ncpplots(): G = localgraphclustering.graph_class_local( "localgraphclustering/tests/data/dolphins.edges", separator=" ") ncp = localgraphclustering.NCPData(G).mqi() plots = localgraphclustering.NCPPlots(ncp) plots.mqi_input_output_cond_plot() plots.cond_by_vol() plots.cond_by_size() plots.isop_by_size() df = localgraphclustering.NCPData(G).approxPageRank().as_data_frame() plots = localgraphclustering.NCPPlots(df) plots.cond_by_vol()
def test_ncp_l1reg_big(): G = lgc.GraphLocal() G.read_graph("notebooks/datasets/neuro-fmri-01.edges","edgelist", " ", header=True) Glcc = G.largest_component() print(Glcc.adjacency_matrix.data) ncp_instance = lgc.NCPData(G) df = ncp_instance.l1reg(ratio=0.5,nthreads=4)
def test_custom_ncp(): G = load_example_graph() ncp = lgc.NCPData(G) ncp.add_random_neighborhood_samples(ratio=1.0, method=_second, methodname="neighborhoods", nthreads=16)
def test_apr_only_node_samples(): G = load_example_graph() df = lgc.NCPData(G).approxPageRank(ratio=1, gamma=0.1, rholist=[1e-2, 1e-3], random_neighborhoods=False, localmins=False)
def test_ncp_l1reg_big(): G = localgraphclustering.GraphLocal() G.read_graph("notebooks/datasets/neuro-fmri-01.edges","edgelist", " ") Glcc = G.largest_component() print(Glcc.adjacency_matrix.data) ncp_instance = localgraphclustering.NCPData(G) df = ncp_instance.l1reg(ratio=0.5)
def test_ncp_clique(): import networkx as nx K10 = nx.complete_graph(10) G = lgc.GraphLocal().from_networkx(K10) ncp = lgc.NCPData(G).approxPageRank() df = ncp.as_data_frame() assert(min(df["output_sizeeff"]) > 0)
def test_ncp_grid(): import networkx as nx K10 = nx.grid_graph(dim=[10, 10]) G = lgc.GraphLocal().from_networkx(K10) ncp = lgc.NCPData(G).approxPageRank() df = ncp.as_data_frame() assert (min(df["output_sizeeff"]) > 0)
def test_ncp_localmin(): G = load_example_graph() ncp = localgraphclustering.NCPData(G) func = lambda G, R: list(spectral_clustering(G,R,alpha=0.01,rho=1.0e-4,method="acl")[0]) ncp.default_method = func ncp.add_localmin_samples(ratio=1) print(ncp.as_data_frame())
def test_ncp_localmin(): G = load_example_graph() ncp = lgc.NCPData(G) func = lgc.partialfunc(lgc.spectral_clustering,alpha=0.01,rho=1.0e-4,method="acl") ncp.default_method = func ncp.add_localmin_samples(ratio=1) print(ncp.as_data_frame()) G = lgc.GraphLocal() G.list_to_gl([0,1],[1,0],[1,1]) ncp = lgc.NCPData(G) func = lgc.partialfunc(lgc.spectral_clustering,alpha=0.01,rho=1.0e-4,method="acl") ncp.default_method = func ncp.add_localmin_samples(ratio=1)
def test_apr_refine(): G = load_example_graph() df = lgc.NCPData(G).approxPageRank(ratio=1, gamma=0.1, rholist=[1e-2, 1e-3], random_neighborhoods=False, localmins=False, spectral_args={ 'refine': lgc.partialfunc(lgc.flow_clustering, method="mqi") })
def test_ncp_crd_big(): G = localgraphclustering.GraphLocal() G.read_graph("notebooks/datasets/neuro-fmri-01.edges","edgelist", " ") ncp_instance = localgraphclustering.NCPData(G) df = ncp_instance.crd(ratio=0.5,w=10,U=10,h=1000) ncp_plots = localgraphclustering.ncpplots.NCPPlots(df) #plot conductance vs size ncp_plots.cond_by_size() #plot conductance vs volume ncp_plots.cond_by_vol() #plot isoperimetry vs size ncp_plots.isop_by_size()
def test_ncp_read_write(): G = load_example_graph() ncp = lgc.NCPData(G).approxPageRank(ratio=2) R1 = ncp.input_set(0) S1 = ncp.output_set(0) R2 = ncp.input_set(1) S2 = ncp.output_set(1) ncp.write("myncp") ncp2 = lgc.NCPData.from_file("myncp.pickle", G) assert(R1 == ncp.input_set(0)) assert(R2 == ncp.input_set(1)) assert(S1 == ncp.output_set(0)) assert(S2 == ncp.output_set(1))
def test_ncp_sets(): G = load_example_graph() ncp = lgc.NCPData(G).approxPageRank() for i in range(len(ncp.results)): R = ncp.input_set(i) S = ncp.output_set(i)
def test_ncp_l1reg(): G = load_example_graph() df = localgraphclustering.NCPData(G).l1reg(ratio=1) print(df)
def test_ncp_crd(): G = load_example_graph() df = lgc.NCPData(G).crd(ratio=1)
def test_ncp_fiedler(): G = load_example_graph() ncp = lgc.NCPData(G) ncp.add_neighborhoods() ncp.add_fiedler() ncp.add_fiedler_mqi()
def test_ncp_one_thread(): G = load_example_graph() df = lgc.NCPData(G).mqi(ratio=2,nthreads=1)
def test_ncp_mqi(): G = load_example_graph() df = lgc.NCPData(G).mqi(ratio=1)
def test_ncp_apr(): G = load_example_graph() df = localgraphclustering.NCPData(G).approxPageRank(ratio=1)
def test_ncp_apr(): G = load_example_graph() df = lgc.NCPData(G).approxPageRank(ratio=1) df = lgc.NCPData(G).approxPageRank(ratio=2, methodname_prefix="")
def test_ncp_l1reg(): G = load_example_graph() df = lgc.NCPData(G).l1reg(ratio=1) print(df)
def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000): ratio = 1.0 if g._num_vertices > 1000000: ratio = 0.05 elif g._num_vertices > 100000: ratio = 0.1 elif g._num_vertices > 10000: ratio = 0.4 elif g._num_vertices > 7500: ratio = 0.6 elif g._num_vertices > 5000: ratio = 0.8 print("ratio: ", ratio) start = time.time() ncp = lgc.NCPData(g,store_output_clusters=True) ncp.approxPageRank(ratio=ratio,nthreads=nthreads,localmins=False,neighborhoods=False,random_neighborhoods=False) end = time.time() print("Elapsed time for acl-ncp for dataset ", gname , " is ", end - start, " the method is ", methodname, " delta is ", delta) sets = [st["output_cluster"] for st in ncp.results] print("Make an NCP object for Improve Algo") start2 = time.time() ncp2 = lgc.NCPData(g) print("Going into improve mode") output = ncp2.refine(sets, method=method, methodname=methodname, nthreads=nthreads, timeout=timeout, **{"delta": delta}) end2 = time.time() print("Elapsed time for improve-ncp for dataset ", gname , " is ", end2 - start2, " the method is ", methodname, " delta is ", delta) fig = lgc.NCPPlots(ncp2).mqi_input_output_cond_plot()[0] fig.axes[0].set_title(gname + " " + methodname+"-NCP") fig.savefig("figures/" + method + "-ncp-"+gname+".pdf", bbox_inches="tight", figsize=(100,100)) plt.show() pickle.dump(ncp, open('results/' + method + "-ncp-" + gname + '.pickle', 'wb')) ncp.write('results/' + method + "-ncp-csv-" + gname, writepython=False) pickle.dump(ncp2, open('results/' + method + "-ncp2-" + gname + '.pickle', 'wb')) ncp2.write('results/' + method + "-ncp2-csv-" + gname, writepython=False) delta = 0.3 method="sl" methodname="SimpleLocal" print("Make an NCP object for Improve Algo") start2 = time.time() ncp2 = lgc.NCPData(g) print("Going into improve mode") output = ncp2.refine(sets, method=method, methodname=methodname, nthreads=nthreads, timeout=timeout, **{"delta": delta}) end2 = time.time() print("Elapsed time for improve-ncp for dataset ", gname , " is ", end2 - start2, " the method is ", methodname, " delta is ", delta) fig = lgc.NCPPlots(ncp2).mqi_input_output_cond_plot()[0] fig.axes[0].set_title(gname + " " + methodname+"-NCP") fig.savefig("figures/" + method + "delta" + str(delta) + "-ncp-"+gname+".pdf", bbox_inches="tight", figsize=(100,100)) plt.show() pickle.dump(ncp, open('results/' + method + "delta" + str(delta) + "-ncp-" + gname + '.pickle', 'wb')) ncp.write('results/' + method + "delta" + str(delta) + "-ncp-csv-" + gname, writepython=False) pickle.dump(ncp2, open('results/' + method + "delta" + str(delta) + "-ncp2-" + gname + '.pickle', 'wb')) ncp2.write('results/' + method + "delta" + str(delta) + "-ncp2-csv-" + gname, writepython=False) delta = 0.6 method="sl" methodname="SimpleLocal" print("Make an NCP object for Improve Algo") start2 = time.time() ncp2 = lgc.NCPData(g) print("Going into improve mode") output = ncp2.refine(sets, method=method, methodname=methodname, nthreads=nthreads, timeout=timeout, **{"delta": delta}) end2 = time.time() print("Elapsed time for improve-ncp for dataset ", gname , " is ", end2 - start2, " the method is ", methodname, " delta is ", delta) fig = lgc.NCPPlots(ncp2).mqi_input_output_cond_plot()[0] fig.axes[0].set_title(gname + " " + methodname+"-NCP") fig.savefig("figures/" + method + "delta" + str(delta) + "-ncp-"+gname+".pdf", bbox_inches="tight", figsize=(100,100)) plt.show() pickle.dump(ncp, open('results/' + method + "delta" + str(delta) + "-ncp-" + gname + '.pickle', 'wb')) ncp.write('results/' + method + "delta" + str(delta) + "-ncp-csv-" + gname, writepython=False) pickle.dump(ncp2, open('results/' + method + "delta" + str(delta) + "-ncp2-" + gname + '.pickle', 'wb')) ncp2.write('results/' + method + "delta" + str(delta) + "-ncp2-csv-" + gname, writepython=False) delta = 0.9 method="sl" methodname="SimpleLocal" print("Make an NCP object for Improve Algo") start2 = time.time() ncp2 = lgc.NCPData(g) print("Going into improve mode") output = ncp2.refine(sets, method=method, methodname=methodname, nthreads=nthreads, timeout=timeout, **{"delta": delta}) end2 = time.time() print("Elapsed time for improve-ncp for dataset ", gname , " is ", end2 - start2, " the method is ", methodname, " delta is ", delta) fig = lgc.NCPPlots(ncp2).mqi_input_output_cond_plot()[0] fig.axes[0].set_title(gname + " " + methodname+"-NCP") fig.savefig("figures/" + method + "delta" + str(delta) + "-ncp-"+gname+".pdf", bbox_inches="tight", figsize=(100,100)) plt.show() pickle.dump(ncp, open('results/' + method + "delta" + str(delta) + "-ncp-" + gname + '.pickle', 'wb')) ncp.write('results/' + method + "delta" + str(delta) + "-ncp-csv-" + gname, writepython=False) pickle.dump(ncp2, open('results/' + method + "delta" + str(delta) + "-ncp2-" + gname + '.pickle', 'wb')) ncp2.write('results/' + method + "delta" + str(delta) + "-ncp2-csv-" + gname, writepython=False)
def test_apr_deep(): G = load_example_graph() df = lgc.NCPData(G).approxPageRank(ratio=1, gamma=0.1, rholist=[1e-2, 1e-3], deep=True)
def test_ncp_crd(): G = load_example_graph() df = localgraphclustering.NCPData(G).crd(ratio=1)
def test_ncp_crd_big(): G = lgc.GraphLocal() G.read_graph("notebooks/datasets/minnesota.edgelist","edgelist", remove_whitespace=True) ncp_instance = lgc.NCPData(G) df = ncp_instance.crd(ratio=0.5,w=10,U=10,h=1000,nthreads=4) ncp_plots = lgc.ncpplots.NCPPlots(df)
for (gname,gfile) in mygraphs.items(): print(gname, gfile) sep = ' ' if isinstance(gfile, tuple): sep = gfile[1] gfile = gfile[0] print("Running " + gname) g = lgc.GraphLocal(os.path.join("..", "data", gfile),'edgelist', " ") g.discard_weights() start = time.time() ncp_instance = lgc.NCPData(g) ncp_instance.approxPageRank(ratio=0.1,timeout=5000000,nthreads=24) ncp_plots = lgc.NCPPlots(ncp_instance,method_name = "acl") #plot conductance vs size fig, ax, min_tuples = ncp_plots.cond_by_size() plt.savefig('figures/cond_card_' + gname + '.png', bbox_inches='tight') plt.show() #plot conductance vs volume fig, ax, min_tuples = ncp_plots.cond_by_vol() plt.savefig('figures/cond_vol_' + gname + '.png', bbox_inches='tight') plt.show() #plot isoperimetry vs size fig, ax, min_tuples = ncp_plots.isop_by_size() plt.savefig('figures/expand_card_' + gname + '.png', bbox_inches='tight') plt.show()