def run(): n = 4000 k = 3 N = k*n p = 0.01 q = 0.01 P = np.ones(k) / k F = DSBM.random_complete(k, η=0.0, random_state=28) # C = DSBM_PA.tree(k = k, η = 0.0, inner_edges=0.15) print(F.shape) PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F) # PA_kwargs['C'] = C aris_DSBM_disim = [] aris_DSBM_herm = [] for seed in seeds: GDSBM, comms_DSBM = DSBM_PA.sample(random_state=seed, Herm=False, a=10, **PA_kwargs) print([len(l) for l in comms_DSBM]) clusters = disim.cluster(GDSBM, k, k, mode='R') print([len(l) for l in clusters]) ari = evaluate.ari(comms_DSBM, clusters) aris_DSBM_disim.append(ari) print('Disim', ari) GDSBM, comms_DSBM = DSBM_PA.sample(random_state=seed, Herm=True, a=10, **PA_kwargs) clusters = herm.cluster(GDSBM, k, 0.1) ari = evaluate.ari(comms_DSBM, clusters) print('Herm', ari) aris_DSBM_herm.append(ari) print('Mean Disim', np.mean(aris_DSBM_disim)) print('Mean Herm', np.mean(aris_DSBM_herm))
def spectral_gap(): # circle metagraph noises = np.linspace(0, 0.5, 15) ks = np.array([3, 5, 7]) ps = np.array([0.003, 0.0035, 0.004, 0.0045, 0.005, 0.0055]) seeds = np.array([6, 28, 496, 8128, 33550336]) ns = [3000, 5000] # np.array([1000, 1500, 2000, 2500, 5000]) for (η, k, n, p) in itertools.product(noises, ks, ns, ps): q = p F = DSBM.circle(k, η) print(F.shape) PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F) for seed in seeds: # GDSBM = DSBM.sample(n=n, k=k, p=p, q=q, F=F, random_state=seed, Herm=True) GPA = DSBM_PA.sample(**PA_kwargs) print(GPA.shape)
def run_test(seed, N, k, p, q, η): n = N // k F = DSBM.circle(k, η) # C = DSBM_PA.tree(k = k, η = 0.0, inner_edges=0.15) # print(F.shape) PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F) # PA_kwargs['C'] = C aris_DSBM_disim = [] aris_DSBM_herm = [] GDSBM, comms_DSBM = DSBM.sample(n=n, k=k, p=p, q=q, F=F, random_state=seed, Herm=False) GPA, comms_pa = DSBM_PA.sample(a=10, **PA_kwargs, random_state=seed) print([len(l) for l in comms_DSBM]) clustersds_dsbm = disim.cluster(GDSBM, k, k, mode='R') clustershm_dsbm = herm.cluster(hermify.to_herm(GDSBM), k, k) clustersds_pa = disim.cluster(GPA, k, k, mode='R') clustershm_pa = herm.cluster(hermify.to_herm(GPA), k, k) ari_dsbm_disim = evaluate.ari(comms_DSBM, clustersds_dsbm) ari_dsbm_herm = evaluate.ari(comms_DSBM, clustershm_dsbm) ari_pa_disim = evaluate.ari(comms_pa, clustersds_pa) ari_pa_herm = evaluate.ari(comms_pa, clustershm_pa) miscl_vs_disim_dsbm = evaluate.misclustered_vertices( comms_DSBM, clustersds_dsbm) miscl_vs_herm_dsbm = evaluate.misclustered_vertices( comms_DSBM, clustershm_dsbm) miscl_vs_disim_pa = evaluate.misclustered_vertices(comms_pa, clustersds_pa) miscl_vs_herm_pa = evaluate.misclustered_vertices(comms_pa, clustershm_pa) print() results = dict([]) results['DiSim_DSBM_M'] = miscl_vs_disim_dsbm results['DiSim_PA_M'] = miscl_vs_disim_pa results['Herm_DSBM_M'] = miscl_vs_herm_dsbm results['Herm_PA_M'] = miscl_vs_herm_pa results['DiSim_DSBM_A'] = ari_dsbm_disim results['DiSim_PA_A'] = ari_pa_disim results['Herm_DSBM_A'] = ari_dsbm_herm results['Herm_PA_A'] = ari_pa_herm return results
def spectral_gap(): # complete metagraph noises = np.array([0.0, 0.1, 0.2]) ks = np.array([3, 5, 7]) ps = np.array([0.0035, 0.0045, 0.005]) seeds = np.array([6, 28, 496, 8128]) ns = np.array([1500, 2500, 5000]) for (η, k, n, p) in itertools.product(noises, ks, ns, ps): q = p F = DSBM.random_complete(random_state=np.random.choice(seeds), k=k, η=η) print(F.shape) PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F) spectra = [] for seed in seeds: # GPA, comms = DSBM.sample(n=n, k=k, p=p, q=q, F=F, random_state=seed, Herm=False) GPA, comms = DSBM_PA.sample(a=10, **PA_kwargs, Herm=False) print(GPA.shape) out_degrees = GPA.sum(axis=1) in_degrees = GPA.sum(axis=0) τ = out_degrees.mean() Pvals = in_degrees + τ _, m = GPA.shape Pτ = scipy.sparse.spdiags(Pvals, 0, m, m) print('computed P') Ovals = out_degrees + τ Oτ = scipy.sparse.spdiags(np.reshape(Ovals, (1, -1)), 0, m, m) print('computed O') tmp1 = Oτ.power(-0.5) print('generated tmp1') tmp2 = Pτ.power(-0.5) print('generated tmp2') print(tmp1.shape, GPA.shape, tmp2.shape) L = tmp1 @ GPA @ tmp2 from scipy.sparse import linalg as la U, Σ, V = la.svds(L, k=10) Σ.sort() spectra.append(Σ[::-1]) import matplotlib.pyplot as plt fig, ax = plt.subplots() for Σ in spectra: ax.scatter(np.arange(1, 11), Σ, s=1.5) plt.savefig( f'complete_meta_spectral_gap_k{k}_N{k*n}_noise{η}_p{p}.png')
def run_test(seed, N, k, p, q, η, interpolation): n = N // k C1 = np.zeros((k, k)) itr = 0 while any(C1.sum(axis=1) == 0): F = DSBM.random_complete(k, η, random_state=seed**itr) PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F) C2 = PA_kwargs['C'] C1[:, :] = C2 for pos in range(k): C1[pos, pos] = 0 C1 = C1 / C1.sum(axis=1).reshape(-1, 1) print(C2) print(C1) PA_kwargs['C'] = interpolation * C1 + (1 - interpolation) * C2 aris_DSBM_disim = [] aris_DSBM_herm = [] GPA, comms = DSBM_PA.sample(a=10, **PA_kwargs, Herm=False, random_state=seed) GPA_Herm = hermify.to_herm(GPA) clusters_disim = disim.cluster(GPA, k, k, mode='R') clusters_herm = herm.cluster(GPA_Herm, k, ϵ=-1, RW=True) ari_disim = evaluate.ari(comms, clusters_disim) ari_herm = evaluate.ari(comms, clusters_herm) # mv_disim = evaluate.misclustered_vertices(comms, clusters_disim) # mv_herm = evaluate.misclustered_vertices(comms, clusters_herm) results = { 'interpolation': interpolation, 'ari_disim': ari_disim, 'ari_herm': ari_herm, } return results
def run_experiments(seed=0, noise=0, tag="", norm=None): η = noise print("Running ROCMG-experiments for seed", seed) k = 5 n = 4000 its = 0 p = 0 q = 0.0045 dic = 'total=0' while dic == 'total=0': F = DSBM.random_complete(k=k, η=0, random_state=seed + its) dic = convert.DSBM_to_PA(k=k, n=n, p=p, q=q, F=F) its += 1 dic['C'] = η * np.diag(np.ones(k)) + (1 - η) * dic['C'] dic2 = convert.PA_to_DSBM(**dic) if norm == None: A, comms = DSBM.sample(n=n, k=k, p=dic2['p'], q=dic2['q'], F=F, random_state=seed, Herm=False) print("finished sampling dsbm. moving to clustering.") cls = disim.cluster(A=A, kz=k, ky=k, norm=norm) ari_ds = evaluate.ari(comms, cls) result1 = { 'model': ['DSBM'], 'average_edge_probability': [q], 'percentage_intra_edges': [η], 'algorithm': ['DiSim'], 'ari': [ari_ds] } df = pd.DataFrame(data=result1) df.to_csv(f'results/{tag}.csv', mode='a', header=False) print('finished disim-dsbm, moving on to herm-dsbm') A = hermify.to_herm(A) cls = herm.cluster(A=A, k=k, norm=norm) ari_herm = evaluate.ari(comms, cls) result2 = { 'model': ['DSBM'], 'average_edge_probability': [q], 'percentage_intra_edges': [η], 'algorithm': ['Herm'], 'ari': [ari_herm] } df = pd.DataFrame(data=result2) df.to_csv(f"results/{tag}.csv", mode='a', header=False) print('finished herm-dsbm, movnig on to disim-pa') A, comms = DSBM_PA.sample(random_state=seed, a=dic['c'], Herm=False, **dic) cls = disim.cluster(A=A, kz=k, ky=k, norm=norm) ari = evaluate.ari(comms, cls) result3 = { 'model': ['DSBM_PA'], 'average_edge_probability': [q], 'percentage_intra_edges': [η], 'algorithm': ['DiSim'], 'ari': [ari] } print("finished disim-pa, moving on to herm-pa") df = pd.DataFrame(data=result3) df.to_csv(f"results/{tag}.csv", mode='a', header=False) A = hermify.to_herm(A) cls = herm.cluster(A=A, k=k, norm=norm) ari = evaluate.ari(comms, cls) result4 = { 'model': ['DSBM_PA'], 'average_edge_probability': [q], 'percentage_intra_edges': [η], 'algorithm': ['Herm'], 'ari': [ari] } df = pd.DataFrame(data=result4) df.to_csv(f"results/{tag}.csv", mode='a', header=False) print(f"Concluded experiments for p={p}") elif norm == 'interpolation': A, comms = DSBM.sample(n=n, k=k, p=dic2['p'], q=dic2['q'], F=F, random_state=seed, Herm=False) print("finished sampling dsbm. moving to clustering.") A = hermify.to_herm(A) for r in np.arange(1, 11): cls = herm.cluster(A=A, k=k, norm=norm, r=r) ari_herm = evaluate.ari(comms, cls) result2 = { 'model': ['DSBM'], 'average_edge_probability': [q], 'percentage_intra_edges': [η], 'r': [r], 'ari': [ari_herm] } df = pd.DataFrame(data=result2) df.to_csv(f"results/{tag}.csv", mode='a', header=False) print('finished herm-dsbm, movnig on to disim-pa') A, comms = DSBM_PA.sample(random_state=seed, a=dic['c'], Herm=False, **dic) A = hermify.to_herm(A) for r in np.arange(1, 11): cls = herm.cluster(A=A, k=k, norm=norm) ari = evaluate.ari(comms, cls) result4 = { 'model': ['DSBM_PA'], 'average_edge_probability': [q], 'percentage_intra_edges': [η], 'r': [r], 'ari': [ari] } df = pd.DataFrame(data=result4) df.to_csv(f"results/{tag}.csv", mode='a', header=False) print(f"Concluded experiments for p={p}")
def run_experiments(seed=0, noise=0, tag="", norm=None): print("Running ROCMG-experiments for seed", seed) k = 5 n = 2000 if norm == 'densify' else 4000 F = DSBM.random_complete(k=k, η=noise, random_state=seed) if norm == None: for p in [0.002, 0.004, 0.006, 0.008]: q = p PA_kwargs = convert.DSBM_to_PA(k = k, n=n, p=p, q=q, F=F) A, comms = DSBM.sample(n=n, k=k, p=p, q=q, F=F, random_state=seed, Herm=False) print("finished sampling dsbm. moving to clustering.") cls = disim.cluster(A=A, kz=k, ky=k, norm=norm) ari_ds = evaluate.ari(comms, cls) result1 = { 'model': ['DSBM'], 'p': [p], 'noise': [noise], 'algorithm': ['DiSim'], 'ari': [ari_ds] } df = pd.DataFrame(data=result1) df.to_csv(f'results/{tag}.csv', mode='a', header=False) print('finished disim-dsbm, moving on to herm-dsbm') A = hermify.to_herm(A) cls = herm.cluster(A=A, k=k, norm=norm) ari_herm = evaluate.ari(comms, cls) result2 = { 'model': ['DSBM'], 'p': [p], 'noise': [noise], 'algorithm': ['Herm'], 'ari': [ari_herm] } df = pd.DataFrame(data=result2) df.to_csv(f"results/{tag}.csv", mode='a', header=False) print('finished herm-dsbm, movnig on to disim-pa') A, comms = DSBM_PA.sample(random_state=seed, a=PA_kwargs['c'], Herm=False, **PA_kwargs) cls = disim.cluster(A=A, kz=k, ky=k, norm=norm) ari = evaluate.ari(comms, cls) result3 = { 'model': ['DSBM_PA'], 'p': [p], 'noise': [noise], 'algorithm': ['DiSim'], 'ari': [ari] } print("finished disim-pa, moving on to herm-pa") df = pd.DataFrame(data=result3) df.to_csv(f"results/{tag}.csv", mode='a', header=False) A = hermify.to_herm(A) cls = herm.cluster(A=A, k=k, norm=norm) ari = evaluate.ari(comms, cls) result4 = { 'model': ['DSBM_PA'], 'p': [p], 'noise': [noise], 'algorithm': ['Herm'], 'ari': [ari] } df = pd.DataFrame(data=result4) df.to_csv(f"results/{tag}.csv", mode='a', header=False) print(f"Concluded experiments for p={p}") elif norm == 'self-loops': for p in [0.003, 0.006]: q = p PA_kwargs = convert.DSBM_to_PA(k=k, n=n, p=p, q=q, F=F) A, comms = DSBM_PA.sample(a = PA_kwargs['c'], random_state=seed, Herm=False, **PA_kwargs) AH = hermify.to_herm(A) for τ in [0.1, 0.4, 0.75, 0.85, 1, 1.15, 1.25, 1.6, 2, 3]: print('clustering using disim') cls = disim.cluster(A = A, ky = k, kz = k, norm = norm, τ_self_loops = τ) ari = evaluate.ari(comms, cls) result1 = { 'p' : [p], 'noise': [noise], 'tau': [τ], 'algorithm': ['DiSim'], 'ari': [ari] } df = pd.DataFrame(data=result1) df.to_csv(f"results/{tag}.csv", mode='a', header=False) print('finished with DiSim, moving to Herm') cls = herm.cluster(A = AH, k = k, norm = norm, τ_self_loops = τ) ari = evaluate.ari(comms, cls) result1['algorithm'] = ['Herm'] result1['ari'] = [ari] df = pd.DataFrame(data=result1) df.to_csv(f"results/{tag}.csv", mode='a', header=False) print('done with herm') elif norm == 'densify': for p in [0.003, 0.006]: q = p PA_kwargs = convert.DSBM_to_PA(k=k, n=n, p=p, q=q, F=F) A, comms = DSBM_PA.sample(a = PA_kwargs['c'], random_state=seed, Herm=False, **PA_kwargs) AH = hermify.to_herm(A) for ω in np.linspace(0,0.002,9): print('clustering using disim') cls = disim.cluster(A = A, ky = k, kz = k, norm = norm, ω=ω) ari = evaluate.ari(comms, cls) result1 = { 'p' : [p], 'noise': [noise], 'omega': [ω], 'algorithm': ['DiSim'], 'ari': [ari] } df = pd.DataFrame(data=result1) df.to_csv(f"results/{tag}.csv", mode='a', header=False) print('finished with DiSim, moving to Herm') cls = herm.cluster(A = AH, k = k, norm = norm, ω = ω) ari = evaluate.ari(comms, cls) result1['algorithm'] = ['Herm'] result1['ari'] = [ari] df = pd.DataFrame(data=result1) df.to_csv(f"results/{tag}.csv", mode='a', header=False) print('done with herm') pass
def eigenvectors(): # complete metagraph noises = np.array([0.0, 0.1, 0.2]) ks = np.array([3, 5, 7]) ps = np.array([0.0045]) np.array([0.0035, 0.0045, 0.005]) seeds = np.array([6]) np.array([6, 28, 496, 8128]) ns = np.array([2500]) np.array([1500, 2500, 5000]) for (η, k, n, p) in itertools.product(noises, ks, ns, ps): q = p F = DSBM.random_complete(random_state=np.random.choice(seeds), k=k, η=η) print(F.shape) PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F) spectra = [] for seed in seeds: # GPA, comms = DSBM.sample(n=n, k=k, p=p, q=q, F=F, random_state=seed, Herm=False) GPA, comms = DSBM_PA.sample(a=PA_kwargs['c'], **PA_kwargs, Herm=False) print(GPA.shape) out_degrees = GPA.sum(axis=1) in_degrees = GPA.sum(axis=0) τ = out_degrees.mean() Pvals = in_degrees + τ _, m = GPA.shape Pτ = scipy.sparse.spdiags(Pvals, 0, m, m) print('computed P') Ovals = out_degrees + τ Oτ = scipy.sparse.spdiags(np.reshape(Ovals, (1, -1)), 0, m, m) print('computed O') tmp1 = Oτ.power(-0.5) print('generated tmp1') tmp2 = Pτ.power(-0.5) print('generated tmp2') print(tmp1.shape, GPA.shape, tmp2.shape) L = tmp1 @ GPA @ tmp2 from scipy.sparse import linalg as la U, Σ, V = la.svds(L, k=10) inds = Σ.argsort() spectrum = { 'L': U[:, inds[-3:]], 'R': V[:, inds[-3:]], 'comms': comms } print(spectrum['L'].shape) spectra.append(spectrum) import matplotlib.pyplot as plt figP, axesP = plt.subplots(1, 3, figsize=(12, 3), sharey='row') figP.suptitle('top left singular vectors') spectrum = spectra[np.random.choice(np.arange(len(spectra)))] for j in range(3, 0, -1): num = 0 for comm in spectrum['comms']: print(spectrum['L'][:, -j].shape) gL = np.array(spectrum['L'][:, -j]).reshape(-1) print(gL[comm].shape, gL[comm]) axesP[j - 1].bar(x=num + np.arange(len(comm)), height=gL[comm]) num += len(comm) figP.savefig(f'DSBM_PA_thicker_tail_k{k}_N{k*n}_noise{η}.pdf')
def run_experiments(seed=0, noise=0, tag="", norm=None): print("Running NCyMG-experiments for seed", seed) k = 5 n = 4000 F = DSBM.cycle(k=k, η=noise) if norm == None: for p in [0.002, 0.004, 0.006, 0.008]: q = p PA_kwargs = convert.DSBM_to_PA(k = k, n=n, p=p, q=q, F=F) A, comms = DSBM.sample(n=n, k=k, p=p, q=q, F=F, random_state=seed, Herm=False) print("finished sampling dsbm. moving to clustering.") cls = disim.cluster(A=A, kz=k, ky=k, norm=norm) ari_ds = evaluate.ari(comms, cls) result1 = { 'model': ['DSBM'], 'p': [p], 'noise': [noise], 'algorithm': ['DiSim'], 'ari': [ari_ds] } df = pd.DataFrame(data=result1) df.to_csv(f'results/{tag}.csv', mode='a', header=False) print('finished disim-dsbm, moving on to herm-dsbm') A = hermify.to_herm(A) cls = herm.cluster(A=A, k=k, norm=norm) ari_herm = evaluate.ari(comms, cls) result2 = { 'model': ['DSBM'], 'p': [p], 'noise': [noise], 'algorithm': ['Herm'], 'ari': [ari_herm] } df = pd.DataFrame(data=result2) df.to_csv(f"results/{tag}.csv", mode='a', header=False) print('finished herm-dsbm, movnig on to disim-pa') A, comms = DSBM_PA.sample(random_state=seed, a=PA_kwargs['c'], Herm=False, **PA_kwargs) cls = disim.cluster(A=A, kz=k, ky=k, norm=norm) ari = evaluate.ari(comms, cls) result3 = { 'model': ['DSBM_PA'], 'p': [p], 'noise': [noise], 'algorithm': ['DiSim'], 'ari': [ari] } print("finished disim-pa, moving on to herm-pa") df = pd.DataFrame(data=result3) df.to_csv(f"results/{tag}.csv", mode='a', header=False) A = hermify.to_herm(A) cls = herm.cluster(A=A, k=k, norm=norm) ari = evaluate.ari(comms, cls) result4 = { 'model': ['DSBM_PA'], 'p': [p], 'noise': [noise], 'algorithm': ['Herm'], 'ari': [ari] } df = pd.DataFrame(data=result4) df.to_csv(f"results/{tag}.csv", mode='a', header=False) print(f"Concluded experiments for p={p}") pass