Exemplo n.º 1
0
def run():
	n = 4000
	k = 3
	N = k*n
	p = 0.01
	q = 0.01
	P = np.ones(k) / k
	F = DSBM.random_complete(k, η=0.0, random_state=28)
#	C = DSBM_PA.tree(k = k, η = 0.0, inner_edges=0.15)
	print(F.shape)
	PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F)
#	PA_kwargs['C'] = C
	
	aris_DSBM_disim = []
	aris_DSBM_herm = []
	
	for seed in seeds:
		GDSBM, comms_DSBM = DSBM_PA.sample(random_state=seed, Herm=False, a=10, **PA_kwargs)
		print([len(l) for l in comms_DSBM])
		clusters = disim.cluster(GDSBM, k, k, mode='R')
		print([len(l) for l in clusters])
		ari = evaluate.ari(comms_DSBM, clusters)
		aris_DSBM_disim.append(ari)
		print('Disim', ari)
		GDSBM, comms_DSBM = DSBM_PA.sample(random_state=seed, Herm=True, a=10, **PA_kwargs)
		clusters = herm.cluster(GDSBM, k, 0.1)
		ari = evaluate.ari(comms_DSBM, clusters)
		print('Herm', ari)
		aris_DSBM_herm.append(ari)

	print('Mean Disim', np.mean(aris_DSBM_disim))
	print('Mean Herm', np.mean(aris_DSBM_herm))
Exemplo n.º 2
0
def run_test(seed, N, k, p, q, η):
    n = N // k
    F = DSBM.circle(k, η)
    #	C = DSBM_PA.tree(k = k, η = 0.0, inner_edges=0.15)
    #	print(F.shape)
    PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F)
    #	PA_kwargs['C'] = C

    aris_DSBM_disim = []
    aris_DSBM_herm = []

    GDSBM, comms_DSBM = DSBM.sample(n=n,
                                    k=k,
                                    p=p,
                                    q=q,
                                    F=F,
                                    random_state=seed,
                                    Herm=False)
    GPA, comms_pa = DSBM_PA.sample(a=10, **PA_kwargs, random_state=seed)
    print([len(l) for l in comms_DSBM])
    clustersds_dsbm = disim.cluster(GDSBM, k, k, mode='R')
    clustershm_dsbm = herm.cluster(hermify.to_herm(GDSBM), k, k)
    clustersds_pa = disim.cluster(GPA, k, k, mode='R')
    clustershm_pa = herm.cluster(hermify.to_herm(GPA), k, k)
    ari_dsbm_disim = evaluate.ari(comms_DSBM, clustersds_dsbm)
    ari_dsbm_herm = evaluate.ari(comms_DSBM, clustershm_dsbm)
    ari_pa_disim = evaluate.ari(comms_pa, clustersds_pa)
    ari_pa_herm = evaluate.ari(comms_pa, clustershm_pa)
    miscl_vs_disim_dsbm = evaluate.misclustered_vertices(
        comms_DSBM, clustersds_dsbm)
    miscl_vs_herm_dsbm = evaluate.misclustered_vertices(
        comms_DSBM, clustershm_dsbm)
    miscl_vs_disim_pa = evaluate.misclustered_vertices(comms_pa, clustersds_pa)
    miscl_vs_herm_pa = evaluate.misclustered_vertices(comms_pa, clustershm_pa)

    print()

    results = dict([])
    results['DiSim_DSBM_M'] = miscl_vs_disim_dsbm
    results['DiSim_PA_M'] = miscl_vs_disim_pa
    results['Herm_DSBM_M'] = miscl_vs_herm_dsbm
    results['Herm_PA_M'] = miscl_vs_herm_pa
    results['DiSim_DSBM_A'] = ari_dsbm_disim
    results['DiSim_PA_A'] = ari_pa_disim
    results['Herm_DSBM_A'] = ari_dsbm_herm
    results['Herm_PA_A'] = ari_pa_herm
    return results
Exemplo n.º 3
0
def run_test(seed, N, k, p, q, η, interpolation):
    n = N // k
    C1 = np.zeros((k, k))
    itr = 0
    while any(C1.sum(axis=1) == 0):
        F = DSBM.random_complete(k, η, random_state=seed**itr)
        PA_kwargs = convert.DSBM_to_PA(n=n, k=k, p=p, q=q, F=F)
        C2 = PA_kwargs['C']
        C1[:, :] = C2
        for pos in range(k):
            C1[pos, pos] = 0

    C1 = C1 / C1.sum(axis=1).reshape(-1, 1)
    print(C2)
    print(C1)

    PA_kwargs['C'] = interpolation * C1 + (1 - interpolation) * C2

    aris_DSBM_disim = []
    aris_DSBM_herm = []

    GPA, comms = DSBM_PA.sample(a=10,
                                **PA_kwargs,
                                Herm=False,
                                random_state=seed)
    GPA_Herm = hermify.to_herm(GPA)

    clusters_disim = disim.cluster(GPA, k, k, mode='R')
    clusters_herm = herm.cluster(GPA_Herm, k, ϵ=-1, RW=True)

    ari_disim = evaluate.ari(comms, clusters_disim)
    ari_herm = evaluate.ari(comms, clusters_herm)
    #	mv_disim = evaluate.misclustered_vertices(comms, clusters_disim)
    #	mv_herm = evaluate.misclustered_vertices(comms, clusters_herm)

    results = {
        'interpolation': interpolation,
        'ari_disim': ari_disim,
        'ari_herm': ari_herm,
    }
    return results
Exemplo n.º 4
0
def run_experiments(seed=0, noise=0, tag="", norm=None):
	print("Running ROCMG-experiments for seed", seed)
	n = 2000 if norm=='densify' else 4000
	if norm == None:
		for k, c in product([3,5], [20, 40, 60]):
			N = n*k
			C = DSBM_PA.hard_cycle(k=k, η=noise)
			P = np.ones(k)/k
			A, comms = DSBM_PA.sample(k=k, c=c, C=C, P=P, N=N, random_state=seed, a=c, Herm=False)
			cls = disim.cluster(A=A, kz=k, ky=k, norm=norm)
			ari = evaluate.ari(comms, cls)
			result3 = {
				'c': [c],
				'k': [k],
				'noise': [noise],
				'algorithm': ['DiSim'],
				'ari': [ari]
			}
			print("finished disim-pa, moving on to herm-pa")
			df = pd.DataFrame(data=result3)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)
			A = hermify.to_herm(A)
			cls = herm.cluster(A=A, k=k, norm=norm)
			ari = evaluate.ari(comms, cls)
			result4 = {
				'c': [c],
				'k': [k],
				'noise': [noise],
				'algorithm': ['Herm'],
				'ari': [ari]
			}
			df = pd.DataFrame(data=result4)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)			
			print(f"Concluded experiments for c={c}")
	elif norm == 'self-loops':
		for k, c in product([3,5], [20, 60]):
			N = n*k
			C = DSBM_PA.hard_cycle(k=k, η=noise)
			P = np.ones(k)/k
			A, comms = DSBM_PA.sample(k=k, c=c, C=C, P=P, N=N, random_state=seed, a=c, Herm=False)
			AH = hermify.to_herm(A)
			for τ in [0.1, 0.4, 0.75, 0.85, 1, 1.15, 1.25, 1.6, 2, 3]:
				print('clustering using disim')
				cls = disim.cluster(A = A, ky = k, kz = k, norm = norm, τ_self_loops = τ)
				ari = evaluate.ari(comms, cls)
				result1 = {
					'c' : [c],
					'k': [k],
					'noise': [noise],
					'tau': [τ],
					'algorithm': ['DiSim'],
					'ari': [ari]
				}
				df = pd.DataFrame(data=result1)
				df.to_csv(f"results/{tag}.csv", mode='a', header=False)
				print('finished with DiSim, moving to Herm')
				cls = herm.cluster(A = AH, k = k, norm = norm, τ_self_loops = τ)
				ari = evaluate.ari(comms, cls)
				result1['algorithm'] = ['Herm']
				result1['ari'] = [ari]
				df = pd.DataFrame(data=result1)
				df.to_csv(f"results/{tag}.csv", mode='a', header=False)
				print('done with herm')
	elif norm == 'densify':
		for k, c in product([5], [20, 60]):
			N = n*k
			C = DSBM_PA.hard_cycle(k=k, η=noise)
			P = np.ones(k)/k
			A, comms = DSBM_PA.sample(k=k, c=c, C=C, P=P, N=N, random_state=seed, a=c, Herm=False)
			AH = hermify.to_herm(A)
			for ω in np.linspace(0,0.002,9):
				print('clustering using disim')
				cls = disim.cluster(A = A, ky = k, kz = k, norm = norm, ω=ω)
				ari = evaluate.ari(comms, cls)
				result1 = {
					'c' : [c],
					'k': [k],
					'noise': [noise],
					'omega': [ω],
					'algorithm': ['DiSim'],
					'ari': [ari]
				}
				df = pd.DataFrame(data=result1)
				df.to_csv(f"results/{tag}.csv", mode='a', header=False)
				print('finished with DiSim, moving to Herm')
				cls = herm.cluster(A = AH, k = k, norm = norm, ω = ω)
				ari = evaluate.ari(comms, cls)
				result1['algorithm'] = ['Herm']
				result1['ari'] = [ari]
				df = pd.DataFrame(data=result1)
				df.to_csv(f"results/{tag}.csv", mode='a', header=False)
				print('done with herm')
Exemplo n.º 5
0
def run_experiments(seed=0, noise=0, tag="", norm=None):
    η = noise
    print("Running ROCMG-experiments for seed", seed)
    k = 5
    n = 4000
    its = 0
    p = 0
    q = 0.0045
    dic = 'total=0'
    while dic == 'total=0':
        F = DSBM.random_complete(k=k, η=0, random_state=seed + its)
        dic = convert.DSBM_to_PA(k=k, n=n, p=p, q=q, F=F)
        its += 1
    dic['C'] = η * np.diag(np.ones(k)) + (1 - η) * dic['C']
    dic2 = convert.PA_to_DSBM(**dic)
    if norm == None:
        A, comms = DSBM.sample(n=n,
                               k=k,
                               p=dic2['p'],
                               q=dic2['q'],
                               F=F,
                               random_state=seed,
                               Herm=False)
        print("finished sampling dsbm. moving to clustering.")
        cls = disim.cluster(A=A, kz=k, ky=k, norm=norm)
        ari_ds = evaluate.ari(comms, cls)
        result1 = {
            'model': ['DSBM'],
            'average_edge_probability': [q],
            'percentage_intra_edges': [η],
            'algorithm': ['DiSim'],
            'ari': [ari_ds]
        }
        df = pd.DataFrame(data=result1)
        df.to_csv(f'results/{tag}.csv', mode='a', header=False)
        print('finished disim-dsbm, moving on to herm-dsbm')
        A = hermify.to_herm(A)
        cls = herm.cluster(A=A, k=k, norm=norm)
        ari_herm = evaluate.ari(comms, cls)
        result2 = {
            'model': ['DSBM'],
            'average_edge_probability': [q],
            'percentage_intra_edges': [η],
            'algorithm': ['Herm'],
            'ari': [ari_herm]
        }
        df = pd.DataFrame(data=result2)
        df.to_csv(f"results/{tag}.csv", mode='a', header=False)
        print('finished herm-dsbm, movnig on to disim-pa')
        A, comms = DSBM_PA.sample(random_state=seed,
                                  a=dic['c'],
                                  Herm=False,
                                  **dic)
        cls = disim.cluster(A=A, kz=k, ky=k, norm=norm)
        ari = evaluate.ari(comms, cls)
        result3 = {
            'model': ['DSBM_PA'],
            'average_edge_probability': [q],
            'percentage_intra_edges': [η],
            'algorithm': ['DiSim'],
            'ari': [ari]
        }
        print("finished disim-pa, moving on to herm-pa")
        df = pd.DataFrame(data=result3)
        df.to_csv(f"results/{tag}.csv", mode='a', header=False)
        A = hermify.to_herm(A)
        cls = herm.cluster(A=A, k=k, norm=norm)
        ari = evaluate.ari(comms, cls)
        result4 = {
            'model': ['DSBM_PA'],
            'average_edge_probability': [q],
            'percentage_intra_edges': [η],
            'algorithm': ['Herm'],
            'ari': [ari]
        }
        df = pd.DataFrame(data=result4)
        df.to_csv(f"results/{tag}.csv", mode='a', header=False)
        print(f"Concluded experiments for p={p}")
    elif norm == 'interpolation':
        A, comms = DSBM.sample(n=n,
                               k=k,
                               p=dic2['p'],
                               q=dic2['q'],
                               F=F,
                               random_state=seed,
                               Herm=False)
        print("finished sampling dsbm. moving to clustering.")
        A = hermify.to_herm(A)
        for r in np.arange(1, 11):
            cls = herm.cluster(A=A, k=k, norm=norm, r=r)
            ari_herm = evaluate.ari(comms, cls)
            result2 = {
                'model': ['DSBM'],
                'average_edge_probability': [q],
                'percentage_intra_edges': [η],
                'r': [r],
                'ari': [ari_herm]
            }
            df = pd.DataFrame(data=result2)
            df.to_csv(f"results/{tag}.csv", mode='a', header=False)
        print('finished herm-dsbm, movnig on to disim-pa')
        A, comms = DSBM_PA.sample(random_state=seed,
                                  a=dic['c'],
                                  Herm=False,
                                  **dic)
        A = hermify.to_herm(A)
        for r in np.arange(1, 11):
            cls = herm.cluster(A=A, k=k, norm=norm)
            ari = evaluate.ari(comms, cls)
            result4 = {
                'model': ['DSBM_PA'],
                'average_edge_probability': [q],
                'percentage_intra_edges': [η],
                'r': [r],
                'ari': [ari]
            }
            df = pd.DataFrame(data=result4)
            df.to_csv(f"results/{tag}.csv", mode='a', header=False)
        print(f"Concluded experiments for p={p}")
Exemplo n.º 6
0
def run_experiments(seed=0, noise=0, tag="", norm=None):
	print("Running ROCMG-experiments for seed", seed)
	k = 5
	n = 2000 if norm == 'densify' else 4000
	F = DSBM.random_complete(k=k, η=noise, random_state=seed)
	if norm == None:
		for p in [0.002, 0.004, 0.006, 0.008]:
			q = p
			PA_kwargs = convert.DSBM_to_PA(k = k, n=n, p=p, q=q, F=F)
			A, comms = DSBM.sample(n=n, k=k, p=p, q=q, F=F, random_state=seed, Herm=False)
			print("finished sampling dsbm. moving to clustering.")
			cls = disim.cluster(A=A, kz=k, ky=k, norm=norm)
			ari_ds = evaluate.ari(comms, cls)
			result1 = {
				'model': ['DSBM'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['DiSim'],
				'ari': [ari_ds]
			}
			df = pd.DataFrame(data=result1)
			df.to_csv(f'results/{tag}.csv', mode='a', header=False)
			print('finished disim-dsbm, moving on to herm-dsbm')
			A = hermify.to_herm(A)
			cls = herm.cluster(A=A, k=k, norm=norm)
			ari_herm = evaluate.ari(comms, cls)
			result2 = {
				'model': ['DSBM'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['Herm'],
				'ari': [ari_herm]
			}
			df = pd.DataFrame(data=result2)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)
			print('finished herm-dsbm, movnig on to disim-pa')
			A, comms = DSBM_PA.sample(random_state=seed, a=PA_kwargs['c'], Herm=False, **PA_kwargs)
			cls = disim.cluster(A=A, kz=k, ky=k, norm=norm)
			ari = evaluate.ari(comms, cls)
			result3 = {
				'model': ['DSBM_PA'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['DiSim'],
				'ari': [ari]
			}
			print("finished disim-pa, moving on to herm-pa")
			df = pd.DataFrame(data=result3)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)
			A = hermify.to_herm(A)
			cls = herm.cluster(A=A, k=k, norm=norm)
			ari = evaluate.ari(comms, cls)
			result4 = {
				'model': ['DSBM_PA'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['Herm'],
				'ari': [ari]
			}
			df = pd.DataFrame(data=result4)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)			
			print(f"Concluded experiments for p={p}")
	elif norm == 'self-loops':
		for p in [0.003, 0.006]:
			q = p
			PA_kwargs = convert.DSBM_to_PA(k=k, n=n, p=p, q=q, F=F)
			A, comms = DSBM_PA.sample(a = PA_kwargs['c'], random_state=seed, Herm=False, **PA_kwargs)
			AH = hermify.to_herm(A)
			for τ in [0.1, 0.4, 0.75, 0.85, 1, 1.15, 1.25, 1.6, 2, 3]:
				print('clustering using disim')
				cls = disim.cluster(A = A, ky = k, kz = k, norm = norm, τ_self_loops = τ)
				ari = evaluate.ari(comms, cls)
				result1 = {
					'p' : [p],
					'noise': [noise],
					'tau': [τ],
					'algorithm': ['DiSim'],
					'ari': [ari]
				}
				df = pd.DataFrame(data=result1)
				df.to_csv(f"results/{tag}.csv", mode='a', header=False)
				print('finished with DiSim, moving to Herm')
				cls = herm.cluster(A = AH, k = k, norm = norm, τ_self_loops = τ)
				ari = evaluate.ari(comms, cls)
				result1['algorithm'] = ['Herm']
				result1['ari'] = [ari]
				df = pd.DataFrame(data=result1)
				df.to_csv(f"results/{tag}.csv", mode='a', header=False)
				print('done with herm')
	elif norm == 'densify':
		for p in [0.003, 0.006]:
			q = p
			PA_kwargs = convert.DSBM_to_PA(k=k, n=n, p=p, q=q, F=F)
			A, comms = DSBM_PA.sample(a = PA_kwargs['c'], random_state=seed, Herm=False, **PA_kwargs)
			AH = hermify.to_herm(A)
			for ω in np.linspace(0,0.002,9):
				print('clustering using disim')
				cls = disim.cluster(A = A, ky = k, kz = k, norm = norm, ω=ω)
				ari = evaluate.ari(comms, cls)
				result1 = {
					'p' : [p],
					'noise': [noise],
					'omega': [ω],
					'algorithm': ['DiSim'],
					'ari': [ari]
				}
				df = pd.DataFrame(data=result1)
				df.to_csv(f"results/{tag}.csv", mode='a', header=False)
				print('finished with DiSim, moving to Herm')
				cls = herm.cluster(A = AH, k = k, norm = norm, ω = ω)
				ari = evaluate.ari(comms, cls)
				result1['algorithm'] = ['Herm']
				result1['ari'] = [ari]
				df = pd.DataFrame(data=result1)
				df.to_csv(f"results/{tag}.csv", mode='a', header=False)
				print('done with herm')


	pass
Exemplo n.º 7
0
def run_experiments(seed=0, noise=0, tag="", norm=None):
	print("Running NCyMG-experiments for seed", seed)
	k = 5
	n = 4000
	F = DSBM.cycle(k=k, η=noise)
	if norm == None:
		for p in [0.002, 0.004, 0.006, 0.008]:
			q = p
			PA_kwargs = convert.DSBM_to_PA(k = k, n=n, p=p, q=q, F=F)
			A, comms = DSBM.sample(n=n, k=k, p=p, q=q, F=F, random_state=seed, Herm=False)
			print("finished sampling dsbm. moving to clustering.")
			cls = disim.cluster(A=A, kz=k, ky=k, norm=norm)
			ari_ds = evaluate.ari(comms, cls)
			result1 = {
				'model': ['DSBM'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['DiSim'],
				'ari': [ari_ds]
			}
			df = pd.DataFrame(data=result1)
			df.to_csv(f'results/{tag}.csv', mode='a', header=False)
			print('finished disim-dsbm, moving on to herm-dsbm')
			A = hermify.to_herm(A)
			cls = herm.cluster(A=A, k=k, norm=norm)
			ari_herm = evaluate.ari(comms, cls)
			result2 = {
				'model': ['DSBM'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['Herm'],
				'ari': [ari_herm]
			}
			df = pd.DataFrame(data=result2)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)
			print('finished herm-dsbm, movnig on to disim-pa')
			A, comms = DSBM_PA.sample(random_state=seed, a=PA_kwargs['c'], Herm=False, **PA_kwargs)
			cls = disim.cluster(A=A, kz=k, ky=k, norm=norm)
			ari = evaluate.ari(comms, cls)
			result3 = {
				'model': ['DSBM_PA'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['DiSim'],
				'ari': [ari]
			}
			print("finished disim-pa, moving on to herm-pa")
			df = pd.DataFrame(data=result3)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)
			A = hermify.to_herm(A)
			cls = herm.cluster(A=A, k=k, norm=norm)
			ari = evaluate.ari(comms, cls)
			result4 = {
				'model': ['DSBM_PA'],
				'p': [p],
				'noise': [noise],
				'algorithm': ['Herm'],
				'ari': [ari]
			}
			df = pd.DataFrame(data=result4)
			df.to_csv(f"results/{tag}.csv", mode='a', header=False)			
			print(f"Concluded experiments for p={p}")
	pass