Exemplo n.º 1
0
from hcluster import pdist, linkage, dendrogram, centroid, weighted
import numpy
from numpy.random import rand
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

X = rand(5, 2)
print X

X[0:3, :] *= 50
print X
Y = pdist(X)
print Y
Z = weighted(Y)
print Z
hh = dendrogram(Z)
print hh


plt.show()
Exemplo n.º 2
0
def hierarchical_clustering(nets,dico,dist_mat):
	multi_level_dyn_net={}
	for inter,net in nets.iteritems():
		#print 'periode : ' + str(inter)
		#print net
		#net.afficher_champs()
		champs_liste = net.champs_liste
		champs_dist = net.champs_dist
		#on reformate les similarités entre champs pour les transformer en distance
		dist_tot = []
		ii = -1
		for champ1 in champs_liste[:-1]:
			ii=ii+1
			for champ2 in champs_liste[ii+1:]:
					dist_tot.append(float(1.-champs_dist.get((champ1,champ2),0.)))			
		#print dist_tot
		Z = weighted(dist_tot)
		print '***periode : ' + str(inter) + 'on calcule le dendrogramme associe'
		#print '**************'
		#print '**************'
		#dd = dendrogram(Z)
		#matplotlib.pyplot.show()
		
		N=len(champs_liste)-1
		res_niv_0= Network(champs_liste,champs_dist) 
		multi_level_dyn_net[(inter,0)] = Network(champs_liste[:],champs_dist) 
		#Z ressemble à : [
		#		 [  2.           6.           0.36216925   2.        ]
		# 		 [ 10.          15.           0.42559153   2.        ]
		# 		 [  8.          13.           0.43538316   2.        ]
		# 		 [  0.          19.           0.43583879   2.        ]
		for ev_fusion in Z:
			N=N+1
			dessous={}
			#couple de champs qui sont fusionnés
			fusion_couple = [int(ev_fusion[0]),int(ev_fusion[1])]
			fusion_level = ev_fusion[2]
			fields_rm_liste=[]
			for champ in champs_liste[:]:
				if champ.index in fusion_couple:
					champs_liste.remove(champ)
					fields_rm_liste.append(champ)
				else:
					dessous[champ] = [champ]
			new_champ  = addnfields(fields_rm_liste,N,inter,dico,dist_mat)
			#print 'on a construit: ' + str(new_champ)
			dessous[new_champ]=fields_rm_liste
			champs_liste.append(new_champ)
			new_champs_dist={}
			ii = 0 
			for champ1 in champs_liste[:-1]:
				ii=ii+1
				for champ2 in champs_liste[ii:]:
					if champ1 != champ2:
						if (champ1, champ2) in champs_dist:
							new_champs_dist[(champ1, champ2)] = champs_dist[(champ1, champ2)]
							new_champs_dist[(champ2, champ1)] = champs_dist[(champ2, champ1)]
						else:
							stre=0.
							nivel=0
							#print 'fusion des champs'
							#on invese les champs de façon à mettre le fusionne en seconde position
							if len(dessous[champ1])>1:
								champ1_new = champ1
								champ2_new = champ2
								champ1=champ2_new
								champ2=champ1_new
							#print 'premier champ: ' + str(champ1)
							for y in dessous[champ2]:
								#print 'second champ: ' + str(y)
								stre = stre  + champs_dist.get((champ1,y),0.) * float(len(y.niveau))
								nivel = nivel+len(y.niveau)
							#print nivel	
							new_champs_dist[(champ1, champ2)] = stre / float(nivel)
							new_champs_dist[(champ2, champ1 )] = stre / float(nivel)
			champs_dist=new_champs_dist
			multi_level_dyn_net[(inter,fusion_level)] = Network(champs_liste[:],champs_dist) 
	return multi_level_dyn_net
Exemplo n.º 3
0
def h_clustering(clusters,nb_level=6):
	multi_level_net={}
	#on reformate les similarités entre champs pour les transformer en distance
	dist_tot = []#aggrège toutes les distances entre champs
	univ2index={}
	index2univ={}

	
	max_periode = 0
	for i,univ in enumerate(clusters.keys()):
		univ2index[univ] = i
		index2univ[i] = univ
		max_periode = max(max_periode,clusters[univ].get('periode',0))
	#on renumérote pour plus de confort:
	clusters_index={}
	for univ,clu in clusters.iteritems():
		index = univ2index[univ]
		clusters_index[index]=clu
		if 'syn' in clu:
			temp = clu['syn']
			temp_index = reindex(univ2index,temp)
			clusters_index[index]['syn'] = temp_index
		if 'dia' in clu:
			temp = clu['dia']
			temp_index = reindex(univ2index,temp)
			clusters_index[index]['dia'] = temp_index

	#on prépare les niveaux supérieurs.
	#on divise dans un premier temps les liens synchros en n quantiles
	sync_strenghts={}
	for index,clu in clusters_index.iteritems():
		voisins = clu['syn']
		for voisin,stre in voisins.iteritems():
			sync_strenghts[(index,voisin)] = stre


	for i,champ1 in enumerate(clusters_index.keys()[:-1]):
		for champ2 in clusters_index.keys()[i+1:]:
				dist_tot.append(float(1.-  max(sync_strenghts.get((champ2,champ1),0.),sync_strenghts.get((champ1,champ2),0.))))			
				
	#print dist_tot
	Z = weighted(dist_tot)
	#print Z
	print '***on calcule le dendrogramme associe'
#	dd = dendrogram(Z)
#	matplotlib.pyplot.show()
	#res_niv_0 = Network(champs_liste,champs_dist) 
	
	#Z ressemble à : [
	#		 [  2.           6.           0.36216925   2.        ]
	# 		 [ 10.          15.           0.42559153   2.        ]
	# 		 [  8.          13.           0.43538316   2.        ]
	# 		 [  0.          19.           0.43583879   2.        ]

	pas = len(Z)/min(len(Z),nb_level)
	next_step=pas
	level = 0
	cluster_level={}
	champs_courants=clusters_index.keys()
	N=len(champs_courants[:])-1
	clusters_level={}
	below,above={},{}
	print len(Z)
	decalage={}
	m=10000
	for i,ev_fusion in enumerate(Z[:-max_periode]):


		if i>=len(Z)*level/nb_level -1:
			print i,level
			#on fige le réseau dans son état.
			if level>0:
				for x in clusters_level[level-1]:
					if x in champs_courants:
						m+=1
						champs_courants.remove(x)
						champs_courants.append(m)
						decalage[x]=m
						above[x]=m
						below.setdefault(m,[]).append(x)
			clusters_level[level]=champs_courants[:]
			level +=1		
		
		fusion_level = ev_fusion[2]#niveau de la fusion
		N += 1

		
		ch1=rec_retrieval(decalage,int(ev_fusion[0]))
		ch2=rec_retrieval(decalage,int(ev_fusion[1]))
		
		champs_courants.remove(ch1)
		champs_courants.remove(ch2)
		champs_courants.append(N)
		below.setdefault(N,[]).append(ch1)
		below.setdefault(N,[]).append(ch2)
		above[ch1]=N
		above[ch2]=N	
				
		if i==len(Z)-max_periode-1:
			print i,level
			#on fige le réseau dans son état.
			if level>0:
				for x in clusters_level[level-1]:
					if x in champs_courants:
						m+=1
						champs_courants.remove(x)
						champs_courants.append(m)
						decalage[x]=m
						above[x]=m
						below.setdefault(m,[]).append(x)
			clusters_level[level]=champs_courants[:]
			
	return clusters_index,clusters_level,below,above