wordList.append(tag)

keywords2 = []
i = 0
for user in keywords:
	keywords2.append([])
	for word in wordList:
		if word in user:
			keywords2[i].append((word,1))
		else:
			keywords2[i].append((word,0))
	i+=1

	
g = kmeans.open_ubigraph_server()
result,clusters = kmeans.kcluster(g,palestrantes,keywords2,k=8)

dataClusters = []
i = 0
for cluster in result:
	apCount = {}
	for indice in cluster:
		dados = keywords2[indice]
		for word,count in dados:
			apCount.setdefault(word,0)
			apCount[word]+= count
	words = apCount.items()
	words.sort(key=operator.itemgetter(1))
	words.reverse()
	print words
	print '====' 
keywords2 = []
i = 0

for tags in keywords_class:
	keywords2.append([])
	for word in wordList:
		if word in tags:
			keywords2[i].append((word,1))
		else:
			keywords2[i].append((word,0))
	i+=1

	
g = kmeans.open_ubigraph_server()
result,clusters = kmeans.kcluster(g,lectures,keywords2,k=8)

dataClusters = []
i = 0
for cluster in result:
	apCount = {}
	for indice in cluster:
		dados = keywords2[indice]
		for word,count in dados:
			apCount.setdefault(word,0)
			apCount[word]+= count
	words = apCount.items()
	words.sort(key=operator.itemgetter(1))
	words.reverse()
	print words
	print '====' 
Пример #3
0
for user,wc in wordCounts.items():
	socialNetworking[user] = {}
	for word in wordlist:
		socialNetworking[user].setdefault(word,0)
		if word in wc:
			socialNetworking[user][word] = wc[word]


items  = socialNetworking.items()
users = [item[0] for item in items]
data =  [item[1].items() for item in items]

#Step 05:  Run a cluster algorithm (k-means)
g = kmeans.open_ubigraph_server()
#g = kmeans.open_ubigraph_server('http://IP:20738/RPC2')
result,clusters = kmeans.kcluster(g,users,data,k=15)

#Step 06: Presenting the results
usersResult =  [[users[v] for v in result[i] ] for i in range(len(result))]

dataClusters = []
for cluster in usersResult:
	apCount = {}
	for user in cluster:
		data = socialNetworking[user]
		for word,wc in data.items():
			apCount.setdefault(word,0)
			apCount[word]+= wc
	
	words = apCount.items()
	words.sort(key=operator.itemgetter(1))