Exemplo n.º 1
0
@author: Yannis Mentekidis
"""

""" Read data """
import data_read as dtrd
import numpy as np
data, features, projects_true = dtrd.All(sparse=False)

""" Processing """
import mypreprocessing as prp

data = prp.TrimmingPresence(data, low_thresh=1, hig_thresh=70)
#data = prp.LDAProjection(data, features=features, LDA_topics=12,verbose=True)
#data = prp.RowWiseNorm(data)

data = prp.tfidf(data)
data = np.array(data.todense())



""" Clustering """
from sklearn.cluster import AgglomerativeClustering
from sklearn import metrics

dist_metric = "cosine"
silh = []
h**o = []
comp = []
vmea = []
choices = range(2, 11)
for c in choices:
Exemplo n.º 2
0
# -*- coding: utf-8 -*-
"""
Created on Wed Jan  6 20:43:07 2016

@author: Yannis Mentekidis, Themis Papavasileiou, Panos Siatos
"""
""" Load Data """
import data_read as dtrd
data, features, projects_true = dtrd.All(sparse=False)

""" Process Data """
import mypreprocessing as prp
import numpy as np
data = prp.TrimmingPresence(data, low_thresh=1, hig_thresh=70)
data_p = prp.tfidf(data)
data_p = np.array(data_p.todense())

""" Clustering with Hierarchical Algorithm """
from sklearn.cluster import AgglomerativeClustering
c=7
dist_metric='cosine'
clu = AgglomerativeClustering(n_clusters = c, affinity=dist_metric, linkage="average")
clu.fit(data_p)

for cluster in range(c):
    print "-=-=-=-=-= Cluster %d -=-=-=-=-=" %(cluster)
    
    indices = [i for i, x in enumerate(list(clu.labels_)) if x == cluster]
    
    print indices