def testNormal(self): m = [[1, 0, 2, 0], [2, 0, 3, 0]] r = clusters.scaledown(m, distance=clusters.euclid_dist, rate=0.1) self.assertEquals(2, len(r)) self.assertAlmostEquals(clusters.euclid_dist(m[0], m[1]), clusters.euclid_dist(r[0], r[1]))
def visualize(tweets, jpeg="tweets.jpg"): ids = [tweet["id_str"] for tweet in tweets] texts = [tweet["text"] for tweet in tweets] matrix = distances(texts) coords = scaledown(matrix) # draw2d(coords, ids, jpeg=jpeg) return ids, matrix, coords
def prefer2d(): reload(clusters) blognames,words,data=clusters.readfile('blogdata.txt') coords=clusters.scaledown(data) clusters.draw2d(coords,blognames,jpeg='blogs2d.jpg')
import clusters blognames, words, data = clusters.readfile('blogdata.txt') coords, itercount = clusters.scaledown(data) clusters.draw2d(coords, labels=blognames, jpeg='mds.jpg') print ('Iteration count: %d' % itercount)
#!/usr/bin/env python import clusters datafile = '../data/word_data_tfidf.tsv'; blognames,words,data=clusters.readfile(datafile) iterations, coords=clusters.scaledown(data) clusters.draw2d(coords,blognames,jpeg='../question5c.jpg') print "iterations: {}".format(iterations)
def multidim(): jobnames,projects,data=clusters.readfile('job_projects') coords = clusters.scaledown(data) clusters.draw2d(coords,jobnames,jpeg='job_multidim.jpg')
def createMDS(): blognames,words,data=clusters.readfile('blogdata.txt') coords=clusters.scaledown(data) clusters.draw2d(coords,blognames,jpeg='blogs2d.jpg')
#!/usr/local/bin/python import clusters (blognames, words, data)=clusters.readfile('blogdata.txt') daata = clusters.scaledown(data) clusters.draw2d(daata, blognames, jpeg='MDS.jpg')
for i in range(len(v1)): d += (v1[i] - v2[i])**2 return math.sqrt(d) #getBlogs() #main() blognames, words, data = clusters.readfile('similarblogdata.txt') print(blognames) print(words) print(data) for i in range(len(data[1:])): if len(data[i + 1]) != len(data[i]): print(blognames[i + 1]) print(len(data[i + 1])) print(blognames[i]) print(len(data[i])) clust = clusters.hcluster(data) clusters.printclust(clust, labels=blognames) clusters.drawdendrogram(clust, blognames, jpeg='sblogclust.jpg') kclust = clusters.kcluster(data, k=5) printkclustValues(kclust) kclust = clusters.kcluster(data, k=10) printkclustValues(kclust) kclust = clusters.kcluster(data, k=20) printkclustValues(kclust) coords = clusters.scaledown(data) clusters.draw2d(coords, blognames, jpeg='sblogs2d.jpg')
import clusters blognames, words, data = clusters.readfile("blogdata.txt") # hierarchy clustering # clust=clusters.hcluster(data) ##clusters.printclust(clust, labels=blognames) # clusters.drawdendrogram(clust, blognames, jpeg='blogclust.jpg') # column clustering # rdata = clusters.rotatematrix(data) # clust=clusters.hcluster(rdata) # clusters.drawdendrogram(clust, words, jpeg='wordclust.jpg') # k-means clustering # kclust=clusters.kcluster(data, k=10) # print [blognames[r] for r in kclust[0]] # zebo.txt # wants, people, data=clusters.readfile('zebo.txt') # clust = clusters.hcluster(data, distance = clusters.tanimoto) # clusters.drawdendrogram(clust, wants, jpeg='zebo_wants_clust.jpg') # mds wants, people, data = clusters.readfile("zebo.txt") loc = clusters.scaledown(data, wants) clusters.draw2d(loc, wants) print "hello world"
def createMDS(): blognames,words,data=clusters.readfile('blogVector.txt') coords,iterationCount=clusters.scaledown(data) clusters.draw2d(coords,blognames,jpeg='blogs2d.jpg') print 'iterationCount', iterationCount
''' clust = cl.hcluster(data) cl.printclust(clust,labels=blognames) cl.drawdendrogram(clust,blognames,jpeg='blogclust.jpg') rdata = cl.rotatematrix(data) wordclust = cl.hcluster(rdata) cl.printclust(wordclust,labels=words) cl.drawdendrogram(wordclust,words,jpeg='wordclust.jpg') k = 4 kclust = cl.kcluster(data,k=k) l = [[blognames[r] for r in kclust[i]] for i in range(k)] for ll in l: print len(ll),ll kclust = cl.kcluster_np(data,k=k) l = [[blognames[r] for r in kclust[i]] for i in range(k)] for ll in l: print len(ll),ll wants,people,data = cl.readfile('zebo') clust = cl.hcluster(data,distance=cl.tanimoto) cl.drawdendrogram(clust,wants) ''' coords = cl.scaledown(data) cl.draw2d(coords, blognames, jpeg='blogs2d.jpg')
import clusters name, word, data = clusters.readfile('blogdata1 (copy).txt') cluster = clusters.scaledown(data) clusters.draw2d(cluster, name, jpeg='mds.jpg')
def text(): blognames, words, data = clusters.readfile('Outputs/blogdata.txt') coords = clusters.scaledown(data) kclust = clusters.kcluster(data, k=4)
#!/usr/local/bin/python import clusters blog,words,data=clusters.readfile('blogdata.txt') coordinates = clusters.scaledown(data) clusters.draw2d(coordinates, blog, jpeg='blogs.jpg')
#!/usr/local/bin/python import clusters blog, words, data = clusters.readfile('blogdata.txt') coordinates = clusters.scaledown(data) clusters.draw2d(coordinates, blog, jpeg='blogs.jpg')
def mds(): blognames, words, data = clusters.readfile('blogdata.txt') coords, itercount = clusters.scaledown(data) clusters.draw2d(coords, labels=blognames, jpeg='mds.jpg') print ('Iteration count: %d' % itercount)
#!/usr/local/bin/python # all code here stolen shamelessly from # "Programming Collective Intelligence, Chapter 3" import sys sys.path.insert(0, '../libs') import clusters blognames,words,data=clusters.readfile('../q1/blogdata500.txt') coords = clusters.scaledown(data) clusters.draw2d(coords, blognames, jpeg='blogs2d.jpg')
def main(): blognames,words,data=clusters.readfile('blogdata.txt') coords=clusters.scaledown(data) clusters.draw2d(coords,blognames,jpeg='blogs2d.jpg')