示例#1
0
    def testNormal(self):

        m = [[1, 0, 2, 0], [2, 0, 3, 0]]

        r = clusters.scaledown(m, distance=clusters.euclid_dist, rate=0.1)
        self.assertEquals(2, len(r))
        self.assertAlmostEquals(clusters.euclid_dist(m[0], m[1]),
                                clusters.euclid_dist(r[0], r[1]))
  def testNormal(self):

    m = [[1, 0, 2, 0],
         [2, 0, 3, 0]]

    r = clusters.scaledown(m, distance=clusters.euclid_dist, rate=0.1)
    self.assertEquals(2, len(r))
    self.assertAlmostEquals(clusters.euclid_dist(m[0], m[1]),
        clusters.euclid_dist(r[0], r[1]))
示例#3
0
def visualize(tweets, jpeg="tweets.jpg"):
    ids = [tweet["id_str"] for tweet in tweets]
    texts = [tweet["text"] for tweet in tweets]

    matrix = distances(texts)

    coords = scaledown(matrix)

    # draw2d(coords, ids, jpeg=jpeg)

    return ids, matrix, coords
示例#4
0
文件: run.py 项目: wz125/courses
def prefer2d():
  reload(clusters)
  blognames,words,data=clusters.readfile('blogdata.txt')
  coords=clusters.scaledown(data)
  clusters.draw2d(coords,blognames,jpeg='blogs2d.jpg')
示例#5
0
import clusters    


blognames, words, data = clusters.readfile('blogdata.txt')
coords, itercount = clusters.scaledown(data)
clusters.draw2d(coords, labels=blognames, jpeg='mds.jpg')
print ('Iteration count: %d' % itercount)
示例#6
0
#!/usr/bin/env python

import clusters

datafile = '../data/word_data_tfidf.tsv';

blognames,words,data=clusters.readfile(datafile)

iterations, coords=clusters.scaledown(data)

clusters.draw2d(coords,blognames,jpeg='../question5c.jpg')

print "iterations: {}".format(iterations)
def multidim():
    jobnames,projects,data=clusters.readfile('job_projects')
    coords = clusters.scaledown(data)
    clusters.draw2d(coords,jobnames,jpeg='job_multidim.jpg')
示例#8
0
def createMDS():

    blognames,words,data=clusters.readfile('blogdata.txt') 
    coords=clusters.scaledown(data)
    clusters.draw2d(coords,blognames,jpeg='blogs2d.jpg') 
示例#9
0
#!/usr/local/bin/python

import clusters

(blognames, words, data)=clusters.readfile('blogdata.txt')

daata = clusters.scaledown(data)

clusters.draw2d(daata, blognames, jpeg='MDS.jpg')
示例#10
0
    for i in range(len(v1)):
        d += (v1[i] - v2[i])**2
    return math.sqrt(d)


#getBlogs()
#main()

blognames, words, data = clusters.readfile('similarblogdata.txt')
print(blognames)
print(words)
print(data)
for i in range(len(data[1:])):
    if len(data[i + 1]) != len(data[i]):
        print(blognames[i + 1])
        print(len(data[i + 1]))
        print(blognames[i])
        print(len(data[i]))
clust = clusters.hcluster(data)
clusters.printclust(clust, labels=blognames)
clusters.drawdendrogram(clust, blognames, jpeg='sblogclust.jpg')

kclust = clusters.kcluster(data, k=5)
printkclustValues(kclust)
kclust = clusters.kcluster(data, k=10)
printkclustValues(kclust)
kclust = clusters.kcluster(data, k=20)
printkclustValues(kclust)
coords = clusters.scaledown(data)
clusters.draw2d(coords, blognames, jpeg='sblogs2d.jpg')
示例#11
0
import clusters

blognames, words, data = clusters.readfile("blogdata.txt")

# hierarchy clustering
# clust=clusters.hcluster(data)
##clusters.printclust(clust, labels=blognames)
# clusters.drawdendrogram(clust, blognames, jpeg='blogclust.jpg')

# column clustering
# rdata = clusters.rotatematrix(data)
# clust=clusters.hcluster(rdata)
# clusters.drawdendrogram(clust, words, jpeg='wordclust.jpg')

# k-means clustering
# kclust=clusters.kcluster(data, k=10)
# print [blognames[r] for r in kclust[0]]

# zebo.txt
# wants, people, data=clusters.readfile('zebo.txt')
# clust = clusters.hcluster(data, distance = clusters.tanimoto)
# clusters.drawdendrogram(clust, wants, jpeg='zebo_wants_clust.jpg')

# mds
wants, people, data = clusters.readfile("zebo.txt")
loc = clusters.scaledown(data, wants)
clusters.draw2d(loc, wants)

print "hello world"
示例#12
0
def createMDS():
	blognames,words,data=clusters.readfile('blogVector.txt')
	coords,iterationCount=clusters.scaledown(data)
	clusters.draw2d(coords,blognames,jpeg='blogs2d.jpg')

	print 'iterationCount', iterationCount
'''
clust = cl.hcluster(data)
cl.printclust(clust,labels=blognames)
cl.drawdendrogram(clust,blognames,jpeg='blogclust.jpg')

rdata = cl.rotatematrix(data)
wordclust = cl.hcluster(rdata)
cl.printclust(wordclust,labels=words)
cl.drawdendrogram(wordclust,words,jpeg='wordclust.jpg')

k = 4
kclust = cl.kcluster(data,k=k)
l = [[blognames[r] for r in kclust[i]] for i in range(k)]
for ll in l:
    print len(ll),ll


kclust = cl.kcluster_np(data,k=k)
l = [[blognames[r] for r in kclust[i]] for i in range(k)]
for ll in l:
    print len(ll),ll


wants,people,data = cl.readfile('zebo')
clust = cl.hcluster(data,distance=cl.tanimoto)
cl.drawdendrogram(clust,wants)
'''

coords = cl.scaledown(data)
cl.draw2d(coords, blognames, jpeg='blogs2d.jpg')
示例#14
0
import clusters

name, word, data = clusters.readfile('blogdata1 (copy).txt')

cluster = clusters.scaledown(data)

clusters.draw2d(cluster, name, jpeg='mds.jpg')
示例#15
0
def text():
    blognames, words, data = clusters.readfile('Outputs/blogdata.txt')
    coords = clusters.scaledown(data)
    kclust = clusters.kcluster(data, k=4)
示例#16
0
#!/usr/local/bin/python

import clusters

blog,words,data=clusters.readfile('blogdata.txt')

coordinates = clusters.scaledown(data)

clusters.draw2d(coordinates, blog, jpeg='blogs.jpg')
示例#17
0
#!/usr/local/bin/python

import clusters

blog, words, data = clusters.readfile('blogdata.txt')

coordinates = clusters.scaledown(data)

clusters.draw2d(coordinates, blog, jpeg='blogs.jpg')
示例#18
0
def mds():
    blognames, words, data = clusters.readfile('blogdata.txt')
    coords, itercount = clusters.scaledown(data)
    clusters.draw2d(coords, labels=blognames, jpeg='mds.jpg')
    print ('Iteration count: %d' % itercount)
示例#19
0
#!/usr/local/bin/python

# all code here stolen shamelessly from 
# "Programming Collective Intelligence, Chapter 3"

import sys

sys.path.insert(0, '../libs')

import clusters

blognames,words,data=clusters.readfile('../q1/blogdata500.txt')

coords = clusters.scaledown(data)

clusters.draw2d(coords, blognames, jpeg='blogs2d.jpg')
示例#20
0
def main():

    blognames,words,data=clusters.readfile('blogdata.txt') 
    coords=clusters.scaledown(data)
    clusters.draw2d(coords,blognames,jpeg='blogs2d.jpg')