示例#1
0
#!/usr/bin/python

import random
from textcluster2 import cluster, dump_clusters

nclusters = 3
data = [
    "pacific ocean", "pacific", "the pacific ocean", "pacific ocean",
    "atlantic", "atlantic ocean", "ocean", "sea", "indian ocean",
    "pacific ocean", "indian", "atlantic", "southern ocean", "lake",
    "lake baikal", "crater lake"
]

random.shuffle(data)
print "data: " + str(data)

nclusters, centroids, cids = cluster(data, nclusters)
print
dump_clusters(nclusters, centroids, data, cids)
示例#2
0
import sys
import json
import io
from textcluster2 import cluster, dump_clusters

# def fetch_tweets():
#     tweets=[]
#     uhandle=urllib2.urlopen('http://api.twitter.com/1/statuses/public_timeline.json')
#     jstr=uhandle.read()
#     jarray=json.loads(jstr)
#     for jobj in jarray:
#         tweets.append(jobj['text'])
#     return tweets


def read_tweets(fnames):
    tweets = []
    for fname in fnames:
        with io.open(fname, 'r') as f:
            jstr = f.read()
            jarray = json.loads(jstr)
            for jobj in jarray:
                tweets.append(jobj['text'])
    return tweets


tweets = read_tweets(sys.argv[1:])
nclusters, centroids, cids = cluster(tweets, 8)
print
dump_clusters(nclusters, centroids, tweets, cids)
示例#3
0
import sys
import json
import io
from textcluster2 import cluster, dump_clusters

# def fetch_tweets():
#     tweets=[]
#     uhandle=urllib2.urlopen('http://api.twitter.com/1/statuses/public_timeline.json')
#     jstr=uhandle.read()
#     jarray=json.loads(jstr)
#     for jobj in jarray:
#         tweets.append(jobj['text'])
#     return tweets

def read_tweets(fnames):
    tweets=[]
    for fname in fnames:
        with io.open(fname, 'r') as f:
            jstr=f.read()
            jarray=json.loads(jstr)
            for jobj in jarray:
                tweets.append(jobj['text'])
    return tweets

tweets=read_tweets(sys.argv[1:])
nclusters,centroids,cids=cluster(tweets, 8)
print
dump_clusters(nclusters, centroids, tweets, cids)

示例#4
0
import random
from textcluster2 import cluster, dump_clusters

nclusters=3
data=["pacific ocean",
      "pacific",
      "the pacific ocean",
      "pacific ocean",
      "atlantic",
      "atlantic ocean",
      "ocean",
      "sea",
      "indian ocean",
      "pacific ocean",
      "indian",
      "atlantic",
      "southern ocean",
      "lake",
      "lake baikal",
      "crater lake"
      ]

random.shuffle(data)
print "data: " + str(data)

nclusters, centroids, cids=cluster(data, nclusters)
print
dump_clusters(nclusters, centroids, data, cids)