示例#1
0
def __main__():
    blognames, words, data = clusters.read_file("blogdata.txt")
    clust = clusters.hcluster(data, distance=euclidean_distance)
    clusters.draw_dendogram(clust, blognames, jpeg="ex3dendrogram.jpg")


# I think this weights against groupings that have similar word use rates but different word use counts.
def __main__():
    entries, words, data = clusters.read_file('entrydata.txt')
    clust = clusters.hcluster(data)
    clusters.draw_dendogram(clust, entries, jpeg="ex2dend.jpg")
def __main__():
    wants, people, data = clusters.read_file('zebo.txt')
    clust = clusters.hcluster(data, distance=manhattan_distance)
    clusters.draw_dendogram(clust, wants, jpeg="ex4dend.jpg")
示例#4
0
def __main__():
    wants, people, data = clusters.read_file('zebo.txt')
    clust = clusters.hcluster(data, distance=manhattan_distance)
    clusters.draw_dendogram(clust, wants, jpeg="ex4dend.jpg")
示例#5
0
def __main__():
    entries, words, data = clusters.read_file('entrydata.txt')
    clust = clusters.hcluster(data)
    clusters.draw_dendogram(clust, entries, jpeg="ex2dend.jpg")
示例#6
0
"""
Exercise 6

After completing exercise five, create a function that runs K-means
clustering over different values of k. How does the total distance
change as the number if clusters increases? At what point does the
improvement from having more clusters become very small?
"""
import clusters
from matplotlib import pyplot
trash, other_trash, DATA = clusters.read_file("blogdata.txt")


def run_experiment(k):
    return clusters.k_cluster(DATA, k=k)[0]


def run_experiments():
    ks = []
    errors = []
    pyplot.xlabel("K Value")
    pyplot.ylabel("Error")
    pyplot.xlim([0, 25])
    pyplot.ylim([0, 100])
    pyplot.title("Plot of K Value and Error")
    pyplot.ion()
    for i in range(2, 25):
        ks.append(i)
        errors.append(run_experiment(i))
        pyplot.plot(ks, errors, "b.")
        if i == 2:
def __main__():
    blognames, words, data = clusters.read_file("blogdata.txt")
    clust = clusters.hcluster(data, distance=euclidean_distance)
    clusters.draw_dendogram(clust, blognames, jpeg="ex3dendrogram.jpg")

# I think this weights against groupings that have similar word use rates but different word use counts.