def kmeans_euc(k, news):
    matrix = tfidf(news)
    centroid = start(k, matrix)
    ii = 0
    while ii < 10:
        cluster = kernel_euc(centroid, matrix, k)
        print rss(cluster, centroid)
        centroid = new(cluster)
        ii += 1
        if ii == 10:
            out = token__.labelize(news)
            output(cluster, matrix, out)
def kernel(centroid, matrix, news):
    cluster = []
    for i in range(len(centroid)):
        cluster.append([centroid[i]])
    for i in matrix:
        distance = []
        for j in centroid:
            distance.append(euclidean(matrix[i], j))
        if sum(distance) == 0:
            l = random.randint(0, len(centroid) - 1)
            cluster[l].append(matrix[i])
            cluster[l] = list(set(cluster[l]))
            centroid = new(cluster)
        else:
            for m in range(len(distance)):
                if distance[m] == min(distance) and matrix[i] not in cluster[m]:
                    cluster[m].append(matrix[i])
                    centroid = new(cluster)
    print rss(cluster, centroid)
    out = token__.labelize(news)
    print out
    output(cluster, matrix, out)