Exemplo n.º 1
0
def cluster_docs():
    timer = Timer()
    timer.start()
    from clustering.K_means import K_means
    c = K_means()
    c.clusterDocs()
    timer.end()

    return render_template('clustering_result.html',
        duration=timer.get_time_taken_pretty(),
        numclusters=len(c.centroidList)
    )
Exemplo n.º 2
0
def page_rank():
    timer = Timer()
    timer.start()

    from pageRank.PageRank import PageRank
    c = PageRank()
    c.pageRank()

    timer.end()

    return render_template('pagerank_result.html',
        duration=timer.get_time_taken_pretty()
    )
Exemplo n.º 3
0
def _generic_index(retrieved_path):
    timer = Timer()
    timer.start()

    api = IndexingAPI(ELASTIC_URL, retrieved_path)
    response = api.bulk_add_documents_in_directory(retrieved_path, INDEX_NAME, DOCUMENT_TYPE).json()
    success = not response['errors']
    num_docs = len(response['items'])
    pretty_response = json.dumps(response, indent=True)
    timer.end()

    return render_template('indexing_result.html',
        duration=timer.get_time_taken_pretty(),
        elastic_response=pretty_response,
        success=success,
        numdocs=num_docs
    )
Exemplo n.º 4
0
def author_cluster_admin():
    timer = Timer()
    timer.start()
    authors = list()
    for file in list_files(AUTHOR_CLUSTER_SOURCE_DIRECTORY, '*.json'):
        with open(os.path.join(AUTHOR_CLUSTER_SOURCE_DIRECTORY, file), 'r') as fp:
            author_data = json.load(fp)
            authors.append(Author(author_data))

    from clustering.authors_cluster import Dendogram
    clusters = Dendogram(authors)
    clusters.cluster()

    min_similarity = 0.375
    cluster_list = list(map(
        lambda cluster: list(map(lambda x: x.name, cluster)),
        map(
            lambda x: list(x.authors),
            clusters.get_clusters(min_similarity)
        )
    ))

    cluster_dict = dict()
    for cluster in cluster_list:
        for author in cluster:
            cluster_dict[author] = cluster

    with open(AUTHOR_CLUSTER_FILE, 'w') as fp:
        json.dump(cluster_dict, fp)

    timer.end()

    return render_template('indexing_result.html',
        duration=timer.get_time_taken_pretty(),
        elastic_response=json.dumps(cluster_list, indent=True),
        success=True,
        numdocs=len(cluster_list)
    )