Exemplo n.º 1
0
        for i in range(m):
            for j in range(i+1,m):
                sim = similarity(clusters[i], clusters[j], adj)
                if sim > best_sim:
                    best_sim = sim
                    best_i = i
                    best_j = j

        cluster = clusters[best_i].union(clusters[best_j])
        del clusters[best_j]
        del clusters[best_i]

        clusters.append(cluster)
    ans = []
    for item in clusters:
        s = set()
        for ind in item:
            s.add(ind2id[ind])
        ans.append(s)
    return ans

if __name__ == '__main__':
    pubs = es._get_all_publications()
    authors = {}
    for pub in pubs:
        for author in pub['authors']:
            uid = str(author['uid'])
            authors.setdefault(uid, set())
            authors[uid].add(pub['id'])
    print(cluster_writers(authors))
Exemplo n.º 2
0
            for j in range(i + 1, m):
                sim = similarity(clusters[i], clusters[j], adj)
                if sim > best_sim:
                    best_sim = sim
                    best_i = i
                    best_j = j

        cluster = clusters[best_i].union(clusters[best_j])
        del clusters[best_j]
        del clusters[best_i]

        clusters.append(cluster)
    ans = []
    for item in clusters:
        s = set()
        for ind in item:
            s.add(ind2id[ind])
        ans.append(s)
    return ans


if __name__ == '__main__':
    pubs = es._get_all_publications()
    authors = {}
    for pub in pubs:
        for author in pub['authors']:
            uid = str(author['uid'])
            authors.setdefault(uid, set())
            authors[uid].add(pub['id'])
    print(cluster_writers(authors))
Exemplo n.º 3
0
def _get_rank(cites):
    """
    :param cites: np.array presenting citation of i -> j
    :return: array of page ranks
    indices are assumed 0..N
    """
    n = cites.shape[0]
    p = np.array(cites, dtype=np.float64)
    alpha = 0.1

    for i in range(n):
        p[i, :] = p[i, :] * (1 - alpha) + np.ones((1, n)) * alpha
        p[i, :] /= np.sum(p[i, :])

    a = np.ones((n, n))
    for i in range(50):
        p = np.dot(p, p)
        (CACHE_DIR / 'pagerank.progress').write_text('{}%'.format(i * 2 + 2))
    a = np.dot(a, p)

    return a


if __name__ == '__main__':
    pubs = es._get_all_publications()
    ranks = get_rank(pubs)
    es.update_ranks(pubs, ranks)
    es.refresh()

print(es._get_all_publications()[20]['rank'])
Exemplo n.º 4
0
def _get_rank(cites):
    """
    :param cites: np.array presenting citation of i -> j
    :return: array of page ranks
    indices are assumed 0..N
    """
    n = cites.shape[0]
    p = np.array(cites, dtype=np.float64)
    alpha = 0.1

    for i in range(n):
        p[i, :] = p[i, :] * (1 - alpha) + np.ones((1, n)) * alpha
        p[i, :] /= np.sum(p[i, :])

    a = np.ones((n, n))
    for i in range(50):
        p = np.dot(p, p)
        (CACHE_DIR / "pagerank.progress").write_text("{}%".format(i * 2 + 2))
    a = np.dot(a, p)

    return a


if __name__ == "__main__":
    pubs = es._get_all_publications()
    ranks = get_rank(pubs)
    es.update_ranks(pubs, ranks)
    es.refresh()

print(es._get_all_publications()[20]["rank"])