for i in range(m): for j in range(i+1,m): sim = similarity(clusters[i], clusters[j], adj) if sim > best_sim: best_sim = sim best_i = i best_j = j cluster = clusters[best_i].union(clusters[best_j]) del clusters[best_j] del clusters[best_i] clusters.append(cluster) ans = [] for item in clusters: s = set() for ind in item: s.add(ind2id[ind]) ans.append(s) return ans if __name__ == '__main__': pubs = es._get_all_publications() authors = {} for pub in pubs: for author in pub['authors']: uid = str(author['uid']) authors.setdefault(uid, set()) authors[uid].add(pub['id']) print(cluster_writers(authors))
for j in range(i + 1, m): sim = similarity(clusters[i], clusters[j], adj) if sim > best_sim: best_sim = sim best_i = i best_j = j cluster = clusters[best_i].union(clusters[best_j]) del clusters[best_j] del clusters[best_i] clusters.append(cluster) ans = [] for item in clusters: s = set() for ind in item: s.add(ind2id[ind]) ans.append(s) return ans if __name__ == '__main__': pubs = es._get_all_publications() authors = {} for pub in pubs: for author in pub['authors']: uid = str(author['uid']) authors.setdefault(uid, set()) authors[uid].add(pub['id']) print(cluster_writers(authors))
def _get_rank(cites): """ :param cites: np.array presenting citation of i -> j :return: array of page ranks indices are assumed 0..N """ n = cites.shape[0] p = np.array(cites, dtype=np.float64) alpha = 0.1 for i in range(n): p[i, :] = p[i, :] * (1 - alpha) + np.ones((1, n)) * alpha p[i, :] /= np.sum(p[i, :]) a = np.ones((n, n)) for i in range(50): p = np.dot(p, p) (CACHE_DIR / 'pagerank.progress').write_text('{}%'.format(i * 2 + 2)) a = np.dot(a, p) return a if __name__ == '__main__': pubs = es._get_all_publications() ranks = get_rank(pubs) es.update_ranks(pubs, ranks) es.refresh() print(es._get_all_publications()[20]['rank'])
def _get_rank(cites): """ :param cites: np.array presenting citation of i -> j :return: array of page ranks indices are assumed 0..N """ n = cites.shape[0] p = np.array(cites, dtype=np.float64) alpha = 0.1 for i in range(n): p[i, :] = p[i, :] * (1 - alpha) + np.ones((1, n)) * alpha p[i, :] /= np.sum(p[i, :]) a = np.ones((n, n)) for i in range(50): p = np.dot(p, p) (CACHE_DIR / "pagerank.progress").write_text("{}%".format(i * 2 + 2)) a = np.dot(a, p) return a if __name__ == "__main__": pubs = es._get_all_publications() ranks = get_rank(pubs) es.update_ranks(pubs, ranks) es.refresh() print(es._get_all_publications()[20]["rank"])