示例#1
0
class ScikitNetworkScorer:
    def __init__(self,
                 algo=None,
                 damping_factor=0.85,
                 solver='naive',
                 n_iter=10,
                 tol=0,
                 undirected=False,
                 method='exact'):
        if algo == 'diffusion':
            self.scorer = Diffusion(n_iter=n_iter)
        elif algo == 'closeness':
            self.scorer = Closeness(method=method, tol=tol)
        elif algo == 'harmonic':
            self.scorer = Harmonic()
        else:
            self.scorer = PageRank(damping_factor=damping_factor,
                                   solver=solver,
                                   n_iter=n_iter,
                                   tol=tol)
        self.undirected = undirected

    def score(self, data):
        node_dict = {node['name']: i for i, node in enumerate(data['node'])}
        edges = np.array([(node_dict[edge['node'][0]],
                           node_dict[edge['node'][1]], edge['weight'])
                          for edge in data['edge']])
        adjacency = edgelist2adjacency(edges, undirected=self.undirected)
        if data['node'][0].get('weight'):
            seeds = np.array([node['weight'] for node in data['node']])
            scores = self.scorer.fit_transform(adjacency, seeds)
        else:
            scores = self.scorer.fit_transform(adjacency)
        return {k: scores[v] for k, v in node_dict.items()}
示例#2
0
def pagerank_scikit(G, sim_mat, user_idx, alpha, beta):
    nodelist = G.nodes()
    M = nx.to_scipy_sparse_matrix(G,
                                  nodelist=nodelist,
                                  weight='weight',
                                  dtype=float)
    S = scipy.array(M.sum(axis=1)).flatten()
    S[S != 0] = 1.0 / S[S != 0]
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    M = Q * M
    M = beta * M + (1 - beta) * sim_mat

    pagerank = PageRank(damping_factor=alpha)

    ppr_mat = []
    print_every = int(len(user_idx) / 3)
    s = time.time()
    for i in user_idx:
        seeds = {i: 1}
        pr = pagerank.fit_transform(M, seeds)
        ppr_mat.append(pr)
        if (i + 1) % print_every == 0:
            print('{}% {}sec'.format(i / len(user_idx) * 100, time.time() - s))

    return np.array(ppr_mat)
示例#3
0
    def compute_rank(self, file_name):
        x = csr_matrix((self.v, (self.b, self.a)),
                       shape=(len(self.destin_idx), len(self.destin_idx)),
                       dtype=float)
        print(x)
        adjacency = x.multiply(x.transpose())
        pagerank = PageRank()
        scores = pagerank.fit_transform(adjacency)
        image = svg_graph(adjacency,
                          names=self.destin_names,
                          scores=scores,
                          display_node_weight=True,
                          node_order=np.argsort(scores))
        with open(file_name, "w") as text_file:
            print(file_name)
            print(scores)
            text_file.write(image)

        print(self.v)
        print(self.destin_names)

        paris = Paris()
        dendrogram = paris.fit_transform(adjacency)

        image = svg_dendrogram(dendrogram,
                               self.destin_names,
                               n_clusters=5,
                               rotate=True)
        with open("dento_" + file_name, "w") as text_file:
            text_file.write(image)
示例#4
0
class RelavanceScore(TorchModel):
    '''
    Calculate relavance score between countries and concepts
    Return a N*M matrix
    N: country num
    M: concept num
    '''
    def __init__(self):
        super(RelavanceScore, self).__init__()
        self.pagerank = PageRank()
        
    def run(self, adj, seeds, paper_id, country_id, concept_id, paper_country, paper_concept):
        pr_scores = self.pagerank.fit_transform(adj, seeds) # pagerank scores
        w_paper = pr_scores[0:len(paper_id)]
        w_paper /= w_paper.sum() # normalize the paper weight
        
        # calculate the paper-county relavance score
        paper_country_edge = []
        country = set()
        for p in paper_country:
            for c in paper_country[p]:
                country.add(c)
                paper_country_edge.append((paper_id[p], \
                                           country_id[c] - (len(paper_id) + len(concept_id)), 
                                           1 / len(paper_country[p])))
        # add the countries without papers and set their edges to 0 
        for c in country_id:
            if c not in country:
                paper_country_edge.append((paper_id[0], \
                                           country_id[c] - (len(paper_id) + len(concept_id)), 0))
        country_paper_mat = sknetwork.utils.edgelist2biadjacency(paper_country_edge).transpose()
        
        # calculate the paper-concept relavance score
        paper_concept_edge = []
        concept = set()
        for p in paper_country:
            # add the paper without concepts and set their edges to 0
            if p not in paper_concept:
                paper_concept_edge.append((paper_id[p], concept_id[72] - (len(paper_id)), 0))
                continue
            for c in paper_concept[p]:
                paper_concept_edge.append((paper_id[p], concept_id[c] - (len(paper_id)), \
                                           1 / len(paper_concept[p]) * w_paper[paper_id[p]]))
                concept.add(c)
        # add the concepts not belonging to any papers and set their edges to 0
        for c in concept_id:
            if c not in concept:
                paper_concept_edge.append((paper_id[0], concept_id[c] - (len(paper_id)), 0))
        paper_concept_mat = sknetwork.utils.edgelist2biadjacency(paper_concept_edge)
        country_concept = country_paper_mat.dot(paper_concept_mat)
        return country_concept
示例#5
0
    def get_triples_of_event(self, seed_vertex, topN=10):
        """
        先得到种子点,然后执行personal PageRank得到与种子点相关的triples
        :return:
        """
        triples_of_event = []
        topN = topN if topN < len(self.unique_triples) else len(
            self.unique_triples)
        pagerank = PageRank()
        scores = pagerank.fit_transform(
            self.edge_weight, {seed_vertex: 1})  # 对每个种子点运行Personal PageRank
        idx_sorted = np.argsort(-scores)
        for idx in idx_sorted[:topN]:
            triples_of_event.append(self.unique_triples[idx])

        debug_logger.debug("seed vertex: {}".format(seed_vertex))
        for idx in idx_sorted:
            debug_logger.debug("weight: {}, triple: {}".format(
                scores[idx], self.unique_triples[idx].to_string()))

        return triples_of_event
示例#6
0
from sknetwork.ranking import PageRank
from sknetwork.data import load_edge_list, house

adjacency = house()

pagerank = PageRank(solver='push')
scores = pagerank.fit_transform(adjacency)
print(scores)
"""
the result should be like: [0.17301832 0.22442742 0.1823948  0.18926552 0.23089394]
"""