示例#1
0
    def page_rank(self):
        #没有出链的点先加上和所有点的边
        for node in self.graph.nodes():
            if len(self.graph.neighbors(node)) == 0:
                for node2 in self.graph.nodes():
                    digraph.add_edge(self.graph, (node, node2))
        nodes = self.graph.nodes()
        graphs_size = len(nodes)

        if graphs_size == 0:
            return 'nodes set is empty!'

        page_rank = dict.fromkeys(nodes, 1.0/graphs_size)
        runAway = (1.0 - self.alpha) / graphs_size
        flag = False
        for i in range(self.maxCycles):
            change = 0
            for node in nodes:
                rank = 0
                for incident_page in self.graph.incidents(node):
                    rank += self.alpha * (page_rank[incident_page] / len(self.graph.neighbors(incident_page)))
                rank += runAway
                change += abs(page_rank[node] - rank)
                page_rank[node] = rank

            print("NO.%s iteration" % (i + 1))
            print(page_rank)

            if change < self.min_delta:
                flag = True
                break
        return page_rank
示例#2
0
    def page_rank(self):
        for node in self.graph.nodes():
            if len(self.graph.neighbors(node)) == 0:
                for node2 in self.graph.nodes():
                    digraph.add_edge(self.graph, (node, node2))

        nodes = self.graph.nodes()
        graph_size = len(nodes)

        if graph_size == 0:
            return {}
        page_rank = dict.fromkeys(nodes, 1.0 / graph_size)
        damping_value = (1.0 - self.damping_factor) / graph_size  # (1−α)/N

        flag = False
        for i in range(self.max_iterations):
            change = 0
            for node in nodes:
                rank = 0
                for incident_page in self.graph.incidents(node):
                    rank += self.damping_factor * (
                        page_rank[incident_page] /
                        len(self.graph.neighbors(incident_page)))
                rank += damping_value
                change += abs(page_rank[node] - rank)
                page_rank[node] = rank

            print("Iteration number %s" % (i + 1))
            print(page_rank)

            if change < self.min_delta:
                flag = True
                break
        return page_rank
示例#3
0
    def page_rank(self):
        print('******')
        cout = 0
        # 先将图中没有出链的节点改为对所有节点都有出链
        for node in self.graph.nodes():
            if len(self.graph.neighbors(node)) == 0:
                cout += 1
                # print(cout)
                digraph.add_edge(self.graph, (node, node), wt=0.5)
                digraph.add_edge(self.graph, (node, core_node), wt=0.5)
                # for node2 in self.graph.nodes():
                #     # print('$$$$$$$')
                #     if node !=node2:
                #         digraph.add_edge(self.graph, (node, node2),wt=float(1/len(self.graph.nodes())))

        print(cout)

        nodes = self.graph.nodes()
        graph_size = len(nodes)

        if graph_size == 0:
            return {}

        # page_rank = dict.fromkeys(nodes, 1.0 / graph_size)  # 给每个节点赋予初始的PR值
        page_rank = dict.fromkeys(nodes, 0.0)  # 给每个节点赋予初始的PR值
        page_rank[core_node] = 1.0
        # print(page_rank)
        damping_value = (1.0 -
                         self.damping_factor) / graph_size  # 公式中的(1−α)/N部分
        print('start iterating...')
        flag = False
        for i in range(self.max_iterations):
            change = 0
            for node in nodes:
                rank = 0
                for incident_page in self.graph.incidents(node):  # 遍历所有“入射”的页面
                    # count = 0
                    # for neighboredge in self.graph.neighbors(incident_page):
                    #     count += self.graph.edge_weight((incident_page,neighboredge))
                    # rank += self.damping_factor * (page_rank[incident_page] / count * self.graph.edge_weight((incident_page,node)))
                    # rank += self.damping_factor * (page_rank[incident_page] / len(self.graph.neighbors(incident_page)))

                    rank += self.damping_factor * page_rank[
                        incident_page] * float(
                            self.graph.edge_weight((incident_page, node)))
                rank += damping_value
                change += abs(page_rank[node] - rank)  # 绝对值
                page_rank[node] = rank

            # print("This is NO.%s iteration" % (i + 1))
            # print(page_rank)

            if change < self.min_delta:
                flag = True
                # print("\n\nfinished in %s iterations!" % i)
                break
        if flag == False:
            print("finished out of %s iterations!" % self.max_iterations)
        return page_rank
示例#4
0
    def construct(self):
        # 先将图中没有出链的节点改为对所有节点都有出链
        try:
            # print("start construct!")
            for node in self.graph.nodes():
                if len(self.graph.neighbors(node)) == 0:
                    # print("ops!no neighbor!")
                    # print(node)
                    for neighbor_node in self.graph.nodes():
                        digraph.add_edge(self.graph, (node, neighbor_node))

            # print("edge append completed!")

        except Exception as e:
            raise e

        nodes = self.graph.nodes()
        graph_size = len(nodes)

        if graph_size == 0:
            return {}
        page_rank = dict.fromkeys(nodes, 1.0 / graph_size)  # 给每个节点赋予初始PR值
        damping_value = (1.0 - self.damping_factor) / \
            graph_size  # 公式中(1-α)/N部分

        flag = False
        for i in range(self.max_iterations):
            change = 0  # 每次迭代PR值变化
            for node in nodes:
                newPR = 0  # 节点每次迭代后得到的新PR值
                for incident_node in self.graph.incidents(node):  # 遍历所有“入射”节点
                    newPR += self.damping_factor * \
                        (page_rank[incident_node] /
                         len(self.graph.neighbors(incident_node)))
                newPR += damping_value
                change += abs(newPR - page_rank[node])  # 节点PR值绝对值变化
                page_rank[node] = newPR

            # print("This is N0.%d iteration" % (i+1))
            # print(page_rank)

            if change < self.min_delta:
                flag = True
                # print("finished in %d iteration!" % (i+1))
                break

        # if not(flag):
        # print("finished out of 100 iterations!")
        return page_rank
示例#5
0
    def page_rank(self):
        """
        先将途中没有出链的结点改为对所有结点都有出链
        :return:
        """
        for node in self.graph.nodes():
            if len(self.graph.neighbors(node)) == 0:
                for node2 in self.graph.nodes():
                    digraph.add_edge(self.graph, (node, node2))

        nodes = self.graph.nodes()
        graph_size = len(nodes)

        if graph_size == 0:
            return {}
        # 给每个节点赋予初始的PR值,第一轮的PR值是均等的,即 1/N
        page_rank = dict.fromkeys(nodes, 1.0 / graph_size)
        # 公式中(1-α)/N部分
        damping_value = (1.0 - self.damping_factor) / graph_size

        flag = False
        for i in range(self.max_iterations):
            change = 0
            for node in nodes:
                rank = 0
                # 遍历所有 "入射" 的页面
                for incident_page in self.graph.incident(node):
                    # "入射"页面的权重根据其出链个数均分,然后传递给当前页面
                    rank += self.damping_factor * (
                        page_rank[incident_page] /
                        len(self.graph.neighbors(incident_page)))
                # 增加随机概率转移矩阵的部分
                rank += damping_value
                change += abs(page_rank[node] - rank)  # 取绝对值
                page_rank[node] = rank

            print(page_rank)
            if change < self.min_delta:
                flag = True
                break
        if flag:
            print("finished in %s iterations!" % node)
        else:
            print("finished out of 100 iterations!")
        return page_rank
示例#6
0
def load_prop_from_file(filename, HOME_DIR):
    fin = open(filename)
    flag = True
    ret = []
    for line in fin:
        line = line.strip("\n")
        if flag:
            curSentence = line
            flag = False
            curGraph = GraphWrapper(curSentence, HOME_DIR)
            parentsList = []
        else:
            if line:
                uid, words, pos, isPredicate, isAsserted, parents = line.split(
                    "\t")
                uid = int(uid)
                isAsserted = bool(int(isAsserted))
                text = [
                    Word(int(index), word) for index, word in
                    [ent.split(",") for ent in words.split(";")]
                ]
                if isAsserted:
                    feats = {"top": isAsserted}
                else:
                    feats = {}
                if parents:
                    parentsList.extend([
                        ((int(index), uid), rel) for rel, index in
                        [ent.split(",") for ent in parents.split(";")]
                    ])

                curNode = newNode.Node(text,
                                       bool(int(isPredicate)),
                                       feats,
                                       curGraph,
                                       uid=uid)

            else:
                for edge, rel in parentsList:
                    digraph.add_edge(curGraph, edge=edge, label=rel)
                ret.append(curGraph)
                flag = True

    fin.close()
    return ret
示例#7
0
    def page_rank(self):
        for node in self.graph.nodes():
            if len(self.graph.neighbors(node)) == 0:
                for node2 in self.graph.nodes():
                    digraph.add_edge(self.graph, (node, node2))

        nodes = self.graph.nodes()
        graph_size = len(nodes)

        if graph_size == 0:
            return {}
        page_rank = dict.fromkeys(nodes, 1.0)
        damping_value = 1.0 - self.damping_factor

        flag = False
        for i in range(self.max_iterations):
            change = 0
            for node in nodes:
                rank = 0
                print(self.graph.incidents(node))
                print('begin')
                for incident_page in self.graph.incidents(node):
                    print(incident_page)
                    rank += self.damping_factor * (
                        page_rank[incident_page] /
                        len(self.graph.neighbors(incident_page)))
                    print((page_rank[incident_page]))
                    print(len(self.graph.neighbors(incident_page)))
                    print("_______________________________")
                rank += damping_value
                change += abs(page_rank[node] - rank)
                page_rank[node] = rank

            print("This is NO.%s iteration" % (i + 1))
            print(page_rank)

            if change < self.min_delta:
                flag = True
                break
        if flag:
            print("finished in %s iterations!" % node)
        else:
            print("finished out of 10 iterations!")
        return page_rank
示例#8
0
    def page_rank(self):
        #  先将图中没有出链的节点改为对所有节点都有出链
        for node in self.graph.nodes():
            if len(self.graph.neighbors(node)) == 0:
                for node2 in self.graph.nodes():
                    digraph.add_edge(self.graph, (node, node2))

        #获取图中的顶点以及顶点总数
        nodes = self.graph.nodes()
        graph_size = len(nodes)

        #若图为空,直接返回,程序结束
        if graph_size == 0:
            return {}

        page_rank = dict.fromkeys(nodes, 1.0 / graph_size)  # 给每个节点赋予初始的PR值
        damping_value = (1.0 -
                         self.damping_factor) / graph_size  # 公式中的(1−α)/N部分

        flag = False  # flag:迭代结束的标志,初始为false;当为true迭代结束

        for i in range(self.max_iterations):
            change = 0  # 记录与上一次迭代结果的差距
            for node in nodes:
                rank = 0
                for incident_page in self.graph.incidents(node):  # 遍历所有“入射”的页面
                    rank += self.damping_factor * (
                        page_rank[incident_page] /
                        len(self.graph.neighbors(incident_page)))
                rank += damping_value
                change += abs(page_rank[node] - rank)  # 绝对值
                page_rank[node] = rank

            print("This is NO.%s iteration" % (i + 1))
            print(page_rank)

        while change < self.min_delta:
            flag = True
            break
        if flag:  #通过flag判断迭代是否结束,并输出最后迭代计算的节点
            print("finished in %s iterations!" % node)
        else:
            print("finished out of 100 iterations!")
        return page_rank  # 最后还需要将计算得到的字典page_rank返回
示例#9
0
    def page_rank(self):
        for node in self.graph.nodes():
            if len(self.graph.neighbors(node)) == 0:  # 将图中没有出链的节点改为对所有节点都有出链
                for node2 in self.graph.nodes():
                    if node2 != node:
                        digraph.add_edge(
                            self.graph,
                            (node, node2))  #没有链出的节点 让它与其他几点都构建一条边 形成闭环图

        nodes = self.graph.nodes()
        graph_size = len(nodes)
        if graph_size == 0:
            return {}
        page_rank = dict.fromkeys(nodes, 1.0 /
                                  graph_size)  #给每个节点赋予初始的PR值,此处的page_rank为字典

        damping_value = (1.0 -
                         self.damping_factor) / graph_size  #公式中的(1−d)/N部分

        print("初始值", page_rank)
        flag = False  #Flag:迭代结束的标志,初始为false;当为true时迭代结束

        for i in range(self.max_iterations):
            change = 0  # ji
            for node in nodes:
                rank = 0
                for incident_page in self.graph.incidents(
                        node):  # 和他相连接的顶点(它的所有链入节点)
                    rank += self.damping_factor * (
                        page_rank[incident_page] /
                        len(self.graph.neighbors(incident_page)))
                rank += damping_value
                change += abs(page_rank[node] - rank)  # 绝对值
                page_rank[node] = rank
            print("This is NO.%s iteration" % (i + 1))
            print(page_rank)
            if change < self.min_delta:
                flag = True
                break
        if flag:
            print("finished in %s iterations!" % node)
        else:
            print("finished out of 100 iterations!")
        return page_rank
示例#10
0
    def page_rank(self):
        '''计算有向图PR值
        '''
        # 初始化一些参量
        digraph = self.digraph
        nodes = digraph.nodes()
        graph_size = len(nodes)

        if graph_size == 0:
            return {}

        # 将没有出链节点添加和其他所有节点(包括自己)连接边
        for node in nodes:
             if len(digraph.neighbors(node))==0:
                for linkNode in nodes:
                    digraph.add_edge((node, linkNode))

        # 初始化PR值
        PR = dict.fromkeys(nodes, 1/graph_size)
        damping_value = (1-self.damping_factor)/graph_size

        # 迭代
        for i in range(self.max_iterations):
            change = 0
            # 遍历每个节点,求解PR值
            for node in nodes:
                rank = 0
                for inciNode in digraph.incidents(node):
                    rank += self.damping_factor*PR[inciNode]/len(digraph.neighbors(inciNode))
                rank += damping_value
                change += abs(PR[node]-rank)
                PR[node]=rank

            # 判断是否终止
            if change <= self.min_delta:
                flag = True
                break
        if flag:
            print("finished in %d iterations."%i)
        else:
            print("finished out of iterations")

        return PR
示例#11
0
    def page_rank(self):
        #  先将图中没有出链的节点改为对所有节点都有出链
        for node in self.graph.nodes():
            if len(self.graph.neighbors(node)) == 0:
                for node2 in self.graph.nodes():
                    digraph.add_edge(self.graph, (node, node2))

        nodes = self.graph.nodes()
        graph_size = len(nodes)

        if graph_size == 0:
            return {}
        page_rank = dict.fromkeys(nodes, 1.0 / graph_size)  # 给每个节点赋予初始的PR值
        damping_value = (1.0 -
                         self.damping_factor) / graph_size  # 公式中的(1−α)/N部分

        flag = False
        count = 0
        for i in range(self.max_iterations):
            change = 0
            for node in nodes:
                rank = 0
                for incident_page in self.graph.incidents(node):  # 遍历所有“入射”的页面
                    rank += self.damping_factor * (
                        page_rank[incident_page] /
                        len(self.graph.neighbors(incident_page)))
                rank += damping_value
                change += abs(page_rank[node] - rank)  # 绝对值
                page_rank[node] = rank

            print("This is NO.%s iteration" % (i + 1))
            print(page_rank)

            if change < self.min_delta:
                flag = True
                count = i
                break
        print()
        if flag:
            print("finished in %s iterations!" % count)
        else:
            print("finished out of 100 iterations!")
        return page_rank
    def page_rank(self):
        #  First change the node without the chain in the figure to the chain for all nodes.
        for node in self.graph.nodes():
            if len(self.graph.neighbors(node)) == 0:
                for node2 in self.graph.nodes():
                    digraph.add_edge(self.graph, (node, node2))

        nodes = self.graph.nodes()
        graph_size = len(nodes)

        if graph_size == 0:
            return {}
        page_rank = dict.fromkeys(
            nodes, 1.0 / graph_size)  # Give each node an initial PR value
        damping_value = (1.0 - self.damping_factor
                         ) / graph_size  # (1−α)/N part of the formula

        flag = False
        for i in range(self.max_iterations):
            change = 0
            for node in nodes:
                rank = 0
                for incident_page in self.graph.incidents(
                        node):  # Traverse all "incident" pages
                    rank += self.damping_factor * (
                        page_rank[incident_page] /
                        len(self.graph.neighbors(incident_page)))
                rank += damping_value
                change += abs(page_rank[node] - rank)  # Absolute value
                page_rank[node] = rank

            print("This is NO.%s iteration" % (i + 1))
            print(page_rank)

            if change < self.min_delta:
                flag = True
                break
        if flag:
            print("finished in %s iterations!" % node)
        else:
            print("finished out of 100 iterations!")
        return page_rank
示例#13
0
    def page_rank(self):
        #先将图中没有出链的节点改为对所有节点都有出链
        for node in self.graph.nodes():
            if len(self.graph.neighbors(node)) == 0:
                for node2 in self.graph.nodes():
                    digraph.add_edge(self.graph, (node, node2))

        nodes = self.graph.nodes()
        graph_size = len(nodes)

        if graph_size == 0: return {}
        #给每个节点赋予初始PR值
        page_rank = dict.fromkeys(nodes, 1.0 / graph_size)
        #公式中的(1−α)/N部分
        damping_value = (1.0 - self.damping_factor) / graph_size

        flag = False
        for i in range(self.max_iterations):
            change = 0
            for node in nodes:
                rank = 0
                #遍历所有入链节点
                for incident_page in self.graph.incidents(node):
                    rank += self.damping_factor * (
                        page_rank[incident_page] /
                        len(self.graph.neighbors(incident_page)))
                rank += damping_value
                change += abs(page_rank[node] - rank)
                page_rank[node] = rank

            print 'This is NO.%d iteration' % (i + 1)
            print page_rank

            if change < self.min_delta:
                flag = True
                break

        if flag == False:
            print 'finished out of 100 iterations'

        return page_rank
示例#14
0
def load_prop_from_file(filename,HOME_DIR):
    fin = open(filename)
    flag = True
    ret = []
    for line in fin:
        line = line.strip("\n")
        if flag:
            curSentence = line
            flag=False
            curGraph = GraphWrapper(curSentence,HOME_DIR)
            parentsList = []
        else:
            if line:
                uid,words,pos,isPredicate,isAsserted,parents = line.split("\t")
                uid = int(uid)
                isAsserted = bool(int(isAsserted))
                text = [Word(int(index),word) for index,word in [ent.split(",") for ent in words.split(";")]]
                if isAsserted:
                    feats={"top":isAsserted}
                else:
                    feats={}
                if parents:
                    parentsList.extend([((int(index),uid),rel) for rel,index in [ent.split(",") for ent in parents.split(";")]])
                    
                curNode = newNode.Node(text,
                                       bool(int(isPredicate)),
                                       feats,
                                       curGraph,
                                       uid = uid)
                
            else:
                for edge,rel in parentsList:
                    digraph.add_edge(curGraph, edge=edge, label=rel)
                ret.append(curGraph)
                flag=True
                
                
    
    fin.close()
    return ret
示例#15
0
    def page_rank(self):
        #对没有出连的节点改为对所有节点都有出连
        for node in self.graph.nodes():
            if len(self.graph.neighbors(node)) == 0:
                for node2 in self.graph.nodes():
                    digraph.add_edge(self.graph, (node, node2))

        nodes = self.graph.nodes()
        graph_size = len(nodes)

        if graph_size == 0:
            return {}
        page_rank = dict.fromkeys(nodes, 1.0 / graph_size)
        damping_value = (1.0 - self.damping_factor) / graph_size  #(1-α)/N

        flag = False
        for i in range(self.max_iterations):
            change = 0
            for node in nodes:
                rank = 0
                for incident_page in self.graph.incidents(node):
                    rank += self.damping_factor * (
                        page_rank[incident_page] /
                        len(self.graph.neighbors(incident_page)))
                rank += damping_value
                change += abs(page_rank[node] - rank)
                page_rank[node] = rank

            print("This is No.%s iteration" % (i + 1))
            print(page_rank)

            if change < self.min_delta:
                flag = True
                break
        if flag:
            print('Finished in %s iterations!' % node)
        else:
            print('Finished out of 100 iterations!')
        return page_rank
示例#16
0
    def add_edge(self, edge, label=''):
        """
        overrides the add_edge function of digraph, to maintain the nodes mapping
        
        @type  edge: (node1,node2)
        
        @type  node1: Node
        @param node2: origin of new edge
        
        @type  node2: Node
        @param node2: destination of new edge
        """
        node1, node2 = edge
        basicEdge = (node1.uid, node2.uid)
        ret = digraph.add_edge(self, edge=basicEdge, label=label)
#         if not self.is_aux_edge(basicEdge):
#             self.del_edge(edge)
#             ret = digraph.add_edge(self,edge=basicEdge,label=label,wt=100)
        return ret
示例#17
0
 def add_edge(self, edge, label=''):
     """
     overrides the add_edge function of digraph, to maintain the nodes mapping
     
     @type  edge: (node1,node2)
     
     @type  node1: Node
     @param node2: origin of new edge
     
     @type  node2: Node
     @param node2: destination of new edge
     """
     node1, node2 = edge
     basicEdge = (node1.uid, node2.uid)
     ret = digraph.add_edge(self, edge=basicEdge, label=label)
     #         if not self.is_aux_edge(basicEdge):
     #             self.del_edge(edge)
     #             ret = digraph.add_edge(self,edge=basicEdge,label=label,wt=100)
     return ret