Пример #1
0
    def rank(self, nodes, edges) -> dict:
        d = 0.85  # damping coefficient, usually is .85
        min_diff = 1e-2  # convergence threshold
        steps = 1000  # iteration steps
        self.weight_default = 1.0 / (len(nodes.keys()) or 1.0)
        self.score_dict = {k: self.weight_default for k in list(nodes)}
        self.indegree_dict = {
            k: self.indegree_nodes(k, edges)
            for k in list(nodes)
        }
        self.outdegree_dict = {
            k: len(self.outdegree_nodes(k, edges))
            for k in list(nodes)
        }
        nodeweight_dict = defaultdict(float)  # store weight of node
        step_tuple = (1000, 0)

        for step in range(steps):
            for node in list(nodes):
                self.score(node, d)
            new_tuple = (step_tuple[1], sum(self.score_dict.values()))
            step_tuple = new_tuple
            if abs(step_tuple[1] - step_tuple[0]) <= min_diff:
                break

    # Normalize and Standardization
        if len(self.score_dict.values()) > 0:
            self.score_dict = utils.standardize_dict(self.score_dict)
            self.score_dict = utils.normalize_dict(self.score_dict)

            assert np.min(list(self.score_dict.values())) >= 0

        for node in nodes.values():
            node.weight = self.score_dict[node.name]
Пример #2
0
def equalize_term_and_entities(graph):
    """Normalize NE weights and tf-idf weights to obtain comparable weight."""
    term_weights = {'dummy0000': 0.0}
    entity_weights = {'dummy0000': 0.0}

    for node in graph.nodes.values():
        if node.node_type == 'term':
            term_weights[node] = node.weight
        else:
            entity_weights[node] = node.weight

    normalized_term_weights = utils.normalize_dict(term_weights)
    normalized_entity_weights = utils.normalize_dict(entity_weights)

    del normalized_term_weights['dummy0000']
    del normalized_entity_weights['dummy0000']

    for node in graph.nodes.values():
        if node.node_type == 'term':
            node.weight = normalized_term_weights[node]
        else:
            node.weight = normalized_entity_weights[node]
Пример #3
0
    def rank(self, nodes, edges) -> dict:
        d = 0.85  # damping coefficient, usually is .85
        min_diff = 1e-5  # convergence threshold
        steps = 1000  # iteration steps
        weight_default = 1.0 / (len(nodes.keys()) or 1.0)

        nodeweight_dict = defaultdict(float)  # store weight of node
        outsum_node_dict = defaultdict(float)  # store weight of out nodes

        for node in nodes.values():  # initilize nodes weight by edges
            nodeweight_dict[node.name] = weight_default  #node.weight
            # Sum of all edges leaving a specific node
            outsum_node_dict[node.name] = sum((edges[edge_key]
                                               for edge_key in edges.keys()
                                               if node.name in edge_key))

        #print(edges)

        sorted_keys = sorted([node_name for node_name in nodes.keys()
                              ])  # save node name as a list for iteration
        step_dict = [0]
        for step in range(1, steps):
            new_weights = defaultdict(float)
            for edge, weight in edges.items():
                node_a, node_b = edge
                new_weights[node_a] += weight / outsum_node_dict[
                    node_b] * nodeweight_dict[node_b]
                new_weights[node_b] += weight / outsum_node_dict[
                    node_a] * nodeweight_dict[node_a]

            for node in sorted_keys:
                nodeweight_dict[node] = (1 - d) + d * new_weights[node]

            step_dict.append(sum(nodeweight_dict.values()))

            if abs(step_dict[step] - step_dict[step - 1]) <= min_diff:
                break

        # Normalize and Standardization
        if len(list(nodeweight_dict.values())) > 0:
            nodeweight_dict = utils.standardize_dict(nodeweight_dict)
            nodeweight_dict = utils.normalize_dict(nodeweight_dict)

            assert np.min(list(nodeweight_dict.values())) >= 0, nodeweight_dict

        for node in nodes.values():
            node.weight = nodeweight_dict[node.name]
Пример #4
0
        # Build (initialize) graph nodes and edges.
        candidate_graph.build(**build_arguments)
        candidate_graph.trim(d_core_k, d_core_l)
        # recalculate node weights using TextRank
        if args.textrank:
            candidate_graph.rank()
        relevance, diversity_type = candidate_graph.compare(
            query_graph, args.novelty, args.node_edge_l)
        ranking[docid] = relevance
        addition_types[docid] = diversity_type

    # Sort retrieved documents according to new similarity score.
    sorted_ranking = utils.normalize_dict({
        k: v
        for k, v in sorted(
            ranking.items(), key=lambda item: item[1], reverse=True)
    })

    # Diversify
    if args.diversify:
        nr_types = len(
            np.unique([
                item for sublist in addition_types.values() for item in sublist
            ]))
        present_types = []
        to_delete_docids = []
        for key in sorted_ranking.keys():
            if len(present_types) == nr_types:
                break
            if len(addition_types[key]) > 1: