Пример #1
0
 def _node_factory(self, cui, description, weight, original_line=None):
     """Generates a new node. It will weight the node using the TF*IDF 
     provider, if one was specified."""
     # This function also mantains the internal node list
     new_node = Node(cui, description, weight, original_line)
     if self._tf_idf_scores is not None:
         new_node.weight = (new_node.weight * self._tf_idf_scores[cui])
     self._node_cache.add(new_node)
     return new_node
Пример #2
0
 def _node_factory(self, cui, description, weight, original_line=None):
     """Generates a new node. It will weight the node using the TF*IDF 
     provider, if one was specified."""
     # This function also mantains the internal node list
     new_node=Node(cui, description, weight, original_line)
     if self._tf_idf_scores is not None:
         new_node.weight=(new_node.weight*
                          self._tf_idf_scores[cui])
     self._node_cache.add(new_node)
     return new_node
Пример #3
0
 def testNodeLinks(self):
     n = Node("c1234", "Fake node", 0.987)
     n2 = Node("c45678", "Another fake node", 0.123)
     linky = Link(n, n2, 1)
     linky2 = Link(n, n2, -1)
     linky3 = Link(n2, n, 1)
     self.assertNotEqual(linky, linky2)  # The direction is the opposite
     self.assertEqual(linky2, linky3)
     self.assertNotEqual(linky, linky3)
     # The hashes must match too
     self.assertNotEqual(hash(linky),
                         hash(linky2))  # The direction is the opposite
     self.assertEqual(hash(linky2), hash(linky3))
     self.assertNotEqual(hash(linky), hash(linky3))
Пример #4
0
 def testForHashClashesNumberOfRandomNodes(self):
     for x in xrange(1000000):
         nid1 = "C%07d" % random.randint(0, 9999999)
         nid2 = "C%07d" % random.randint(0, 9999999)
         nid3 = "C%07d" % random.randint(0, 9999999)
         n1 = Node(nid1, "Fake node", 1)
         n2 = Node(nid2, "Fake node 2", 1)
         n3 = Node(nid3, "Yet Another Fake Node", 1)
         linky = Link(n1, n2, 1)
         linky2 = Link(n1, n3, 1)
         if nid2 == nid3:
             # Very infrequent!
             self.assertEqual(hash(linky), hash(linky2))
         else:
             self.assertNotEqual(hash(linky), hash(linky2))
Пример #5
0
 def testLinkMatrixConversion(self):
     self.fill_in_graph(self.test_graph)
     a_matrix = self.test_graph.as_mapped_link_matrix()
     self.assert_(type(a_matrix) is MappedLinkMatrix)
     from_node = Node('2', 'Node2', 1)
     to_node = Node('4', 'Node4', 1)
     self.assertEqual(
         a_matrix[a_matrix.get_term_position(from_node),
                  a_matrix.get_term_position(to_node)], 0.0)
     from_node = Node('3', 'Node3', 1)
     # There's no relation from 3 to 4 - it's from 4 to 3 (it was negative)
     self.assertEqual(
         a_matrix[a_matrix.get_term_position(from_node),
                  a_matrix.get_term_position(to_node)], 0.0)
     self.assertEqual(
         a_matrix[a_matrix.get_term_position(to_node),
                  a_matrix.get_term_position(from_node)], 1.0)
Пример #6
0
 def graph_and_rank(self, article):
     """Uses the information from the MTI file to simulate graphing and
     ranking. Returns a set of (Node, score) tuples that a 
     RankedConverter can use."""
     this_article = []
     for l in article.lines:
         this_article.append((Node(l.CUI, l.description,
                                   l.confidence), l.confidence))
     return this_article
Пример #7
0
 def testConvertingAdirectionalGraph(self):
     self.fill_in_graph(self.test_graph, AdirectionalLink)
     a_matrix = self.test_graph.as_mapped_link_matrix()
     self.assert_(type(a_matrix) is MappedLinkMatrix)
     from_node = Node('2', 'Node2', 1)
     to_node = Node('4', 'Node4', 1)
     self.assertEqual(
         a_matrix[a_matrix.get_term_position(from_node),
                  a_matrix.get_term_position(to_node)], 0.0)
     from_node = Node('3', 'Node3', 1)
     # There's no relation from 3 to 4 in the directional matrix
     # but there should be one here
     self.assertEqual(
         a_matrix[a_matrix.get_term_position(from_node),
                  a_matrix.get_term_position(to_node)], 1.0)
     self.assertEqual(
         a_matrix[a_matrix.get_term_position(to_node),
                  a_matrix.get_term_position(from_node)], 1.0)
Пример #8
0
def main():
    # Read all lines, stripping trailing newlines and leading spaces
    sentences=[s.strip() \
               for s in
               sentence_detector.tokenize(
                   open(sys.argv[1], 'rU').read().strip())]

    # Eliminate empty lines
    sentences = [s for s in sentences if len(s) > 0]
    # Create one Node per sentence, with a unique ID based on sequential
    # numbering, the contents of the sentence, and an initial node weight of 1.0
    sentnodes = [Node(x, sentences[x], 1.0) for x in xrange(len(sentences))]

    # Create an empty graph
    sentgraph = Graph()

    # Compute the similarity between every pair of sentences and add a link to
    # the graph connecting those nodes. THe
    for p in sentence_pairs(sentences):
        n1, n2 = sentnodes[p[0]], sentnodes[p[1]]
        sentlink = AdirectionalLink(
            n1, n2, sentence_similarity(sentences[p[0]], sentences[p[1]]))
        sentgraph.add_relationship(sentlink)

    # Create a default TextRanker (that implements TextRank as described) and
    # wrap it in the MappedRanker class, which returns (node, score) pairings
    # instead of just scores
    ranker = MappedRanker(TextRanker())
    # Convert the graph to a link matrix
    matrix = sentgraph.as_mapped_link_matrix()
    # Run the ranker on the matrix
    results = ranker.evaluate(matrix)
    # The ranker returns a RankedResultSet that behaves like a list of
    # (node, score) pairings. By default these are sorted in reverse order,
    # i.e., the highest scores at the beginning. To obtain the desired sentences
    # we just trim the list to size.
    try:
        # Try to get a float from the command line
        desired_length = int(round(float(sys.argv[2]) * len(sentences)))
    except:
        desired_length = int(round(float(len(sentences)) * 0.2))

    # For the final output we only need the node, not its score
    shortened_results = [x[0] for x in results]
    # Now we trim it to the desired length
    shortened_results = shortened_results[:desired_length]
    # Now, for presentation purposes, we reorder the truncated list in its
    # original order.
    shortened_results.sort(cmp=cmp_two_nodes_by_id)

    # Output the summary as a paragraph
    print ' '.join([x.name for x in shortened_results])
Пример #9
0
    def from_graphml_file(self, file_object, default_link=Link):
        from xml.etree.ElementTree import iterparse

        def get_subelement_data(elem, key):
            result = [
                x.text for x in elem.getiterator()
                if x.tag == "{http://graphml.graphdrawing.org/xmlns}data"
                and x.get('key') == key
            ]
            if len(result) == 0:
                return None
            return result[0]

        nodes = {}
        # Discover the names of the attributes we're looking for by investigating the keys
        # Then actually read the file
        keystore = {}
        for event, element in iterparse(file_object):
            #print element
            if element.tag == "{http://graphml.graphdrawing.org/xmlns}key":
                if element.get('attr.name') is None:
                    continue
                keystore[element.get('for') + '.' +
                         element.get('attr.name')] = element.get('id')
            # print keystore
            if element.tag == "{http://graphml.graphdrawing.org/xmlns}node":
                # The next line supports yEd's NodeLabel and Profuse's label
                nodename = get_subelement_data(element,
                                               keystore['node.description'])
                if nodename is None:
                    nodename = "NoName"
                nodekey = get_subelement_data(element, keystore['node.MR_id'])
                nodes[element.get('id')] = Node(nodekey, nodename, 1.0)
            if element.tag == "{http://graphml.graphdrawing.org/xmlns}edge":
                n1 = nodes[element.get('source')]
                n2 = nodes[element.get('target')]
                try:
                    weight = float(
                        get_subelement_data(element, keystore['edge.weight']))
                except:
                    logging.warn('Failed at reading weight because of:\n%s',
                                 traceback.format_exc())
                    weight = 1.0
                try:
                    relname = get_subelement_data(element,
                                                  keystore['edge.description'])
                except:
                    relname = ""
                self.add_relationship(default_link(n1, n2, weight, relname))
        self.consolidate_graph()
        return
Пример #10
0
def end_element(name, record_callback):
    global current_links
    global current_element
    global current_article
    global current_cuis
    global current_names
    global current_nodes
    global current_relations
    global current_strengths
    global current_links
    global current_content

    data = ''.join(current_content)
    data = [x for x in data.splitlines() if len(x) > 0]
    name = name.strip().lower()

    if name == 'cuis':
        current_cuis = handle_cuis(data)
    if name == 'names':
        current_names = handle_names(data)
    if name == 'connectivities':
        current_strengths = handle_connectivities(data, current_nodes)
    if current_cuis is not None and current_names is not None:
        current_nodes = [
            Node(x[0], x[1], 1) for x in zip(current_cuis, current_names)
        ]
    if name == 'relations':
        current_relations = handle_relations(data, current_nodes)
    # Very inefficient if there are, in fact, relations, but if they aren't this
    # will make it actually work.
    if current_strengths is not None:
        current_links = make_links(current_strengths, current_relations)
        # Free some memory
        current_strengths = None
        current_relations = None

    if name == 'article':
        record_callback(current_article, current_links)
        current_element = None
        current_article = None
        current_cuis = None
        current_names = None
        current_nodes = None
        current_relations = None
        current_strengths = None
        current_links = None
    current_content = []
Пример #11
0
 def buildRankedResultSet(self):
     # A ranked result set has nodes and scores instead of concepts.
     c1, c2 = self.buildConcepts()
     n1 = Node(c1.CUI, c1.concept_name, 2)
     n2 = Node(c2.CUI, c2.concept_name, 1)
     return [(n1, 2), (n2, 1)]
 def __init__(self, node_id, node_name, node_weight, mesh_expression):
     Node.__init__(self, node_id, node_name, node_weight)
     self._mesh=mesh_expression
Пример #13
0
 def fill_in_graph(a_graph, link_type=Link):
     a_graph.add_relationship(
         link_type(Node('0', 'Node0', 1), Node('1', 'Node1', 1), 1.0))
     a_graph.add_relationship(
         link_type(Node('1', 'Node1', 1), Node('2', 'Node2', 1), 1.0))
     a_graph.add_relationship(
         link_type(Node('2', 'Node2', 1), Node('3', 'Node3', 1), 1.0))
     a_graph.add_relationship(
         link_type(Node('2', 'Node2', 1), Node('3', 'Node3', 1), 1.0))
     a_graph.add_relationship(
         link_type(Node('3', 'Node3', 1), Node('4', 'Node4', 1), -1.0))
     a_graph.add_relationship(
         link_type(Node('3', 'Node3', 1), Node('0', 'Node0', 1), 1.0))
     a_graph.consolidate_graph()