def _node_factory(self, cui, description, weight, original_line=None): """Generates a new node. It will weight the node using the TF*IDF provider, if one was specified.""" # This function also mantains the internal node list new_node = Node(cui, description, weight, original_line) if self._tf_idf_scores is not None: new_node.weight = (new_node.weight * self._tf_idf_scores[cui]) self._node_cache.add(new_node) return new_node
def _node_factory(self, cui, description, weight, original_line=None): """Generates a new node. It will weight the node using the TF*IDF provider, if one was specified.""" # This function also mantains the internal node list new_node=Node(cui, description, weight, original_line) if self._tf_idf_scores is not None: new_node.weight=(new_node.weight* self._tf_idf_scores[cui]) self._node_cache.add(new_node) return new_node
def testNodeLinks(self): n = Node("c1234", "Fake node", 0.987) n2 = Node("c45678", "Another fake node", 0.123) linky = Link(n, n2, 1) linky2 = Link(n, n2, -1) linky3 = Link(n2, n, 1) self.assertNotEqual(linky, linky2) # The direction is the opposite self.assertEqual(linky2, linky3) self.assertNotEqual(linky, linky3) # The hashes must match too self.assertNotEqual(hash(linky), hash(linky2)) # The direction is the opposite self.assertEqual(hash(linky2), hash(linky3)) self.assertNotEqual(hash(linky), hash(linky3))
def testForHashClashesNumberOfRandomNodes(self): for x in xrange(1000000): nid1 = "C%07d" % random.randint(0, 9999999) nid2 = "C%07d" % random.randint(0, 9999999) nid3 = "C%07d" % random.randint(0, 9999999) n1 = Node(nid1, "Fake node", 1) n2 = Node(nid2, "Fake node 2", 1) n3 = Node(nid3, "Yet Another Fake Node", 1) linky = Link(n1, n2, 1) linky2 = Link(n1, n3, 1) if nid2 == nid3: # Very infrequent! self.assertEqual(hash(linky), hash(linky2)) else: self.assertNotEqual(hash(linky), hash(linky2))
def testLinkMatrixConversion(self): self.fill_in_graph(self.test_graph) a_matrix = self.test_graph.as_mapped_link_matrix() self.assert_(type(a_matrix) is MappedLinkMatrix) from_node = Node('2', 'Node2', 1) to_node = Node('4', 'Node4', 1) self.assertEqual( a_matrix[a_matrix.get_term_position(from_node), a_matrix.get_term_position(to_node)], 0.0) from_node = Node('3', 'Node3', 1) # There's no relation from 3 to 4 - it's from 4 to 3 (it was negative) self.assertEqual( a_matrix[a_matrix.get_term_position(from_node), a_matrix.get_term_position(to_node)], 0.0) self.assertEqual( a_matrix[a_matrix.get_term_position(to_node), a_matrix.get_term_position(from_node)], 1.0)
def graph_and_rank(self, article): """Uses the information from the MTI file to simulate graphing and ranking. Returns a set of (Node, score) tuples that a RankedConverter can use.""" this_article = [] for l in article.lines: this_article.append((Node(l.CUI, l.description, l.confidence), l.confidence)) return this_article
def testConvertingAdirectionalGraph(self): self.fill_in_graph(self.test_graph, AdirectionalLink) a_matrix = self.test_graph.as_mapped_link_matrix() self.assert_(type(a_matrix) is MappedLinkMatrix) from_node = Node('2', 'Node2', 1) to_node = Node('4', 'Node4', 1) self.assertEqual( a_matrix[a_matrix.get_term_position(from_node), a_matrix.get_term_position(to_node)], 0.0) from_node = Node('3', 'Node3', 1) # There's no relation from 3 to 4 in the directional matrix # but there should be one here self.assertEqual( a_matrix[a_matrix.get_term_position(from_node), a_matrix.get_term_position(to_node)], 1.0) self.assertEqual( a_matrix[a_matrix.get_term_position(to_node), a_matrix.get_term_position(from_node)], 1.0)
def main(): # Read all lines, stripping trailing newlines and leading spaces sentences=[s.strip() \ for s in sentence_detector.tokenize( open(sys.argv[1], 'rU').read().strip())] # Eliminate empty lines sentences = [s for s in sentences if len(s) > 0] # Create one Node per sentence, with a unique ID based on sequential # numbering, the contents of the sentence, and an initial node weight of 1.0 sentnodes = [Node(x, sentences[x], 1.0) for x in xrange(len(sentences))] # Create an empty graph sentgraph = Graph() # Compute the similarity between every pair of sentences and add a link to # the graph connecting those nodes. THe for p in sentence_pairs(sentences): n1, n2 = sentnodes[p[0]], sentnodes[p[1]] sentlink = AdirectionalLink( n1, n2, sentence_similarity(sentences[p[0]], sentences[p[1]])) sentgraph.add_relationship(sentlink) # Create a default TextRanker (that implements TextRank as described) and # wrap it in the MappedRanker class, which returns (node, score) pairings # instead of just scores ranker = MappedRanker(TextRanker()) # Convert the graph to a link matrix matrix = sentgraph.as_mapped_link_matrix() # Run the ranker on the matrix results = ranker.evaluate(matrix) # The ranker returns a RankedResultSet that behaves like a list of # (node, score) pairings. By default these are sorted in reverse order, # i.e., the highest scores at the beginning. To obtain the desired sentences # we just trim the list to size. try: # Try to get a float from the command line desired_length = int(round(float(sys.argv[2]) * len(sentences))) except: desired_length = int(round(float(len(sentences)) * 0.2)) # For the final output we only need the node, not its score shortened_results = [x[0] for x in results] # Now we trim it to the desired length shortened_results = shortened_results[:desired_length] # Now, for presentation purposes, we reorder the truncated list in its # original order. shortened_results.sort(cmp=cmp_two_nodes_by_id) # Output the summary as a paragraph print ' '.join([x.name for x in shortened_results])
def from_graphml_file(self, file_object, default_link=Link): from xml.etree.ElementTree import iterparse def get_subelement_data(elem, key): result = [ x.text for x in elem.getiterator() if x.tag == "{http://graphml.graphdrawing.org/xmlns}data" and x.get('key') == key ] if len(result) == 0: return None return result[0] nodes = {} # Discover the names of the attributes we're looking for by investigating the keys # Then actually read the file keystore = {} for event, element in iterparse(file_object): #print element if element.tag == "{http://graphml.graphdrawing.org/xmlns}key": if element.get('attr.name') is None: continue keystore[element.get('for') + '.' + element.get('attr.name')] = element.get('id') # print keystore if element.tag == "{http://graphml.graphdrawing.org/xmlns}node": # The next line supports yEd's NodeLabel and Profuse's label nodename = get_subelement_data(element, keystore['node.description']) if nodename is None: nodename = "NoName" nodekey = get_subelement_data(element, keystore['node.MR_id']) nodes[element.get('id')] = Node(nodekey, nodename, 1.0) if element.tag == "{http://graphml.graphdrawing.org/xmlns}edge": n1 = nodes[element.get('source')] n2 = nodes[element.get('target')] try: weight = float( get_subelement_data(element, keystore['edge.weight'])) except: logging.warn('Failed at reading weight because of:\n%s', traceback.format_exc()) weight = 1.0 try: relname = get_subelement_data(element, keystore['edge.description']) except: relname = "" self.add_relationship(default_link(n1, n2, weight, relname)) self.consolidate_graph() return
def end_element(name, record_callback): global current_links global current_element global current_article global current_cuis global current_names global current_nodes global current_relations global current_strengths global current_links global current_content data = ''.join(current_content) data = [x for x in data.splitlines() if len(x) > 0] name = name.strip().lower() if name == 'cuis': current_cuis = handle_cuis(data) if name == 'names': current_names = handle_names(data) if name == 'connectivities': current_strengths = handle_connectivities(data, current_nodes) if current_cuis is not None and current_names is not None: current_nodes = [ Node(x[0], x[1], 1) for x in zip(current_cuis, current_names) ] if name == 'relations': current_relations = handle_relations(data, current_nodes) # Very inefficient if there are, in fact, relations, but if they aren't this # will make it actually work. if current_strengths is not None: current_links = make_links(current_strengths, current_relations) # Free some memory current_strengths = None current_relations = None if name == 'article': record_callback(current_article, current_links) current_element = None current_article = None current_cuis = None current_names = None current_nodes = None current_relations = None current_strengths = None current_links = None current_content = []
def buildRankedResultSet(self): # A ranked result set has nodes and scores instead of concepts. c1, c2 = self.buildConcepts() n1 = Node(c1.CUI, c1.concept_name, 2) n2 = Node(c2.CUI, c2.concept_name, 1) return [(n1, 2), (n2, 1)]
def __init__(self, node_id, node_name, node_weight, mesh_expression): Node.__init__(self, node_id, node_name, node_weight) self._mesh=mesh_expression
def fill_in_graph(a_graph, link_type=Link): a_graph.add_relationship( link_type(Node('0', 'Node0', 1), Node('1', 'Node1', 1), 1.0)) a_graph.add_relationship( link_type(Node('1', 'Node1', 1), Node('2', 'Node2', 1), 1.0)) a_graph.add_relationship( link_type(Node('2', 'Node2', 1), Node('3', 'Node3', 1), 1.0)) a_graph.add_relationship( link_type(Node('2', 'Node2', 1), Node('3', 'Node3', 1), 1.0)) a_graph.add_relationship( link_type(Node('3', 'Node3', 1), Node('4', 'Node4', 1), -1.0)) a_graph.add_relationship( link_type(Node('3', 'Node3', 1), Node('0', 'Node0', 1), 1.0)) a_graph.consolidate_graph()