def enrich_treebank(self, a_treebank): abstract_bank.enrich_treebank(self, a_treebank) for a_speaker_document in self: sys.stderr.write(".") for a_speaker_sentence in a_speaker_document: a_tree = a_speaker_document.tree_document[a_speaker_sentence.line_number] a_speaker_sentence.enrich_tree(a_tree) sys.stderr.write("\n")
def enrich_treebank(self, a_treebank, a_cursor=None): total_ne_node_mismatches = 0 total_nes = 0 total_ne_non_terminals = 0 total_ne_terminals = 0 abstract_bank.enrich_treebank(self, a_treebank) #--------------------------------------------------------------------------------# # now that we have initialized the names, we can go through them # one by one, and tag the nodes in the tree with those names #--------------------------------------------------------------------------------# #---- for each document in the list of name tagged documents ----# for a_name_tagged_document in self: sys.stderr.write(".") a_name_entity_sets = a_name_tagged_document.name_entity_sets a_tree_document = a_name_tagged_document.tree_document if len(a_name_entity_sets) > len(a_tree_document): on.common.log.report( "name", " found a mismatch in number of elements in the lists SERIOUS", nes=len(a_name_entity_sets), tids=len(a_tree_document), sets=a_name_entity_sets) for a_name_entity_set in a_name_entity_sets: for a_name_entity_list in a_name_entity_set: for a_name_entity in a_name_entity_list: a_name_entity.valid = False continue while len(a_name_entity_sets) < len(a_tree_document): a_name_tagged_document.name_entity_sets.append( name_entity_set(a_tree_document.document_id)) # for a_tree, a_document_sentence in zip(a_tree_document, a_name_tagged_document.document_sentences): # print "-===============------------================-" # print a_tree.get_word_string # print # print # print # print a_document_sentence #---- for each sentence in the document ----# for sentence_no, (a_tree, a_name_entity_set) in enumerate( zip(a_tree_document, a_name_entity_sets)): #---- for each name type tagged in the sentence ----# for a_name_entity_type in a_name_entity_set.name_entity_hash: #---- for each name instance in that type in the sentence ----# for a_name_entity in a_name_entity_set.name_entity_hash[ a_name_entity_type]: #---- try to get a legal node that aligns with this name ----# a_subtree_id = None old_str = a_name_entity.string old_swi = a_name_entity.start_word_index old_ewi = a_name_entity.end_word_index old_sti = a_name_entity.start_token_index old_eti = a_name_entity.end_token_index assert a_tree.get_sentence_index( ) == a_name_entity.sentence_index, (a_tree, a_name_entity) a_name_entity.enrich_tree(a_tree) if not a_name_entity.start_leaf or not a_name_entity.end_leaf: continue a_name_entity.check_tree_alignment() a_subtree = a_name_entity.subtree if a_subtree: total_nes += 1 if a_subtree.is_leaf(): total_ne_terminals += 1 else: total_ne_non_terminals += 1 #---- if there is no legal tree node aligning with the name ----# else: total_ne_node_mismatches += 1 new_str = a_name_entity.string #--------------------------------------------------------------------------------# # now that we have traversed all the names in the name bank, # we will show the summary statistics of how many of them had # nodes in the tree aligning with them, etc. #--------------------------------------------------------------------------------# sys.stderr.write("\n") if (on.common.log.DEBUG == True and on.common.log.VERBOSITY >= on.common.log.MAX_VERBOSITY): sys.stderr.write("total nes: " + str(total_nes) + "\n") sys.stderr.write("total ne-node mismatches: " + str(total_ne_node_mismatches) + "\n") sys.stderr.write("total ne-terminals: " + str(total_ne_terminals) + "\n") sys.stderr.write("total ne-non-terminals: " + str(total_ne_non_terminals) + "\n")
def enrich_treebank(self, a_treebank, a_cursor=None): total_ne_node_mismatches = 0 total_nes = 0 total_ne_non_terminals = 0 total_ne_terminals = 0 abstract_bank.enrich_treebank(self, a_treebank) #--------------------------------------------------------------------------------# # now that we have initialized the names, we can go through them # one by one, and tag the nodes in the tree with those names #--------------------------------------------------------------------------------# #---- for each document in the list of name tagged documents ----# for a_name_tagged_document in self: sys.stderr.write(".") a_name_entity_sets = a_name_tagged_document.name_entity_sets a_tree_document = a_name_tagged_document.tree_document if len(a_name_entity_sets) > len(a_tree_document): on.common.log.report("name", " found a mismatch in number of elements in the lists SERIOUS", nes=len(a_name_entity_sets), tids=len(a_tree_document), sets=a_name_entity_sets) for a_name_entity_set in a_name_entity_sets: for a_name_entity_list in a_name_entity_set: for a_name_entity in a_name_entity_list: a_name_entity.valid = False continue while len(a_name_entity_sets) < len(a_tree_document): a_name_tagged_document.name_entity_sets.append(name_entity_set(a_tree_document.document_id)) # for a_tree, a_document_sentence in zip(a_tree_document, a_name_tagged_document.document_sentences): # print "-===============------------================-" # print a_tree.get_word_string # print # print # print # print a_document_sentence #---- for each sentence in the document ----# for sentence_no, (a_tree, a_name_entity_set) in enumerate(zip(a_tree_document, a_name_entity_sets)): #---- for each name type tagged in the sentence ----# for a_name_entity_type in a_name_entity_set.name_entity_hash: #---- for each name instance in that type in the sentence ----# for a_name_entity in a_name_entity_set.name_entity_hash[a_name_entity_type]: #---- try to get a legal node that aligns with this name ----# a_subtree_id = None old_str = a_name_entity.string old_swi = a_name_entity.start_word_index old_ewi = a_name_entity.end_word_index old_sti = a_name_entity.start_token_index old_eti = a_name_entity.end_token_index assert a_tree.get_sentence_index() == a_name_entity.sentence_index, (a_tree, a_name_entity) a_name_entity.enrich_tree(a_tree) if not a_name_entity.start_leaf or not a_name_entity.end_leaf: continue a_name_entity.check_tree_alignment() a_subtree = a_name_entity.subtree if a_subtree: total_nes += 1 if a_subtree.is_leaf(): total_ne_terminals += 1 else: total_ne_non_terminals += 1 #---- if there is no legal tree node aligning with the name ----# else: total_ne_node_mismatches += 1 new_str = a_name_entity.string #--------------------------------------------------------------------------------# # now that we have traversed all the names in the name bank, # we will show the summary statistics of how many of them had # nodes in the tree aligning with them, etc. #--------------------------------------------------------------------------------# sys.stderr.write("\n") if(on.common.log.DEBUG == True and on.common.log.VERBOSITY >= on.common.log.MAX_VERBOSITY): sys.stderr.write("total nes: " + str(total_nes) + "\n") sys.stderr.write("total ne-node mismatches: " + str(total_ne_node_mismatches) + "\n") sys.stderr.write("total ne-terminals: " + str(total_ne_terminals) + "\n") sys.stderr.write("total ne-non-terminals: " + str(total_ne_non_terminals) + "\n")