def extract_entity(parse_tree, semantic_role=''): """Creates an entity object given a snippet of a parse tree.""" entity = Location() if semantic_role in ('Location', 'Source', 'Destination') else ObjectEntity() # print 'Extracting from:' # print str(parse_tree) # Ignore rescursed trees and added descriptions ignore_positions = [] previous_node = None previous_leaves = None for position in parse_tree.treepositions(): if not isinstance(parse_tree[position], Tree): continue if position in ignore_positions: continue subtree = parse_tree[position] node = subtree.node leaves = ' '.join(subtree.leaves()).lower() # A noun phrase might have sub-parts that we need to parse recursively # Recurse while there are NP's below the current node if subtree is not parse_tree and 'NP' in node: entity.merge(extract_entity(subtree)) # ignore_positions should be relative to parse_tree ignore_positions.extend(position + subposition for subposition in subtree.treepositions()) # A determiner cardinal node adds some information for the quantifier if 'DT' in node: entity.quantifier.fill_determiner(leaves) # Cardinal number sets the quantifier number elif node == 'CD': entity.quantifier.fill_cardinal(leaves) if entity.quantifier.number == None: # Not actually a number entity.name = leaves elif node == 'PRP': entity.name = 'Commander' if leaves in ('i', 'me') else leaves elif ('PP' in node and entity.name) or node in ('SBAR', 'JJ'): entity.description.append(leaves) # ignore_positions should be relative to parse_tree ignore_positions.extend(position + subposition for subposition in subtree.treepositions()) elif 'NN' in node and previous_node and 'NN' in previous_node and entity.name == previous_leaves: entity.description.append(previous_leaves) entity.name = leaves elif 'NN' in node or node == '-NONE-': entity.name = morphy(leaves, 'n') if entity.name is None: entity.name = leaves elif node == 'RB' and leaves == 'there': entity.name = 'there' previous_node = node previous_leaves = leaves return entity
def get_semantics_from_parse_tree(parse_tree_string): """Take a string representing the parse tree as input, and print the semantic parse. The result list consists of a list of tuples, with each tuple containing the VerbNet frame and its associated tree.""" parse_tree = Tree.parse(parse_tree_string) # Split clauses to handle them separately split_clause_dict = frames.split_clauses(parse_tree) # Activize clauses for key, (clause, conjunction) in split_clause_dict.items(): activized_clause = frames.activize_clause(clause) split_clause_dict[key] = (activized_clause, conjunction) result_list = [] for (clause, conjunction) in split_clause_dict.values(): # Split conjunctions and duplicate arguments if necessary split_tree_dict = frames.split_conjunctions(clause) if conjunction != '': result_list.append(conjunction) for (split_tree, conjunction) in split_tree_dict.values(): if conjunction != '': result_list.append(conjunction) for tree in split_tree: tag_list = [] # Store whether there was an existential there if frames.is_existential(str(tree)): tag_list.append('ex') # Transformational grammar stuff tree = frames.existential_there_insertion(tree) tree = frames.invert_clause(tree) tree = frames.wh_movement(tree) # Regex for finding verbs verb_finder = re.compile(r'(?<=VB[ DGNPZ]) *\w*(?=\))') # Get the lemma of the verb for searching verbnet verbs = (word.strip().lower() for word in verb_finder.findall(str(tree))) # Create VFOs for each verb, then match them to the parse tree for verb in verbs: lemmatized_verb = morphy(verb,'v') vfo_list = frames.create_VerbFrameObjects(lemmatized_verb) match_list = [] for vfo in vfo_list: match = vfo.match_parse(tree) if match: match_list.append((match, vfo.classid)) (best_match, sense) = frames.pick_best_match(match_list) if not best_match is None: result_list.append((best_match, tree, tag_list, sense)) return result_list
def extract_entity_class(parse_tree, semantic_role = ''): """Creates an entity_class object given a snippet of a parse tree.""" quantifier = Quantifier() quantifier.number = 1 quantifier.definite = True predicates = defaultdict(list) for position in parse_tree.treepositions(): if not isinstance(parse_tree[position], Tree): continue subtree = parse_tree[position] node = subtree.node # A determiner node adds some information for the quantifier if node == 'DT': determiner = ' '.join(subtree.leaves()).lower() if determiner == 'any': quantifier.definite = False quantifier.exhaustive = True quantifier.proportionality = 'at least' quantifier.number = 1 if determiner == 'a' or determiner == 'an': quantifier.plural = False quantifier.number = 1 quantifier.definite = False quantifier.exhaustive = False quantifier.proportionality = 'at least' if determiner == 'the': quantifier.definite = True quantifier.exhaustive = False # A personal pronoun adds some information for the quantifier elif node == 'PRP': pronoun = ' '.join(subtree.leaves()).lower() if pronoun == 'him' or pronoun == 'he' or \ pronoun == 'her' or pronoun == 'she': obj = pronoun quantifier.definite = True quantifier.number = 1 quantifier.proportionality = 'exact' quantifier.exhaustive = True quantifier.fulfilled = False # The object is Commander elif pronoun == 'i' or pronoun == 'me': obj = 'Commander' quantifier.definite = True quantifier.number = 1 quantifier.proportionality = 'exact' quantifier.exhaustive = True else: obj = pronoun quantifier.definite = True quantifier.number = 1 quantifier.proportionality = 'exact' quantifier.exhaustive = True quantifier.fulfilled = False predicates[semantic_role].append(Predicate(semantic_role,obj)) # Prepositional phrase generates a location predicate elif node == 'PP-LOC': for subposition in subtree.treepositions(): predicate_type = 'Location' if not isinstance(subtree[subposition], Tree): continue if subtree[subposition].node == 'IN': predicate_type = 'Location' elif 'NP' in subtree[subposition].node and \ 'NP' not in subtree[subposition][0]: predicates[predicate_type].append( Predicate(predicate_type, ' '.join( subtree[subposition].leaves()))) # Cardinal number sets the quantifier number elif node == 'CD': number_text = ' '.join(subtree.leaves()).lower() if not number_text.isdigit(): number = text2int(number_text) quantifier.number = number # A noun phrase might have sub-parts that we need to parse separately elif ('NP' in node) or node == 'NP-PRD-A': obj_word_list = [] for subposition in subtree.treepositions(): # Don't check leaves or parents of leaves if (not isinstance(subtree[subposition], Tree) or isinstance(subtree[subposition][0], Tree)): continue theme_word = ' '.join(subtree[subposition].leaves()).lower() if theme_word is None: continue # Get the actual object in question if ('NN' in subtree[subposition].node or 'CD' in subtree[subposition].node or 'JJ' in subtree[subposition].node and theme_word not in obj_word_list): m_word = morphy(theme_word, 'n') if m_word is None: m_word = theme_word obj_word_list.append(m_word) # Get the quantifier info if len(obj_word_list) > 0 and quantifier.plural is not None: if obj_word_list[0] != theme_word: quantifier.proportionality = 'at least' quantifier.number = 1 else: quantifier.proportionality = 'exact' quantifier.number = 1 # Compile object reference into lower_case_with_underscores name if len(obj_word_list) > 0: obj_name = "_".join(word.lower() for word in obj_word_list) predicates[semantic_role].append(Predicate(semantic_role, obj_name)) break # If it's just a noun, add it as a predicate elif 'N' in node and 'SBJ' not in node: predicates[semantic_role].append(Predicate(semantic_role,' '.join(subtree.leaves()))) quantifier.definite = True elif 'ADV' in node: predicates[semantic_role].append(Predicate(semantic_role,' '.join(subtree.leaves()))) entity_class = EntityClass(quantifier,predicates) return entity_class
def extract_frames_from_parse(parse_tree_string): """Take a string representing the parse tree as input, and print the semantic parse. The result list consists of a list of tuples, with each tuple containing the VerbNet frame and its associated tree.""" result_list = [] # In case we're handed an bad string, bail somewhat gracefully try: parse_tree = Tree.parse(parse_tree_string) except ValueError: print "Warning: semantics could not parse tree", repr(parse_tree_string) return result_list # Split clauses to handle them separately split_clause_dict = frames.split_clauses(parse_tree) # Activize clauses for key, (clause, conjunction) in split_clause_dict.items(): activized_clause = frames.activize_clause(clause) split_clause_dict[key] = (activized_clause, conjunction) for (clause, conjunction) in split_clause_dict.values(): # Split conjunctions and duplicate arguments if necessary split_tree_dict = frames.split_conjunctions(clause) if conjunction != '': result_list.append(conjunction) for (split_tree, conjunction) in split_tree_dict.values(): if conjunction != '': result_list.append(conjunction) for tree in split_tree: tag_list = [] # Store whether there was an existential there if frames.is_existential(str(tree)): tag_list.append('ex') # Transformational grammar stuff tree = frames.existential_there_insertion(tree) tree = frames.invert_clause(tree) tree = frames.wh_movement(tree) if EXTRACT_DEBUG: print 'Transformed tree:' print str(tree) verbs = frames.find_verbs(tree) # Create VFOs for each verb, then match them to the parse tree for verb, negation in verbs: lemmatized_verb = morphy(verb, 'v') vfo_list = frames.create_VerbFrameObjects(lemmatized_verb) match_list = [] if EXTRACT_DEBUG: print 'VFO list for %s:' % verb print '\n'.join(str(vfo.frame_list) for vfo in vfo_list) for vfo in vfo_list: match = vfo.match_parse(tree) if match: if EXTRACT_DEBUG: print 'Matched:' print '\t', str(vfo.frame_list) print 'with' print '\t', str(tree) match_list.append((match, vfo.classid)) if EXTRACT_DEBUG: print 'Match list:' for m in match_list: print 'Sense:', m[1] for a, b in m[0].items(): print a, str(b) print '\n\n' (best_match, sense) = frames.pick_best_match(match_list) if EXTRACT_DEBUG: print 'Chose: ' if best_match: for a, b in best_match.items(): print a, str(b) else: print str(None) print '\n\n' if not best_match is None: result_list.append((best_match, tree, tag_list, sense, verb, negation)) return result_list