def getCoreferent(self, role): coreferent_phrase = None # for coord purposes coreferent_clause = None # get number of phrase - if coreferent is coordination, it will be choosen based on number phrase_number = role.phrase.getNumberCategory() if role.phrase != None else 0 # sequentially add clauses to the list order clauses = [] clause_found = False for sentence in self.sentences: for clause in sentence.clauses: # found containing clause if clause == role.getRelation().containing_clause: clause_found = True # if clause was just found and the antecedent is not a pronoun, search also current clause if role.phrase == None or (role.phrase != None and not 'k3yR' in role.phrase.tokens[0].tag): clauses.insert(0, clause) # clause wasn't found yet, add clause at the beginning elif not clause_found: clauses.insert(0, clause) # clause was found, add clause at the end else: clauses.append(clause) # find coreferent i = 0 while i < len(clauses) and coreferent_phrase == None: j = 0 while j < len(clauses[i].phrases) and coreferent_phrase == None: phrase_role = clauses[i].phrases[j].hasRole(role.second_level_role) # newer version - search also base roles if not phrase_role: phrase_role = clauses[i].phrases[j].hasBaseRole(role.second_level_role) # update role if phrase_role: phrase_role.second_level_role = role.second_level_role if phrase_role and phrase_role.filledWithNE(): coreferent_phrase = clauses[i].phrases[j] coreferent_clause = clauses[i] j += 1 i += 1 # if phrase is coordination and antecedent wants just one entity if phrase_number == 1 and isinstance(coreferent_phrase, NPhrase) and coreferent_phrase.is_coordination: sub_phrases = coreferent_clause.getDependentPhrases(coreferent_phrase) new_coreferent = None i = len(sub_phrases) - 1 # find the latest NE in given coordination while i >= 0 and new_coreferent == None: if Utils.isNamedEntity(sub_phrases[i]): new_coreferent = sub_phrases[i] i = i - 1 if new_coreferent != None: coreferent_phrase = new_coreferent return coreferent_phrase
def getStocks(self, relation): stocks = [] stocks_phrases = relation.getSecondLevelRoles('<actor_stock:1>') # collect all stock phrases from given relation for candidate in stocks_phrases: if candidate.coreferent != None and Utils.isNamedEntity(candidate.coreferent) and not candidate.coreferent in stocks_phrases: # get named entities from given phrase for entity_str in Utils.getNamedEntities(candidate.coreferent): if not entity_str.replace('_', ' ') in stocks: stocks.append(entity_str.replace('_', ' ')) return stocks
def getAgencies(self, relation): agencies = [] agencies_phrases = relation.getSecondLevelRoles('<actor_agency:1>') # collect all agency phrases from given relation for candidate in agencies_phrases: if candidate.coreferent != None and Utils.isNamedEntity(candidate.coreferent) and not candidate.coreferent in agencies_phrases: # get named entities from given phrase for entity_str in Utils.getNamedEntities(candidate.coreferent): # is it relevant agency string? Dfens against bugs and wrong parses if RoleResolver.isRelevantAgencyEntity(entity_str) and not entity_str.replace('_', ' ') in agencies: agencies.append(entity_str.replace('_', ' ')) return agencies
def filledWithNE(self): return Utils.isNamedEntity(self.phrase) if self.phrase != None else False
def matchesPhrase(self, phrase): return Utils.isNamedEntity(phrase)