def __init__(self, session, node, parent): SimpleTokenMerger.__init__(self, session, node, parent) if nltk is None: raise MissingDependencyException(self.objectType, 'nltk') types = self.get_setting(session, 'entityTypes') if types: self.types = [] for type_ in types.split(): type_ = type_.lower() if type_.startswith('pe'): self.types.append('PERSON') elif type_.startswith(('pl', 'g')): self.types.append('GPE') elif type_.startswith(('org', 'co')): self.types.append('ORGANIZATION') else: msg = ("Unknown entity type setting {0} on {1} {2}" "".format(type_, self.__class__.__name__, self.id) ) raise ConfigFileException(msg) else: # Default to all self.types = ['PERSON', 'GPE', 'ORGANIZATION'] # Should we keep the /POS tag or strip it self.keepPos = self.get_setting(session, 'pos', 0)
def __init__(self, session, config, parent): SimpleTokenMerger.__init__(self, session, config, parent) self.nonPhrases = self.get_setting(session, "nonPhrases", 0) self.nounRequired = self.get_setting(session, "nounRequired", 0)
def __init__(self, session, config, parent): SimpleTokenMerger.__init__(self, session, config, parent) self.nonPhrases = self.get_setting(session, 'nonPhrases', 0) self.nounRequired = self.get_setting(session, 'nounRequired', 0)