def find_tags(self, tokens, **kwargs): if kwargs.get("tagset") in (PENN, None): kwargs.setdefault("map", lambda token, tag: (token, tag)) if kwargs.get("tagset") == UNIVERSAL: kwargs.setdefault( "map", lambda token, tag: penntreebank2universal(token, tag)) return _Parser.find_tags(self, tokens, **kwargs)
def find_tags(self, tokens, **kwargs): # Parser.find_tags() can take an optional map(token, tag) function, # which returns an updated (token, tag)-tuple for each token. kwargs.setdefault("map", parole2penntreebank) return Parser.find_tags(self, tokens, **kwargs)
def find_tokens(self, tokens, **kwargs): # 's in Dutch preceded by a vowel indicates plural ("auto's"): don't replace. kwargs.setdefault("abbreviations", ABBREVIATIONS) kwargs.setdefault("replace", {"'n": " 'n"}) s = _Parser.find_tokens(self, tokens, **kwargs) s = [re.sub(r"' s (ochtends|morgens|middags|avonds)", "'s \\1", s) for s in s] return s
def find_tokens(self, tokens, **kwargs): kwargs.setdefault("abbreviations", ABBREVIATIONS) kwargs.setdefault("replace", replacements) s = _Parser.find_tokens(self, tokens, **kwargs) s = [s.replace("&rsquo ;", u"’") if isinstance( s, unicode) else s for s in s] return s
def find_tokens(self, tokens, **kwargs): kwargs.setdefault("abbreviations", ABBREVIATIONS) kwargs.setdefault("replace", replacements) #return _Parser.find_tokens(self, tokens, **kwargs) s = _Parser.find_tokens(self, tokens, **kwargs) s = [s.replace(" &contraction ;", u"'").replace("XXX -", "-") for s in s] return s
def find_tags(self, tokens, **kwargs): if kwargs.get("tagset") != STTS: kwargs.setdefault("map", stts2penntreebank) # The lexicon uses Swiss spelling: "ss" instead of "ß". # We restore the "ß" after parsing. tokens_ss = [t.replace(u"ß", "ss") for t in tokens] tokens_ss = _Parser.find_tags(self, tokens_ss, **kwargs) return [[w] + tokens_ss[i][1:] for i, w in enumerate(tokens)]
def find_tokens(self, tokens, **kwargs): kwargs.setdefault("abbreviations", ABBREVIATIONS) kwargs.setdefault("replace", replacements) s = _Parser.find_tokens(self, tokens, **kwargs) s = [ s.replace("&rsquo ;", "’") if isinstance(s, str) else s for s in s ] return s
def find_tags(self, tokens, **kwargs): if kwargs.get("tagset") in (PENN, None): kwargs.setdefault("map", lambda token, tag: stts2penntreebank(token, tag)) if kwargs.get("tagset") == UNIVERSAL: kwargs.setdefault("map", lambda token, tag: stts2universal(token, tag)) if kwargs.get("tagset") is STTS: kwargs.setdefault("map", lambda token,tag: (token, tag)) # The lexicon uses Swiss spelling: "ss" instead of "ß". # We restore the "ß" after parsing. tokens_ss = [t.replace(u"ß", "ss") for t in tokens] tokens_ss = _Parser.find_tags(self, tokens_ss, **kwargs) return [[w] + tokens_ss[i][1:] for i, w in enumerate(tokens)]
def find_chunks(self, tokens, **kwargs): return _Parser.find_chunks(self, tokens, **kwargs)
def find_tags(self, tokens, **kwargs): kwargs.setdefault("map", tagset2penntreebank) return _Parser.find_tags(self, tokens, **kwargs)
def find_tokens(self, tokens, **kwargs): kwargs.setdefault("abbreviations", ABBREVIATIONS) kwargs.setdefault("replace", REPLACEMENTS) return _Parser.find_tokens(self, tokens, **kwargs)
def find_tags(self, tokens, **kwargs): if kwargs.get("tagset") != WOTAN: kwargs.setdefault("map", wotan2penntreebank) return _Parser.find_tags(self, tokens, **kwargs)
def find_tags(self, tokens, **kwargs): if kwargs.get("tagset") != PAROLE: kwargs.setdefault("map", parole2penntreebank) return _Parser.find_tags(self, tokens, **kwargs)
def find_tokens(self, tokens, **kwargs): kwargs.setdefault("abbreviations", ABBREVIATIONS) kwargs.setdefault("replace", CONTRACTIONS) return Parser.find_tokens(self, tokens, **kwargs)