def find_tags(self, tokens, **kwargs): if kwargs.get("tagset") in (PENN, None): kwargs.setdefault("map", lambda token, tag: (token, tag)) if kwargs.get("tagset") == UNIVERSAL: kwargs.setdefault("map", lambda token, tag: penntreebank2universal(token, tag)) return _Parser.find_tags(self, tokens, **kwargs)
def find_tokens(self, tokens, **kwargs): kwargs.setdefault("abbreviations", ABBREVIATIONS) kwargs.setdefault("replace", replacements) s = _Parser.find_tokens(self, tokens, **kwargs) s = [s.replace("&rsquo ;", u"’") if isinstance(s, unicode) else s for s in s] return s