def wotan2universal(token, tag): """ Converts a WOTAN tag to a universal tag. For example: bokkenrijders/N(soort,mv,neut) => bokkenrijders/NOUN """ if tag.startswith("Adv"): return (token, ADV) return penntreebank2universal(*wotan2penntreebank(token, tag))
def find_tags(self, tokens, **kwargs): if kwargs.get("tagset") in (PENN, None): kwargs.setdefault("map", lambda token, tag: (token, tag)) if kwargs.get("tagset") == UNIVERSAL: kwargs.setdefault( "map", lambda token, tag: penntreebank2universal(token, tag)) return _Parser.find_tags(self, tokens, **kwargs)
def parole2universal(token, tag): """ Converts a Parole tag to a universal tag. For example: importantísimo/AQ => importantísimo/ADJ """ if tag == "CS": return (token, CONJ) if tag == "DP": return (token, DET) if tag in ("P0", "PD", "PI", "PP", "PR", "PT", "PX"): return (token, PRON) return penntreebank2universal(*parole2penntreebank(token, tag))
def stts2universal(token, tag): """ Converts an STTS tag to a universal tag. For example: ohne/APPR => ohne/PREP """ if tag in ("KON", "KOUI", "KOUS", "KOKOM"): return (token, CONJ) if tag in ("PTKZU", "PTKNEG", "PTKVZ", "PTKANT"): return (token, PRT) if tag in ("PDF", "PDAT", "PIS", "PIAT", "PIDAT", "PPER", "PPOS", "PPOSAT"): return (token, PRON) if tag in ("PRELS", "PRELAT", "PRF", "PWS", "PWAT", "PWAV", "PAV"): return (token, PRON) return penntreebank2universal(*stts2penntreebank(token, tag))