def prettify(self, tokens: List[str], tags: List[str], lemmas: List[str]) -> Union[List[str], str]: """Prettifies output of morphological tagger. Args: tokens: tokenized source sentence tags: list of tags, the output of a tagger lemmas: list of lemmas, the output of a lemmatizer Returns: the prettified output of the tagger. Examples: >>> sent = "John really likes pizza .".split() >>> tags = ["PROPN,Number=Sing", "ADV", >>> "VERB,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", >>> "NOUN,Number=Sing", "PUNCT"] >>> lemmas = "John really like pizza .".split() >>> prettifier = LemmatizedOutputPrettifier() >>> self.prettify(sent, tags, lemmas) 1 John John PROPN _ Number=Sing _ _ _ _ 2 really really ADV _ _ _ _ _ _ 3 likes like VERB _ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin _ _ _ _ 4 pizza pizza NOUN _ Number=Sing _ _ _ _ 5 . . PUNCT _ _ _ _ _ _ """ answer = [] for i, (word, tag, lemma) in enumerate(zip(tokens, tags, lemmas)): pos, tag = make_pos_and_tag(tag, sep=",") answer.append( self.format_string.format(i + 1, word, pos, tag, lemma)) if self.return_string: answer = self.begin + self.sep.join(answer) + self.end return answer
def prettify(sent, tags, return_string=True, begin="", end="", sep="\n"): """ x: str, sentence y: list, a sequence of tags x = "John likes, really likes pizza" y = ["NNP", "VBZ", "PUNCT", "RB", "VBZ", "NN"] answer: 1 John NNP 2 likes VBZ 3 , PUNCT 4 really RB 5 likes VBZ 6 pizza NN 7 . SENT """ if isinstance(sent, str): words = [x for x in re.split("(\w+|[,.])", sent) if x.strip() != ""] else: words = sent answer = [] for i, (word, tag) in enumerate(zip(words, tags)): answer.append("{}\t{}\t{}\t{}".format(i+1, word, *make_pos_and_tag(tag))) if return_string: answer = begin + sep.join(answer) + end return answer
def prettify(sent: Union[str, List[str]], tags: List[str], return_string: bool = True, begin: str = "", end: str = "", sep: str = "\n") -> Union[List[str], str]: """Prettifies output of morphological tagger. Args: sent: source sentence (either tokenized or not) tags: list of tags, the output of a tagger return_string: whether to return a list of strings or a single string begin: a string to append in the beginning end: a string to append in the end sep: separator between word analyses Returns: the prettified output of the tagger. Examples: >>> sent = "John likes, really likes pizza" >>> tags = ["NNP", "VBZ", "PUNCT", "RB", "VBZ", "NN"] >>> prettify(sent, tags) 1 John NNP 2 likes VBZ 3 , PUNCT 4 really RB 5 likes VBZ 6 pizza NN 7 . SENT """ if isinstance(sent, str): words = [x for x in re.split("(\w+|[,.])", sent) if x.strip() != ""] else: words = sent answer = [] for i, (word, tag) in enumerate(zip(words, tags)): answer.append("{}\t{}\t{}\t{}".format(i + 1, word, *make_pos_and_tag(tag))) if return_string: answer = begin + sep.join(answer) + end return answer
def _make_tag_trie(self): self._nodes = [defaultdict(dict)] self._start_nodes_for_pos = dict() self._data = [None] for tag, code in self._t2i.items(): pos, tag = make_pos_and_tag(tag, sep=",", return_mode="sorted_items") start = self._start_nodes_for_pos.get(pos) if start is None: start = self._start_nodes_for_pos[pos] = len(self._nodes) self._nodes.append(defaultdict(dict)) self._data.append(None) for key, value in tag: values_dict = self._nodes[start][key] child = values_dict.get(value) if child is None: child = values_dict[value] = len(self._nodes) self._nodes.append(defaultdict(dict)) self._data.append(None) start = child self._data[start] = code return self
def prettify(self, tokens: List[str], tags: List[str]) -> Union[List[str], str]: """Prettifies output of morphological tagger. Args: tokens: tokenized source sentence tags: list of tags, the output of a tagger Returns: the prettified output of the tagger. Examples: >>> sent = "John really likes pizza .".split() >>> tags = ["PROPN,Number=Sing", "ADV", >>> "VERB,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", >>> "NOUN,Number=Sing", "PUNCT"] >>> prettifier = TagOutputPrettifier(mode='basic') >>> self.prettify(sent, tags) 1 John PROPN Number=Sing 2 really ADV _ 3 likes VERB Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 4 pizza NOUN Number=Sing 5 . PUNCT _ >>> prettifier = TagOutputPrettifier(mode='ud') >>> self.prettify(sent, tags) 1 John _ PROPN _ Number=Sing _ _ _ _ 2 really _ ADV _ _ _ _ _ _ 3 likes _ VERB _ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin _ _ _ _ 4 pizza _ NOUN _ Number=Sing _ _ _ _ 5 . _ PUNCT _ _ _ _ _ _ """ answer = [] for i, (word, tag) in enumerate(zip(tokens, tags)): answer.append( self.format_string.format(i + 1, word, *make_pos_and_tag(tag))) if self.return_string: answer = self.begin + self.sep.join(answer) + self.end return answer
def prettify(self, tokens: List[str], tags: List[str]) -> Union[List[str], str]: """Prettifies output of morphological tagger. Args: tokens: tokenized source sentence tags: list of tags, the output of a tagger Returns: the prettified output of the tagger. Examples: >>> sent = "John really likes pizza .".split() >>> tags = ["PROPN,Number=Sing", "ADV", >>> "VERB,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", >>> "NOUN,Number=Sing", "PUNCT"] >>> prettifier = TagOutputPrettifier(mode='basic') >>> self.prettify(sent, tags) 1 John PROPN Number=Sing 2 really ADV _ 3 likes VERB Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 4 pizza NOUN Number=Sing 5 . PUNCT _ >>> prettifier = TagOutputPrettifier(mode='ud') >>> self.prettify(sent, tags) 1 John _ PROPN _ Number=Sing _ _ _ _ 2 really _ ADV _ _ _ _ _ _ 3 likes _ VERB _ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin _ _ _ _ 4 pizza _ NOUN _ Number=Sing _ _ _ _ 5 . _ PUNCT _ _ _ _ _ _ """ answer = [] for i, (word, tag) in enumerate(zip(tokens, tags)): answer.append(self.format_string.format(i + 1, word, *make_pos_and_tag(tag))) if self.return_string: answer = self.begin + self.sep.join(answer) + self.end return answer