def ids_to_tree(self, emb, postprocess=True): """Create a fake (flat) t-tree from token embeddings (IDs). @param emb: source embeddings (token IDs) @param postprocess: postprocess the sentence (capitalize sentence start, merge plural \ markers)? True by default. @return: the corresponding tree """ tree = TreeData() tokens = self.ids_to_strings(emb) for token in tokens: if token in ['<GO>', '<STOP>', '<VOID>']: continue if postprocess: # casing (only if set to lowercase) if self.lowercase and len(tree) == 1 or tree.nodes[-1].t_lemma in ['.', '?', '!']: token = token[0].upper() + token[1:] # plural merging (if plural tokens come up) if token == '<-s>' and tree.nodes[-1].t_lemma is not None: token = self._singular_to_plural(tree.nodes[-1].t_lemma) tree.remove_node(len(tree) - 1) elif token == '<-s>': continue tree.create_child(0, len(tree), NodeData(token, 'x')) return tree