Пример #1
0
    def ids_to_tree(self, emb, postprocess=True):
        """Create a fake (flat) t-tree from token embeddings (IDs).

        @param emb: source embeddings (token IDs)
        @param postprocess: postprocess the sentence (capitalize sentence start, merge plural \
            markers)? True by default.
        @return: the corresponding tree
        """

        tree = TreeData()
        tokens = self.ids_to_strings(emb)

        for token in tokens:
            if token in ['<GO>', '<STOP>', '<VOID>']:
                continue
            if postprocess:
                # casing (only if set to lowercase)
                if self.lowercase and len(tree) == 1 or tree.nodes[-1].t_lemma in ['.', '?', '!']:
                    token = token[0].upper() + token[1:]
                # plural merging (if plural tokens come up)
                if token == '<-s>' and tree.nodes[-1].t_lemma is not None:
                    token = self._singular_to_plural(tree.nodes[-1].t_lemma)
                    tree.remove_node(len(tree) - 1)
                elif token == '<-s>':
                    continue

            tree.create_child(0, len(tree), NodeData(token, 'x'))

        return tree
Пример #2
0
    def ids_to_tree(self, emb, postprocess=True):
        """Create a fake (flat) t-tree from token embeddings (IDs).

        @param emb: source embeddings (token IDs)
        @param postprocess: postprocess the sentence (capitalize sentence start, merge plural \
            markers)? True by default.
        @return: the corresponding tree
        """

        tree = TreeData()
        tokens = self.ids_to_strings(emb)

        for token in tokens:
            if token in ['<GO>', '<STOP>', '<VOID>']:
                continue
            if postprocess:
                # casing (only if set to lowercase)
                if self.lowercase and len(tree) == 1 or tree.nodes[-1].t_lemma in ['.', '?', '!']:
                    token = token[0].upper() + token[1:]
                # plural merging (if plural tokens come up)
                if token == '<-s>' and tree.nodes[-1].t_lemma is not None:
                    token = self._singular_to_plural(tree.nodes[-1].t_lemma)
                    tree.remove_node(len(tree) - 1)
                elif token == '<-s>':
                    continue

            tree.create_child(0, len(tree), NodeData(token, 'x'))

        return tree