Exemplos de Conllu em Python, exemplos de udapi.block.read.conllu.Conllu em Python

Exemplo n.º 1

0

Exibir arquivo

    def test_deps_getter(self):
        """Test enhanced dependencies."""
        # Create a path to the test CoNLLU file.
        data_filename = os.path.join(os.path.dirname(__file__), 'data',
                                     'enh_deps.conllu')

        # Read a test CoNLLU file.
        document = Document()
        reader = Conllu(files=data_filename)
        reader.process_document(document)

        # Exactly one bundle should be loaded.
        self.assertEqual(len(document.bundles), 1)

        # Obtain the dependency tree and check its sentence ID.
        root = document.bundles[0].get_tree()
        self.assertEqual(root.bundle.bundle_id, 'a-mf920901-001-p1s1A')

        # Check raw secondary dependencies for each node.
        nodes = root.descendants()
        self.assertEqual(nodes[0].raw_deps, '0:root|2:amod')
        self.assertEqual(nodes[1].raw_deps, '0:root')
        self.assertEqual(nodes[2].raw_deps, '0:root')
        self.assertEqual(nodes[3].raw_deps, '0:root')
        self.assertEqual(nodes[4].raw_deps, '1:amod')
        self.assertEqual(nodes[5].raw_deps, '5:conj')

        # Check deserialized dependencies.
        self.assertEqual(nodes[0].deps[0]['parent'], root)
        self.assertEqual(nodes[0].deps[0]['deprel'], 'root')
        self.assertEqual(nodes[5].deps[0]['parent'], nodes[4])

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_node.py Projeto: udapi/udapi-python

    def test_deps_getter(self):
        """Test enhanced dependencies."""
        # Create a path to the test CoNLLU file.
        data_filename = os.path.join(os.path.dirname(__file__), 'data', 'enh_deps.conllu')

        # Read a test CoNLLU file.
        document = Document()
        reader = Conllu(files=data_filename)
        reader.process_document(document)

        # Exactly one bundle should be loaded.
        self.assertEqual(len(document.bundles), 1)

        # Obtain the dependency tree and check its sentence ID.
        root = document.bundles[0].get_tree()
        self.assertEqual(root.bundle.bundle_id, 'a-mf920901-001-p1s1A')

        # Check raw secondary dependencies for each node.
        nodes = root.descendants()
        self.assertEqual(nodes[0].raw_deps, '0:root|2:amod')
        self.assertEqual(nodes[1].raw_deps, '0:root')
        self.assertEqual(nodes[2].raw_deps, '0:root')
        self.assertEqual(nodes[3].raw_deps, '0:root')
        self.assertEqual(nodes[4].raw_deps, '1:amod')
        self.assertEqual(nodes[5].raw_deps, '5:conj')

        # Check deserialized dependencies.
        self.assertEqual(nodes[0].deps[0]['parent'], root)
        self.assertEqual(nodes[0].deps[0]['deprel'], 'root')
        self.assertEqual(nodes[5].deps[0]['parent'], nodes[4])

Exemplo n.º 3

0

Exibir arquivo

Arquivo: udpipe.py Projeto: Kira-D/UDapy_block_artificial

 def __init__(self, model):
     """Create the UDPipe tool object."""
     self.model = model
     path = require_file(model)
     self.tool = Model.load(path)
     if not self.tool:
         raise IOError("Cannot load model from file '%s'" % path)
     self.error = ProcessingError()
     self.conllu_reader = ConlluReader()
     self.tokenizer = self.tool.newTokenizer(Model.DEFAULT)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: udpipe.py Projeto: udapi/udapi-python

 def __init__(self, model):
     """Create the UDPipe tool object."""
     self.model = model
     path = require_file(model)
     self.tool = Model.load(path)
     if not self.tool:
         raise IOError("Cannot load model from file '%s'" % path)
     self.error = ProcessingError()
     self.conllu_reader = ConlluReader()
     self.tokenizer = self.tool.newTokenizer(Model.DEFAULT)

Exemplo n.º 5

0

Exibir arquivo

def get_ud_analysis(analysis):
    analysis = Conllu(filehandle=StringIO(analysis)).read_tree()
    heads = []
    words = [node.form for node in analysis.descendants]
    for id, token in enumerate(analysis.descendants):
        if token.deprel == 'root':
            # root_id = words.index(token.form)
            heads.append(-1)
        else:
            heads.append(words.index(token.parent.form))
    return heads, words

Exemplo n.º 6

0

Exibir arquivo

Arquivo: run.py Projeto: Jankus1994/udapi-python

    def execute(self):
        """Parse given scenario and execute it."""

        # Parse the given scenario from the command line.
        block_names, block_args = _parse_command_line_arguments(
            self.args.scenario)

        # Import blocks (classes) and construct block instances.
        blocks = _import_blocks(block_names, block_args)

        # Initialize blocks (process_start).
        for block in blocks:
            block.process_start()

        readers = []
        for block in blocks:
            try:
                block.finished  # pylint: disable=pointless-statement
                readers.append(block)
            except AttributeError:
                pass
        if not readers:
            logging.info('No reader specified, using read.Conllu')
            conllu_reader = Conllu()
            readers = [conllu_reader]
            blocks = readers + blocks

        # Apply blocks on the data.
        finished = False
        filenames_iterator = 0  # !!! ADDED !!!
        while not finished:
            document = Document()
            logging.info(" ---- ROUND ----")
            for block in blocks:
                if (filenames_iterator < len(block.filenames)):  # !!!
                    filename = block.filenames[filenames_iterator]  # !!!
                    document.set_filename(filename)  # ADDED
                    filenames_iterator += 1  # !!!
                logging.info("Executing block " + block.__class__.__name__)
                block.before_process_document(document)
                result = block.process_document(document)
                if (type(result) == int):
                    init_cluster_id = result
                block.after_process_document(document)

            finished = True

            for reader in readers:
                finished = finished and reader.finished

        # 6. close blocks (process_end)
        for block in blocks:
            block.process_end()

Exemplo n.º 7

0

Exibir arquivo

Arquivo: tree_parser.py Projeto: vintagexav/DeepPavlov

 def __call__(self,
              q_tokens_batch: List[List[str]]) -> List[Tuple[str, str]]:
     entity_rel_list = []
     for q_tokens in q_tokens_batch:
         q_str = '\n'.join(q_tokens)
         s = self.full_ud_model.process(q_str)
         tree = Conllu(filehandle=StringIO(s)).read_tree()
         fnd, detected_entity, detected_rel = self.find_entity(
             tree, q_tokens)
         if not fnd:
             fnd, detected_entity, detected_rel = self.find_entity_adj(tree)
         detected_entity = detected_entity.replace("первый ", '')
         entity_rel_list.append((detected_entity, detected_rel))
     return entity_rel_list

Exemplo n.º 8

0

Exibir arquivo

Arquivo: preprocessing.py Projeto: jackashore/pleonasm_detection

def _get_groups_from_tree(sentence):
    groups = defaultdict(list)

    to_tree = UDPIPE.process(sentence)
    tree = Conllu(filehandle=StringIO(to_tree)).read_tree()
    nodes = tree.descendants

    for node in nodes:
        parent = node.parent
        parent_form = parent.form
        parent_tag = parent.upos
        _node = node.form
        if parent_tag in ['NOUN', 'PROPN', 'VERB']:
            groups[(parent_form, parent_tag)].append((_node, node.upos))
    return groups

Exemplo n.º 9

0

Exibir arquivo

    def execute(self):
        """Parse given scenario and execute it."""

        # Parse the given scenario from the command line.
        block_names, block_args = _parse_command_line_arguments(
            self.args.scenario)

        # Import blocks (classes) and construct block instances.
        blocks = _import_blocks(block_names, block_args)

        # Initialize blocks (process_start).
        for block in blocks:
            block.process_start()

        readers = []
        for block in blocks:
            try:
                block.finished  # pylint: disable=pointless-statement
                readers.append(block)
            except AttributeError:
                pass
        if not readers:
            logging.info('No reader specified, using read.Conllu')
            conllu_reader = Conllu()
            readers = [conllu_reader]
            blocks = readers + blocks

        # Apply blocks on the data.
        finished = False
        while not finished:
            document = Document()
            logging.info(" ---- ROUND ----")
            for block in blocks:
                logging.info("Executing block " + block.__class__.__name__)
                block.apply_on_document(document)

            finished = True

            for reader in readers:
                finished = finished and reader.finished

        # 6. close blocks (process_end)
        for block in blocks:
            block.process_end()

Exemplo n.º 10

0

Exibir arquivo

Arquivo: document.py Projeto: arademaker/udapi-python

 def from_conllu_string(self, string):
     """Load a document from a conllu-formatted string."""
     reader = ConlluReader(filehandle=io.StringIO(string))
     reader.apply_on_document(self)

Exemplo n.º 11

0

Exibir arquivo

    def __call__(
        self, syntax_tree_batch: List[str],
        positions_batch: List[List[List[int]]]
    ) -> Tuple[List[List[str]], List[Dict[str, str]]]:
        log.debug(f"positions of entity tokens {positions_batch}")
        query_nums_batch = []
        entities_dict_batch = []
        types_dict_batch = []
        questions_batch = []
        count = False
        for syntax_tree, positions in zip(syntax_tree_batch, positions_batch):
            log.debug(f"\n{syntax_tree}")
            tree = Conllu(filehandle=StringIO(syntax_tree)).read_tree()
            root = self.find_root(tree)
            tree_desc = tree.descendants
            unknown_node = ""
            if root:
                log.debug(f"syntax tree info, root: {root.form}")
                unknown_node, unknown_branch = self.find_branch_with_unknown(
                    root)
            positions = [num for position in positions for num in position]
            if unknown_node:
                log.debug(
                    f"syntax tree info, unknown node: {unknown_node.form}, unknown branch: {unknown_branch.form}"
                )
                log.debug(f"wh_leaf: {self.wh_leaf}")
                clause_node, clause_branch = self.find_clause_node(
                    root, unknown_branch)
                modifiers, clause_modifiers = self.find_modifiers_of_unknown(
                    unknown_node)
                log.debug(
                    f"modifiers: {[modifier.form for modifier in modifiers]}")
                if f"{tree_desc[0].form.lower()} {tree_desc[1].form.lower()}" in self.change_root_tokens:
                    new_root = root.children[0]
                else:
                    new_root = root
                root_desc = defaultdict(list)
                for node in new_root.children:
                    if node.deprel not in ["punct", "advmod", "cop", "mark"]:
                        if node == unknown_branch:
                            root_desc[node.deprel].append(node)
                        else:
                            if self.find_entities(node, positions, cut_clause=False) or \
                               (self.find_year_or_number(node) and node.deprel in ["obl", "nummod"]):
                                root_desc[node.deprel].append(node)

                if root.form.lower() == self.how_many or ("nsubj" in root_desc.keys() and \
                                        self.how_many in [nd.form.lower() for nd in root_desc["nsubj"]]):
                    count = True
                log.debug(f"root_desc {root_desc.keys()}")
                appos_token_nums = sorted(self.find_appos_tokens(root, []))
                appos_tokens = [
                    elem.form for elem in tree_desc
                    if elem.ord in appos_token_nums
                ]
                clause_token_nums = sorted(
                    self.find_clause_tokens(root, clause_node, []))
                clause_tokens = [
                    elem.form for elem in tree_desc
                    if elem.ord in clause_token_nums
                ]
                log.debug(f"appos tokens: {appos_tokens}")
                log.debug(f"clause_tokens: {clause_tokens}")
                self.root_entity = False
                if root.ord - 1 in positions:
                    self.root_entity = True
                query_nums, entities_dict, types_dict = self.build_query(
                    new_root,
                    unknown_branch,
                    root_desc,
                    unknown_node,
                    modifiers,
                    clause_modifiers,
                    clause_node,
                    positions,
                    count=count)

                if self.lang == "rus":
                    question = ' '.join([node.form for node in tree.descendants \
                        if (node.ord not in appos_token_nums or node.ord not in clause_token_nums)])
                else:
                    question = ' '.join(
                        [node.form for node in tree.descendants])
                log.debug(f"sanitized question: {question}")
                query_nums_batch.append(query_nums)
                entities_dict_batch.append(entities_dict)
                types_dict_batch.append(types_dict)
                questions_batch.append(question)
        return questions_batch, query_nums_batch, entities_dict_batch, types_dict_batch

Exemplo n.º 12

0

Exibir arquivo

class UDPipe:
    """Wrapper for UDPipe (more pythonic than ufal.udpipe)."""
    def __init__(self, model):
        """Create the UDPipe tool object."""
        self.model = model
        path = require_file(model)
        self.tool = Model.load(path)
        if not self.tool:
            raise IOError("Cannot load model from file '%s'" % path)
        self.error = ProcessingError()
        self.conllu_reader = ConlluReader()
        self.tokenizer = self.tool.newTokenizer(Model.DEFAULT)

    def tag_parse_tree(self, root):
        """Tag (+lemmatize, fill FEATS) and parse a tree (already tokenized)."""
        descendants = root.descendants
        if not descendants:
            return
        pipeline = Pipeline(self.tool, 'horizontal', Pipeline.DEFAULT,
                            Pipeline.DEFAULT, 'conllu')
        in_data = " ".join([n.form for n in descendants])
        out_data = pipeline.process(in_data, self.error)
        if self.error.occurred():
            raise IOError("UDPipe error " + self.error.message)
        self.conllu_reader.files.filehandle = io.StringIO(out_data)
        parsed_root = self.conllu_reader.read_tree()
        nodes = [root] + descendants
        for parsed_node in parsed_root.descendants:
            node = nodes[parsed_node.ord]
            node.parent = nodes[parsed_node.parent.ord]
            for attr in 'upos xpos lemma feats deprel'.split():
                setattr(node, attr, getattr(parsed_node, attr))

        # TODO: benchmark which solution is the fastest one. E.g. we could also do
        # for node, parsed_node in zip(root.descendants, parsed_root.descendants):
        #    parsed_node.misc = node.misc
        # pylint: disable=protected-access
        #root._children, root._descendants = parsed_root._children, parsed_root._descendants

    def tokenize_tag_parse_tree(self,
                                root,
                                resegment=False,
                                tag=True,
                                parse=True):
        """Tokenize, tag (+lemmatize, fill FEATS) and parse the text stored in `root.text`.

        If resegment=True, the returned list of Udapi trees may contain multiple trees.
        """
        if root.children:
            raise ValueError(
                'Tree already contained nodes before tokenization')

        # Tokenize and segment the text (segmentation cannot be turned off in older UDPipe versions).
        self.tokenizer.setText(root.text)
        is_another = True
        u_sentences = []
        while is_another:
            u_sentence = Sentence()
            is_another = self.tokenizer.nextSentence(u_sentence)
            if is_another:
                u_sentences.append(u_sentence)

        # If resegmentation was not required, we need to join the segments.
        if not resegment and len(u_sentences) > 1:
            first_sent = u_sentences[0]
            n_words = first_sent.words.size() - 1
            for other_sent in u_sentences[1:]:
                other_words = other_sent.words.size() - 1
                for i in range(1, other_words + 1):
                    u_w = other_sent.words[i]
                    n_words += 1
                    u_w.id = n_words
                    first_sent.words.append(u_w)
            u_sentences = [first_sent]

        # tagging and parsing
        if tag:
            for u_sentence in u_sentences:
                self.tool.tag(u_sentence, Model.DEFAULT)
                if parse:
                    self.tool.parse(u_sentence, Model.DEFAULT)
        elif parse:
            raise ValueError(
                'Combination parse=True tag=False is not allowed.')

        # converting UDPipe nodes to Udapi nodes
        new_root = root
        trees = []
        for u_sentence in u_sentences:
            if not new_root:
                new_root = Root()
            heads, nodes = [], [new_root]
            u_words = u_sentence.words
            for i in range(1, u_words.size()):
                u_w = u_words[i]
                node = new_root.create_child(
                    form=u_w.form,
                    lemma=u_w.lemma,
                    upos=u_w.upostag,
                    xpos=u_w.xpostag,
                    feats=u_w.feats,
                    deprel=u_w.deprel,
                    misc=u_w.misc,
                )
                if parse:
                    heads.append(u_w.head)
                    nodes.append(node)
            if parse:
                for node in nodes[1:]:
                    head = heads.pop(0)
                    node.parent = nodes[head]
            trees.append(new_root)
            new_root = None
        return trees

Exemplo n.º 13

0

Exibir arquivo

Arquivo: document.py Projeto: ftyers/udapi-python

 def load_conllu(self, filename):
     """Load a document from a conllu-formatted file."""
     reader = ConlluReader(files=filename)
     reader.process_document(self)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: document.py Projeto: udapi/udapi-python

 def from_conllu_string(self, string):
     """Load a document from a conllu-formatted string."""
     reader = ConlluReader(filehandle=io.StringIO(string))
     reader.apply_on_document(self)

Exemplo n.º 15

0

Exibir arquivo

 def load_conllu(self, filename=None):
     """Load a document from a conllu-formatted file."""
     reader = ConlluReader(files=filename)
     reader.apply_on_document(self)

Exemplo n.º 16

0

Exibir arquivo

    def __call__(
        self, syntax_tree_batch: List[str],
        positions_batch: List[List[List[int]]]
    ) -> Tuple[List[List[str]], List[Dict[str, str]]]:
        log.debug(f"positions of entity tokens {positions_batch}")
        query_nums_batch = []
        entities_dict_batch = []
        types_dict_batch = []
        questions_batch = []
        for syntax_tree, positions in zip(syntax_tree_batch, positions_batch):
            log.debug(f"\n{syntax_tree}")
            tree = Conllu(filehandle=StringIO(syntax_tree)).read_tree()
            root = self.find_root(tree)
            tree_desc = tree.descendants
            log.debug(f"syntax tree info, root: {root.form}")
            unknown_node, unknown_branch = self.find_branch_with_unknown(root)
            positions = [num for position in positions for num in position]
            if unknown_node:
                log.debug(
                    f"syntax tree info, unknown node: {unknown_node.form}, unknown branch: {unknown_branch.form}"
                )
                clause_node, clause_branch = self.find_clause_node(
                    root, unknown_branch)
                modifiers, clause_modifiers = self.find_modifiers_of_unknown(
                    unknown_node)
                log.debug(
                    f"modifiers: {[modifier.form for modifier in modifiers]}")
                if f"{tree_desc[0].form.lower()} {tree_desc[1].form.lower()}" in [
                        "каким был", "какой была"
                ]:
                    new_root = root.children[0]
                else:
                    new_root = root
                root_desc = defaultdict(list)
                for node in new_root.children:
                    if node.deprel not in ["punct", "advmod", "cop"]:
                        if node == unknown_branch:
                            root_desc[node.deprel].append(node)
                        else:
                            if self.find_entities(node,
                                                  positions,
                                                  cut_clause=False):
                                root_desc[node.deprel].append(node)

                appos_token_nums = sorted(self.find_appos_tokens(root, []))
                appos_tokens = [
                    elem.form for elem in tree_desc
                    if elem.ord in appos_token_nums
                ]
                clause_token_nums = sorted(
                    self.find_clause_tokens(root, clause_node, []))
                clause_tokens = [
                    elem.form for elem in tree_desc
                    if elem.ord in clause_token_nums
                ]
                log.debug(f"appos tokens: {appos_tokens}")
                log.debug(f"clause tokens: {clause_tokens}")
                query_nums, entities_dict, types_dict = self.build_query(
                    new_root, unknown_branch, root_desc, unknown_node,
                    modifiers, clause_modifiers, positions)

                question = ' '.join([
                    node.form for node in tree.descendants
                    if (node.ord not in appos_token_nums
                        or node.ord not in clause_token_nums)
                ])
                log.debug(f"sanitized question: {question}")
                query_nums_batch.append(query_nums)
                entities_dict_batch.append(entities_dict)
                types_dict_batch.append(types_dict)
                questions_batch.append(question)
        return questions_batch, query_nums_batch, entities_dict_batch, types_dict_batch

Exemplo n.º 17

0

Exibir arquivo

Arquivo: udpipe.py Projeto: Kira-D/UDapy_block_artificial

class UDPipe:
    """Wrapper for UDPipe (more pythonic than ufal.udpipe)."""

    def __init__(self, model):
        """Create the UDPipe tool object."""
        self.model = model
        path = require_file(model)
        self.tool = Model.load(path)
        if not self.tool:
            raise IOError("Cannot load model from file '%s'" % path)
        self.error = ProcessingError()
        self.conllu_reader = ConlluReader()
        self.tokenizer = self.tool.newTokenizer(Model.DEFAULT)

    def tag_parse_tree(self, root):
        """Tag (+lemmatize, fill FEATS) and parse a tree (already tokenized)."""
        pipeline = Pipeline(self.tool, 'horizontal', Pipeline.DEFAULT, Pipeline.DEFAULT, 'conllu')
        in_data = " ".join([n.form for n in root.descendants])
        out_data = pipeline.process(in_data, self.error)
        if self.error.occurred():
            raise IOError("UDPipe error " + self.error.message)
        self.conllu_reader.files.filehandle = io.StringIO(out_data)
        parsed_root = self.conllu_reader.read_tree()
        nodes = [root] + root.descendants
        for parsed_node in parsed_root.descendants:
            node = nodes[parsed_node.ord]
            node.parent = nodes[parsed_node.parent.ord]
            for attr in 'upos xpos lemma feats'.split():
                setattr(node, attr, getattr(parsed_node, attr))

        # TODO: benchmark which solution is the fastest one. E.g. we could also do
        # for node, parsed_node in zip(root.descendants, parsed_root.descendants):
        #    parsed_node.misc = node.misc
        # pylint: disable=protected-access
        #root._children, root._descendants = parsed_root._children, parsed_root._descendants

    def tokenize_tag_parse_tree(self, root):
        """Tokenize, tag (+lemmatize, fill FEATS) and parse the text stored in `root.text`."""
        if root.children:
            raise ValueError('Tree already contained nodes before tokenization')

        # tokenization (I cannot turn off segmenter, so I need to join the segments)
        self.tokenizer.setText(root.text)
        u_sentence = Sentence()
        is_another = self.tokenizer.nextSentence(u_sentence)
        u_words = u_sentence.words
        n_words = u_words.size() - 1
        if is_another:
            u_sent_cont = Sentence()
            while self.tokenizer.nextSentence(u_sent_cont):
                n_cont = u_sent_cont.words.size() - 1
                for i in range(1, n_cont + 1):
                    u_w = u_sent_cont.words[i]
                    n_words += 1
                    u_w.id = n_words
                    u_words.append(u_w)

        # tagging and parsing
        self.tool.tag(u_sentence, Model.DEFAULT)
        self.tool.parse(u_sentence, Model.DEFAULT)

        # converting UDPipe nodes to Udapi nodes
        heads, nodes = [], [root]
        for i in range(1, u_words.size()):
            u_w = u_words[i]
            node = root.create_child(
                form=u_w.form, lemma=u_w.lemma, upos=u_w.upostag,
                xpos=u_w.xpostag, feats=u_w.feats, deprel=u_w.deprel,
            )
            node.misc = u_w.misc
            heads.append(u_w.head)
            nodes.append(node)
        for node in nodes[1:]:
            head = heads.pop(0)
            node.parent = nodes[head]

Exemplo n.º 18

0

Exibir arquivo

Arquivo: udpipe.py Projeto: udapi/udapi-python

class UDPipe:
    """Wrapper for UDPipe (more pythonic than ufal.udpipe)."""

    def __init__(self, model):
        """Create the UDPipe tool object."""
        self.model = model
        path = require_file(model)
        self.tool = Model.load(path)
        if not self.tool:
            raise IOError("Cannot load model from file '%s'" % path)
        self.error = ProcessingError()
        self.conllu_reader = ConlluReader()
        self.tokenizer = self.tool.newTokenizer(Model.DEFAULT)

    def tag_parse_tree(self, root):
        """Tag (+lemmatize, fill FEATS) and parse a tree (already tokenized)."""
        descendants = root.descendants
        if not descendants:
            return
        pipeline = Pipeline(self.tool, 'horizontal', Pipeline.DEFAULT, Pipeline.DEFAULT, 'conllu')
        in_data = " ".join([n.form for n in descendants])
        out_data = pipeline.process(in_data, self.error)
        if self.error.occurred():
            raise IOError("UDPipe error " + self.error.message)
        self.conllu_reader.files.filehandle = io.StringIO(out_data)
        parsed_root = self.conllu_reader.read_tree()
        nodes = [root] + descendants
        for parsed_node in parsed_root.descendants:
            node = nodes[parsed_node.ord]
            node.parent = nodes[parsed_node.parent.ord]
            for attr in 'upos xpos lemma feats'.split():
                setattr(node, attr, getattr(parsed_node, attr))

        # TODO: benchmark which solution is the fastest one. E.g. we could also do
        # for node, parsed_node in zip(root.descendants, parsed_root.descendants):
        #    parsed_node.misc = node.misc
        # pylint: disable=protected-access
        #root._children, root._descendants = parsed_root._children, parsed_root._descendants

    def tokenize_tag_parse_tree(self, root):
        """Tokenize, tag (+lemmatize, fill FEATS) and parse the text stored in `root.text`."""
        if root.children:
            raise ValueError('Tree already contained nodes before tokenization')

        # tokenization (I cannot turn off segmenter, so I need to join the segments)
        self.tokenizer.setText(root.text)
        u_sentence = Sentence()
        is_another = self.tokenizer.nextSentence(u_sentence)
        u_words = u_sentence.words
        n_words = u_words.size() - 1
        if is_another:
            u_sent_cont = Sentence()
            while self.tokenizer.nextSentence(u_sent_cont):
                n_cont = u_sent_cont.words.size() - 1
                for i in range(1, n_cont + 1):
                    u_w = u_sent_cont.words[i]
                    n_words += 1
                    u_w.id = n_words
                    u_words.append(u_w)

        # tagging and parsing
        self.tool.tag(u_sentence, Model.DEFAULT)
        self.tool.parse(u_sentence, Model.DEFAULT)

        # converting UDPipe nodes to Udapi nodes
        heads, nodes = [], [root]
        for i in range(1, u_words.size()):
            u_w = u_words[i]
            node = root.create_child(
                form=u_w.form, lemma=u_w.lemma, upos=u_w.upostag,
                xpos=u_w.xpostag, feats=u_w.feats, deprel=u_w.deprel,
            )
            node.misc = u_w.misc
            heads.append(u_w.head)
            nodes.append(node)
        for node in nodes[1:]:
            head = heads.pop(0)
            node.parent = nodes[head]