Пример #1
0
    def train_from_file(self, conll_file, verbose=False):
        """
        Train MaltParser from a file
        :param conll_file: str for the filename of the training input data
        :type conll_file: str
        """

        # If conll_file is a ZipFilePathPointer,
        # then we need to do some extra massaging
        if isinstance(conll_file, ZipFilePathPointer):
            with tempfile.NamedTemporaryFile(prefix='malt_train.conll.',
                                             dir=self.working_dir,
                                             mode='w',
                                             delete=False) as input_file:
                with conll_file.open() as conll_input_file:
                    conll_str = conll_input_file.read()
                    input_file.write(text_type(conll_str))
                return self.train_from_file(input_file.name, verbose=verbose)

        # Generate command to run maltparser.
        cmd = self.generate_malt_command(conll_file, mode="learn")
        ret = self._execute(cmd, verbose)
        if ret != 0:
            raise Exception("MaltParser training (%s) failed with exit "
                            "code %d" % (' '.join(cmd), ret))
        self._trained = True
Пример #2
0
    def parse_tagged_sents(self, sentences, verbose=False, top_relation_label='null'):
        """
        Use MaltParser to parse multiple POS tagged sentences. Takes multiple
        sentences where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentence: list(list(tuple(str, str)))
        :return: iter(iter(``DependencyGraph``)) the dependency graph
        representation of each sentence
        """
        if not self._trained:
            raise Exception("Parser has not been trained. Call train() first.")

        with tempfile.NamedTemporaryFile(prefix='malt_input.conll.',
              dir=self.working_dir, mode='w', delete=False) as input_file:
              with tempfile.NamedTemporaryFile(prefix='malt_output.conll.',
                     dir=self.working_dir, mode='w', delete=False) as output_file:
                # Convert list of sentences to CONLL format.
                for line in taggedsents_to_conll(sentences):
                    input_file.write(text_type(line))
                input_file.close()

                # Generate command to run maltparser.
                cmd =self.generate_malt_command(input_file.name,
                                output_file.name, mode="parse")

                # This is a maltparser quirk, it needs to be run
                # where the model file is. otherwise it goes into an awkward
                # missing .jars or strange -w working_dir problem.
                _current_path = os.getcwd() # Remembers the current path.
                try: # Change to modelfile path
                    os.chdir(os.path.split(self.model)[0])
                except:
                    pass
                ret = self._execute(cmd, verbose) # Run command.
                os.chdir(_current_path) # Change back to current path.

                if ret is not 0:
                    raise Exception("MaltParser parsing (%s) failed with exit "
                            "code %d" % (' '.join(cmd), ret))

                # Must return iter(iter(Tree))
                with open(output_file.name) as infile:
                    for tree_str in infile.read().split('\n\n'):
                        yield(iter([DependencyGraph(tree_str, top_relation_label=top_relation_label)]))

        os.remove(input_file.name)
        os.remove(output_file.name)
Пример #3
0
    def parse_tagged_sents(self, sentences, verbose=False, top_relation_label='null'):
        """
        Use MaltParser to parse multiple POS tagged sentences. Takes multiple
        sentences where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentence: list(list(tuple(str, str)))
        :return: iter(iter(``DependencyGraph``)) the dependency graph
        representation of each sentence
        """
        if not self._trained:
            raise Exception("Parser has not been trained. Call train() first.")

        with tempfile.NamedTemporaryFile(prefix='malt_input.conll.',
              dir=self.working_dir, mode='w', delete=False) as input_file:
              with tempfile.NamedTemporaryFile(prefix='malt_output.conll.',
                     dir=self.working_dir, mode='w', delete=False) as output_file:
                # Convert list of sentences to CONLL format.
                for line in taggedsents_to_conll(sentences):
                    input_file.write(text_type(line))
                input_file.close()

                # Generate command to run maltparser.
                cmd =self.generate_malt_command(input_file.name,
                                output_file.name, mode="parse")

                # This is a maltparser quirk, it needs to be run
                # where the model file is. otherwise it goes into an awkward
                # missing .jars or strange -w working_dir problem.
                _current_path = os.getcwd() # Remembers the current path.
                try: # Change to modelfile path
                    os.chdir(os.path.split(self.model)[0])
                except:
                    pass
                ret = self._execute(cmd, verbose) # Run command.
                os.chdir(_current_path) # Change back to current path.

                if ret is not 0:
                    raise Exception("MaltParser parsing (%s) failed with exit "
                            "code %d" % (' '.join(cmd), ret))

                # Must return iter(iter(Tree))
                with open(output_file.name) as infile:
                    for tree_str in infile.read().split('\n\n'):
                        yield(iter([DependencyGraph(tree_str, top_relation_label=top_relation_label)]))

        os.remove(input_file.name)
        os.remove(output_file.name)
Пример #4
0
    def train(self, depgraphs, verbose=False):
        """
        Train MaltParser from a list of ``DependencyGraph`` objects

        :param depgraphs: list of ``DependencyGraph`` objects for training input data
        :type depgraphs: DependencyGraph
        """

        # Write the conll_str to malt_train.conll file in /tmp/
        with tempfile.NamedTemporaryFile(prefix='malt_train.conll.',
             dir=self.working_dir, mode='w', delete=False) as input_file:
            input_str = ('\n'.join(dg.to_conll(10) for dg in depgraphs))
            input_file.write(text_type(input_str))
        # Trains the model with the malt_train.conll
        self.train_from_file(input_file.name, verbose=verbose)
        # Removes the malt_train.conll once training finishes.
        os.remove(input_file.name)
Пример #5
0
    def train(self, depgraphs, verbose=False):
        """
        Train MaltParser from a list of ``DependencyGraph`` objects

        :param depgraphs: list of ``DependencyGraph`` objects for training input data
        :type depgraphs: DependencyGraph
        """

        # Write the conll_str to malt_train.conll file in /tmp/
        with tempfile.NamedTemporaryFile(prefix='malt_train.conll.',
             dir=self.working_dir, mode='w', delete=False) as input_file:
            input_str = ('\n'.join(dg.to_conll(10) for dg in depgraphs))
            input_file.write(text_type(input_str))
        # Trains the model with the malt_train.conll
        self.train_from_file(input_file.name, verbose=verbose)
        # Removes the malt_train.conll once training finishes.
        os.remove(input_file.name)
Пример #6
0
    def train_from_file(self, conll_file, verbose=False):
        """
        Train MaltParser from a file
        :param conll_file: str for the filename of the training input data
        :type conll_file: str
        """

        # If conll_file is a ZipFilePathPointer,
        # then we need to do some extra massaging
        if isinstance(conll_file, ZipFilePathPointer):
            with tempfile.NamedTemporaryFile(prefix='malt_train.conll.',
            dir=self.working_dir, mode='w', delete=False) as input_file:
                with conll_file.open() as conll_input_file:
                    conll_str = conll_input_file.read()
                    input_file.write(text_type(conll_str))
                return self.train_from_file(input_file.name, verbose=verbose)

        # Generate command to run maltparser.
        cmd =self.generate_malt_command(conll_file, mode="learn")
        ret = self._execute(cmd, verbose)
        if ret != 0:
            raise Exception("MaltParser training (%s) failed with exit "
                    "code %d" % (' '.join(cmd), ret))
        self._trained = True