Пример #1
0
    def tagged_parse(self, sentence, verbose=False):
        """
        Use MaltParser to parse a sentence. Takes a sentence as a list of
        (word, tag) tuples; the sentence must have already been tokenized and
        tagged.
        
        @param sentence: Input sentence to parse
        @type sentence: L{list} of (word, tag) L{tuple}s.
        @return: C{DependencyGraph} the dependency graph representation of the sentence
        """

        if not self._malt_bin:
            raise Exception(
                "MaltParser location is not configured.  Call config_malt() first."
            )
        if not self._trained:
            raise Exception(
                "Parser has not been trained.  Call train() first.")

        input_file = os.path.join(tempfile.gettempdir(), 'malt_input.conll')
        output_file = os.path.join(tempfile.gettempdir(), 'malt_output.conll')

        execute_string = 'java -jar %s -w %s -c %s -i %s -o %s -m parse'
        if not verbose:
            execute_string += ' > ' + os.path.join(tempfile.gettempdir(),
                                                   "malt.out")

        f = None
        try:
            f = open(input_file, 'w')

            for (i, (word, tag)) in enumerate(sentence):
                f.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %
                        (i + 1, word, '_', tag, tag, '_', '0', 'a', '_', '_'))
            f.write('\n')
            f.close()

            cmd = [
                'java',
                '-jar %s' % self._malt_bin,
                '-w %s' % tempfile.gettempdir(),
                '-c %s' % self.mco,
                '-i %s' % input_file,
                '-o %s' % output_file, '-m parse'
            ]

            self._execute(cmd, 'parse', verbose)

            return DependencyGraph.load(output_file)
        finally:
            if f: f.close()
Пример #2
0
    def tagged_parse(self, sentence, verbose=False):
        """
        Use MaltParser to parse a sentence. Takes a sentence as a list of
        (word, tag) tuples; the sentence must have already been tokenized and
        tagged.
        
        @param sentence: Input sentence to parse
        @type sentence: L{list} of (word, tag) L{tuple}s.
        @return: C{DependencyGraph} the dependency graph representation of the sentence
        """

        if not self._malt_bin:
            raise Exception("MaltParser location is not configured.  Call config_malt() first.")
        if not self._trained:
            raise Exception("Parser has not been trained.  Call train() first.")
            
        input_file = os.path.join(tempfile.gettempdir(), 'malt_input.conll')
        output_file = os.path.join(tempfile.gettempdir(), 'malt_output.conll')
        
        execute_string = 'java -jar %s -w %s -c %s -i %s -o %s -m parse'
        if not verbose:
            execute_string += ' > ' + os.path.join(tempfile.gettempdir(), "malt.out")
        
        f = None
        try:
            f = open(input_file, 'w')

            for (i, (word,tag)) in enumerate(sentence):
                f.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % 
                        (i+1, word, '_', tag, tag, '_', '0', 'a', '_', '_'))
            f.write('\n')
            f.close()
        
            cmd = ['java', '-jar %s' % self._malt_bin, '-w %s' % tempfile.gettempdir(), 
                   '-c %s' % self.mco, '-i %s' % input_file, '-o %s' % output_file, '-m parse']

            self._execute(cmd, 'parse', verbose)
            
            return DependencyGraph.load(output_file)
        finally:
            if f: f.close()