Exemplo n.º 1
0
    def parse(self, text, relationships=None, dependencies=None):
        """Tokenize and parse some text to create ``Sentence`` objects and extract 
        dependencies, parse trees, etc.

        :param Sentence sentence: The ``Sentence`` object.

        """

        start_time = datetime.now()
        parsed = self.parse_with_error_handling(text)
        end_time = datetime.now()

        # If the parse was unsuccessful, exit
        if parsed == None:
            return []
        # timing report
        parsetime = end_time - start_time
        self.parsetime += parsetime.total_seconds()
        
        sentences = []

        for parsed_sentence in parsed['sentences']:
            sentence = Sentence(text = parsed_sentence['text'], project=self.project)
            sentence.save(False)

            self.add_words(sentence, parsed_sentence, text)
            self.add_grammatical_relations(sentence, parsed_sentence, relationships, dependencies)

            sentence.save(False)            
            sentences.append(sentence)
            
        return sentences
Exemplo n.º 2
0
    def parse(self, text, relationships=None, dependencies=None):
        """Tokenize and parse some text to create ``Sentence`` objects and extract 
        dependencies, parse trees, etc.

        :param Sentence sentence: The ``Sentence`` object.

        """

        start_time = datetime.now()
        parsed = self.parse_with_error_handling(text)
        end_time = datetime.now()

        # If the parse was unsuccessful, exit
        if parsed == None:
            return []
        # timing report
        parsetime = end_time - start_time
        self.parsetime += parsetime.total_seconds()

        sentences = []

        for parsed_sentence in parsed['sentences']:
            sentence = Sentence(text=parsed_sentence['text'],
                                project=self.project)
            sentence.save(False)

            self.add_words(sentence, parsed_sentence, text)
            self.add_grammatical_relations(sentence, parsed_sentence,
                                           relationships, dependencies)

            sentence.save(False)
            sentences.append(sentence)

        return sentences
Exemplo n.º 3
0
    def test_process(self):
        """Test process()
        """
        document = Document()
        sentence = Sentence(text="The quick brown fox jumped over the lazy dog",
            document=document, project = self.project)
        words = [
            Word(lemma="the", surface="the"),
            Word(lemma="fox", surface="fox"),
            Word(lemma="jump", surface="jumped"),
            Word(lemma="over", surface="over"),
            Word(lemma="the", surface="the"),
            Word(lemma="dog", surface="dog")]
        for index, word in enumerate(words): 
            word.save()
            sentence.add_word(word, index+1, " ", word.surface, self.project)
        sentence.save()

        result = self.seq_proc.process(sentence)
        sequences = split_sequences(result)
        sequence_sequences = get_sequence_text(sequences)

        # Create four lists of sequences based on the categories and then
        # check the output
        key = {
            "words": {
                "stops": [
                    "the",
                    "the fox",
                    "the fox jumped",
                    "the fox jumped over",
                    "fox jumped over",
                    "fox jumped over the",
                    "jumped over",
                    "jumped over the",
                    "jumped over the dog",
                    "over",
                    "over the",
                    "over the dog",
                    "the",
                    "the dog"],
                "nostops": [
                    "fox",
                    "fox jumped",
                    "jumped",
                    "jumped dog",
                    "dog"]
            },
            "lemmas": {
                "stops": [
                    "the",
                    "the fox",
                    "the fox jump",
                    "the fox jump over",
                    "fox jump over",
                    "fox jump over the",
                    "jump over",
                    "jump over the",
                    "jump over the dog",
                    "over",
                    "over the",
                    "over the dog",
                    "the",
                    "the dog"],
                "nostops": [
                    "fox",
                    "fox jump",
                    "jump",
                    "jump dog",
                    "dog"]
            }
        }

        print sequence_sequences
        # TODO: the seqproc isn't making phrases of words separated by a stopword,
        # but this code expects it to.
        self.failUnless(sequence_sequences == key)