Exemplo n.º 1
0
    def test_multiple_speech(self):
        content = '''
“If you did sign it,” said the King, “that only makes the matter worse. You _must_ have meant some mischief, or else you would have signed your name like an honest man.”
'''
        sentences = [Sentence(s) for s in quote_aware_sent_tokenize(content)]

        speech_parts = flatten([s.speech for s in sentences if s.speech])

        self.assertEqual([
            s.serialize() for s in speech_parts if s
        ], [{
            'text': 'If you did mark it ,',
            'speaker': {
                'name': 'King',
                'gender': 'male'
            },
            'inflection': 'said'
        }, {
            'text':
            'that only makes the matter worse . You  must  have meant some mischief , or else you would have signed your name love an right man .',
            'speaker': {
                'name': 'King',
                'gender': 'male'
            },
            'inflection': 'said'
        }])
Exemplo n.º 2
0
    def assignments(self):
        content = self.content

        if content is None:
            return []

        return flatten([
            Sentence(s).assignments for s in nltk.tokenize.sent_tokenize(content)
        ])
Exemplo n.º 3
0
 def people(self) -> People:
     """
     Extract all People from text. This is pretty stupid so includes
     a lot of false positives
     """
     people = People()
     for sentence in self.sentences:
         for person in Sentence(sentence).people:
             people.append(person)
     return people
Exemplo n.º 4
0
    def test_extract_from_sentence(self):

        node = extract_conditionals(
            Sentence('If you went to bed earlier, you\'d be better rested'))[0]
        self.assertEqual(node[0].text, 'If you went to bed earlier'
                         )  # Forget about the extra if for the moment
        self.assertEqual(node[1].text, 'If')
        self.assertEqual(node[2].text, 'you \'d be better rested')

        node = extract_conditionals(
            Sentence('If and only if something then something'))[0]
        self.assertEqual(node[0].text, 'If and only if something')
        self.assertEqual(node[1].text, 'If')
        self.assertEqual(node[2].text, 'something')

        node = extract_conditionals(
            Sentence('You can have some chocolate if you want'))[0]
        self.assertEqual(node[0].text, 'you want')
        self.assertEqual(node[1].text, 'if')
        self.assertEqual(node[2].text, 'You can have some chocolate')
Exemplo n.º 5
0
def extract_conditionals(sentence):
    '''

    If clause (condition)
    If + past perfect

    '''
    from mauve.models.sentence import Sentence

    conditionals = []
    for conditional in sentence.deptree.conditionals:

        value = conditional
        if conditional.idx == 0:
            left = Sentence(' '.join([
                s.text for s in sentence.deptree.get_before_node(
                    sentence.deptree.get_closest_after(conditional,
                                                       text=[',', 'then']))
            ]))
            right = Sentence(' '.join([
                s.text for s in sentence.deptree.get_after_node(
                    sentence.deptree.get_closest_after(conditional,
                                                       text=[',', 'then']))
            ]))

            conditionals.append((left, value, right))
        else:

            right = Sentence(' '.join([
                s.text for s in sentence.deptree.get_before_node(conditional)
            ]))
            left = Sentence(' '.join([
                s.text for s in sentence.deptree.get_after_node(conditional)
            ]))

            conditionals.append((left, value, right))

    return conditionals
Exemplo n.º 6
0
    def test_real_text(self):
        content = '''
Hallward painted away with that marvellous bold touch of his, that had the true refinement and perfect delicacy that in art, at any rate comes only from strength.  He was unconscious of the silence.

"Basil, I am tired of standing," cried Dorian Gray suddenly.  "I must go out and sit in the garden.  The air is stifling here."

"My dear fellow, I am so sorry.  When I am painting, I can not think of anything else.  But you never sat better.  You were perfectly still.  And I have caught the effect I wanted--the half-parted lips and the bright look in the eyes.  I do not know what Harry has been saying to you, but he has certainly made you have the most wonderful expression.  I suppose he has been paying you compliments.  You must not believe a word that he says."
        '''

        sentences = [Sentence(s) for s in quote_aware_sent_tokenize(content)]

        speech_parts = flatten([s.speech for s in sentences if s.speech])

        self.assertEquals([
            s.serialize() for s in speech_parts if s
        ], [{
            'text': 'Basil , I am tired of standing ,',
            'speaker': {
                'name': 'Dorian Gray',
                'gender': 'male'
            },
            'inflection': 'cried'
        }, {
            'text':
            'I must go out and sit in the garden . The air is stifling here .',
            'speaker': {
                'name': '',
                'gender': None
            },
            'inflection': None
        }, {
            'text':
            'My dear fellow , I am so sorry . When I am painting , I can not think of anything else . But you never sat better . You were perfectly still . And I have caught the put I wanted -- the half-parted lips and the interesting look in the eyes . I do not know what Harry has been saying to you , but he has certainly made you have the most good expression . I suppose he has been paying you compliments . You must not think a word that he says .',
            'speaker': {
                'name': '',
                'gender': None
            },
            'inflection': None
        }])
Exemplo n.º 7
0
    def get_speech(self) -> Iterable:

        def assign_best_name(speech_parts: Iterable) -> Iterable:
            # FIXME: what if a line has multiple speakers? Does this happen somewhere?

            is_multi_speaker = len(
                set(
                    [
                        item.speaker.name for item in speech_parts if item.speaker.name and item.speaker.name[0].isupper()
                    ]
                )
            ) > 1

            if is_multi_speaker:
                return speech_parts

            best_name = None
            for speech_item in speech_parts:
                if best_name is None and speech_item.speaker.name != '':
                    best_name = speech_item.speaker.name
                elif speech_item.speaker.name != '':
                    if speech_item.speaker.name in NAMES and best_name not in NAMES:
                        best_name = speech_item.speaker.name

            for speech_item in speech_parts:
                speech_item.speaker = Person(name=best_name)

            return speech_parts

        # can probably use extract speech from here without copy paste
        # by using quote_aware_sent_tokenize

        sentences_text = quote_aware_sent_tokenize(self.text)
        speech_parts = [extract_speech(Sentence(sentence)) for sentence in sentences_text]
        speech_parts = assign_best_name(rflatten(speech_parts))

        return speech_parts
Exemplo n.º 8
0
    def get_pre_post(self, phrase, simple=False):
        """
        Get the segments pre and post a phrase pair

        :param phrase: str can be a word or many or whatever
        :kwarg simple: if not simple, split by segments, otherwise
                       simple string splitting

        Usage:
            >>> TextBody(content='He ran to the fridge').get_pre_post('he')
            {'pre': ['he'], 'post': ['to']}

            >>> TextBody(content='I went to the shop. The shop was closed').get_pre_post('shop')
            {'pre': ['the', 'the'], 'post': ['was']}
        """

        # FIXME multi instances in sentence. Don't care enough for the moment

        pairs = defaultdict(list)

        if phrase not in self.basic_content:
            return pairs

        for sentence in self.basic_sentences:
            if phrase in sentence:

                # TODO: make the phrase a segment if multi word

                unsplit_phrase = phrase.replace(' ', '_')

                if simple:
                    texts = split_include(
                        split_include(
                            sentence.split(' '),
                            ','
                        ),
                        '.'
                    )
                else:
                    s = Sentence(sentence.replace(phrase, unsplit_phrase))
                    texts = [i.text.lower() for i in s.segments]
                try:
                    idx = texts.index(unsplit_phrase)

                    pre = texts[idx - 1]
                    if idx == 0:
                        pre = None

                    post = texts[idx + 1]
                    if idx + 1 == len(texts):
                        post = None

                    if pre is not None and pre not in EXTENDED_PUNCTUATION:
                        pairs['pre'].append(pre)

                    if post is not None and post not in EXTENDED_PUNCTUATION:
                        pairs['post'].append(post)
                except (ValueError, IndexError):
                    continue

        return pairs
Exemplo n.º 9
0
 def test_speech_speech_before(self):
     speech = extract_speech(Sentence('"Shut up" he said'))[0]
     self.assertEqual(speech.text.strip(), 'Shut up')
     self.assertEqual(speech.inflection.strip(), 'said')
     self.assertEqual(speech.speaker.name.strip(), 'he')
Exemplo n.º 10
0
 def test_extra_speech_before(self):
     speech = extract_speech(
         Sentence('"Shut up" Robert exclaimed to Mikey'))[0]
     self.assertEqual(speech.text.strip(), 'Shut up')
     self.assertEqual(speech.inflection.strip(), 'exclaimed')
     self.assertEqual(speech.speaker.name.strip(), 'Robert')
Exemplo n.º 11
0
 def test_extra_speech_after(self):
     speech = extract_speech(
         Sentence('And then Robert exclaimed "Shut up"'))[0]
     self.assertEqual(speech.text.strip(), 'Shut up')
     self.assertEqual(speech.inflection.strip(), 'exclaimed')
     self.assertEqual(speech.speaker.name.strip(), 'Robert')
Exemplo n.º 12
0
 def test_as_long_as(self):
     node = extract_conditionals(
         Sentence('as long as you fed him, he would be cooperative'))[0]
     self.assertEqual(node[0].text, 'as_long_as you fed him')
     self.assertEqual(node[1].text, 'as long as')
     self.assertEqual(node[2].text, 'he would be cooperative')