Пример #1
0
    def how_many_nouns(self, passage):
        context_scope, sen_index, text = randomize_scope(passage,
                                                         passage_prob=0.25)

        spans, _ = extract_pos(text, pos_to_extract=['NN'])
        answer = len(spans)

        question = randomize_total_question(string_category=f'noun',
                                            context_scope=context_scope)

        sample_details = dict(question=question,
                              answer=answer,
                              spans=spans,
                              span_type='SPAN')

        return sample_details
Пример #2
0
    def how_many_sentences_in_total(self, passage):
        context_scope, sen_index, passage = randomize_scope(passage,
                                                            passage_prob=1.0)

        sentences = extract_sentences(passage)
        spans = [(passage.index(s), passage.index(s) + len(s))
                 for s in sentences]
        answer = len(sentences)

        question = randomize_total_question(string_category='sentence',
                                            context_scope=context_scope)

        sample_details = dict(question=question,
                              answer=answer,
                              spans=spans,
                              span_type='SENTENCE')

        return sample_details
Пример #3
0
    def how_many_words_in_total(self, passage):
        context_scope, sen_index, text = randomize_scope(passage,
                                                         passage_prob=0.0)

        words = extract_words(text)
        start_idx = passage.index(text)
        spans = [(start_idx + w.idx, start_idx + w.idx + len(str(w)))
                 for w in words]
        answer = len(words)

        question = randomize_total_question(string_category='word',
                                            context_scope=context_scope)

        sample_details = dict(question=question,
                              answer=answer,
                              spans=spans,
                              span_type='WORD')

        return sample_details
Пример #4
0
    def how_many_numbers(self, passage):
        context_scope, sen_index, text = randomize_scope(passage,
                                                         passage_prob=0.5)

        start_idx = passage.index(text)
        num_words, num_indices = extract_passage_numbers(text)
        spans = [(start_idx + idx, start_idx + idx + len(str(w)))
                 for w, idx in zip(num_words, num_indices)]
        answer = len(num_words)

        question = randomize_total_question(string_category='number',
                                            context_scope=context_scope)

        sample_details = dict(question=question,
                              answer=answer,
                              spans=spans,
                              span_type='WORD')

        return sample_details
Пример #5
0
    def how_many_words_shorter_than(self, passage):
        context_scope, sen_index, text = randomize_scope(passage, passage_prob=0.2)

        words = extract_words(text)
        target_len = np.random.random_integers(5)
        target_words = list(filter(lambda tok: len(str(tok[0])) < target_len, words))
        start_idx = passage.index(text)
        spans = [(start_idx + w.idx, start_idx + w.idx + len(str(w))) for w in target_words]
        answer = len(target_words)

        question = randomize_total_question(string_category=f'words shorter than {target_len} character',
                                            context_scope=context_scope)

        sample_details = dict(
            question=question,
            answer=answer,
            spans=spans,
            span_type='WORD'
        )

        return sample_details
Пример #6
0
    def how_many_title_case_words_in_total(self, passage):
        context_scope, sen_index, text = randomize_scope(passage,
                                                         passage_prob=0.5)

        words = extract_words(text)
        title_words = list(
            filter(lambda tok: len(tok.text) > 0 and tok.text[0].isupper(),
                   words))
        start_idx = passage.index(text)
        spans = [(start_idx + w.idx, start_idx + w.idx + len(str(w)))
                 for w in title_words]
        answer = len(title_words)

        question = randomize_total_question(string_category='title case word',
                                            context_scope=context_scope)

        sample_details = dict(question=question,
                              answer=answer,
                              spans=spans,
                              span_type='WORD')

        return sample_details
Пример #7
0
    def how_many_times_vowels_appears(self, passage):
        context_scope, sen_index, text = randomize_scope(passage, passage_prob=0.0)

        letters_frequency = extract_letters_frequency(text)

        if self.limit_classes is not None:
            letters_frequency = dict(filter(lambda entry: entry[1] < self.limit_classes, letters_frequency.items()))

        vowel_entries = dict(filter(lambda e: e[0].lower() in ('a', 'e', 'i', 'u', 'o'), letters_frequency.items()))
        answer = sum(vowel_entries.values())
        start_idx = passage.index(text)
        spans = [(start_idx + i, start_idx + i + 1) for i, x in enumerate(text) if x in vowel_entries.keys()]

        question = randomize_total_question(string_category='vowel character', context_scope=context_scope)

        sample_details = dict(
            question=question,
            answer=answer,
            spans=spans,
            span_type='CHAR'
        )

        return sample_details