Пример #1
0
 def test_counts_correct_amount_of_paragraphs_for_complex_56(self):
     result = interleave.zip_sentences(
         interleave.build_paragraphs(
             interleave.sanitize_text(self.complex_PDF_5)),
         interleave.build_paragraphs(
             interleave.sanitize_text(self.complex_PDF_6)))
     self.assertEqual(69, len(result))
Пример #2
0
    def test_zip_sentences_to_tuple(self):
        list1 = '\n\n' + self.short_text
        list2 = '\n\n' + self.short_text

        self.assertEqual(
            self.processed_text,
            interleave.zip_sentences(
                interleave.build_paragraphs(interleave.sanitize_text(list1)),
                interleave.build_paragraphs(interleave.sanitize_text(list2))))
Пример #3
0
 def test_edge_valid_nonroman_string(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text('\n\n' + self.edge_complex_7))
     self.assertEqual(4, len(result))
Пример #4
0
 def test_edge_marks_bad_paragraph_parse(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text('\n\n' + self.edge_bad_parse))
     self.assertEqual(3, len(result))
     self.assertIn('2. PARSE ERROR', result)
Пример #5
0
 def test_edge_case_isolate_first_paragraph(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text(self.edge_first_paragraph))
     self.assertEqual(1, len(result))
Пример #6
0
 def test_edge_case_missing_paragraphs(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text('\n\n' + self.edge_missing_paragraphs))
     self.assertEqual(7, len(result))  # Expect seven paragraphs
Пример #7
0
 def test_edge_case_line_starts_with_numeric_sentence_end(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text('\n\n' + self.edge_numbers))
     self.assertEqual(7, len(result))  # Expect seven paragraphs
Пример #8
0
 def test_builds_paragraphs_correctly(self):
     self.assertEqual(self.split_simple_text,
                      interleave.build_paragraphs('\n\n' + self.short_text))
Пример #9
0
 def test_counts_up_to_1000_paragraphs(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text(self.thousand_paragraphs))
     self.assertEqual(1000, len(result))
Пример #10
0
 def test_splits_multipage_paragraphs(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text('\n\n' + self.multipage_text))
     self.assertEqual(self.split_multipage_text, result)
Пример #11
0
 def test_splits_short_sentences(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text('\n\n' + self.short_text))
     self.assertEqual(self.split_simple_text, result)