示例#1
0
 def test_004(self):
     """ single paragraph """
     segment = Segment("first\nsecond")
     self.assertEquals(segment.segments, [{
         'tag': 1002,
         'text': 'first\nsecond'
     }])
示例#2
0
 def test_005(self):
     """ single paragraph with training whitespace"""
     segment = Segment("first\nsecond\n\n")
     self.assertEquals(segment.segments, [{
         'tag': 1002,
         'text': 'first\nsecond'
     }])
示例#3
0
 def test_009(self):
     """ page numbering- abbr """
     segment = Segment("first\nsecond\n\np 1\n")
     self.assertEquals(segment.segments, [{
         'tag': 1002,
         'text': 'first\nsecond'
     }, {
         'tag': 1003,
         'text': 'p 1'
     }])
     segment = Segment("first\nsecond\n\np. 1\n")
     self.assertEquals(segment.segments, [{
         'tag': 1002,
         'text': 'first\nsecond'
     }, {
         'tag': 1003,
         'text': 'p. 1'
     }])
示例#4
0
 def test_008(self):
     """ page nimbering- bottom of page """
     segment = Segment("first\nsecond\n\nPage 1\n")
     self.assertEquals(segment.segments, [{
         'tag': 1002,
         'text': 'first\nsecond'
     }, {
         'tag': 1003,
         'text': 'Page 1'
     }])
示例#5
0
 def test_006(self):
     """ headings """
     with open("files/segment_heading.txt", "r") as f:
         text = f.read()
     segment = Segment(text)
     self.assertEquals(segment.segments, [{
         'tag': 1001,
         'text': 'TABLE OF CONTENTS'
     }, {
         'tag': 1001,
         'text': '1. Section One'
     }, {
         'tag': 1001,
         'text': '2. Section Two'
     }])
示例#6
0
 def test_003(self):
     """ paragraph """
     with open("files/segment_para.txt", "r") as f:
         text = f.read()
     segment = Segment(text)
     self.assertEquals(
         segment.segments,
         [{
             'tag': 1002,
             'text':
             'This is a first paragraph\nand continues to next line.'
         }, {
             'tag': 1002,
             'text': 'Then this is the second\nparagraph.'
         }])
示例#7
0
 def test_039(self):
     """ segment option - paragraphs """
     segment = Segment('para 1\n\npara 2')
     page = Page(text=segment.segments)
     self.assertEqual(page.text, 'para 1\n\npara 2')
     self.assertEqual(page.size, 16)
     self.assertEqual(page.words, [{
         'tag': 1002,
         'words': [{
             'tag': 0,
             'word': 'para'
         }]
     }, {
         'tag': 1002,
         'words': [{
             'tag': 0,
             'word': 'para'
         }]
     }])
示例#8
0
 def test_010(self):
     """ more page numbering """
     with open("files/segment_page.txt", "r") as f:
         text = f.read()
     segment = Segment(text)
     self.assertEquals(segment.segments, [{
         'tag': 1002,
         'text': 'first para'
     }, {
         'tag': 1003,
         'text': 'Page 1'
     }, {
         'tag': 1002,
         'text': 'second para'
     }, {
         'tag': 1003,
         'text': 'p. 2'
     }, {
         'tag': 1002,
         'text': 'third para'
     }, {
         'tag': 1003,
         'text': '3'
     }])
示例#9
0
 def test_040(self):
     """ segment option - path and segments """
     with open('files/segment_para.txt', 'r', encoding="utf-8") as f:
         segment = Segment(f.read())
     page = Page('files/segment_para.txt', segment.segments)
示例#10
0
 def test_002(self):
     """ Segment constructor - no params """
     with pytest.raises(TypeError):
         segment = Segment()
示例#11
0
 def test_001(self):
     """ Segment constructor - text is not a string"""
     with pytest.raises(TypeError):
         segment = Segment(12)
示例#12
0
 def test_011(self):
     """ page numbering - page - """
     segment = Segment("- 2 -\n")
     self.assertEquals(segment.segments, [{'tag': 1003, 'text': '- 2 -'}])