def test_slice_start_from_space(self): offff = 20 txt = ' ' * offff + '''основании Устава, с одной стороны, и Фонд «Благо»''' tm = TextMap(txt) print(tm.map[0]) print(tm.tokens[11]) print(tm.map[11]) # print(f'[{doc.tokens_map.text}]') print(f'[{tm.text}]') print(len(tm)) tm_sliced = tm.slice(slice(0, len(tm))) print('span-0') print(tm.map[0]) print(tm_sliced.map[0]) self.assertEqual(len(tm), len(tm_sliced)) self.assertEqual(tm.map[0], tm_sliced.map[0]) for c in range(len(tm.tokens[0])): print(c) self.assertEqual(0, tm.token_index_by_char(c)) self.assertEqual(0, tm_sliced.token_index_by_char(c)) self.assertEqual(tm.text, tm_sliced.text) self.assertEqual(0, tm.token_index_by_char(0))
def test_tokens_in_range(self): text = 'мама' tm = TextMap(text) self.assertEqual(0, tm.token_index_by_char(0)) self.assertEqual(0, tm.token_index_by_char(1)) self.assertEqual(0, tm.token_index_by_char(2)) self.assertEqual(0, tm.token_index_by_char(3)) text = 'мама выла папу' tm = TextMap(text) self.assertEqual(1, tm.token_index_by_char(5)) self.assertEqual(1, tm.token_index_by_char(6)) self.assertEqual(1, tm.token_index_by_char(7)) self.assertEqual(1, tm.token_index_by_char(8)) self.assertEqual(2, tm.token_index_by_char(9)) self.assertEqual(1, tm.token_index_by_char(4))
def test_tokens_in_range_start_from_space(self): text = ' мама' tm = TextMap(text) self.assertEqual(1, tm.map[0][0]) self.assertEqual(0, tm.token_index_by_char(0)) txt = ' ' * 20 + '''основании Устава, с одной стороны, и Фонд «Благо»''' # tm = TextMap(txt) doc = LegalDocument(txt).parse() tm = doc.tokens_map print(tm.map[0]) print(tm.tokens[11]) print(tm.map[11]) print(f'[{doc.tokens_map.text}]') print(f'[{doc.text}]')
def test_slice(self): text = 'этилен мама ಶ್ರೀರಾಮ' tm = TextMap(text) tm2: TextMap = tm.slice(slice(1, 2)) self.assertEqual(tm2[0], 'мама') self.assertEqual(tm2.text, 'мама') tm3 = tm2.slice(slice(0, 1)) self.assertEqual(tm3[0], 'мама') self.assertEqual(0, tm.token_index_by_char(1)) self.assertEqual(0, tm2.token_index_by_char(1)) self.assertEqual(0, tm3.token_index_by_char(1)) self.assertEqual('мама', tm3.text) self.assertEqual('мама', tm3.text_range([0, 1])) self.assertEqual('мама', tm3.text_range([0, 2]))