示例#1
0
  def test_slice_start_from_space(self):
    offff = 20
    txt = ' ' * offff + '''основании Устава, с одной стороны, и Фонд «Благо»'''
    tm = TextMap(txt)
    print(tm.map[0])
    print(tm.tokens[11])
    print(tm.map[11])
    # print(f'[{doc.tokens_map.text}]')
    print(f'[{tm.text}]')

    print(len(tm))
    tm_sliced = tm.slice(slice(0, len(tm)))
    print('span-0')
    print(tm.map[0])
    print(tm_sliced.map[0])

    self.assertEqual(len(tm), len(tm_sliced))
    self.assertEqual(tm.map[0], tm_sliced.map[0])

    for c in range(len(tm.tokens[0])):
      print(c)
      self.assertEqual(0, tm.token_index_by_char(c))
      self.assertEqual(0, tm_sliced.token_index_by_char(c))

    self.assertEqual(tm.text, tm_sliced.text)

    self.assertEqual(0, tm.token_index_by_char(0))
示例#2
0
  def test_tokens_in_range(self):
    text = 'мама'
    tm = TextMap(text)

    self.assertEqual(0, tm.token_index_by_char(0))
    self.assertEqual(0, tm.token_index_by_char(1))
    self.assertEqual(0, tm.token_index_by_char(2))
    self.assertEqual(0, tm.token_index_by_char(3))

    text = 'мама выла папу'
    tm = TextMap(text)

    self.assertEqual(1, tm.token_index_by_char(5))
    self.assertEqual(1, tm.token_index_by_char(6))
    self.assertEqual(1, tm.token_index_by_char(7))
    self.assertEqual(1, tm.token_index_by_char(8))

    self.assertEqual(2, tm.token_index_by_char(9))
    self.assertEqual(1, tm.token_index_by_char(4))
示例#3
0
  def test_tokens_in_range_start_from_space(self):
    text = ' мама'
    tm = TextMap(text)

    self.assertEqual(1, tm.map[0][0])
    self.assertEqual(0, tm.token_index_by_char(0))

    txt = ' ' * 20 + '''основании Устава, с одной стороны, и Фонд «Благо»'''
    # tm = TextMap(txt)
    doc = LegalDocument(txt).parse()
    tm = doc.tokens_map
    print(tm.map[0])
    print(tm.tokens[11])
    print(tm.map[11])
    print(f'[{doc.tokens_map.text}]')
    print(f'[{doc.text}]')
示例#4
0
  def test_slice(self):
    text = 'этилен мама   ಶ್ರೀರಾಮ'
    tm = TextMap(text)
    tm2: TextMap = tm.slice(slice(1, 2))

    self.assertEqual(tm2[0], 'мама')
    self.assertEqual(tm2.text, 'мама')

    tm3 = tm2.slice(slice(0, 1))
    self.assertEqual(tm3[0], 'мама')

    self.assertEqual(0, tm.token_index_by_char(1))
    self.assertEqual(0, tm2.token_index_by_char(1))
    self.assertEqual(0, tm3.token_index_by_char(1))

    self.assertEqual('мама', tm3.text)
    self.assertEqual('мама', tm3.text_range([0, 1]))
    self.assertEqual('мама', tm3.text_range([0, 2]))