示例#1
0
 def _prepare_token_array(self):
     # TODO: the lazy init should move to somewhere else
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     token_array_position = 0
     for idx, witness in enumerate(self.witnesses):
         # print("witness.tokens",witness.tokens())
         witness_range = RangeSet()
         witness_range.add_range(self.counter,
                                 self.counter + len(witness.tokens()))
         # the extra one is for the marker token
         self.counter += len(witness.tokens()) + 1
         self.witness_ranges[witness.sigil] = witness_range
         # remember get tokens twice
         sigil = witness.sigil
         for token in witness.tokens():
             token.token_data['_sigil'] = sigil
             token.token_data[
                 '_token_array_position'] = token_array_position
             token_array_position += 1
         self.token_array.extend(witness.tokens())
         # # add marker token
         self.token_array.append(
             Token({
                 "n": '$' + str(idx),
                 '_sigil': sigil
             }))
         token_array_position += 1
     self.token_array.pop()  # remove last marker
示例#2
0
 def _prepare_token_array(self):
     # TODO: the lazy init should move to somewhere else
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     for idx, witness in enumerate(self.witnesses):
         witness_range = RangeSet()
         witness_range.add_range(self.counter,
                                 self.counter + len(witness.tokens()))
         # the extra one is for the marker token
         self.counter += len(witness.tokens()) + 1
         self.witness_ranges[witness.sigil] = witness_range
         if self.token_array:
             # add marker token
             self.token_array.append(Token({"n": "$" + str(idx - 1)}))
         # remember get tokens twice
         self.token_array.extend(witness.tokens())
示例#3
0
 def test_invalid_token_raises_exception(self):
     with self.assertRaises(TokenError):
         #data = {'x': 'abc'}
         data = {}
         Token(data)
示例#4
0
 def test_creation_token_none(self):
     t = Token(None)
     self.assertEqual(t.token_string, '')
     self.assertIsNone(t.token_data)
示例#5
0
 def test_creation_token_n(self):
     data = {'t': 'kitten', 'n': 'cat'}
     t = Token(data)
     self.assertEqual(t.token_string, 'cat')
     self.assertEqual(t.token_data, data)
示例#6
0
 def test_creation_token_t(self):
     data = {'t': 'fox', 'id': 123 }
     t = Token(data)
     self.assertEqual(t.token_string, 'fox')
     self.assertEqual(t.token_data, data)