def test_23bit_file_offset_too_small(self): trie = Trie() # The high bit of the child offset stores a lookahead barrier, so the # file has to be smaller than 8M, not 16. Python has a recursion limit # of 1000, so we can't really insert a 8M character long string. # Instead, insert one 130-character string where each char has 32k # 16bit result IDs. 129 isn't enough to overflow the offsets. results_32k = [j for j in range(32767)] for i in range(130): trie.insert('a' * i, results_32k) with self.assertRaisesRegex( OverflowError, "Trie child offset too large to store in 23 bits, set SEARCH_FILE_OFFSET_BYTES = 4 in your conf.py." ): trie.serialize( Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1)) # This should work trie.serialize( Serializer(file_offset_bytes=4, result_id_bytes=2, name_size_bytes=1))
def test_16bit_result_count(self): trie = Trie() for i in range(128): trie.insert("__init__", i) # It's __init_subclass__ (one underscore, not two), but here I want to # trigger the case of both a high amount of results and some children # as well. for i in [203, 215, 267]: trie.insert("__init__subclass__", i) for i in trie_type_sizes: with self.subTest(**i): serialized = trie.serialize(Serializer(**i)) self.compare( Deserializer(**i), serialized, """ __init__ [{}] subclass__ [203, 215, 267] """.format(', '.join([str(i) for i in range(128)]))) # Verify just the smallest and largest size, everything else # should fit in between if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2: self.assertEqual(len(serialized), 377) elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4: self.assertEqual(len(serialized), 657) else: self.assertGreater(len(serialized), 377) self.assertLess(len(serialized), 657)
def test_unicode(self): trie = Trie() trie.insert("hýždě", 0) trie.insert("hárá", 1) serialized = trie.serialize( Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1)) self.compare( Deserializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1), serialized, """ h0xc3 0xbd 0xc5 | 0xbe | d0xc4 | 0x9b | [0] 0xa1 r0xc3 | 0xa1 | [1] """) self.assertEqual(len(serialized), 82)
def test_single(self): trie = Trie() trie.insert("magnum", 1337) trie.insert("magnum", 21) serialized = trie.serialize() self.compare(serialized, """ magnum [1337, 21] """) self.assertEqual(len(serialized), 46)
def test(self): trie = Trie() map = ResultMap() trie.insert( "math", map.add("Math", "namespaceMath.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE))) index = map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)) trie.insert("math::vector", index) trie.insert("vector", index) index = map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)) trie.insert("math::range", index) trie.insert("range", index) for i in type_sizes: with self.subTest(**i): serialized = serialize_search_data(Serializer(**i), trie, map, search_type_map, 3) self.compare( serialized, """ 3 symbols math [0] | ::vector [1] | range [2] vector [1] range [2] 0: Math [type=NAMESPACE] -> namespaceMath.html 1: ::Vector [prefix=0[:0], type=CLASS] -> classMath_1_1Vector.html 2: ::Range [prefix=0[:0], type=CLASS] -> classMath_1_1Range.html (EntryType.PAGE, CssClass.SUCCESS, 'page'), (EntryType.NAMESPACE, CssClass.PRIMARY, 'namespace'), (EntryType.CLASS, CssClass.PRIMARY, 'class'), (EntryType.FUNC, CssClass.INFO, 'func') """) # Verify just the smallest and largest size, everything else # should fit in between if i['file_offset_bytes'] == 3 and i[ 'result_id_bytes'] == 2 and i['name_size_bytes'] == 1: self.assertEqual(len(serialized), 282) elif i['file_offset_bytes'] == 4 and i[ 'result_id_bytes'] == 4 and i['name_size_bytes'] == 2: self.assertEqual(len(serialized), 317) else: self.assertGreater(len(serialized), 282) self.assertLess(len(serialized), 317)
def test_24bit_result_id_too_small(self): trie = Trie() trie.insert("a", 16 * 1024 * 1024) with self.assertRaisesRegex( OverflowError, "Trie result ID too large to store in 24 bits, set SEARCH_RESULT_ID_BYTES = 4 in your conf.py." ): trie.serialize( Serializer(file_offset_bytes=3, result_id_bytes=3, name_size_bytes=1)) # This should work trie.serialize( Serializer(file_offset_bytes=3, result_id_bytes=4, name_size_bytes=1))
def test_single(self): trie = Trie() trie.insert("magnum", 1337) trie.insert("magnum", 21) for i in trie_type_sizes: with self.subTest(**i): serialized = trie.serialize(Serializer(**i)) self.compare(Deserializer(**i), serialized, """ magnum [1337, 21] """) # Verify just the smallest and largest size, everything else # should fit in between if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2: self.assertEqual(len(serialized), 46) elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4: self.assertEqual(len(serialized), 56) else: self.assertGreater(len(serialized), 46) self.assertLess(len(serialized), 56)
def test(self): trie = Trie() map = ResultMap() trie.insert( "math", map.add("Math", "namespaceMath.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE))) index = map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)) trie.insert("math::vector", index) trie.insert("vector", index) index = map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)) trie.insert("math::range", index) trie.insert("range", index) serialized = serialize_search_data(trie, map, search_type_map, 3) self.compare( serialized, """ 3 symbols math [0] | ::vector [1] | range [2] vector [1] range [2] 0: Math [type=NAMESPACE] -> namespaceMath.html 1: ::Vector [prefix=0[:0], type=CLASS] -> classMath_1_1Vector.html 2: ::Range [prefix=0[:0], type=CLASS] -> classMath_1_1Range.html (EntryType.PAGE, CssClass.SUCCESS, 'page'), (EntryType.NAMESPACE, CssClass.PRIMARY, 'namespace'), (EntryType.CLASS, CssClass.PRIMARY, 'class'), (EntryType.FUNC, CssClass.INFO, 'func') """) self.assertEqual(len(serialized), 277)
def test_unicode(self): trie = Trie() trie.insert("hýždě", 0) trie.insert("hárá", 1) serialized = trie.serialize() self.compare( serialized, """ h0xc3 0xbd 0xc5 | 0xbe | d0xc4 | 0x9b | [0] 0xa1 r0xc3 | 0xa1 | [1] """) self.assertEqual(len(serialized), 82)
# Empty file, in all possible type size combinations for i in type_sizes: with open(basedir / 'empty-{}.bin'.format(type_size_suffix(**i)), 'wb') as f: f.write( serialize_search_data(Serializer(**i), Trie(), ResultMap(), [], 0)) # General test, in all possible type size combinations trie = Trie() map = ResultMap() trie.insert( "math", map.add("Math", "namespaceMath.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE))) index = map.add("Math::min(int, int)", "namespaceMath.html#min", suffix_length=8, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC)) trie.insert("math::min()", index, lookahead_barriers=[4]) trie.insert("min()", index) index = map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.from_type(ResultFlag.DEPRECATED, EntryType.CLASS)) trie.insert("math::vector", index) trie.insert("vector", index) index = map.add("Math::Vector::min() const",
def test_multiple(self): trie = Trie() trie.insert("math", 0) trie.insert("math::vector", 1, lookahead_barriers=[4]) trie.insert("vector", 1) trie.insert("math::range", 2) trie.insert("range", 2) trie.insert("math::min", 3) trie.insert("min", 3) trie.insert("math::max", 4) trie.insert("max", 4) trie.insert("math::minmax", 5) trie.insert("minmax", 5) trie.insert("math::vector::minmax", 6, lookahead_barriers=[4, 12]) trie.insert("vector::minmax", 6, lookahead_barriers=[6]) trie.insert("minmax", 6) trie.insert("math::vector::min", 7) trie.insert("vector::min", 7) trie.insert("min", 7) trie.insert("math::vector::max", 8) trie.insert("vector::max", 8) trie.insert("max", 8) trie.insert("math::range::min", 9, lookahead_barriers=[4, 11]) trie.insert("range::min", 9, lookahead_barriers=[5]) trie.insert("min", 9) trie.insert("math::range::max", 10) trie.insert("range::max", 10) trie.insert("max", 10) serialized = trie.serialize() self.compare( serialized, """ math [0] ||| :$ ||| :vector [1] ||| | :$ ||| | :min [7] ||| | | max [6] ||| | ax [8] ||| range [2] ||| | :$ ||| | :min [9] ||| | ax [10] ||| min [3] ||| || max [5] ||| |ax [4] ||x [4, 8, 10] |in [3, 7, 9] || max [5, 6] vector [1] | :$ | :min [7] | | max [6] | ax [8] range [2] | :$ | :min [9] | ax [10] """) self.assertEqual(len(serialized), 340)
def test_multiple(self): trie = Trie() trie.insert("math", 0) trie.insert("math::vector", 1, lookahead_barriers=[4]) trie.insert("vector", 1) trie.insert("math::range", 2) trie.insert("range", 2) trie.insert("math::min", 3) trie.insert("min", 3) trie.insert("math::max", 4) trie.insert("max", 4) trie.insert("math::minmax", 5) trie.insert("minmax", 5) trie.insert("math::vector::minmax", 6, lookahead_barriers=[4, 12]) trie.insert("vector::minmax", 6, lookahead_barriers=[6]) trie.insert("minmax", 6) trie.insert("math::vector::min", 7) trie.insert("vector::min", 7) trie.insert("min", 7) trie.insert("math::vector::max", 8) trie.insert("vector::max", 8) trie.insert("max", 8) trie.insert("math::range::min", 9, lookahead_barriers=[4, 11]) trie.insert("range::min", 9, lookahead_barriers=[5]) trie.insert("min", 9) trie.insert("math::range::max", 10) trie.insert("range::max", 10) trie.insert("max", 10) for i in trie_type_sizes: with self.subTest(**i): serialized = trie.serialize(Serializer(**i)) self.compare( Deserializer(**i), serialized, """ math [0] ||| :$ ||| :vector [1] ||| | :$ ||| | :min [7] ||| | | max [6] ||| | ax [8] ||| range [2] ||| | :$ ||| | :min [9] ||| | ax [10] ||| min [3] ||| || max [5] ||| |ax [4] ||x [4, 8, 10] |in [3, 7, 9] || max [5, 6] vector [1] | :$ | :min [7] | | max [6] | ax [8] range [2] | :$ | :min [9] | ax [10] """) # Verify just the smallest and largest size, everything else # should fit in between if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2: self.assertEqual(len(serialized), 340) elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4: self.assertEqual(len(serialized), 428) else: self.assertGreater(len(serialized), 340) self.assertLess(len(serialized), 428)