Пример #1
0
    def test_23bit_file_offset_too_small(self):
        trie = Trie()

        # The high bit of the child offset stores a lookahead barrier, so the
        # file has to be smaller than 8M, not 16. Python has a recursion limit
        # of 1000, so we can't really insert a 8M character long string.
        # Instead, insert one 130-character string where each char has 32k
        # 16bit result IDs. 129 isn't enough to overflow the offsets.
        results_32k = [j for j in range(32767)]
        for i in range(130):
            trie.insert('a' * i, results_32k)

        with self.assertRaisesRegex(
                OverflowError,
                "Trie child offset too large to store in 23 bits, set SEARCH_FILE_OFFSET_BYTES = 4 in your conf.py."
        ):
            trie.serialize(
                Serializer(file_offset_bytes=3,
                           result_id_bytes=2,
                           name_size_bytes=1))

        # This should work
        trie.serialize(
            Serializer(file_offset_bytes=4,
                       result_id_bytes=2,
                       name_size_bytes=1))
Пример #2
0
    def test_16bit_result_count(self):
        trie = Trie()

        for i in range(128):
            trie.insert("__init__", i)
        # It's __init_subclass__ (one underscore, not two), but here I want to
        # trigger the case of both a high amount of results and some children
        # as well.
        for i in [203, 215, 267]:
            trie.insert("__init__subclass__", i)

        for i in trie_type_sizes:
            with self.subTest(**i):
                serialized = trie.serialize(Serializer(**i))
                self.compare(
                    Deserializer(**i), serialized, """
__init__ [{}]
        subclass__ [203, 215, 267]
""".format(', '.join([str(i) for i in range(128)])))
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2:
                    self.assertEqual(len(serialized), 377)
                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4:
                    self.assertEqual(len(serialized), 657)
                else:
                    self.assertGreater(len(serialized), 377)
                    self.assertLess(len(serialized), 657)
Пример #3
0
    def test_unicode(self):
        trie = Trie()

        trie.insert("hýždě", 0)
        trie.insert("hárá", 1)

        serialized = trie.serialize(
            Serializer(file_offset_bytes=3,
                       result_id_bytes=2,
                       name_size_bytes=1))
        self.compare(
            Deserializer(file_offset_bytes=3,
                         result_id_bytes=2,
                         name_size_bytes=1), serialized, """
h0xc3
  0xbd
   0xc5
  | 0xbe
  |  d0xc4
  |    0x9b
  |      [0]
  0xa1
   r0xc3
  |  0xa1
  |    [1]
""")
        self.assertEqual(len(serialized), 82)
Пример #4
0
    def test_single(self):
        trie = Trie()
        trie.insert("magnum", 1337)
        trie.insert("magnum", 21)

        serialized = trie.serialize()
        self.compare(serialized, """
magnum [1337, 21]
""")
        self.assertEqual(len(serialized), 46)
Пример #5
0
    def test(self):
        trie = Trie()
        map = ResultMap()

        trie.insert(
            "math",
            map.add("Math",
                    "namespaceMath.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.NAMESPACE)))
        index = map.add("Math::Vector",
                        "classMath_1_1Vector.html",
                        flags=ResultFlag.from_type(ResultFlag.NONE,
                                                   EntryType.CLASS))
        trie.insert("math::vector", index)
        trie.insert("vector", index)
        index = map.add("Math::Range",
                        "classMath_1_1Range.html",
                        flags=ResultFlag.from_type(ResultFlag.NONE,
                                                   EntryType.CLASS))
        trie.insert("math::range", index)
        trie.insert("range", index)

        for i in type_sizes:
            with self.subTest(**i):
                serialized = serialize_search_data(Serializer(**i), trie, map,
                                                   search_type_map, 3)
                self.compare(
                    serialized, """
3 symbols
math [0]
|   ::vector [1]
|     range [2]
vector [1]
range [2]
0: Math [type=NAMESPACE] -> namespaceMath.html
1: ::Vector [prefix=0[:0], type=CLASS] -> classMath_1_1Vector.html
2: ::Range [prefix=0[:0], type=CLASS] -> classMath_1_1Range.html
(EntryType.PAGE, CssClass.SUCCESS, 'page'),
(EntryType.NAMESPACE, CssClass.PRIMARY, 'namespace'),
(EntryType.CLASS, CssClass.PRIMARY, 'class'),
(EntryType.FUNC, CssClass.INFO, 'func')
""")
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i[
                        'result_id_bytes'] == 2 and i['name_size_bytes'] == 1:
                    self.assertEqual(len(serialized), 282)
                elif i['file_offset_bytes'] == 4 and i[
                        'result_id_bytes'] == 4 and i['name_size_bytes'] == 2:
                    self.assertEqual(len(serialized), 317)
                else:
                    self.assertGreater(len(serialized), 282)
                    self.assertLess(len(serialized), 317)
Пример #6
0
    def test_24bit_result_id_too_small(self):
        trie = Trie()
        trie.insert("a", 16 * 1024 * 1024)
        with self.assertRaisesRegex(
                OverflowError,
                "Trie result ID too large to store in 24 bits, set SEARCH_RESULT_ID_BYTES = 4 in your conf.py."
        ):
            trie.serialize(
                Serializer(file_offset_bytes=3,
                           result_id_bytes=3,
                           name_size_bytes=1))

        # This should work
        trie.serialize(
            Serializer(file_offset_bytes=3,
                       result_id_bytes=4,
                       name_size_bytes=1))
Пример #7
0
    def test_single(self):
        trie = Trie()
        trie.insert("magnum", 1337)
        trie.insert("magnum", 21)

        for i in trie_type_sizes:
            with self.subTest(**i):
                serialized = trie.serialize(Serializer(**i))
                self.compare(Deserializer(**i), serialized, """
magnum [1337, 21]
""")
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2:
                    self.assertEqual(len(serialized), 46)
                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4:
                    self.assertEqual(len(serialized), 56)
                else:
                    self.assertGreater(len(serialized), 46)
                    self.assertLess(len(serialized), 56)
Пример #8
0
    def test(self):
        trie = Trie()
        map = ResultMap()

        trie.insert(
            "math",
            map.add("Math",
                    "namespaceMath.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.NAMESPACE)))
        index = map.add("Math::Vector",
                        "classMath_1_1Vector.html",
                        flags=ResultFlag.from_type(ResultFlag.NONE,
                                                   EntryType.CLASS))
        trie.insert("math::vector", index)
        trie.insert("vector", index)
        index = map.add("Math::Range",
                        "classMath_1_1Range.html",
                        flags=ResultFlag.from_type(ResultFlag.NONE,
                                                   EntryType.CLASS))
        trie.insert("math::range", index)
        trie.insert("range", index)

        serialized = serialize_search_data(trie, map, search_type_map, 3)
        self.compare(
            serialized, """
3 symbols
math [0]
|   ::vector [1]
|     range [2]
vector [1]
range [2]
0: Math [type=NAMESPACE] -> namespaceMath.html
1: ::Vector [prefix=0[:0], type=CLASS] -> classMath_1_1Vector.html
2: ::Range [prefix=0[:0], type=CLASS] -> classMath_1_1Range.html
(EntryType.PAGE, CssClass.SUCCESS, 'page'),
(EntryType.NAMESPACE, CssClass.PRIMARY, 'namespace'),
(EntryType.CLASS, CssClass.PRIMARY, 'class'),
(EntryType.FUNC, CssClass.INFO, 'func')
""")
        self.assertEqual(len(serialized), 277)
Пример #9
0
    def test_unicode(self):
        trie = Trie()

        trie.insert("hýždě", 0)
        trie.insert("hárá", 1)

        serialized = trie.serialize()
        self.compare(
            serialized, """
h0xc3
  0xbd
   0xc5
  | 0xbe
  |  d0xc4
  |    0x9b
  |      [0]
  0xa1
   r0xc3
  |  0xa1
  |    [1]
""")
        self.assertEqual(len(serialized), 82)
Пример #10
0
# Empty file, in all possible type size combinations

for i in type_sizes:
    with open(basedir / 'empty-{}.bin'.format(type_size_suffix(**i)),
              'wb') as f:
        f.write(
            serialize_search_data(Serializer(**i), Trie(), ResultMap(), [], 0))

# General test, in all possible type size combinations

trie = Trie()
map = ResultMap()

trie.insert(
    "math",
    map.add("Math",
            "namespaceMath.html",
            flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE)))
index = map.add("Math::min(int, int)",
                "namespaceMath.html#min",
                suffix_length=8,
                flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC))
trie.insert("math::min()", index, lookahead_barriers=[4])
trie.insert("min()", index)
index = map.add("Math::Vector",
                "classMath_1_1Vector.html",
                flags=ResultFlag.from_type(ResultFlag.DEPRECATED,
                                           EntryType.CLASS))
trie.insert("math::vector", index)
trie.insert("vector", index)
index = map.add("Math::Vector::min() const",
Пример #11
0
    def test_multiple(self):
        trie = Trie()

        trie.insert("math", 0)
        trie.insert("math::vector", 1, lookahead_barriers=[4])
        trie.insert("vector", 1)
        trie.insert("math::range", 2)
        trie.insert("range", 2)

        trie.insert("math::min", 3)
        trie.insert("min", 3)
        trie.insert("math::max", 4)
        trie.insert("max", 4)
        trie.insert("math::minmax", 5)
        trie.insert("minmax", 5)

        trie.insert("math::vector::minmax", 6, lookahead_barriers=[4, 12])
        trie.insert("vector::minmax", 6, lookahead_barriers=[6])
        trie.insert("minmax", 6)
        trie.insert("math::vector::min", 7)
        trie.insert("vector::min", 7)
        trie.insert("min", 7)
        trie.insert("math::vector::max", 8)
        trie.insert("vector::max", 8)
        trie.insert("max", 8)

        trie.insert("math::range::min", 9, lookahead_barriers=[4, 11])
        trie.insert("range::min", 9, lookahead_barriers=[5])
        trie.insert("min", 9)

        trie.insert("math::range::max", 10)
        trie.insert("range::max", 10)
        trie.insert("max", 10)

        serialized = trie.serialize()
        self.compare(
            serialized, """
math [0]
||| :$
|||  :vector [1]
|||   |     :$
|||   |      :min [7]
|||   |        | max [6]
|||   |        ax [8]
|||   range [2]
|||   |    :$
|||   |     :min [9]
|||   |       ax [10]
|||   min [3]
|||   || max [5]
|||   |ax [4]
||x [4, 8, 10]
|in [3, 7, 9]
|| max [5, 6]
vector [1]
|     :$
|      :min [7]
|        | max [6]
|        ax [8]
range [2]
|    :$
|     :min [9]
|       ax [10]
""")
        self.assertEqual(len(serialized), 340)
Пример #12
0
    def test_multiple(self):
        trie = Trie()

        trie.insert("math", 0)
        trie.insert("math::vector", 1, lookahead_barriers=[4])
        trie.insert("vector", 1)
        trie.insert("math::range", 2)
        trie.insert("range", 2)

        trie.insert("math::min", 3)
        trie.insert("min", 3)
        trie.insert("math::max", 4)
        trie.insert("max", 4)
        trie.insert("math::minmax", 5)
        trie.insert("minmax", 5)

        trie.insert("math::vector::minmax", 6, lookahead_barriers=[4, 12])
        trie.insert("vector::minmax", 6, lookahead_barriers=[6])
        trie.insert("minmax", 6)
        trie.insert("math::vector::min", 7)
        trie.insert("vector::min", 7)
        trie.insert("min", 7)
        trie.insert("math::vector::max", 8)
        trie.insert("vector::max", 8)
        trie.insert("max", 8)

        trie.insert("math::range::min", 9, lookahead_barriers=[4, 11])
        trie.insert("range::min", 9, lookahead_barriers=[5])
        trie.insert("min", 9)

        trie.insert("math::range::max", 10)
        trie.insert("range::max", 10)
        trie.insert("max", 10)

        for i in trie_type_sizes:
            with self.subTest(**i):
                serialized = trie.serialize(Serializer(**i))
                self.compare(
                    Deserializer(**i), serialized, """
math [0]
||| :$
|||  :vector [1]
|||   |     :$
|||   |      :min [7]
|||   |        | max [6]
|||   |        ax [8]
|||   range [2]
|||   |    :$
|||   |     :min [9]
|||   |       ax [10]
|||   min [3]
|||   || max [5]
|||   |ax [4]
||x [4, 8, 10]
|in [3, 7, 9]
|| max [5, 6]
vector [1]
|     :$
|      :min [7]
|        | max [6]
|        ax [8]
range [2]
|    :$
|     :min [9]
|       ax [10]
""")
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2:
                    self.assertEqual(len(serialized), 340)
                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4:
                    self.assertEqual(len(serialized), 428)
                else:
                    self.assertGreater(len(serialized), 340)
                    self.assertLess(len(serialized), 428)