Exemplo n.º 1
0
    def _decode_key_string(self, i):
        """ decode key at position i as a string. Key strings are always
        cached, since they repeat a lot. """
        ll_chars = self.ll_chars
        start = i

        strhash, nonascii, i = simd.find_end_of_string(ll_chars, i,
                                                       len(self.s))

        ch = ll_chars[i]
        if ch == '\\':
            self.pos = i
            w_key = self.decode_string_escaped(start, nonascii)
            return w_key
        if ch < '\x20':
            self._raise_control_char_in_string(ch, start, i)
        length = i - start
        strhash ^= length
        self.pos = i + 1
        # check cache first:
        try:
            entry = self.cache_keys[strhash]
        except KeyError:
            w_res = self._create_string_wrapped(start, i, nonascii)
            entry = StringCacheEntry(self.getslice(start, start + length),
                                     w_res)
            self.cache_keys[strhash] = entry
            return w_res
        if not entry.compare(ll_chars, start, length):
            # collision! hopefully rare
            w_res = self._create_string_wrapped(start, i, nonascii)
        else:
            w_res = entry.w_uni
        return w_res
Exemplo n.º 2
0
    def decode_string(self, i, contextmap=None):
        """ Decode a string at position i (which is right after the opening ").
        Optionally pass a contextmap, if the value is decoded as the value of a
        dict."""

        ll_chars = self.ll_chars
        start = i
        ch = ll_chars[i]
        if ch == '"':
            self.pos = i + 1
            return self.w_empty_string  # surprisingly common

        cache = True
        if contextmap is not None:
            # keep some statistics about the usefulness of the string cache on
            # the contextmap
            # the intuition about the contextmap is as follows:
            # often there are string values stored in dictionaries that can
            # never be usefully cached, like unique ids of objects. Then the
            # strings *in those fields* of all objects should never be cached.
            # However, the content of other fields can still be useful to
            # cache.
            contextmap.decoded_strings += 1
            if not contextmap.should_cache_strings():
                cache = False
        if len(self.s) < self.MIN_SIZE_FOR_STRING_CACHE:
            cache = False

        if not cache:
            return self.decode_string_uncached(i)

        strhash, nonascii, i = simd.find_end_of_string(ll_chars, i,
                                                       len(self.s))
        ch = ll_chars[i]
        if ch == '\\':
            self.pos = i
            return self.decode_string_escaped(start, nonascii)
        if ch < '\x20':
            self._raise_control_char_in_string(ch, start, i)
        else:
            assert ch == '"'

        self.pos = i + 1

        length = i - start
        strhash ^= length

        # check cache first:
        try:
            entry = self.cache_values[strhash]
        except KeyError:
            w_res = self._create_string_wrapped(start, i, nonascii)
            # only add *some* strings to the cache, because keeping them all is
            # way too expensive. first we check if the contextmap has caching
            # disabled completely. if not, we check whether we have recently
            # seen the same hash already, if yes, we cache the string.
            if ((contextmap is not None and contextmap.decoded_strings <
                 self.STRING_CACHE_EVALUATION_SIZE)
                    or strhash in self.lru_cache):
                entry = StringCacheEntry(self.getslice(start, start + length),
                                         w_res)
                self.cache_values[strhash] = entry
            else:
                self.lru_cache[self.lru_index] = strhash
                self.lru_index = (self.lru_index + 1) & self.LRU_MASK
            return w_res
        if not entry.compare(ll_chars, start, length):
            # collision! hopefully rare
            return self._create_string_wrapped(start, i, nonascii)
        if contextmap is not None:
            contextmap.cache_hits += 1
        return entry.w_uni