def _decode_key_string(self, i): """ decode key at position i as a string. Key strings are always cached, since they repeat a lot. """ ll_chars = self.ll_chars start = i strhash, nonascii, i = simd.find_end_of_string(ll_chars, i, len(self.s)) ch = ll_chars[i] if ch == '\\': self.pos = i w_key = self.decode_string_escaped(start, nonascii) return w_key if ch < '\x20': self._raise_control_char_in_string(ch, start, i) length = i - start strhash ^= length self.pos = i + 1 # check cache first: try: entry = self.cache_keys[strhash] except KeyError: w_res = self._create_string_wrapped(start, i, nonascii) entry = StringCacheEntry(self.getslice(start, start + length), w_res) self.cache_keys[strhash] = entry return w_res if not entry.compare(ll_chars, start, length): # collision! hopefully rare w_res = self._create_string_wrapped(start, i, nonascii) else: w_res = entry.w_uni return w_res
def decode_string(self, i, contextmap=None): """ Decode a string at position i (which is right after the opening "). Optionally pass a contextmap, if the value is decoded as the value of a dict.""" ll_chars = self.ll_chars start = i ch = ll_chars[i] if ch == '"': self.pos = i + 1 return self.w_empty_string # surprisingly common cache = True if contextmap is not None: # keep some statistics about the usefulness of the string cache on # the contextmap # the intuition about the contextmap is as follows: # often there are string values stored in dictionaries that can # never be usefully cached, like unique ids of objects. Then the # strings *in those fields* of all objects should never be cached. # However, the content of other fields can still be useful to # cache. contextmap.decoded_strings += 1 if not contextmap.should_cache_strings(): cache = False if len(self.s) < self.MIN_SIZE_FOR_STRING_CACHE: cache = False if not cache: return self.decode_string_uncached(i) strhash, nonascii, i = simd.find_end_of_string(ll_chars, i, len(self.s)) ch = ll_chars[i] if ch == '\\': self.pos = i return self.decode_string_escaped(start, nonascii) if ch < '\x20': self._raise_control_char_in_string(ch, start, i) else: assert ch == '"' self.pos = i + 1 length = i - start strhash ^= length # check cache first: try: entry = self.cache_values[strhash] except KeyError: w_res = self._create_string_wrapped(start, i, nonascii) # only add *some* strings to the cache, because keeping them all is # way too expensive. first we check if the contextmap has caching # disabled completely. if not, we check whether we have recently # seen the same hash already, if yes, we cache the string. if ((contextmap is not None and contextmap.decoded_strings < self.STRING_CACHE_EVALUATION_SIZE) or strhash in self.lru_cache): entry = StringCacheEntry(self.getslice(start, start + length), w_res) self.cache_values[strhash] = entry else: self.lru_cache[self.lru_index] = strhash self.lru_index = (self.lru_index + 1) & self.LRU_MASK return w_res if not entry.compare(ll_chars, start, length): # collision! hopefully rare return self._create_string_wrapped(start, i, nonascii) if contextmap is not None: contextmap.cache_hits += 1 return entry.w_uni