def test_missing_inapp_and_memcache_get_repopulated_from_datastore(self): ''' Tests if result from datastore resaves data to higher levels It performs the checks on large values that will make use of the ChunkedResult. ''' @layer_cache.cache(layer=layer_cache.Layers.Memcache | layer_cache.Layers.Datastore | layer_cache.Layers.InAppMemory, compress_chunks=False) def func(result): return result func(_BIG_STRING) instance_cache.flush() # make sure instance_cache is flushed self.assertIsNone(instance_cache.get(self.key)) # force removal from memcache memcache.delete(self.key) # make sure removal worked self.assertIsNone(memcache.get(self.key)) # make sure we are still able to get the value from datastore self.assertEqualTruncateError(_BIG_STRING, func("a")) # make sure instance_cache has been filled again self.assertEqualTruncateError(_BIG_STRING, instance_cache.get(self.key)) # make sure memcache value has been readded self.assertIsInstance( memcache.get(self.key), layer_cache.ChunkedResult)
def thread_func(thread_output): request_cache.set('key', 'thread') thread_output['value_of_get_before_flush'] = ( request_cache.get('key')) request_cache.flush() thread_output['value_of_get_after_flush'] = ( request_cache.get('key'))
def test_repopulate_missing_inapp_cache_when_reading_from_memcache(self): ''' Tests if missing inapp cache gets repopulated from memcache It performs the checks on large values that will make use of the ChunkedResult ''' @layer_cache.cache(layer=layer_cache.Layers.Memcache | layer_cache.Layers.InAppMemory, compress_chunks=False) def func(result): return result func(_BIG_STRING) instance_cache.flush() # make sure instance_cache's value is gone self.assertIsNone(instance_cache.get(self.key)) # make sure we are still able to get the value from memcache self.assertEqualTruncateError(_BIG_STRING, func("a")) # make sure instance_cache has been filled again self.assertEqualTruncateError(_BIG_STRING, instance_cache.get(self.key))
def test_write_thread_storage_in_flush(self): request_cache.set('key', 'main') self.assertTrue(request_cache.has('key')) self.assertEqual('main', request_cache.get('key')) thread_output = {} def thread_func(thread_output): request_cache.set('key', 'thread') thread_output['value_of_get_before_flush'] = ( request_cache.get('key')) request_cache.flush() thread_output['value_of_get_after_flush'] = ( request_cache.get('key')) thread = threading.Thread(target=thread_func, args=[thread_output]) thread.start() thread.join() # The main thread should not see changes made by the second # thread. self.assertTrue(request_cache.has('key')) self.assertEqual('main', request_cache.get('key')) self.assertEqual('thread', thread_output['value_of_get_before_flush']) self.assertIsNone(thread_output['value_of_get_after_flush'])
def get_cached_result(key, namespace, expiration, layer): if layer & Layers.InAppMemory: result = cachepy.get(key) if result is not None: return result if layer & Layers.Memcache: result = memcache.get(key, namespace=namespace) if result is not None: # Found in memcache, fill upward layers if layer & Layers.InAppMemory: cachepy.set(key, result, expiry=expiration) return result if layer & Layers.Datastore: result = KeyValueCache.get(key, namespace=namespace) if result is not None: # Found in datastore, fill upward layers if layer & Layers.InAppMemory: cachepy.set(key, result, expiry=expiration) if layer & Layers.Memcache: memcache.set(key, result, time=expiration, namespace=namespace) return result
def get_cached_result(key, namespace, expiration, layer): if layer & Layers.InAppMemory: result = cachepy.get(key) if result is not None: return result if layer & Layers.Memcache: result = get_from_memcache(key, namespace=namespace) if result is not None: # Found in memcache, fill upward layers if layer & Layers.InAppMemory: cachepy.set(key, result, expiry=expiration) return result if layer & Layers.Datastore: result = KeyValueCache.get(key, namespace=namespace) if result is not None: # Found in datastore, fill upward layers if layer & Layers.InAppMemory: cachepy.set(key, result, expiry=expiration) if layer & Layers.Memcache: set_to_memcache(key, result, time=expiration, namespace=namespace) return result if layer & Layers.Blobstore: result = BlobCache.get(key, namespace=namespace) # TODO: fill upward layers if size of dumped result is going to be less than 1MB (might be too costly to figure that out return result
def test_read_thread_storage_in_get(self): self.assertIsNone(request_cache.get('key')) request_cache.set('key', 'main') self.assertEqual('main', request_cache.get('key')) thread_output = {} def thread_func(thread_output): thread_output['value_of_get'] = request_cache.get('key') thread = threading.Thread(target=thread_func, args=[thread_output]) thread.start() thread.join() # The second thread should see different values than the main thread. self.assertIsNone(thread_output['value_of_get'])
def get_cached_result(key, namespace, expiration, layer): if layer & Layers.InAppMemory: result = cachepy.get(key) if result is not None: return result if layer & Layers.Memcache: maybe_chunked_result = memcache.get(key, namespace=namespace) if maybe_chunked_result is not None: if isinstance(maybe_chunked_result, ChunkedResult): result = maybe_chunked_result.get_result( memcache, namespace=namespace) else: result = maybe_chunked_result # Found in memcache, fill upward layers if layer & Layers.InAppMemory: cachepy.set(key, result, expiry=expiration) return result if layer & Layers.Datastore: maybe_chunked_result = KeyValueCache.get(key, namespace=namespace) if maybe_chunked_result is not None: # Found in datastore. Unchunk results if needed, and fill upward # layers if isinstance(maybe_chunked_result, ChunkedResult): result = maybe_chunked_result.get_result( KeyValueCache, namespace=namespace) if layer & Layers.Memcache: # Since the result in the datastore needed to be chunked # we will need to use ChunkedResult for memcache as well ChunkedResult.set(key, result, expiration, namespace, cache_class=memcache) else: result = maybe_chunked_result if layer & Layers.Memcache: # Since the datastore wasn't using a chunked result # This memcache.set should succeed as well. memcache.set(key, result, time=expiration, namespace=namespace) if layer & Layers.InAppMemory: cachepy.set(key, result, expiry=expiration) return result
def get_cached_result(key, namespace, expiration, layer): if layer & Layers.InAppMemory: result = instance_cache.get(key) if result is not None: return result if layer & Layers.Memcache: maybe_chunked_result = memcache.get(key, namespace=namespace) if maybe_chunked_result is not None: if isinstance(maybe_chunked_result, ChunkedResult): result = maybe_chunked_result.get_result(memcache, namespace=namespace) else: result = maybe_chunked_result # Found in memcache, fill upward layers if layer & Layers.InAppMemory: instance_cache.set(key, result, expiry=expiration) return result if layer & Layers.Datastore: maybe_chunked_result = KeyValueCache.get(key, namespace=namespace) if maybe_chunked_result is not None: # Found in datastore. Unchunk results if needed, and fill upward # layers if isinstance(maybe_chunked_result, ChunkedResult): result = maybe_chunked_result.get_result(KeyValueCache, namespace=namespace) if layer & Layers.Memcache: # Since the result in the datastore needed to be chunked # we will need to use ChunkedResult for memcache as well ChunkedResult.set(key, result, expiration, namespace, cache_class=memcache) else: result = maybe_chunked_result if layer & Layers.Memcache: # Since the datastore wasn't using a chunked result # This memcache.set should succeed as well. memcache.set(key, result, time=expiration, namespace=namespace) if layer & Layers.InAppMemory: instance_cache.set(key, result, expiry=expiration) return result
def fetch_publisher(publisher_id): doc = BeautifulSoup( get(f'https://boardgamegeek.com/boardgamepublisher/{publisher_id}'), 'html.parser') name = html.unescape(doc.find('meta', {'name': 'title'}).attrs['content']) desc = html.unescape( doc.find('meta', { 'name': 'description' }).attrs['content']) return { 'id': publisher_id, 'name': name, 'description': desc, }
def thread_func(thread_output): request_cache.set('key', 'thread') thread_output['value_of_get_after_set'] = request_cache.get('key')
def fetch_games(all_ids): # Resulting games games = [] # Counter to keep track of what batch we are currently processing current_batch = 0 # How many games to fetch in a single request batch_size = 200 # Number of comments we'll be fetching per request comments_per_page = 100 # Per game comment limit comment_limit = 500 for ids in split(all_ids, batch_size): page = 1 # Set page as 0 or lower to stop fetching comments while page > 0: # Tells if each game has had all of its comments fetch has_fetch_all_comments = [True] * batch_size i = 0 url = f'https://api.geekdo.com/xmlapi2/thing?type=boardgame&id={",".join(map(str, ids))}&comments=1&pagesize={comments_per_page}&page={page}' data = xml.fromstring(get(url)) for item in data: if page == 1: game = { 'id': int(item.get('id')), 'categories': [], 'mechanics': [], 'expansions': [], 'publishers': [], 'comments': [], } games.append(game) else: game = games[current_batch * batch_size + i] for el in item: # Process some properties only if we are seeing this game for the first time if page == 1: if el.tag == 'name' and el.get('type') == 'primary': game['name'] = el.get('value') elif el.tag == 'description': game['description'] = html.unescape(el.text) elif el.tag == 'yearpublished': game['year'] = int(el.get('value')) elif el.tag == 'minplayers': game['min_players'] = int(el.get('value')) elif el.tag == 'maxplayers': game['max_players'] = int(el.get('value')) elif el.tag == 'minplaytime': game['min_playtime'] = int(el.get('value')) elif el.tag == 'maxplaytime': game['max_playtime'] = int(el.get('value')) elif el.tag == 'minage': game['min_age'] = int(el.get('value')) elif el.tag == 'link': link_type = el.get('type') if link_type == 'boardgamecategory': game['categories'].append({ 'id': int(el.get('id')), 'name': el.get('value'), }) if link_type == 'boardgamemechanic': game['mechanics'].append({ 'id': int(el.get('id')), 'name': el.get('value'), }) if link_type == 'boardgameexpansion': game['expansions'].append({ 'id': int(el.get('id')), 'game_id': game['id'], }) if link_type == 'boardgamepublisher': game['publishers'].append(int(el.get('id'))) if el.tag == 'comments': total_comments = int(el.get('totalitems')) # Check if there are still more comments after this page if page * comments_per_page < min( total_comments, comment_limit): has_fetch_all_comments[i] = False for comment in el: rating = comment.get('rating') if rating == 'N/A': rating = 0 else: rating = float(rating) text = comment.get('value') game['comments'].append({ 'rating': rating, 'text': text, }) # Next game i += 1 # Check if all of the comments have been fetch if reduce(lambda x, y: x and y, has_fetch_all_comments): page = -1 else: page += 1 # Next games batch current_batch += 1 return games
def thread_func(thread_output): thread_output['value_of_get'] = request_cache.get('key')
def layer_cache_check_set_return( target, key_fxn, expiration = DEFAULT_LAYER_CACHE_EXPIRATION_SECONDS, layer = Layers.Memcache | Layers.InAppMemory, persist_across_app_versions = False, permanent_key_fxn = None, bigdata = False, *args, **kwargs): if bigdata: get_from_memcache = big_memcache_get set_to_memcache = big_memcache_set else: get_from_memcache = memcache.get set_to_memcache = memcache.set def get_cached_result(key, namespace, expiration, layer): if layer & Layers.InAppMemory: result = cachepy.get(key) if result is not None: return result if layer & Layers.Memcache: result = get_from_memcache(key, namespace=namespace) if result is not None: # Found in memcache, fill upward layers if layer & Layers.InAppMemory: cachepy.set(key, result, expiry=expiration) return result if layer & Layers.Datastore: result = KeyValueCache.get(key, namespace=namespace) if result is not None: # Found in datastore, fill upward layers if layer & Layers.InAppMemory: cachepy.set(key, result, expiry=expiration) if layer & Layers.Memcache: set_to_memcache(key, result, time=expiration, namespace=namespace) return result if layer & Layers.Blobstore: result = BlobCache.get(key, namespace=namespace) # TODO: fill upward layers if size of dumped result is going to be less than 1MB (might be too costly to figure that out return result def set_cached_result(key, namespace, expiration, layer, result): # Cache the result if layer & Layers.InAppMemory: cachepy.set(key, result, expiry=expiration) if layer & Layers.Memcache: if not set_to_memcache(key, result, time=expiration, namespace=namespace): logging.error("Memcache set failed for %s" % key) if layer & Layers.Datastore: KeyValueCache.set(key, result, time=expiration, namespace=namespace) if layer & Layers.Blobstore: BlobCache.set(key, result, time=expiration, namespace=namespace) bust_cache = False if "bust_cache" in kwargs: bust_cache = kwargs["bust_cache"] # delete from kwargs so it's not passed to the target del kwargs["bust_cache"] key = key_fxn(*args, **kwargs) # if key is None, or layer_cache is disabled don't bother trying to get it # from the cache, just execute the function and return it if key is None or request_cache.get("layer_cache_disabled"): return target(*args, **kwargs) namespace = App.version if persist_across_app_versions: namespace = None if not bust_cache: try: result = get_cached_result(key, namespace, expiration, layer) except IOError: logging.exception("Exception loading from %s cache", key) result = None if result is not None: return result try: result = target(*args, **kwargs) # an error happened trying to recompute the result, see if there is a value for it in the permanent cache except Exception, e: import traceback, StringIO fp = StringIO.StringIO() traceback.print_exc(file=fp) logging.info(fp.getvalue()) if permanent_key_fxn is not None: permanent_key = permanent_key_fxn(*args, **kwargs) result = get_cached_result(permanent_key, namespace, expiration, layer) if result is not None: logging.info("resource is not available, restoring from permanent cache") # In case the key's value has been changed by target's execution key = key_fxn(*args, **kwargs) #retreived item from permanent cache - save it to the more temporary cache and then return it set_cached_result(key, namespace, expiration, layer, result) return result # could not retrieve item from a permanent cache, raise the error on up raise e
def is_disabled(): return request_cache.get("layer_cache_disabled") or False
time=expiration, namespace=namespace, compress=compress_chunks, cache_class=KeyValueCache) bust_cache = False if "bust_cache" in kwargs: bust_cache = kwargs["bust_cache"] # delete from kwargs so it's not passed to the target del kwargs["bust_cache"] key = key_fxn(*args, **kwargs) # if key is None, or layer_cache is disabled don't bother trying to get it # from the cache, just execute the function and return it if key is None or request_cache.get("layer_cache_disabled"): return target(*args, **kwargs) namespace = App.version if persist_across_app_versions: namespace = None if not bust_cache: result = get_cached_result(key, namespace, expiration, layer) if result is not None: return result try: result = target(*args, **kwargs)
def fetch_games_expansions(games): expansions = reduce(reduce_extend, map(lambda g: g['expansions'], games), []) res = [] # Counter to keep track of what batch we are currently processing current_batch = 0 # How many games to fetch in a single request batch_len = 500 # Number of comments we'll be fetching per request comments_per_page = 100 # Per game comment limit comment_limit = 500 for exps in split(expansions, batch_len): page = 1 while page > 0: # Tells if each expansion has had all of its comments fetch has_fetch_all_comments = [True] * batch_len ids = map(lambda e: e['id'], exps) i = 0 data = xml.fromstring( get(f'https://api.geekdo.com/xmlapi2/thing?type=boardgameexpansion&id={",".join(map(str, ids))}&comments=1&pagesize={comments_per_page}&page={page}' )) for item in data: if page == 1: expansion = { 'game_id': exps[i]['game_id'], 'id': exps[i]['id'], 'comments': [], } res.append(expansion) else: expansion = res[batch_len * current_batch + i] for el in item: # Process some information only if we're looking for the first time at this expansion if page == 1: if el.tag == 'name' and el.get('type') == 'primary': expansion['name'] = el.get('value') elif el.tag == 'description': expansion['description'] = html.unescape(el.text) elif el.tag == 'yearpublished': expansion['year'] = int(el.get('value')) if el.tag == 'comments': total_comments = int(el.get('totalitems')) # Check if there are still more comments after this page if page * comments_per_page < min( total_comments, comment_limit): has_fetch_all_comments[i] = False for comment in el: rating = comment.get('rating') if rating == 'N/A': rating = 0 else: rating = float(rating) text = comment.get('value') expansion['comments'].append({ 'rating': rating, 'text': text, }) # Next expansion i += 1 # Check if all of the comments have been fetch if reduce(lambda x, y: x and y, has_fetch_all_comments): page = -1 else: page += 1 # Next expansion batch current_batch += 1 return res
namespace=namespace, compress=compress_chunks, cache_class=KeyValueCache) bust_cache = False if "bust_cache" in kwargs: bust_cache = kwargs["bust_cache"] # delete from kwargs so it's not passed to the target del kwargs["bust_cache"] key = key_fxn(*args, **kwargs) # if key is None, or layer_cache is disabled don't bother trying to get it # from the cache, just execute the function and return it if key is None or request_cache.get("layer_cache_disabled"): return target(*args, **kwargs) namespace = App.version if persist_across_app_versions: namespace = None if not bust_cache: result = get_cached_result(key, namespace, expiration, layer) if result is not None: return result try: result = target(*args, **kwargs)
def layer_cache_check_set_return( target, key_fxn, expiration=DEFAULT_LAYER_CACHE_EXPIRATION_SECONDS, layer=Layers.Memcache | Layers.InAppMemory, persist_across_app_versions=False, permanent_key_fxn=None, bigdata=False, *args, **kwargs): if bigdata: get_from_memcache = big_memcache_get set_to_memcache = big_memcache_set else: get_from_memcache = memcache.get set_to_memcache = memcache.set def get_cached_result(key, namespace, expiration, layer): if layer & Layers.InAppMemory: result = cachepy.get(key) if result is not None: return result if layer & Layers.Memcache: result = get_from_memcache(key, namespace=namespace) if result is not None: # Found in memcache, fill upward layers if layer & Layers.InAppMemory: cachepy.set(key, result, expiry=expiration) return result if layer & Layers.Datastore: result = KeyValueCache.get(key, namespace=namespace) if result is not None: # Found in datastore, fill upward layers if layer & Layers.InAppMemory: cachepy.set(key, result, expiry=expiration) if layer & Layers.Memcache: set_to_memcache(key, result, time=expiration, namespace=namespace) return result if layer & Layers.Blobstore: result = BlobCache.get(key, namespace=namespace) # TODO: fill upward layers if size of dumped result is going to be less than 1MB (might be too costly to figure that out return result def set_cached_result(key, namespace, expiration, layer, result): # Cache the result if layer & Layers.InAppMemory: cachepy.set(key, result, expiry=expiration) if layer & Layers.Memcache: if not set_to_memcache( key, result, time=expiration, namespace=namespace): logging.error("Memcache set failed for %s" % key) if layer & Layers.Datastore: KeyValueCache.set(key, result, time=expiration, namespace=namespace) if layer & Layers.Blobstore: BlobCache.set(key, result, time=expiration, namespace=namespace) bust_cache = False if "bust_cache" in kwargs: bust_cache = kwargs["bust_cache"] # delete from kwargs so it's not passed to the target del kwargs["bust_cache"] key = key_fxn(*args, **kwargs) # if key is None, or layer_cache is disabled don't bother trying to get it # from the cache, just execute the function and return it if key is None or request_cache.get("layer_cache_disabled"): return target(*args, **kwargs) namespace = App.version if persist_across_app_versions: namespace = None if not bust_cache: try: result = get_cached_result(key, namespace, expiration, layer) except IOError: logging.exception("Exception loading from %s cache", key) result = None if result is not None: return result try: result = target(*args, **kwargs) # an error happened trying to recompute the result, see if there is a value for it in the permanent cache except Exception, e: import traceback, StringIO fp = StringIO.StringIO() traceback.print_exc(file=fp) logging.info(fp.getvalue()) if permanent_key_fxn is not None: permanent_key = permanent_key_fxn(*args, **kwargs) result = get_cached_result(permanent_key, namespace, expiration, layer) if result is not None: logging.info( "resource is not available, restoring from permanent cache" ) # In case the key's value has been changed by target's execution key = key_fxn(*args, **kwargs) #retreived item from permanent cache - save it to the more temporary cache and then return it set_cached_result(key, namespace, expiration, layer, result) return result # could not retrieve item from a permanent cache, raise the error on up raise e