def finish_async_redshift_query(event, context): """A handler triggered by the arrival of an UNLOAD manifest on S3 The S3 trigger must be configured manually with: prefix = PROD suffix = manifest """ logger = logging.getLogger( "hsreplaynet.lambdas.finish_async_redshift_query") catalogue = get_redshift_catalogue() s3_event = event["Records"][0]["s3"] bucket = s3_event["bucket"]["name"] manifest_key = unquote(s3_event["object"]["key"]) if bucket == settings.S3_UNLOAD_BUCKET: logger.info("Finishing query: %s" % manifest_key) start_time = time.time() parameterized_query = catalogue.refresh_cache_from_s3_manifest_key( manifest_key=manifest_key) query_execute_metric_fields = { "duration_seconds": parameterized_query.most_recent_duration, "unload_seconds": time.time() - start_time, "query_handle": parameterized_query.most_recent_query_handle } query_execute_metric_fields.update( parameterized_query.supplied_non_filters_dict) influx_metric("finished_async_redshift_query", query_execute_metric_fields, query_name=parameterized_query.query_name, **parameterized_query.supplied_filters_dict)
def refresh_stale_redshift_queries(event, context): """A cron'd handler that attempts to refresh queries queued in Redis""" logger = logging.getLogger( "hsreplaynet.lambdas.refresh_stale_redshift_queries") start_time = time.time() logger.info("Start Time: %s" % str(start_time)) catalogue = get_redshift_catalogue() scheduler = catalogue.scheduler target_duration_seconds = 55 duration = 0 for queue_name in scheduler.query_queues: influx_metric( "redshift_refresh_queue_depth", {"depth": scheduler.queue_size(queue_name)}, queue_name=queue_name, ) # We run for 55 seconds, since the majority of queries take < 5 seconds to finish # And the next scheduled invocation of this will be starting a minute after this one. while duration < target_duration_seconds: logger.info("Runtime duration: %s" % str(duration)) available_slots = scheduler.get_available_slots( ) - scheduler.get_queued_query_count() logger.info("Available cluster slots: %s" % str(available_slots)) if available_slots <= 1: sleep_duration = 5 logger.info("Sleeping for %i until more slots are available" % sleep_duration) # If only 1 slot remains leave it for ETL or an IMMEDIATE query. time.sleep(sleep_duration) current_time = time.time() duration = current_time - start_time continue remaining_seconds = target_duration_seconds - duration if remaining_seconds < 1: break logger.info("Will block for queued query for %s seconds" % str(remaining_seconds)) try: refreshed_query = scheduler.refresh_next_pending_query( block_for=remaining_seconds, # This uses available cluster resources to refresh queries early force=REDSHIFT_PREEMPTIVELY_REFRESH_QUERIES) if refreshed_query: logger.info("Refreshed: %s" % refreshed_query.cache_key) else: logger.info("Nothing to refresh.") except Exception: logger.error("Error refreshing query") sentry.captureException() current_time = time.time() duration = current_time - start_time
def handle(self, *args, **options): catalogue = get_redshift_catalogue() for name, query in catalogue.registry.items(): if query.uses_archetypes: query.mark_all_stale() if name in settings.ARCHETYPE_QUERIES_FOR_IMMEDIATE_REFRESH: for permutation in query.generate_cachable_parameter_permutations( ): parameterized_query = query.build_full_params( permutation) parameterized_query.schedule_refresh()
def get_queries_for_cache_warming(eligible_queries=None): queries = [] for query in redshift.get_redshift_catalogue().cache_warm_eligible_queries: is_eligible = eligible_queries is None or query.name in eligible_queries if is_eligible: for permutation in query.generate_cachable_parameter_permutations( ): queries.append({ "query_name": query.name, "supplied_parameters": permutation }) return queries
def get_personalized_queries_for_cache_warming(contexts, eligible_queries=None, catalogue=None): redshift_catalogue = catalogue if catalogue else redshift.get_redshift_catalogue( ) queries = [] for query in redshift_catalogue.personalized_queries: is_eligible = eligible_queries is None or query.name in eligible_queries if query.cache_warming_enabled and is_eligible: for permutation in query.generate_personalized_parameter_permutation_bases( ): # Each permutation will still be missing a Region and account_lo value for ctx in contexts: for blizzard_account in ctx.blizzard_accounts: new_permutation = copy.copy(permutation) new_permutation[ "Region"] = blizzard_account.region.name new_permutation[ "account_lo"] = blizzard_account.account_lo if "deck_id" in query.get_available_non_filter_parameters( ): deck_map_for_account = ctx.get_deck_map_for_account( blizzard_account) for deck, gts in deck_map_for_account.items(): if _permutation_matches_game_types( new_permutation, gts): new_permutation_for_deck = copy.copy( new_permutation) new_permutation_for_deck[ "deck_id"] = deck.id log.info("Warming: %s: %s", new_permutation_for_deck, query.name) queries.append({ "query_name": query.name, "supplied_parameters": new_permutation_for_deck, }) else: log.info("Warming: %s: %s", new_permutation, query.name) queries.append({ "query_name": query.name, "supplied_parameters": new_permutation }) return queries