def do_execute_redshift_query(query_name, supplied_params, queue_name): """A common entry point for the actual execution of queries on Lambda""" logger = logging.getLogger("hsreplaynet.lambdas.execute_redshift_query") logger.info("Query Name: %s" % query_name) logger.info("Query Params: %s" % supplied_params) query = get_redshift_query(query_name) if not query: return False parameterized_query = query.build_full_params(supplied_params) try: wlm_queue = settings.REDSHIFT_QUERY_QUEUES[queue_name]["wlm_queue"] with get_concurrent_redshift_query_queue_semaphore(queue_name): _do_execute_query(parameterized_query, wlm_queue) logger.info("Query Execution Complete") return True except NotAvailable: logger.warn("The Redshift query queue was already at max concurrency. Skipping query.") metric_fields = { "count": 1 } metric_fields.update(parameterized_query.supplied_non_filters_dict) influx_metric( "redshift_query_lambda_execution_concurrency_exceeded", metric_fields, query_name=query_name, **parameterized_query.supplied_filters_dict ) return False
def _get_global_stats_eligible_decks(): query = redshift.get_redshift_query("list_decks_by_win_rate") standard_query = query.build_full_params( dict( TimeRange="LAST_30_DAYS", GameType="RANKED_STANDARD", )) wild_query = query.build_full_params( dict( TimeRange="LAST_30_DAYS", GameType="RANKED_WILD", )) digest_cache_missing = _eligible_decks_cache.get("eligible_decks", None) is None standard_as_of = _eligible_decks_cache.get("standard_as_of", None) standard_is_stale = standard_as_of != standard_query.result_as_of wild_as_of = _eligible_decks_cache.get("wild_as_of", None) wild_is_stale = wild_as_of != wild_query.result_as_of if digest_cache_missing or standard_is_stale or wild_is_stale: result = set() for player_class, decks in standard_query.response_payload["series"][ "data"].items(): for deck in decks: result.add(deck["digest"]) for player_class, decks in wild_query.response_payload["series"][ "data"].items(): for deck in decks: result.add(deck["digest"]) _eligible_decks_cache["standard_as_of"] = standard_query.result_as_of _eligible_decks_cache["wild_as_of"] = standard_query.result_as_of _eligible_decks_cache["eligible_decks"] = result return _eligible_decks_cache["eligible_decks"]
def validate_query(self, value): # Check and get the query self.query = get_redshift_query(value) if not self.query: raise ValidationError("Query does not exist") return value
def get_parameterized_query(self): query = get_redshift_query("account_lo_leaderboard_by_winrate") # Support RANKED_STANDARD and RANKED_WILD leaderboards in addition to the base # leaderboard filters. param_dict = self.get_parameterized_query_filters() param_dict["GameType"] = self.get_parameterized_query_filter( "GameType", filters.GameType, ["RANKED_STANDARD", "RANKED_WILD"]) return query.build_full_params(param_dict)
def run_local_warm_queries(eligible_queries=None): messages = get_queries_for_cache_warming(eligible_queries) log.info("Generated %i global query permutations for cache warming." % len(messages)) stale_queries = filter_freshly_cached_queries(messages) msg = "%i permutations remain after filtering fresh queries" % len( stale_queries) log.info(msg) for msg in stale_queries: query = redshift.get_redshift_query(msg["query_name"]) parameterized_query = query.build_full_params( msg["supplied_parameters"]) execute_query(parameterized_query, run_local=True)
def _get_query_data(self, query_name, params=None) -> Tuple[Dict, Dict]: query = get_redshift_query(query_name) parameterized_query = query.build_full_params(params or dict()) try: trigger_if_stale(parameterized_query) except OSError as err: get_logger().warning("Failed to trigger stale query refresh: " + str(err)) if not parameterized_query.result_available: raise QueryDataNotAvailableException() response = parameterized_query.response_payload return response["series"]["data"], response["series"]["metadata"]
def get_parameterized_query(self): query = get_redshift_query( "account_lo_archetype_leaderboard_by_winrate") if "archetype_id" not in self.request.GET: raise ValidationError( {f"archetype_id": "Invalid value: {filter_value}"}) # This query is implicitly filtered by GameType=RANKED_STANDARD at the moment. param_dict = self.get_parameterized_query_filters() param_dict["archetype_id"] = int(self.request.GET["archetype_id"]) return query.build_full_params(param_dict)
def refresh_meta_preview(): from hsreplaynet.utils.aws.redshift import get_redshift_query query = get_redshift_query("archetype_popularity_distribution_stats") ranks = [x for x in range(0, 21)] regions = ["REGION_EU", "REGION_US", "REGION_KR", "REGION_CN"] for rank in ranks: for region in regions: parameterized_query = query.build_full_params( dict(TimeRange="LAST_1_DAY", GameType="RANKED_STANDARD", RankRange=RANK_MAP[rank], Region=region)) trigger_if_stale(parameterized_query)
def _list_decks_from_redshift_for_format(self, game_format): query = get_redshift_query("list_decks_by_win_rate") if game_format == enums.FormatType.FT_STANDARD: paramiterized_query = query.build_full_params( dict( TimeRange="LAST_30_DAYS", GameType="RANKED_STANDARD", )) else: paramiterized_query = query.build_full_params( dict( TimeRange="LAST_30_DAYS", GameType="RANKED_WILD", )) return paramiterized_query.response_payload["series"]["data"]
def get_cluster_set_data(game_format=FormatType.FT_STANDARD, lookback=7, min_observations=100, min_pilots=10, block=True): from hsreplaynet.utils.aws.redshift import get_redshift_query gt = "RANKED_STANDARD" if game_format == FormatType.FT_STANDARD else "RANKED_WILD" query = get_redshift_query("list_cluster_set_data") time_range_val = "LAST_%i_DAY" % lookback if lookback > 1: time_range_val += "S" parameterized_query = query.build_full_params( dict( TimeRange=time_range_val, min_games=min_observations, min_pilots=min_pilots, GameType=gt, )) def result_available(): return (parameterized_query.result_available and not parameterized_query.result_is_stale) if not result_available(): if block: attempt_request_triggered_query_execution(parameterized_query, run_local=True) sleep_counter = 0 MAX_SLEEP = 120 while not result_available(): if sleep_counter >= MAX_SLEEP: raise RuntimeError( "Waited %i seconds, clustering data not available" % MAX_SLEEP) time.sleep(1) sleep_counter += 1 else: attempt_request_triggered_query_execution(parameterized_query) return [] return parameterized_query.response_payload
def filter_freshly_cached_queries(messages): if not settings.ENV_AWS: # We can only reach the cache from inside AWS # So we cannot take advantage of this optimization outside AWS # Skipping filtering is okay as up-to-date queries will still # get skipped at query execution time return messages result = [] for msg in messages: query = redshift.get_redshift_query(msg["query_name"]) parameterized_query = query.build_full_params( msg["supplied_parameters"]) if not parameterized_query.result_available or parameterized_query.result_is_stale: # Keep this message because the cache is stale result.append(msg) return result
def get_mulligan_preview(): from hsreplaynet.utils.aws.redshift import get_redshift_query decks_query = get_redshift_query("list_decks_by_win_rate") parameterized_decks_query = decks_query.build_full_params( dict(GameType="RANKED_STANDARD", RankRange="ALL", Region="ALL", TimeRange="LAST_30_DAYS")) if not parameterized_decks_query.result_available: return {} decks_response = parameterized_decks_query.response_payload archetype_decks = defaultdict(list) for class_decks in decks_response["series"]["data"].values(): for deck in class_decks: if deck["total_games"] >= 25000: archetype_decks[deck["archetype_id"]].append(deck) def is_sufficiently_distinct(mulligan_data): card_data = [ sorted(card_data, key=lambda x: x["opening_hand_winrate"], reverse=True)[:5] for card_data in mulligan_data["series"]["data"].values() ] num_classes = len(card_data) def matching_cards(i, num_cards): return sum([ 1 for j in range(0, num_classes) if j != i and all( card_data[i][k]["dbf_id"] == card_data[j][k]["dbf_id"] for k in range(0, num_cards)) ]) for i in range(0, num_classes): if matching_cards(i, 5) > 3 or matching_cards(i, 1) > 4: return False return True now = datetime.utcnow() random = seed(int(datetime(now.year, now.month, now.day).timestamp())) archetype_decks = list(archetype_decks.values()) shuffle(archetype_decks, random=random) for decks in archetype_decks: shuffle(decks, random=random) for deck in decks: mulligan_query = get_redshift_query( "single_deck_mulligan_guide_by_class") deck_obj = Deck.objects.get_by_shortid(deck["deck_id"]) parameterized_mulligan_query = mulligan_query.build_full_params( dict(GameType="RANKED_STANDARD", RankRange="ALL", Region="ALL", PlayerInitiative="ALL", deck_id=str(deck_obj.id))) if not parameterized_mulligan_query.result_available: continue mulligan_response = parameterized_mulligan_query.response_payload if not is_sufficiently_distinct(mulligan_response): continue return { "deck": deck, "data": mulligan_response["series"]["data"], "meta_data": mulligan_response["series"]["metadata"] } return {}
def get_meta_preview(num_items=10): from hsreplaynet.utils.aws.clients import LAMBDA from hsreplaynet.utils.aws.redshift import get_redshift_query LAMBDA.invoke(FunctionName="do_refresh_meta_preview", InvocationType="Event") query = get_redshift_query("archetype_popularity_distribution_stats") unique_archetypes = set() data = [] ranks = [x for x in range(0, 21)] for rank in ranks: regions = ["REGION_EU", "REGION_US", "REGION_KR", "REGION_CN"] for region in regions: parameterized_query = query.build_full_params( dict(TimeRange="LAST_1_DAY", GameType="RANKED_STANDARD", RankRange=RANK_MAP[rank], Region=region)) if not parameterized_query.result_available: continue response = parameterized_query.response_payload archetypes = [] for class_values in response["series"]["data"].values(): for value in class_values: if (value["archetype_id"] > 0 and value["pct_of_total"] > 0.5 and value["total_games"] > 30 and value["win_rate"] > 51): archetypes.append(value) if not len(archetypes): continue archetype = list( sorted(archetypes, key=lambda a: a["win_rate"], reverse=True))[0] unique_archetypes.add(archetype["archetype_id"]) data.append({ "rank": rank, "region": region, "data": archetype, "as_of": response["as_of"] }) results = [] for archetype_id in unique_archetypes: for datum in data: if datum["data"]["archetype_id"] == archetype_id: results.append(datum) break data.remove(datum) hour = datetime.utcnow().replace(minute=0, second=0, microsecond=0) random = seed(int(hour.timestamp())) if len(results) < num_items: shuffle(data, random=random) for i in range(0, num_items - len(results)): results.append(data[i]) shuffle(results, random=random) return results[:num_items]
def _get_query_and_params(request, name): query = get_redshift_query(name) if not query: raise Http404("No query named: %s" % name) supplied_params = request.GET.dict() deck = None if "deck_id" in supplied_params and not supplied_params["deck_id"].isdigit( ): # We got sent a shortid, so we need to translate it into a deck_id int try: deck = Deck.objects.get_by_shortid(supplied_params["deck_id"]) supplied_params["deck_id"] = str(deck.id) except Deck.DoesNotExist: raise Http404("Deck does not exist") if query.is_personalized: if request.user and request.user.is_authenticated: if "Region" in supplied_params and "account_lo" in supplied_params: # The parameters region and account_lo were both supplied, use these supplied_region = supplied_params["Region"] supplied_account_lo = supplied_params["account_lo"] if not (supplied_region.isdigit() and supplied_account_lo.isdigit()): return JsonResponse( { "msg": "Both account_lo and Region should be numeric." }, status=400) user_owns_blizzard_account = request.user.blizzard_accounts.filter( region__exact=int(supplied_region), account_lo__exact=int(supplied_account_lo)).exists() if not user_owns_blizzard_account and not request.user.is_staff: return HttpResponseForbidden() elif "Region" not in supplied_params and "account_lo" not in supplied_params: # Neither region nor account_lo were supplied, default to first default_blizzard_account = request.user.blizzard_accounts.first( ) if default_blizzard_account: supplied_params[ "Region"] = default_blizzard_account.region.name supplied_params[ "account_lo"] = default_blizzard_account.account_lo else: raise Http404("User does not have any Blizzard Accounts.") else: # Supplying only either Region or account_lo is a bad request msg = "Personalized queries require both region and account_lo to be present." return JsonResponse({"msg": msg}, status=400) # Map numeric region to FilterEnum if supplied_params["Region"].isdigit(): supplied_region = supplied_params["Region"] region_member = Region.from_int(int(supplied_region)) supplied_params["Region"] = region_member.name try: personal_parameterized_query = query.build_full_params( supplied_params) except InvalidOrMissingQueryParameterError as e: # Return a 400 Bad Request response log.warn(str(e)) return JsonResponse({"msg": str(e)}, status=400) if not user_is_eligible_for_query(request.user, query, personal_parameterized_query): return HttpResponseForbidden() return personal_parameterized_query else: # Anonymous or Fake Users Can Never Request Personal Stats return HttpResponseForbidden() else: if deck and not deck.eligible_for_global_stats: return HttpResponseForbidden() try: parameterized_query = query.build_full_params(supplied_params) except InvalidOrMissingQueryParameterError as e: # Return a 400 Bad Request response log.warn(str(e)) return JsonResponse({"msg": str(e)}, status=400) if not user_is_eligible_for_query(request.user, query, parameterized_query): return HttpResponseForbidden() return parameterized_query