def get(self, request, id): claim = get_uuid_object_or_404(AccountClaim, id=id) log.info("Claim %r: Token=%r, User=%r", claim, claim.token, claim.token.user) if claim.token.user: if claim.token.user.is_fake: count = GameReplay.objects.filter( user=claim.token.user).update(user=request.user) log.info("Updated %r replays. Deleting %r.", count, claim.token.user) # For now we just delete the fake user, because we are not using it. claim.token.user.delete() else: log.warning("%r is a real user. Deleting %r.", claim.token.user, claim) # Something's wrong. Get rid of the claim and reject the request. claim.delete() influx_metric("hsreplaynet_account_claim", {"count": 1}, error=1) return HttpResponseForbidden( "This token has already been claimed.") claim.token.user = request.user claim.token.save() # Replays are claimed in AuthToken post_save signal (games.models) claim.delete() messages.info(request, "You have claimed your account. Nice!") influx_metric("hsreplaynet_account_claim", {"count": 1}) return redirect(self.get_redirect_url(request))
def delete(self): # We only perform delete on NEW raw uploads because when we get to this point we have # a copy of the log and descriptor attached to the UploadEvent if self.state == RawUploadState.NEW: log.info("Deleting files from S3") aws.S3.delete_object(Bucket=self.bucket, Key=self.log_key) aws.S3.delete_object(Bucket=self.bucket, Key=self.descriptor_key)
def _do_execute_query_work(parameterized_query, wlm_queue=None): if not parameterized_query.result_is_stale: log.info( "Up-to-date cached data exists. Exiting without running query.") else: log.info("Cached data missing or stale. Executing query now.") # DO EXPENSIVE WORK start_ts = time.time() exception_raised = False exception_msg = None try: parameterized_query.refresh_result(wlm_queue) except Exception as e: exception_raised = True exception_msg = str(e) raise finally: end_ts = time.time() duration_seconds = round(end_ts - start_ts, 2) query_execute_metric_fields = { "duration_seconds": duration_seconds, "exception_message": exception_msg } query_execute_metric_fields.update( parameterized_query.supplied_non_filters_dict) influx_metric("redshift_query_execute", query_execute_metric_fields, exception_thrown=exception_raised, query_name=parameterized_query.query_name, **parameterized_query.supplied_filters_dict)
def get(self, request, id): claim = get_uuid_object_or_404(AccountClaim, id=id) log.info("Claim %r: Token=%r, User=%r", claim, claim.token, claim.token.user) if claim.token.user: if claim.token.user.is_fake: count = GameReplay.objects.filter( user=claim.token.user).update(user=request.user) log.info("Updated %r replays. Deleting %r.", count, claim.token.user) # For now we just delete the fake user, because we are not using it. claim.token.user.delete() else: log.warning("%r is a real user. Deleting %r.", claim.token.user, claim) # Something's wrong. Get rid of the claim and reject the request. claim.delete() return HttpResponseForbidden( "This token has already been claimed.") claim.token.user = request.user claim.token.save() # Replays are claimed in AuthToken post_save signal (games.models) claim.delete() msg = "You have claimed your account. Yay!" # XXX: using WARNING as a hack to ignore login/logout messages for now messages.add_message(request, messages.WARNING, msg) return redirect(settings.LOGIN_REDIRECT_URL)
def reap_orphans_for_date(reaping_date): inventory = get_reaping_inventory_for_date(reaping_date) for hour, hour_inventory in inventory.items(): reaped_orphan_count = 0 for minute, minute_inventory in hour_inventory.items(): for shortid, keys in minute_inventory.items(): descriptor = keys.get("descriptor") if is_safe_to_reap(shortid, keys): log.debug("Reaping Descriptor: %r", descriptor) aws.S3.delete_object( Bucket=settings.S3_RAW_LOG_UPLOAD_BUCKET, Key=keys["descriptor"]) reaped_orphan_count += 1 else: log.debug("Skipping: %r (Unsafe to reap)", descriptor) log.info("A total of %s descriptors reaped for hour: %s" % (str(reaped_orphan_count), str(hour))) # Report count of orphans to Influx fields = {"count": reaped_orphan_count} influx_metric("orphan_descriptors_reaped", fields=fields, timestamp=reaping_date, hour=hour)
def reap_orphans_for_date(reaping_date): inventory = get_reaping_inventory_for_date(reaping_date) for hour, hour_inventory in inventory.items(): reaped_orphan_count = 0 for minute, minute_inventory in hour_inventory.items(): for shortid, keys in minute_inventory.items(): if is_safe_to_reap(shortid, keys): log.debug("Reaping Descriptor: %r", keys["descriptor"]) aws.S3.delete_object( Bucket=settings.S3_RAW_LOG_UPLOAD_BUCKET, Key=keys["descriptor"] ) reaped_orphan_count += 1 else: log.debug("Skipping: %r (Unsafe To Reap)", keys["descriptor"]) log.info( "A total of %s descriptors reaped for hour: %s" % ( str(reaped_orphan_count), str(hour) ) ) # Report count of orphans to Influx fields = { "count": reaped_orphan_count } influx_metric( "orphan_descriptors_reaped", fields=fields, timestamp=reaping_date, hour=hour )
def on_premium_purchased(sender, event, **kwargs): if event.customer and event.customer.subscriber: user = event.customer.subscriber log.info("Received premium purchased signal for user: %s" % user.username) log.info("Scheduling personalized stats for immediate cache warming.") context = PremiumUserCacheWarmingContext.from_user(user) warm_redshift_cache_for_user_context(context)
def _fetch_query_results(parameterized_query, run_local=False, user=None, priority=None): cache_is_populated = parameterized_query.cache_is_populated is_cache_hit = parameterized_query.result_available triggered_refresh = False if is_cache_hit: triggered_refresh = trigger_if_stale(parameterized_query, run_local, priority) response = HttpResponse( content=parameterized_query.response_payload_data, content_type=parameterized_query.response_payload_type) elif cache_is_populated and parameterized_query.is_global: if parameterized_query.is_backfillable and parameterized_query.is_personalized: # Premium users should have cache entries even if the result set is empty # So we should only reach this block if the user just subscribed # And we haven't rerun the global query yet. triggered_refresh = True attempt_request_triggered_query_execution(parameterized_query, run_local, priority) result = {"msg": "Query is processing. Check back later."} response = JsonResponse(result, status=202) else: # There is no content for this permutation of parameters # For deck related queries this most likely means that someone hand crafted the URL # Or if it's a card related query, then it's a corner case where there is no data response = HttpResponse(status=204) else: # The cache is not populated yet for this query. # Perhaps it's a new query or perhaps the cache was recently flushed. # So attempt to trigger populating it attempt_request_triggered_query_execution(parameterized_query, run_local, priority) result = {"msg": "Query is processing. Check back later."} response = JsonResponse(result, status=202) log.info("Query: %s Cache Populated: %s Cache Hit: %s Is Stale: %s" % (cache_is_populated, parameterized_query.cache_key, is_cache_hit, triggered_refresh)) query_fetch_metric_fields = { "count": 1, } query_fetch_metric_fields.update( parameterized_query.supplied_non_filters_dict) influx.influx_metric("redshift_query_fetch", query_fetch_metric_fields, cache_populated=cache_is_populated, cache_hit=is_cache_hit, query_name=parameterized_query.query_name, triggered_refresh=triggered_refresh, **parameterized_query.supplied_filters_dict) return response
def fill_global_query_queue(eligible_queries=None): queue_name = settings.REDSHIFT_ANALYTICS_QUERY_QUEUE_NAME messages = get_queries_for_cache_warming(eligible_queries) log.info("Generated %i global query permutations for cache warming." % len(messages)) stale_queries = filter_freshly_cached_queries(messages) msg = "%i permutations remain after filtering fresh queries" % len( stale_queries) log.info(msg) write_messages_to_queue(queue_name, stale_queries)
def reap_orphan_descriptors_handler(event, context): """A daily job to cleanup orphan descriptors in the raw uploads bucket.""" current_date = date.today() reaping_delay = settings.LAMBDA_ORPHAN_REAPING_DELAY_DAYS assert reaping_delay >= 1 # Protect against descriptors just created reaping_date = current_date - timedelta(days=reaping_delay) log.info("Reaping Orphan Descriptors For: %r", reaping_date.isoformat()) reap_orphans_for_date(reaping_date) log.info("Finished.")
def fill_personalized_query_queue(contexts, eligible_queries=None): queue_name = settings.REDSHIFT_PERSONALIZED_QUERY_QUEUE_NAME messages = get_personalized_queries_for_cache_warming( contexts, eligible_queries) log.info("Generated %i personalized permutations for cache warming." % len(messages)) stale_queries = filter_freshly_cached_queries(messages) msg = "%i personalized perms remain after filtering fresh queries" % len( stale_queries) log.info(msg) write_messages_to_queue(queue_name, stale_queries)
def get_valid_match_start(match_start, upload_date): """ Returns a valid match_start value given the match_start and upload_date. If the upload_date is greater than the match_start, return the match_start. If it's greater than the match_start, return the upload_date, modified to use the match_start's timezone. """ if upload_date > match_start: return match_start log.info("match_start=%r>upload_date=%r - rejecting match_start", match_start, upload_date) return upload_date.astimezone(match_start.tzinfo)
def get_personalized_queries_for_cache_warming(contexts, eligible_queries=None, catalogue=None): redshift_catalogue = catalogue if catalogue else redshift.get_redshift_catalogue( ) queries = [] for query in redshift_catalogue.personalized_queries: is_eligible = eligible_queries is None or query.name in eligible_queries if query.cache_warming_enabled and is_eligible: for permutation in query.generate_personalized_parameter_permutation_bases( ): # Each permutation will still be missing a Region and account_lo value for ctx in contexts: for blizzard_account in ctx.blizzard_accounts: new_permutation = copy.copy(permutation) new_permutation[ "Region"] = blizzard_account.region.name new_permutation[ "account_lo"] = blizzard_account.account_lo if "deck_id" in query.get_available_non_filter_parameters( ): deck_map_for_account = ctx.get_deck_map_for_account( blizzard_account) for deck, gts in deck_map_for_account.items(): if _permutation_matches_game_types( new_permutation, gts): new_permutation_for_deck = copy.copy( new_permutation) new_permutation_for_deck[ "deck_id"] = deck.id log.info("Warming: %s: %s", new_permutation_for_deck, query.name) queries.append({ "query_name": query.name, "supplied_parameters": new_permutation_for_deck, }) else: log.info("Warming: %s: %s", new_permutation, query.name) queries.append({ "query_name": query.name, "supplied_parameters": new_permutation }) return queries
def run_local_warm_queries(eligible_queries=None): messages = get_queries_for_cache_warming(eligible_queries) log.info("Generated %i global query permutations for cache warming." % len(messages)) stale_queries = filter_freshly_cached_queries(messages) msg = "%i permutations remain after filtering fresh queries" % len( stale_queries) log.info(msg) for msg in stale_queries: query = redshift.get_redshift_query(msg["query_name"]) parameterized_query = query.build_full_params( msg["supplied_parameters"]) execute_query(parameterized_query, run_local=True)
def _do_execute_query(parameterized_query, wlm_queue=None): # This method should always be getting executed within a Lambda context # Distributed dog pile lock pattern # From: https://pypi.python.org/pypi/python-redis-lock log.info("About to attempt acquiring lock...") redis_client = redshift.get_redshift_cache_redis_client() with RedisLock(redis_client, parameterized_query.cache_key, expire=300): # Get a lock with a 5-minute lifetime since that's the maximum duration of a Lambda # to ensure the lock is held for as long as the Python process / Lambda is running. log.info("Lock acquired.") return _do_execute_query_work(parameterized_query, wlm_queue)
def update_signature_for_archetype(self, archetype_id, format): input_decks_qs = Deck.objects.filter(archetype_id=archetype_id) input_decks = [d for d in input_decks_qs if d.format == format and d.size == 30] deck_observation_counts = self._get_deck_observation_counts_from_redshift(format) card_prevalance_counts = defaultdict(int) total_occurances_of_decks = 0 for deck in input_decks: if deck.shortid in deck_observation_counts: total_occurances_of_decks += deck_observation_counts[deck.shortid] for card in deck: card_prevalance_counts[card] += deck_observation_counts[deck.shortid] archetype = Archetype.objects.get(id=archetype_id) log.info( "Generating new %s signature for archetype: %s" % (format.name, archetype.name) ) log.info( "Deck occurences contributing to signature: %s" % str(total_occurances_of_decks) ) signature = Signature.objects.create( archetype=archetype, format=format, as_of=timezone.now() ) for card, observation_count in card_prevalance_counts.items(): prevalance = float(observation_count) / total_occurances_of_decks if prevalance >= settings.ARCHETYPE_CORE_CARD_THRESHOLD: log.info( "card: %s with prevalence: %s is CORE" % (card.name, prevalance) ) SignatureComponent.objects.create( signature=signature, card=card, weight=settings.ARCHETYPE_CORE_CARD_WEIGHT * prevalance ) elif prevalance >= settings.ARCHETYPE_TECH_CARD_THRESHOLD: log.info( "card: %s with prevalence: %s is TECH" % (card.name, prevalance) ) SignatureComponent.objects.create( signature=signature, card=card, weight=settings.ARCHETYPE_TECH_CARD_WEIGHT * prevalance ) else: log.info( "card: %s with prevalence: %s is DISCARD" % (card.name, prevalance) )
def prepare_upload_event_log_location(self, bucket, key, descriptor): self._upload_event_log_bucket = bucket self._upload_event_log_key = key self._upload_event_descriptor_key = descriptor if key != self.log_key: copy_source = "%s/%s" % (self.bucket, self.log_key) log.info("%r: Copying power.log %r to %r:%r" % (self, copy_source, bucket, key)) aws.S3.copy_object(Bucket=bucket, Key=key, CopySource=copy_source) if descriptor != self.descriptor_key: copy_source = "%s/%s" % (self.bucket, self.descriptor_key) log.info("%r: Copying descriptor %r to %r:%r" % (self, copy_source, bucket, descriptor)) aws.S3.copy_object(Bucket=bucket, Key=descriptor, CopySource=copy_source) self._upload_event_location_populated = True
def execute_query(parameterized_query, run_local=False, priority=None): if run_local: # IMMEDIATE Will cause the query to get run synchronously log.info("run_local async refresh for: %s" % parameterized_query.cache_key) parameterized_query.schedule_refresh( priority=QueryRefreshPriority.IMMEDIATE) # _do_execute_query_work(parameterized_query) # Uncomment to cut-over to async redshift queries else: # This will queue the query for refresh as resources are available log.info("Scheduling refresh for: %s (priority=%s)" % ( parameterized_query.unload_key, priority, )) parameterized_query.schedule_refresh(priority=priority, )
def customer_subscription_created_handler(event, event_data, event_type, event_subtype): if event.customer and event.customer.subscriber: user = event.customer.subscriber log.info("Received premium purchased signal for user: %s" % user.username) log.info("Scheduling personalized stats for immediate cache warming.") context = PremiumUserCacheWarmingContext.from_user(user) fields = {"count": 1, "user": user.username} influx_metric( "premium_purchase_cache_warm_event", fields, ) warm_redshift_cache_for_user_context(context)
def get_personalized_queries_for_cache_warming(contexts, eligible_queries=None): queries = [] for query in get_redshift_catalogue().personalized_queries: is_eligible = eligible_queries is None or query.name in eligible_queries if query.cache_warming_enabled and is_eligible: for permutation in query.generate_personalized_parameter_permutation_bases( ): # Each permutation will still be missing a Region and account_lo value for ctx in contexts: for pegasus_account in ctx.pegasus_accounts: new_permutation = copy.copy(permutation) new_permutation["Region"] = pegasus_account.region.name new_permutation[ "account_lo"] = pegasus_account.account_lo if "deck_id" in query.get_available_non_filter_parameters( ): for deck, gts in ctx.decks.items(): if _permutation_matches_game_types( new_permutation, gts): new_permutation_for_deck = copy.copy( new_permutation) new_permutation_for_deck[ "deck_id"] = deck.id msg = "Warming: %s: %s" % ( str(new_permutation_for_deck), str(query.name)) log.info(msg) queries.append({ "query_name": query.name, "supplied_parameters": new_permutation_for_deck }) else: msg = "Warming: %s: %s" % (str(new_permutation), str(query.name)) log.info(msg) queries.append({ "query_name": query.name, "supplied_parameters": new_permutation }) return queries
def _fetch_query_results(parameterized_query, run_local=False): is_cache_hit = parameterized_query.result_available triggered_refresh = False if is_cache_hit: if parameterized_query.result_is_stale: triggered_refresh = True execute_query(parameterized_query, run_local) staleness = (datetime.utcnow() - parameterized_query.result_as_of).total_seconds() query_fetch_metric_fields = {"count": 1, "staleness": int(staleness)} query_fetch_metric_fields.update( parameterized_query.supplied_non_filters_dict) influx.influx_metric("redshift_response_payload_staleness", query_fetch_metric_fields, query_name=parameterized_query.query_name, **parameterized_query.supplied_filters_dict) response = JsonResponse(parameterized_query.response_payload, json_dumps_params=dict(separators=(",", ":"), )) else: execute_query(parameterized_query, run_local) result = {"msg": "Query is processing. Check back later."} response = JsonResponse(result, status=202) log.info("Query: %s Cache Hit: %s Is Stale: %s" % (parameterized_query.cache_key, is_cache_hit, triggered_refresh)) query_fetch_metric_fields = { "count": 1, } query_fetch_metric_fields.update( parameterized_query.supplied_non_filters_dict) influx.influx_metric("redshift_query_fetch", query_fetch_metric_fields, cache_hit=is_cache_hit, query_name=parameterized_query.query_name, triggered_refresh=triggered_refresh, **parameterized_query.supplied_filters_dict) return response
def post(self, request): # Prevent staff and current subscribers from deleting accounts if not self.can_delete(): messages.error(request, _("This account cannot be deleted.")) return redirect("account_delete") log.info("Deleting account: %s", request.user) # Record reason and message in influx influx_metric("hsreplaynet_account_delete", { "count": 1, "message": request.POST.get("message", "") }, reason=request.POST.get("reason", "")) # Sign out, then delete the account user = request.user logout(request) user.delete() return redirect(self.success_url)
def get(self, request, id): claim = get_uuid_object_or_404(AccountClaim, id=id) log.info("Claim %r: Token=%r, User=%r", claim, claim.token, claim.token.user) if claim.token.user: if claim.token.user.is_fake: count = GameReplay.objects.filter(user=claim.token.user).update(user=request.user) log.info("Updated %r replays. Deleting %r.", count, claim.token.user) # For now we just delete the fake user, because we are not using it. claim.token.user.delete() else: log.warning("%r is a real user. Deleting %r.", claim.token.user, claim) # Something's wrong. Get rid of the claim and reject the request. claim.delete() return HttpResponseForbidden("This token has already been claimed.") claim.token.user = request.user claim.token.save() # Replays are claimed in AuthToken post_save signal (games.models) claim.delete() msg = "You have claimed your account. Yay!" # XXX: using WARNING as a hack to ignore login/logout messages for now messages.add_message(request, messages.WARNING, msg) return redirect(settings.LOGIN_REDIRECT_URL)
def merge_users(base_user, user): """ Merge user into base_user """ def do_queryset(qs, **kwargs): if not kwargs: kwargs = {"user": base_user} ret = qs.update(**kwargs) log.info("Merging %r -> %r: %r", user, base_user, ret) return ret # Auth tokens do_queryset(user.auth_tokens) # Replays do_queryset(user.replays) # Comments do_queryset(user.comment_comments) do_queryset(user.comment_flags) # Pegasus Accounts do_queryset(user.pegasusaccount_set) # Emails do_queryset(user.emailaddress_set) # Social accounts do_queryset(user.socialaccount_set) # Webhooks do_queryset(user.webhooks) # Stripe customers # Always delete livemode customers Customer.objects.filter(livemode=False, subscriber=user).delete() customers = Customer.objects.filter(livemode=True) user_has_customer = customers.filter(subscriber=user).exists() base_user_has_customer = customers.filter(subscriber=base_user).exists() if user_has_customer and base_user_has_customer: cus1 = customers.get(subscriber=user) cus2 = customers.get(subscriber=base_user) log.warn("Found customers for both users: %r and %r. Merge manually!", cus1, cus2) elif user_has_customer: cus = customers.get(subscriber=user) log.info("Moving Customer %r from user %r to user %r", cus, user, base_user) cus.subscriber = base_user cus.metadata["django_account"] = str(base_user.pk) cus.save() elif base_user_has_customer: log.info("Found Stripe customer on %r, nothing to do.", base_user) else: log.info("No Stripe customers to merge.")
def execute_query(parameterized_query, run_local=False): if run_local: _do_execute_query_work(parameterized_query) # It's safe to launch multiple attempts to execute for the same query # Because the dogpile lock will only allow one to execute # But we can save resources by not even launching the attempt # If we see that the lock already exists if not _lock_exists(parameterized_query.cache_key): log.info( "No lock already exists for query. Will attempt to execute async.") if settings.ENV_AWS and settings.PROCESS_REDSHIFT_QUERIES_VIA_LAMBDA: # In PROD use Lambdas so the web-servers don't get overloaded LAMBDA.invoke( FunctionName="execute_redshift_query", InvocationType="Event", # Triggers asynchronous invocation Payload=_to_lambda_payload(parameterized_query), ) else: _do_execute_query_work(parameterized_query) else: msg = "An async attempt to run this query is in-flight. Will not launch another." log.info(msg)
def find_or_create_global_game(entity_tree, meta): ladder_season = meta.get("ladder_season") if not ladder_season: ladder_season = guess_ladder_season(meta["end_time"]) defaults = { "game_handle": meta.get("game_handle"), "server_address": meta.get("server_ip"), "server_port": meta.get("server_port"), "server_version": meta.get("server_version"), "game_type": meta.get("game_type", 0), "format": meta.get("format", 0), "build": meta["build"], "match_start": meta["start_time"], "match_end": meta["end_time"], "brawl_season": meta.get("brawl_season", 0), "ladder_season": ladder_season, "scenario_id": meta.get("scenario_id"), "num_entities": len(entity_tree.entities), "num_turns": entity_tree.tags.get(GameTag.TURN), } if eligible_for_unification(meta): # If the globalgame is eligible for unification, generate a digest # and get_or_create the object players = entity_tree.players lo1, lo2 = players[0].account_lo, players[1].account_lo digest = generate_globalgame_digest(meta, lo1, lo2) log.info("GlobalGame digest is %r" % (digest)) global_game, created = GlobalGame.objects.get_or_create(digest=digest, defaults=defaults) else: global_game = GlobalGame.objects.create(digest=None, **defaults) created = True log.debug("Prepared GlobalGame(id=%r), created=%r", global_game.id, created) return global_game, created
def merge_users(base_user, user): """ Merge user into base_user """ def do_queryset(qs, **kwargs): if not kwargs: kwargs = {"user": base_user} ret = qs.update(**kwargs) log.info("Merging %r -> %r: %r", user, base_user, ret) return ret # Auth tokens do_queryset(user.auth_tokens) # Replays do_queryset(user.replays) # Packs do_queryset(user.packs) # Blizzard Accounts do_queryset(user.blizzard_accounts) # Emails if base_user.emailaddress_set.filter(primary=True).count(): user.emailaddress_set.update(primary=False) do_queryset(user.emailaddress_set) # OAuth2 do_queryset(user.oauth2_application) do_queryset(user.oauth2_provider_accesstoken) do_queryset(user.oauth2_provider_refreshtoken) do_queryset(user.oauth2_provider_grant) # Social accounts do_queryset(user.socialaccount_set) # Webhooks do_queryset(user.webhook_endpoints) # Stripe customers # Always delete livemode customers Customer.objects.filter(livemode=False, subscriber=user).delete() customers = Customer.objects.filter(livemode=True) user_has_customer = customers.filter(subscriber=user).exists() base_user_has_customer = customers.filter(subscriber=base_user).exists() if user_has_customer and base_user_has_customer: cus1 = customers.get(subscriber=user) cus2 = customers.get(subscriber=base_user) log.warn("Found customers for both users: %r and %r. Merge manually!", cus1, cus2) elif user_has_customer: cus = customers.get(subscriber=user) log.info("Moving Customer %r from user %r to user %r", cus, user, base_user) cus.subscriber = base_user cus.metadata["django_account"] = str(base_user.pk) cus.save() elif base_user_has_customer: log.info("Found Stripe customer on %r, nothing to do.", base_user) else: log.info("No Stripe customers to merge.") # dj-paypal do_queryset(user.paypal_payers) do_queryset(user.preparedbillingagreement_set) do_queryset(user.billingagreement_set)
def do_queryset(qs, **kwargs): if not kwargs: kwargs = {"user": base_user} ret = qs.update(**kwargs) log.info("Merging %r -> %r: %r", user, base_user, ret) return ret
def _fetch_query_results(parameterized_query, run_local=False): cache_is_populated = parameterized_query.cache_is_populated is_cache_hit = parameterized_query.result_available triggered_refresh = False if is_cache_hit: if parameterized_query.result_is_stale: triggered_refresh = True attempt_request_triggered_query_execution(parameterized_query, run_local) staleness = (datetime.utcnow() - parameterized_query.result_as_of).total_seconds() query_fetch_metric_fields = {"count": 1, "staleness": int(staleness)} query_fetch_metric_fields.update( parameterized_query.supplied_non_filters_dict) influx.influx_metric("redshift_response_payload_staleness", query_fetch_metric_fields, query_name=parameterized_query.query_name, **parameterized_query.supplied_filters_dict) if settings.REDSHIFT_PRETTY_PRINT_QUERY_RESULTS: json_params = dict(indent=4) else: json_params = dict(separators=(",", ":"), ) response = JsonResponse(parameterized_query.response_payload, json_dumps_params=json_params) elif cache_is_populated and parameterized_query.is_global: # There is no content for this permutation of parameters # For deck related queries this most likely means that someone hand crafted the URL # Or if it's a card related query, then it's a corner case where there is no data response = HttpResponse(status=204) else: # The cache is not populated yet for this query. # Perhaps it's a new query or perhaps the cache was recently flushed. # So attempt to trigger populating it attempt_request_triggered_query_execution(parameterized_query, run_local) result = {"msg": "Query is processing. Check back later."} response = JsonResponse(result, status=202) log.info("Query: %s Cache Populated: %s Cache Hit: %s Is Stale: %s" % (cache_is_populated, parameterized_query.cache_key, is_cache_hit, triggered_refresh)) query_fetch_metric_fields = { "count": 1, } query_fetch_metric_fields.update( parameterized_query.supplied_non_filters_dict) influx.influx_metric("redshift_query_fetch", query_fetch_metric_fields, cache_populated=cache_is_populated, cache_hit=is_cache_hit, query_name=parameterized_query.query_name, triggered_refresh=triggered_refresh, **parameterized_query.supplied_filters_dict) return response
def snapshot(self, game_format=enums.FormatType.FT_STANDARD, num_clusters=20, merge_threshold=0.85, inherit_threshold=0.85, lookback=7, min_observations=100, min_pilots=10, experimental_threshold=.01, dry_run=False): from hsreplaynet.analytics.processing import get_cluster_set_data data = get_cluster_set_data(game_format=game_format, lookback=lookback, min_observations=min_observations, min_pilots=min_pilots) log.info("\nClustering Raw Data Volume:") total_data_points = 0 total_observations = 0 experimental_thresholds = {} for player_class_name, data_points in data.items(): data_points_for_class = len(data_points) observations_for_class = sum(d["observations"] for d in data_points) total_observations += observations_for_class threshold_for_class = int(observations_for_class * experimental_threshold) experimental_thresholds[player_class_name] = threshold_for_class total_data_points += data_points_for_class log.info( "\t%s: %i Data Points,\t%i Observations,\tExperimental Threshold: %i" % (player_class_name, data_points_for_class, observations_for_class, threshold_for_class)) log.info("\tTotal Data Points: %i, Observations: %i\n" % (total_data_points, total_observations)) inheritance_missed = [] with transaction.atomic(): cs_snapshot = create_cluster_set(data, factory=ClusterSetSnapshot, num_clusters=num_clusters, merge_similarity=merge_threshold, consolidate=False, create_experimental_cluster=False) previous_snapshot = ClusterSetSnapshot.objects.filter( live_in_production=True).first() cs_snapshot.consolidate_clusters(merge_threshold) ClusterSetSnapshot.objects.update(latest=False) cs_snapshot.game_format = game_format cs_snapshot.latest = True cs_snapshot.save() uninherited_id_set = cs_snapshot.inherit_from_previous( previous_snapshot, merge_threshold=inherit_threshold) for uninherited_id in uninherited_id_set: inheritance_missed.append(str(uninherited_id)) cs_snapshot.create_experimental_clusters( experimental_cluster_thresholds=experimental_thresholds) if not dry_run: for class_cluster in cs_snapshot.class_clusters: class_cluster.cluster_set = cs_snapshot # Don't use pcp_adjustments after consolidation is complete # In order to leave signatures for tooltips unaffected. class_cluster.update_cluster_signatures( use_pcp_adjustment=False) class_cluster.save() for cluster in class_cluster.clusters: cluster.class_cluster = class_cluster cluster.save() if inheritance_missed: log.warn("Inheritance missed on archetypes {}".format( ", ".join(inheritance_missed))) return cs_snapshot