def refresh_user_cache(): logger.info(f"Refreshing user cache") redis = RedisClient.get() for user in list(User.objects.all().exclude(profile__isnull=True)): bundle = get_user_bundle(user) redis.set(f"users/{user.username}", json.dumps(bundle)) RedisClient.get().set(f"users_updated", timezone.now().timestamp())
def get_workflow_usage_timeseries(owner: str, name: str, branch: str, invalidate: bool = False) -> dict: redis = RedisClient.get() cached = redis.get(f"workflow_timeseries/{owner}/{name}/{branch}") if cached is None or invalidate: series = dict() tasks = Task.objects.filter(workflow__repo__owner=owner, workflow__repo__name=name, workflow__repo__branch=branch) # return early if no tasks if len(tasks) == 0: return series # count tasks per workflow for task in tasks: timestamp = datetime.combine(task.created.date(), datetime.min.time()).isoformat() if timestamp not in series: series[timestamp] = 0 series[timestamp] = series[timestamp] + 1 redis.set(f"workflow_timeseries/{owner}/{name}/{branch}", json.dumps(series)) else: series = json.loads(cached) return series
def healthcheck(request, name): try: agent = Agent.objects.get(name=name) # if the requesting user doesn't own the agent and isn't on its # list of authorized users, they're not authorized to access it if not agent.public and agent.user != request.user and request.user.username not in [ u.username for u in agent.users_authorized.all() ]: return HttpResponseNotFound() except: return HttpResponseNotFound() healthy, output = is_healthy(agent) check = { 'timestamp': timezone.now().isoformat(), 'healthy': healthy, 'output': output } # persist health status to DB plantit.healthchecks.is_healthy = healthy agent.save() # update cache redis = RedisClient.get() length = redis.llen(f"healthchecks/{agent.name}") checks_saved = int(settings.AGENTS_HEALTHCHECKS_SAVED) if length > checks_saved: redis.rpop(f"healthchecks/{agent.name}") redis.lpush(f"healthchecks/{agent.name}", json.dumps(check)) return JsonResponse(check)
def get_user_bundle(user: User): profile = Profile.objects.get(user=user) if not has_github_info(profile): return { 'username': user.username, 'first_name': user.first_name, 'last_name': user.last_name, } else: redis = RedisClient.get() cached = redis.get(f"users/{user.username}") if cached is not None: return json.loads(cached) github_profile = async_to_sync(get_user_github_profile)(user) github_organizations = async_to_sync(get_user_github_organizations)( user) bundle = { 'username': user.username, 'first_name': user.first_name, 'last_name': user.last_name, 'github_username': profile.github_username, 'github_profile': github_profile, 'github_organizations': github_organizations, } if 'login' in github_profile else { 'username': user.username, 'first_name': user.first_name, 'last_name': user.last_name, } redis.set(f"users/{user.username}", json.dumps(bundle)) return bundle
def agents_healthchecks(): task_name = agents_healthchecks.name if not __acquire_lock(task_name): logger.warning( f"Task '{task_name}' is already running, aborting (maybe consider a longer scheduling interval?)" ) return try: for agent in Agent.objects.all(): healthy, output = is_healthy(agent) plantit.healthchecks.is_healthy = healthy agent.save() redis = RedisClient.get() length = redis.llen(f"healthchecks/{agent.name}") checks_saved = int(settings.AGENTS_HEALTHCHECKS_SAVED) if length > checks_saved: redis.rpop(f"healthchecks/{agent.name}") check = { 'timestamp': timezone.now().isoformat(), 'healthy': healthy, 'output': output } redis.lpush(f"healthchecks/{agent.name}", json.dumps(check)) finally: __release_lock(task_name)
async def refresh_user_workflow_cache(github_username: str): if github_username is None or github_username == '': raise ValueError(f"No GitHub username provided") try: profile = await sync_to_async(Profile.objects.get )(github_username=github_username) user = await get_profile_user(profile) except MultipleObjectsReturned: logger.warning( f"Multiple users bound to Github user {github_username}!") return except: logger.warning(f"Github user {github_username} does not exist") return # scrape GitHub to synchronize repos and workflow config profile = await sync_to_async(Profile.objects.get)(user=user) workflows = await github.list_connectable_repos_by_owner( github_username, profile.github_token) # update the cache, first removing workflows that no longer exist redis = RedisClient.get() removed = 0 updated = 0 added = 0 old_keys = [ key.decode('utf-8') for key in redis.scan_iter(match=f"workflows/{github_username}/*") ] new_keys = [ f"workflows/{github_username}/{wf['repo']['name']}/{wf['branch']['name']}" for wf in workflows ] for old_key in old_keys: if old_key not in new_keys: logger.debug(f"Removing user workflow {old_key}") removed += 1 redis.delete(old_key) else: logger.debug(f"Updating user workflow {old_key}") updated += 1 # ...then adding/updating the workflows we just scraped for wf in workflows: # set flag if this is a featured workflow wf['featured'] = await is_featured(github_username, wf['repo']['name'], wf['branch']['name']) key = f"workflows/{github_username}/{wf['repo']['name']}/{wf['branch']['name']}" if key not in old_keys: logger.debug(f"Adding user workflow {key}") added += 1 redis.set(key, json.dumps(del_none(wf))) redis.set(f"workflows_updated/{github_username}", timezone.now().timestamp()) logger.info( f"{len(workflows)} workflow(s) now in GitHub user's {github_username}'s workflow cache (added {added}, updated {updated}, removed {removed})" )
def get_institutions(invalidate: bool = False) -> dict: redis = RedisClient.get() cached = list(redis.scan_iter(match=f"institutions/*")) institutions = dict() if invalidate: # count members per institution counts = { i['institution'].lower(): i['institution__count'] for i in count_institutions() } for k in counts.keys(): # get institution information (TODO: can we send all the requests concurrently?) # TODO: need to make sure this doesn't exceed the free plan rate limit result = async_to_sync(mapbox.get_institution)( k, settings.MAPBOX_TOKEN) # reconstruct institution name with proper capitalization from Mapbox result # TODO: are there any edge cases this might fail for? name = ' '.join(result['query']) # if we can't match the institution name, skip it if name not in counts: logger.warning(f"Failed to match {name} to any institution") continue # number of members in this institution count = counts[name] # if Mapbox returned no results, we can't return geocode information if len(result['features']) == 0: logger.warning( f"No results from Mapbox for institution: {name}") institutions[name] = { 'institution': name, 'count': count, 'geocode': None } # if we got results, pick the top one else: feature = result['features'][0] feature['id'] = name feature['properties'] = {'name': name, 'count': count} institutions[name] = { 'institution': name, 'count': count, 'geocode': feature } for name, institution in institutions.items(): redis.set(f"institutions/{name}", json.dumps(institution)) else: for institution in cached: if institution is not None: institutions[institution.decode('utf-8')] = json.loads( redis.get(institution)) return institutions
async def get_workflow(owner: str, name: str, branch: str, github_token: str, cyverse_token: str, invalidate: bool = False) -> dict: redis = RedisClient.get() updated = redis.get(f"workflows_updated/{owner}") workflow = redis.get(f"workflows/{owner}/{name}/{branch}") if updated is None or workflow is None or invalidate: bundle = await github.get_repo_bundle(owner, name, branch, github_token, cyverse_token) workflow = { 'config': bundle['config'], 'repo': bundle['repo'], 'validation': bundle['validation'], 'branch': branch, 'featured': FeaturedWorkflow.objects.filter(owner=owner, name=name, branch=branch).exists() } redis.set(f"workflows/{owner}/{name}/{branch}", json.dumps(del_none(workflow))) return workflow else: return json.loads(workflow)
def refresh_all_users_stats(): task_name = refresh_all_users_stats.name if not __acquire_lock(task_name): logger.warning( f"Task '{task_name}' is already running, aborting (maybe consider a longer scheduling interval?)" ) return try: # TODO: move caching to query layer redis = RedisClient.get() for user in User.objects.all(): logger.info(f"Computing statistics for {user.username}") # overall statistics (no need to save result, just trigger reevaluation) async_to_sync(q.get_user_statistics)(user, True) # timeseries (no need to save result, just trigger reevaluation) q.get_user_timeseries(user, invalidate=True) logger.info(f"Computing aggregate statistics") redis.set("stats_counts", json.dumps(q.get_total_counts(True))) redis.set("total_timeseries", json.dumps(q.get_aggregate_timeseries(True))) finally: __release_lock(task_name)
def list_project_workflows(project: Investigation) -> List[dict]: redis = RedisClient.get() proj_dict = project_to_dict(project) return [ json.loads(wf) for wf in [ redis.get(key) for key in [f"workflows/{name}" for name in proj_dict['workflows']] ] if wf is not None ]
def list_public_workflows() -> List[dict]: redis = RedisClient.get() workflows = [ wf for wf in [ json.loads(redis.get(key)) for key in redis.scan_iter(match='workflows/*') ] if 'public' in wf['config'] and wf['config']['public'] ] return workflows
def get_project_workflows(project: Investigation): redis = RedisClient.get() workflows = [ wf for wf in [ json.loads(redis.get(key)) for key in redis.scan_iter(match='workflows/*') ] if 'projects' in wf['config'] and project.guid in wf['config']['projects'] ] return workflows
def refresh_user_institutions(): task_name = refresh_user_institutions.name if not __acquire_lock(task_name): logger.warning( f"Task '{task_name}' is already running, aborting (maybe consider a longer scheduling interval?)" ) return try: # TODO: move caching to query layer redis = RedisClient.get() institutions = q.get_institutions(True) for name, institution in institutions.items(): redis.set(f"institutions/{name}", json.dumps(institution)) finally: __release_lock(task_name)
def get_user_timeseries(user: User, invalidate: bool = False) -> dict: redis = RedisClient.get() cached = redis.get(f"user_timeseries/{user.username}") if cached is None or invalidate: tasks_usage = get_tasks_usage_timeseries(user=user) workflows_usage = get_workflows_usage_timeseries(user) agents_usage = get_agents_usage_timeseries(user) series = { 'tasks_usage': tasks_usage, 'agents_usage': agents_usage, 'workflows_usage': workflows_usage } redis.set(f"user_timeseries/{user.username}", json.dumps(series)) else: series = json.loads(cached) return series
async def refresh_org_workflow_cache(org_name: str, github_token: str): # scrape GitHub to synchronize repos and workflow config workflows = await github.list_connectable_repos_by_org( org_name, github_token) # update the cache, first removing workflows that no longer exist redis = RedisClient.get() removed = 0 updated = 0 added = 0 old_keys = [ key.decode('utf-8') for key in redis.scan_iter(match=f"workflows/{org_name}/*") ] new_keys = [ f"workflows/{org_name}/{wf['repo']['name']}/{wf['branch']['name']}" for wf in workflows ] for old_key in old_keys: if old_key not in new_keys: logger.debug(f"Removing org workflow {old_key}") removed += 1 redis.delete(old_key) else: logger.debug(f"Updating org workflow {old_key}") updated += 1 # ...then adding/updating the workflows we just scraped for wf in workflows: # set flag if this is a featured workflow wf['featured'] = await is_featured(org_name, wf['repo']['name'], wf['branch']['name']) key = f"workflows/{org_name}/{wf['repo']['name']}/{wf['branch']['name']}" if key not in old_keys: logger.debug(f"Adding org workflow {key}") added += 1 redis.set(key, json.dumps(del_none(wf))) redis.set(f"workflows_updated/{org_name}", timezone.now().timestamp()) logger.info( f"{len(workflows)} workflow(s) now in GitHub organization {org_name}'s workflow cache (added {added}, updated {updated}, removed {removed})" )
def healthchecks(request, name): try: agent = Agent.objects.get(name=name) # if the requesting user doesn't own the agent and isn't on its # list of authorized users, they're not authorized to access it if not agent.public and agent.user != request.user and request.user.username not in [ u.username for u in agent.users_authorized.all() ]: return HttpResponseNotFound() except: return HttpResponseNotFound() redis = RedisClient.get() checks = [ json.loads(check) for check in redis.lrange(f"healthchecks/{agent.name}", 0, -1) ] return JsonResponse({'healthchecks': checks})
def get_total_counts(invalidate: bool = False) -> dict: redis = RedisClient.get() cached = redis.get("stats_counts") if cached is None or invalidate: users = User.objects.count() online = len(filter_online(User.objects.all()) ) # TODO store this in the DB each time the user logs in wfs = [ json.loads(redis.get(key)) for key in redis.scan_iter('workflows/*') ] devs = list(set([wf['repo']['owner']['login'] for wf in wfs])) workflows = len(wfs) developers = len(devs) agents = Agent.objects.count() tasks = TaskCounter.load().count running = len( list( Task.objects.exclude(status__in=[ TaskStatus.COMPLETED, TaskStatus.FAILURE, TaskStatus.TIMEOUT, TaskStatus.CANCELED ]))) counts = { 'users': users, 'online': online, 'workflows': workflows, 'developers': developers, 'agents': agents, 'tasks': tasks, 'running': running } redis.set("stats_counts", json.dumps(counts)) else: counts = json.loads(cached) return counts
def get_aggregate_timeseries(invalidate: bool = False) -> dict: redis = RedisClient.get() cached = redis.get("total_timeseries") if cached is None or invalidate: users_total = get_users_total_timeseries() tasks_total = get_tasks_total_timeseries() tasks_usage = get_tasks_usage_timeseries() workflows_usage = get_workflows_usage_timeseries() agents_usage = get_agents_usage_timeseries() series = { 'users_total': users_total, 'tasks_total': tasks_total, 'tasks_usage': tasks_usage, 'agents_usage': agents_usage, 'workflows_usage': workflows_usage, } redis.set("total_timeseries", json.dumps(series)) else: series = json.loads(cached) return series
def list_users(invalidate: bool = False) -> List[dict]: redis = RedisClient.get() updated = redis.get(f"users_updated") # repopulate if empty or invalidation requested if updated is None or len(list( redis.scan_iter(match=f"users/*"))) == 0 or invalidate: refresh_user_cache() else: age = (datetime.now() - datetime.fromtimestamp(float(updated))) age_secs = age.total_seconds() max_secs = (int(settings.USERS_REFRESH_MINUTES) * 60) # otherwise only if stale if age_secs > max_secs: logger.info( f"User cache is stale ({age_secs}s old, {age_secs - max_secs}s past limit), repopulating" ) refresh_user_cache() return [ json.loads(redis.get(key)) for key in redis.scan_iter(match=f"users/*") ]
def task_to_dict(task: Task) -> dict: orchestrator_log_file_path = get_task_orchestrator_log_file_path(task) if Path(orchestrator_log_file_path).is_file(): with open(orchestrator_log_file_path, 'r') as log: orchestrator_logs = [ line.strip() for line in log.readlines()[-int(1000000):] ] else: orchestrator_logs = [] # try: # AgentAccessPolicy.objects.get(user=task.user, agent=task.agent, role__in=[AgentRole.admin, AgentRole.guest]) # can_restart = True # except: # can_restart = False results = RedisClient.get().get(f"results/{task.guid}") return { # 'can_restart': can_restart, 'guid': task.guid, 'status': task.status, 'owner': task.user.username, 'name': task.name, 'project': { 'title': task.project.title, 'owner': task.project.owner.username, 'description': task.project.description } if task.project is not None else None, 'study': { 'title': task.study.title, 'description': task.study.description } if task.study is not None else None, 'work_dir': task.workdir, 'orchestrator_logs': orchestrator_logs, 'inputs_detected': task.inputs_detected, 'inputs_downloaded': task.inputs_downloaded, 'inputs_submitted': task.inputs_submitted, 'inputs_completed': task.inputs_completed, 'agent': agent_to_dict(task.agent) if task.agent is not None else None, 'created': task.created.isoformat(), 'updated': task.updated.isoformat(), 'completed': task.completed.isoformat() if task.completed is not None else None, 'due_time': None if task.due_time is None else task.due_time.isoformat(), 'cleanup_time': None if task.cleanup_time is None else task.cleanup_time.isoformat(), 'workflow_owner': task.workflow_owner, 'workflow_name': task.workflow_name, 'workflow_branch': task.workflow_branch, 'workflow_image_url': task.workflow_image_url, 'input_path': task.workflow['input']['path'] if 'input' in task.workflow else None, 'output_path': task.workflow['output']['to'] if ('output' in task.workflow and 'to' in task.workflow['output']) else None, 'tags': [str(tag) for tag in task.tags.all()], 'is_complete': task.is_complete, 'is_success': task.is_success, 'is_failure': task.is_failure, 'is_cancelled': task.is_cancelled, 'is_timeout': task.is_timeout, 'result_previews_loaded': task.previews_loaded, 'result_transfer': has_output_target(task), 'results_retrieved': task.results_retrieved, 'results_transferred': task.results_transferred, 'cleaned_up': task.cleaned_up, 'transferred': task.transferred, 'transfer_path': task.transfer_path, 'output_files': json.loads(results) if results is not None else [], 'job_id': task.job_id, 'job_status': task.job_status, 'job_walltime': task.job_consumed_walltime, 'delayed_id': task.delayed_id, 'repeating_id': task.repeating_id }
def test_push(self, guid: str): if guid is None: logger.warning(f"Aborting") self.request.callbacks = None return try: task = Task.objects.get(guid=guid) except: logger.warning( f"Could not find task with GUID {guid} (might have been deleted?)") return try: # check the expected filenames against the contents of the CyVerse collection path = task.workflow['output']['to'] actual = [ file.rpartition('/')[2] for file in terrain.list_dir( path, task.user.profile.cyverse_access_token) ] expected = [ file['name'] for file in json.loads(RedisClient.get().get( f"results/{task.guid}")) if file['exists'] ] from pprint import pprint pprint(actual) pprint(expected) if not set(expected).issubset(set(actual)): message = f"Transfer to CyVerse directory {path} incomplete: expected {len(expected)} files but found {len(actual)}" logger.warning(message) # mark the task failed now = timezone.now() task.updated = now task.completed = now task.status = TaskStatus.FAILURE task.transferred = True task.results_transferred = len(expected) task.transfer_path = path task.save() else: message = f"Transfer to CyVerse directory {path} completed" logger.info(message) # mark the task succeeded now = timezone.now() task.updated = now task.completed = now task.status = TaskStatus.COMPLETED if task.status != TaskStatus.FAILURE else task.status task.transferred = True task.results_transferred = len(expected) task.transfer_path = path task.save() # log status update and push it to clients log_task_orchestrator_status(task, [message]) async_to_sync(push_task_channel_event)(task) return guid except Exception: self.request.callbacks = None message = f"Failed to test CyVerse transfer: {traceback.format_exc()}" # mark the task failed and persist it task.status = TaskStatus.FAILURE now = timezone.now() task.updated = now task.completed = now task.save() # log status update and push it to client log_task_orchestrator_status(task, [message]) async_to_sync(push_task_channel_event)(task) # push AWS SNS notification if task.user.profile.push_notification_status == 'enabled': SnsClient.get().publish_message( task.user.profile.push_notification_topic_arn, f"PlantIT task {task.guid}", message, {}) # revoke access to the user's datasets then clean up the task unshare_data.s(task.guid).apply_async() tidy_up.s(task.guid).apply_async( countdown=int(environ.get('TASKS_CLEANUP_MINUTES')) * 60)
def test_results(self, guid: str): if guid is None: logger.warning(f"Aborting") self.request.callbacks = None return try: task = Task.objects.get(guid=guid) except: logger.warning( f"Could not find task with GUID {guid} (might have been deleted?)") self.request.callbacks = None return try: # get logs from agent filesystem ssh = get_task_ssh_client(task) get_task_remote_logs(task, ssh) # get results from agent filesystem, then save them to cache and update the task results = list_result_files(task) found = [r for r in results if r['exists']] redis = RedisClient.get() redis.set(f"results/{task.guid}", json.dumps(found)) task.results_retrieved = True task.save() # make sure we got the results we expected missing = [r for r in results if not r['exists']] if len(missing) > 0: message = f"Found {len(found)} results, missing {len(missing)}: {', '.join([m['name'] for m in missing])}" else: message = f"Found {len(found)} results" # log status update and push it to client(s) log_task_orchestrator_status(task, [message]) async_to_sync(push_task_channel_event)(task) return guid except Exception: self.request.callbacks = None message = f"Failed to check results: {traceback.format_exc()}" # mark the task failed and persist it task.status = TaskStatus.FAILURE now = timezone.now() task.updated = now task.completed = now task.save() # log status to file log_task_orchestrator_status(task, [message]) # push status to client(s) async_to_sync(push_task_channel_event)(task) # push AWS SNS notification if task.user.profile.push_notification_status == 'enabled': SnsClient.get().publish_message( task.user.profile.push_notification_topic_arn, f"PlantIT task {task.guid}", message, {}) # revoke access to the user's datasets then clean up the task unshare_data.s(task.guid).apply_async() tidy_up.s(task.guid).apply_async( countdown=int(environ.get('TASKS_CLEANUP_MINUTES')) * 60)
def list_user_workflows(owner: str) -> List[dict]: redis = RedisClient.get() return [ json.loads(redis.get(key)) for key in redis.scan_iter(match=f"workflows/{owner}/*") ]
def get_last_task_config(username, owner, name, branch): redis = RedisClient.get() last_config = redis.get( f"workflow_configs/{username}/{owner}/{name}/{branch}") return None if last_config is None else json.loads(last_config)
def list_org_workflows(organization: str) -> List[dict]: redis = RedisClient.get() return [ json.loads(redis.get(key)) for key in redis.scan_iter(match=f"workflows/{organization}/*") ]
def create_immediate_task(user: User, config: dict): # set submission time so we can persist configuration # and show recent submissions to the user in the UI config['timestamp'] = timezone.now().isoformat() # parse GitHub repo info repo_owner = config['repo']['owner'] repo_name = config['repo']['name'] repo_branch = config['repo']['branch'] # persist task configuration redis = RedisClient.get() redis.set( f"workflow_configs/{user.username}/{repo_owner}/{repo_name}/{repo_branch}", json.dumps(config)) # get the task GUID and name guid = config.get('guid', None) if config['type'] == 'Now' else str( uuid.uuid4()) name = config.get('name', None) # if the browser client hasn't set a GUID, create one if guid is None: guid = str(uuid.uuid4()) # get the agent this task should be submitted on agent = Agent.objects.get(name=config['agent']) # if we have a time limit, calculate due time time = config.get('time', None) if time is not None: time_limit = parse_task_time_limit(time) logger.info(f"Using task time limit {time_limit}s") due_time = timezone.now() + timedelta(seconds=time_limit) else: due_time = None # create the task right meow now = timezone.now() task = Task.objects.create( guid=guid, name=guid if name is None or name == '' else name, user=user, workflow=config['workflow'], workflow_owner=repo_owner, workflow_name=repo_name, workflow_branch=repo_branch, agent=agent, status=TaskStatus.CREATED, created=now, updated=now, due_time=due_time, token=binascii.hexlify(os.urandom(20)).decode()) # add MIAPPE info, if we have any project, study = parse_task_miappe_info(config['miappe']) if project is not None: task.project = Investigation.objects.get(owner=user, title=project) if study is not None: task.study = Study.objects.get(investigation=task.project, title=study) # add repo logo if 'logo' in config['workflow']: logo_path = config['workflow']['logo'] task.workflow_image_url = f"https://raw.githubusercontent.com/{repo_owner}/{repo_name}/master/{logo_path}" for tag in config['tags']: task.tags.add(tag) # add task tags task.workdir = f"{task.guid}/" # use GUID for working directory name task.save() # increment task count for aggregate statistics counter = TaskCounter.load() counter.count = counter.count + 1 counter.save() return task
def cyverse_handle_temporary_code(self, request): session_state = request.GET.get('session_state', None) code = request.GET.get('code', None) # missing session state string or code indicates a mis-configured redirect from the KeyCloak client? if session_state is None: return HttpResponseBadRequest("Missing param: 'session_state'") if code is None: return HttpResponseBadRequest("Missing param: 'code'") # send the authorization request response = requests.post( "https://kc.cyverse.org/auth/realms/CyVerse/protocol/openid-connect/token", data={ 'grant_type': 'authorization_code', 'client_id': os.environ.get('CYVERSE_CLIENT_ID'), 'code': code, 'redirect_uri': os.environ.get('CYVERSE_REDIRECT_URL') }, auth=HTTPBasicAuth(request.user.username, os.environ.get('CYVERSE_CLIENT_SECRET'))) # if we have anything other than a 200 the auth request did not succeed if response.status_code == 400: return HttpResponse('Unauthorized for KeyCloak token endpoint', status=401) elif response.status_code != 200: return HttpResponse('Bad response from KeyCloak token endpoint', status=500) # get the response body content = response.json() # make sure we have CyVerse access & refresh tokens if 'access_token' not in content: return HttpResponseBadRequest( "Missing param on token response: 'access_token'") if 'refresh_token' not in content: return HttpResponseBadRequest( "Missing param on token response: 'refresh_token'") # decode them access_token = content['access_token'] refresh_token = content['refresh_token'] decoded_access_token = jwt.decode(access_token, options={ 'verify_signature': False, 'verify_aud': False, 'verify_iat': False, 'verify_exp': False, 'verify_iss': False }) decoded_refresh_token = jwt.decode(refresh_token, options={ 'verify_signature': False, 'verify_aud': False, 'verify_iat': False, 'verify_exp': False, 'verify_iss': False }) # retrieve the user entry (or create if it's their first time logging in) user, _ = User.objects.get_or_create( username=decoded_access_token['preferred_username']) # update the user's personal info user.first_name = decoded_access_token['given_name'] user.last_name = decoded_access_token['family_name'] user.email = decoded_access_token['email'] user.save() # update the user's profile (CyVerse tokens, etc) profile, created = Profile.objects.get_or_create(user=user) if created: profile.created = timezone.now() profile.cyverse_access_token = access_token profile.cyverse_refresh_token = refresh_token user.profile = profile profile.save() user.save() # if user's stats haven't been calculated yet, schedule it redis = RedisClient.get() cached_stats = redis.get(f"stats/{user.username}") if cached_stats is None: self.logger.info( f"No usage statistics for {user.username}. Scheduling refresh..." ) refresh_user_stats.s(user.username).apply_async() # log the user into the builtin django backend login(request, user, backend='django.contrib.auth.backends.ModelBackend') # open the dashboard return redirect(f"/home/")
async def get_user_statistics(user: User, invalidate: bool = False) -> dict: redis = RedisClient.get() cached = redis.get(f"stats/{user.username}") if cached is None or invalidate: profile = await sync_to_async(Profile.objects.get)(user=user) all_tasks = await filter_tasks(user=user) completed_tasks = await filter_tasks(user=user, completed=True) total_tasks = len(all_tasks) total_time = sum([(task.completed - task.created).total_seconds() for task in completed_tasks]) total_results = sum([ len(task.results if task.results is not None else []) for task in completed_tasks ]) owned_workflows = [ f"{workflow['repo']['owner']['login']}/{workflow['name'] if 'name' in workflow else '[unnamed]'}" for workflow in list_user_workflows(owner=profile.github_username) ] if profile.github_username != '' else [] used_workflows = [ f"{task.workflow_owner}/{task.workflow_name}" for task in all_tasks ] used_workflows_counter = Counter(used_workflows) unique_used_workflows = list(np.unique(used_workflows)) owned_agents = [(await sync_to_async(agent_to_dict) (agent, user.username))['name'] for agent in [ agent for agent in await filter_agents(user=user) if agent is not None ]] guest_agents = [(await sync_to_async(agent_to_dict) (agent, user.username))['name'] for agent in [ agent for agent in await filter_agents(user=user) if agent is not None ]] used_agents = [ (await sync_to_async(agent_to_dict)(agent, user.username))['name'] for agent in [ a for a in [await get_task_agent(task) for task in all_tasks] if a is not None ] ] used_projects = [ (await sync_to_async(project_to_dict)(project)) for project in [ p for p in [await get_task_project(task) for task in all_tasks] if p is not None ] ] used_agents_counter = Counter(used_agents) used_projects_counter = Counter([ f"{project['guid']} ({project['title']})" for project in used_projects ]) unique_used_agents = list(np.unique(used_agents)) # owned_datasets = terrain.list_dir(f"/iplant/home/{user.username}", profile.cyverse_access_token) # guest_datasets = terrain.list_dir(f"/iplant/home/", profile.cyverse_access_token) tasks_running = await sync_to_async(get_tasks_usage_timeseries)(600, user) stats = { 'total_tasks': total_tasks, 'total_task_seconds': total_time, 'total_task_results': total_results, 'owned_workflows': owned_workflows, 'workflow_usage': { 'values': [ used_workflows_counter[workflow] for workflow in unique_used_workflows ], 'labels': unique_used_workflows, }, 'agent_usage': { 'values': [used_agents_counter[agent] for agent in unique_used_agents], 'labels': unique_used_agents, }, 'project_usage': { 'values': list(dict(used_projects_counter).values()), 'labels': list(dict(used_projects_counter).keys()), }, 'task_status': { 'values': [1 if task.status == 'success' else 0 for task in all_tasks], 'labels': [ 'SUCCESS' if task.status == 'success' else 'FAILURE' for task in all_tasks ], }, 'owned_agents': owned_agents, 'guest_agents': guest_agents, 'institution': profile.institution, 'tasks_running': tasks_running } redis.set(f"stats/{user.username}", json.dumps(stats)) else: stats = json.loads(cached) return stats