def get_level_info_data(): session = create_session() workspaces = session.query(Workspace) price_settings = get_price_settings() interactive_dbu_price = price_settings['interactive'] job_dbu_price = price_settings['job'] workspace_count = workspaces.count() cluster_count = sum( [len(workspace.active_clusters()) for workspace in workspaces]) user_count = sum([len(workspace.users()) for workspace in workspaces]) actives = concat_dfs( workspace.state_df(active_only=True) for workspace in workspaces) dbu_counts = get_cluster_dbus(actives) dbu_count_dict = dbu_counts.to_dict() dbu_cost = ( dbu_count_dict.get('interactive', 0.0) * interactive_dbu_price + dbu_count_dict.get('job', 0.0) * job_dbu_price) return { "clusters": cluster_count, "workspaces": workspace_count, "user_count": user_count, "daily_dbu": dbu_counts.sum(), "daily_dbu_cost": dbu_cost, "interactive_dbu_price": interactive_dbu_price, "job_dbu_price": job_dbu_price }
def view_clusters(): session = create_session() clusters = session.query(Cluster).all() states = concat_dfs(cluster.state_df() for cluster in clusters) level_info_data = get_level_info_data() price_settings = get_price_settings() time_stats_dict = { 'interactive': empty_timeseries(), 'job': empty_timeseries() } if not states.empty: results = aggregate_by_types(states, aggregate_for_entity) for key, (_, time_stats) in results.items(): time_stats_dict[key] = time_stats.to_dict("records") cluster_dbus = (aggregate(df=states, col="interval_dbu", by="cluster_id", since_days=7).rename(columns={ 'interval_dbu': 'dbu' }).dbu.to_dict()) else: cluster_dbus = {cluster.cluster_id: 0.0 for cluster in clusters} clusters_by_type = {} for cluster in clusters: clusters_by_type.setdefault(cluster.cluster_type(), []).append(cluster) return render_template('clusters.html', clusters_by_type=clusters_by_type, price_settings=price_settings, data=level_info_data, cluster_dbus=cluster_dbus, time_stats=time_stats_dict)
def view_dashboard(): session = create_session() clusters = session.query(Cluster).all() jobs = session.query(JobRun).all() states = concat_dfs(cluster.state_df() for cluster in clusters) level_info_data = get_level_info_data() numbjobs_dict = get_running_jobs(jobs) last7dbu_dict = aggregate_by_types(states, get_last_7_days_dbu) time_stats_dict = { 'interactive': empty_timeseries(), 'job': empty_timeseries() } if not states.empty: results = aggregate_by_types(states, aggregate_for_entity) # cost_summary_dict = {} for key, (cost_summary, time_stats) in results.items(): time_stats['dbu_cumsum'] = time_stats['interval_dbu_sum'].cumsum() time_stats_dict[key] = time_stats.to_dict("records") # cost_summary_dict[key] = cost_summary.to_dict() return render_template('dashboard.html', time_stats=time_stats_dict, last7dbu=last7dbu_dict, numjobs=numbjobs_dict, data=level_info_data)
def view_jobs(): session = create_session() jobs = session.query(Job).all() level_info_data = get_level_info_data() price_settings = get_price_settings() aggregations = { 'cost': ['median'], 'dbu': ['median'], 'duration': ['median'] } empty_agg = pd.DataFrame({ column: {agg: 0 for agg in aggregation} for column, aggregation in aggregations.items() }) extra_stats = {} for job in jobs: last7 = job.runs(as_df=True, price_config=price_settings, last=7) aggregated = last7.agg(aggregations) if not last7.empty else empty_agg extra_stats[job.job_id] = aggregated since30 = concat_dfs( job.runs(as_df=True, price_config=price_settings, since_days=30) for job in jobs) if not since30.empty: time_stats = (since30.groupby(get_time_grouper('start_time')).agg({ 'run_id': 'count', 'dbu': 'sum', 'duration': 'sum' }).fillna(0.).reindex(get_time_index(30), fill_value=0)) else: time_stats = empty_timeseries(as_df=True) time_stats['ts'] = time_stats.index.format() time_stats_dict = time_stats.to_dict("records") return render_template('jobs.html', jobs=jobs, price_settings=price_settings, data=level_info_data, time_stats=time_stats_dict, extra_stats=extra_stats)
def view_users(): session = create_session() users = session.query(User).all() level_info_data = get_level_info_data() for user in users: user.dbu = aggregate(df=user.state_df(), col='interval_dbu', since_days=7) users = sorted(users, key=lambda user: user.dbu, reverse=True) states = concat_dfs(user.state_df() for user in users) # Average active users active_users = (aggregate(df=states, col='user_id', by=get_time_grouper('timestamp'), aggfunc='nunique', since_days=7).reindex(get_time_index(7), fill_value=0)) active_users['ts'] = active_users.index.format() # Average used DBU dbus = (aggregate(df=states, col='interval_dbu', by=get_time_grouper('timestamp'), aggfunc='sum', since_days=7).reindex(get_time_index(7), fill_value=0)) active_users['sum_dbus'] = dbus.interval_dbu active_users['average_dbu'] = ((active_users.sum_dbus / active_users.user_id).fillna(0.)) return render_template('users.html', users=users, active_users=active_users.to_dict('records'), data=level_info_data)
def view_user(username): session = create_session() try: user = (session.query(User).filter(User.username == username).one()) except Exception: return view_missing(type="user", id=username) states = user.state_df() time_stats_dict = { 'interactive': empty_timeseries(), 'job': empty_timeseries() } if not states.empty: workspaces = (concat_dfs({ (w.workspace.id, w.workspace.name): w.workspace.state_df() for w in user.user_workspaces }).reset_index([0, 1]).rename(columns={ 'level_0': 'workspace_id', 'level_1': 'workspace_name' })) last7_workspaces = (aggregate( df=workspaces, col='interval_dbu', by=['workspace_id', 'workspace_name'], since_days=7).rename(columns={'interval_dbu': 'last7dbu'})) all_workspaces = (aggregate( df=workspaces, col='interval_dbu', by=['workspace_id', 'workspace_name' ]).rename(columns={'interval_dbu': 'alltimedbu'})) workspaces_dict = (pd.merge( all_workspaces, last7_workspaces, how='left', left_index=True, right_index=True).fillna(0.0).reset_index().sort_values( 'last7dbu').to_dict('records')) price_settings = get_price_settings() results = aggregate_by_types(states, aggregate_for_entity) cost_summary_dict = {} for key, (cost_summary, time_stats) in results.items(): time_stats_dict[key] = time_stats.to_dict("records") cost_summary = cost_summary.to_dict() cost = cost_summary['interval_dbu'] * price_settings[key] weekly_cost = (cost_summary['weekly_interval_dbu_sum'] * price_settings[key]) cost_summary['cost'] = cost cost_summary['weekly_cost'] = weekly_cost cost_summary_dict[key] = cost_summary # We aren't sure if we have both interactive and job present_key = list(cost_summary_dict.keys())[0] cost_summary_dict = { key: sum([cost_summary_dict[type][key] for type in results.keys()]) for key in cost_summary_dict[present_key] } else: workspaces_dict = [{ 'workspace_id': w.workspace.id, 'workspace_name': w.workspace.name, 'last7dbu': 0.0, 'alltimedbu': 0.0 } for w in user.user_workspaces] cost_summary_dict = { "interval": 0.0, "interval_dbu": 0.0, "weekly_interval_sum": 0.0, "weekly_interval_dbu_sum": 0.0, "cost": 0.0, "weekly_cost": 0.0 } return render_template('user.html', user=user, workspaces=workspaces_dict, cost=cost_summary_dict, time_stats=time_stats_dict)