def _refresh_session(host: str, user: str) -> None: """Keep a session around if the user is still logged in.""" with get_connection() as c: c.execute( 'UPDATE `session` SET `last_update` = NOW() ' 'WHERE `host` = %s AND `user` = %s AND `end` IS NULL', (host, user), )
def _bandwidth_by_dist(start): with get_connection() as c: c.execute( 'SELECT `dist`, SUM(`up` + `down`) as `bandwidth` FROM `mirrors_public` WHERE `date` > %s' 'GROUP BY `dist` ORDER BY `bandwidth` DESC', start, ) return [(i['dist'], _humanize(float(i['bandwidth']))) for i in c]
def _pages_per_day() -> Dict[str, int]: with stats.get_connection() as cursor: cursor.execute(''' SELECT max(value) as value, cast(date as date) as date, printer FROM printer_pages_public GROUP BY cast(date as date), printer ORDER BY date ASC, printer ASC ''')
def _close_sessions(host: str) -> None: """Close all sessions for a particular host.""" with get_connection() as c: c.execute( 'UPDATE `session` SET `end` = NOW(), `last_update` = NOW() ' 'WHERE `host` = %s AND `end` IS NULL', (host,), )
def get_sessions_plot(start_day: datetime, end_day: datetime) -> Figure: """Return matplotlib plot representing median session length between start and end day..""" with get_connection() as c: query = ''' SELECT CAST(start AS DATE) AS date, AVG(TIME_TO_SEC(duration)) as mean_duration_seconds FROM session_duration_public WHERE CAST(start AS DATE) BETWEEN %s AND %s AND end IS NOT NULL GROUP BY date ''' c.execute(query, (start_day, end_day)) days = {r['date']: r for r in c} fig = Figure(figsize=(10, 3)) ax = fig.add_subplot(1, 1, 1) x = [] mean_duration_hours = [] day = start_day while day <= end_day: x.append(time.mktime(day.timetuple())) row = days.get(day) mean_duration_hours.append( row['mean_duration_seconds'] / 3600 if (row and row['mean_duration_seconds'] / 3600 <= 4) else 0, ) day += ONE_DAY ax.grid(True) # we want to show an "o" marker to suggest the data points are discrete, # but it harms readability with too much data kwargs = {'marker': 'o'} if end_day - start_day > timedelta(days=60): del kwargs['marker'] ax.plot(x, mean_duration_hours, linewidth=2, **kwargs) ax.set_xlim(x[0], x[-1]) skip = max(1, len(x) // 5) # target 5 labels ax.set_xticks(x[::skip]) ax.set_xticklabels(list(map(date.fromtimestamp, x))[::skip]) ax.set_ylim(bottom=0) ax.set_ylabel('Duration (hours)') ax.set_title( 'Mean session duration {} to {}'.format( start_day.isoformat(), end_day.isoformat(), ), ) return fig
def _new_session(host: str, user: str) -> None: """Register new session in when a user logs into a desktop.""" _close_sessions(host) with get_connection() as c: c.execute( 'INSERT INTO `session` (`host`, `user`, `start`, `last_update`) ' 'VALUES (%s, %s, NOW(), NOW())', (host, user), )
def _session_exists(host: str, user: str) -> bool: """Returns whether an open session already exists for a given host and user.""" with get_connection() as c: c.execute( 'SELECT COUNT(*) AS `count` FROM `session` ' 'WHERE `host` = %s AND `user` = %s AND `end` IS NULL', (host, user), ) return c.fetchone()['count'] > 0
def _get_desktops_in_use(): """List which desktops are currently in use.""" # https://github.com/ocf/ocflib/blob/90f9268a89ac9d53c089ab819c1aa95bdc38823d/ocflib/lab/ocfstats.sql#L70 # we don't use users_in_lab_count_public because we're looking for # desktops in use, and the view does COUNT(DISTINCT users) with get_connection() as c: c.execute('SELECT * FROM `desktops_in_use_public`;', ) return {hostname_from_domain(session['host']) for session in c}
def get_sessions_plot(start_day, end_day): """Return matplotlib plot representing median session length between start and end day..""" with get_connection() as c: query = ''' SELECT CAST(start AS DATE) AS date, AVG(TIME_TO_SEC(duration)) as mean_duration_seconds FROM session_duration_public WHERE CAST(start AS DATE) BETWEEN %s AND %s AND end IS NOT NULL GROUP BY date ''' c.execute(query, (start_day, end_day)) days = {r['date']: r for r in c} fig = Figure(figsize=(10, 3)) ax = fig.add_subplot(1, 1, 1) x = [] mean_duration_hours = [] day = start_day while day <= end_day: x.append(time.mktime(day.timetuple())) row = days.get(day) mean_duration_hours.append(row['mean_duration_seconds'] / 3600 if row else 0) day += ONE_DAY ax.grid(True) # we want to show an "o" marker to suggest the data points are discrete, # but it harms readability with too much data kwargs = {'marker': 'o'} if end_day - start_day > timedelta(days=60): del kwargs['marker'] ax.plot(x, mean_duration_hours, linewidth=2, **kwargs) ax.set_xlim(x[0], x[-1]) skip = max(1, len(x) // 5) # target 5 labels ax.set_xticks(x[::skip]) ax.set_xticklabels(list(map(date.fromtimestamp, x))[::skip]) ax.set_ylim(ymin=0) ax.set_ylabel('Duration (hours)') ax.set_title('Mean session duration {} to {}'.format( start_day.isoformat(), end_day.isoformat(), )) return fig
def collect_desktops(): """Collect the currently in use desktops for today.""" with get_connection() as c: query = ''' SELECT `host` FROM `session_duration_public` WHERE `duration` is NULL ''' c.execute(query) # trimming suffix "ocf.berkeley.edu" from each host desktops = {r['host'][:-17] for r in c} return desktops
async def lab_desktops(): public_desktops = list_desktops(public_only=True) with get_connection() as c: c.execute("SELECT * FROM `desktops_in_use_public`;", ) desktops_in_use = {hostname_from_domain(session["host"]) for session in c} return { "public_desktops_in_use": desktops_in_use.intersection(public_desktops), "public_desktops_num": len(public_desktops), }
def _toner_used_by_printer(printer, cutoff=.05, since=date(2017, 8, 20)): with stats.get_connection() as cursor: cursor.execute( ''' CREATE TEMPORARY TABLE ordered1 (PRIMARY KEY (position)) AS ( SELECT * FROM ( SELECT T.*, @rownum := @rownum + 1 AS position FROM ( ( SELECT * FROM printer_toner_public WHERE printer = %s AND date > %s ORDER BY date ) AS T, (SELECT @rownum := 0) AS r ) ) AS x ) ''', (printer, since.strftime('%Y-%m-%d')), ) cursor.execute(''' CREATE TEMPORARY TABLE ordered2 (PRIMARY KEY (position)) AS (SELECT * FROM ordered1) ''') cursor.execute(''' CREATE TEMPORARY TABLE diffs AS (SELECT B.date AS date, A.value/A.max - B.value/B.max as pct_diff FROM ordered1 as A, ordered2 as B WHERE B.position = A.position + 1) ''') cursor.execute( ''' SELECT SUM(pct_diff) as toner_used FROM diffs WHERE ABS(pct_diff)<%s ''', (cutoff), ) return float(cursor.fetchone()['toner_used'])
def get_sessions_plot(start_day: date, end_day: date) -> Figure: """Return matplotlib plot representing unique sessions between start and end day..""" with get_connection() as c: query = ''' SELECT `date`, `unique_logins` FROM `daily_sessions_public` WHERE `date` BETWEEN %s AND %s ''' c.execute(query, (start_day, end_day)) days = {r['date']: r for r in c} fig = Figure(figsize=(10, 3)) ax = fig.add_subplot(1, 1, 1) x = [] unique_logins = [] day = start_day while day <= end_day: x.append(time.mktime(day.timetuple())) row = days.get(day) unique_logins.append(row['unique_logins'] if row else 0) day += ONE_DAY ax.grid(True) # we want to show an "o" marker to suggest the data points are discrete, # but it harms readability with too much data kwargs = {'marker': 'o'} if end_day - start_day > timedelta(days=60): del kwargs['marker'] ax.plot(x, unique_logins, linewidth=2, **kwargs) ax.set_xlim(x[0], x[-1]) skip = max(1, len(x) // 5) # target 5 labels ax.set_xticks(x[::skip]) ax.set_xticklabels(list(map(date.fromtimestamp, x))[::skip]) ax.set_ylim(bottom=0) ax.set_title( 'Unique lab logins {} to {}'.format( start_day.isoformat(), end_day.isoformat(), ), ) return fig
def get_sessions_plot(start_day, end_day): """Return matplotlib plot representing unique sessions between start and end day..""" with get_connection() as c: query = ''' SELECT `date`, `unique_logins` FROM `daily_sessions_public` WHERE `date` BETWEEN %s AND %s ''' c.execute(query, (start_day, end_day)) days = {r['date']: r for r in c} fig = Figure(figsize=(10, 3)) ax = fig.add_subplot(1, 1, 1) x = [] unique_logins = [] day = start_day while day <= end_day: x.append(time.mktime(day.timetuple())) row = days.get(day) unique_logins.append(row['unique_logins'] if row else 0) day += ONE_DAY ax.grid(True) # we want to show an "o" marker to suggest the data points are discrete, # but it harms readability with too much data kwargs = {'marker': 'o'} if end_day - start_day > timedelta(days=60): del kwargs['marker'] ax.plot(x, unique_logins, linewidth=2, **kwargs) ax.set_xlim(x[0], x[-1]) skip = max(1, len(x) // 5) # target 5 labels ax.set_xticks(x[::skip]) ax.set_xticklabels(list(map(date.fromtimestamp, x))[::skip]) ax.set_ylim(ymin=0) ax.set_title('Unique lab logins {} to {}'.format( start_day.isoformat(), end_day.isoformat(), )) return fig
def _toner_changes_for_printer(printer): with stats.get_connection() as cursor: cursor.execute( """ CREATE TEMPORARY TABLE ordered1 (PRIMARY KEY (position)) AS ( SELECT * FROM ( SELECT T.*, @rownum := @rownum + 1 AS position FROM ( ( SELECT * FROM printer_toner_public WHERE printer = %s ORDER BY date ) AS T, (SELECT @rownum := 0) AS r ) ) AS x ) """, (printer,), ) cursor.execute( """ CREATE TEMPORARY TABLE ordered2 (PRIMARY KEY (position)) AS (SELECT * FROM ordered1) """ ) cursor.execute( """ SELECT B.date AS date, A.value as pages_before, B.value as pages_after FROM ordered1 as A, ordered2 as B WHERE B.position = A.position + 1 AND B.value > A.value AND A.value > 0 """ ) return reversed(list(cursor))
def _toner_changes_for_printer(printer): with stats.get_connection() as cursor: cursor.execute( ''' CREATE TEMPORARY TABLE ordered1 (PRIMARY KEY (position)) AS ( SELECT * FROM ( SELECT T.*, @rownum := @rownum + 1 AS position FROM ( ( SELECT * FROM printer_toner_public WHERE printer = %s ORDER BY date ) AS T, (SELECT @rownum := 0) AS r ) ) AS x ) ''', (printer, ), ) cursor.execute(''' CREATE TEMPORARY TABLE ordered2 (PRIMARY KEY (position)) AS (SELECT * FROM ordered1) ''') cursor.execute(''' SELECT B.date AS date, A.value as pages_before, B.value as pages_after FROM ordered1 as A, ordered2 as B WHERE B.position = A.position + 1 AND B.value > A.value AND A.value > 0 LIMIT 20; ''') return reversed(list(cursor))
def _pages_per_day(): with stats.get_connection() as cursor: cursor.execute(''' SELECT max(value) as value, cast(date as date) as date, printer FROM printer_pages_public GROUP BY cast(date as date), printer ORDER BY date ASC, printer ASC ''') last_seen = {} pages_printed = {} for row in cursor: if row['printer'] in last_seen: pages_printed.setdefault(row['date'], defaultdict(int)) pages_printed[row['date']][row['printer']] = ( row['value'] - last_seen[row['printer']]) last_seen[row['printer']] = row['value'] return pages_printed
def _pages_per_day(): with stats.get_connection() as cursor: cursor.execute( """ SELECT max(value) as value, cast(date as date) as date, printer FROM printer_pages_public GROUP BY cast(date as date), printer ORDER BY date ASC, printer ASC """ ) last_seen = {} pages_printed = {} for row in cursor: if row["printer"] in last_seen: pages_printed.setdefault(row["date"], defaultdict(int)) pages_printed[row["date"]][row["printer"]] = row["value"] - last_seen[row["printer"]] last_seen[row["printer"]] = row["value"] return pages_printed
def _pages_printed_for_printer(printer, resolution=100): with stats.get_connection() as cursor: cursor.execute( ''' SELECT Z.date, Z.value FROM ( SELECT T.*, @rownum := @rownum + 1 AS position FROM ( ( SELECT * FROM printer_pages_public WHERE printer = %s ORDER BY date ) AS T, (SELECT @rownum := 0) AS r ) ) as Z WHERE Z.position mod %s = 0 ''', (printer, resolution)) return [(time.mktime(row['date'].timetuple()) * 1000, row['value']) for row in cursor]
def _pages_per_day(): with stats.get_connection() as cursor: cursor.execute(''' SELECT max(value) as value, cast(date as date) as date, printer FROM printer_pages_public GROUP BY cast(date as date), printer ORDER BY date ASC, printer ASC ''') last_seen = {} pages_printed = {} for row in cursor: if row['printer'] in last_seen: pages_printed.setdefault(row['date'], defaultdict(int)) pages_printed[row['date']][row['printer']] = ( row['value'] - last_seen[row['printer']] ) last_seen[row['printer']] = row['value'] return pages_printed
def _pages_printed_for_printer(printer, resolution=100): with stats.get_connection() as cursor: cursor.execute(''' SELECT Z.date, Z.value FROM ( SELECT T.*, @rownum := @rownum + 1 AS position FROM ( ( SELECT * FROM printer_pages_public WHERE printer = %s ORDER BY date ) AS T, (SELECT @rownum := 0) AS r ) ) as Z WHERE Z.position mod %s = 0 ''', (printer, resolution)) return [ (time.mktime(row['date'].timetuple()) * 1000, row['value']) for row in cursor ]
def _pages_per_day() -> Dict[str, int]: with stats.get_connection() as cursor: cursor.execute(''' SELECT max(value) as value, cast(date as date) as date, printer FROM printer_pages_public GROUP BY cast(date as date), printer ORDER BY date ASC, printer ASC ''') # Resolves the issue of possible missing dates. # defaultdict(lambda: defaultdict(int)) doesn't work due to inability to pickle local objects like lambdas; # this effectively does the same thing as that. pages_printed: Dict[Any, Any] = defaultdict(partial(defaultdict, int)) last_seen: Dict[Any, Any] = {} for row in cursor: if row['printer'] in last_seen: pages_printed.setdefault(row['date'], defaultdict(int)) pages_printed[row['date']][row['printer']] = ( row['value'] - last_seen[row['printer']] ) last_seen[row['printer']] = row['value'] return pages_printed
def get_data(user): """ Generates a json blob of data for the passed user """ cnx = stats.get_connection() cursor = cnx.cursor() query = """ SELECT `start`, `end` FROM `staff_session_duration_public` WHERE `user` = %s AND `duration` IS NOT NULL ORDER BY start ASC""" cursor.execute(query, (user)) cleaned = cursor.fetchall() if cleaned: step = datetime.timedelta(days=1) start_date = cleaned[0]['start'].replace(hour=0, minute=0, second=0, microsecond=0) end_date = datetime.datetime.today().replace(hour=0, minute=0, second=0, microsecond=0) frequency = defaultdict(lambda: 0) for row in cleaned: start = row['start'] start_0 = start.replace(hour=0, minute=0, second=0, microsecond=0) end = row['end'] end_0 = end.replace(hour=0, minute=0, second=0, microsecond=0) num_days = (end_0 - start_0).days for day_num in range(num_days + 1): day = start_0 + datetime.timedelta(days=day_num) effective_start = max(start, day) effective_end = min(end, day + step) frequency[day] += (effective_end - effective_start).total_seconds() // 60 # collect tuples cur_date = start_date freqs = [] while cur_date <= end_date: freqs.append(frequency[cur_date]) ts = cur_date.replace(tzinfo=timezone.utc).timestamp() cur_date += step cumu_freqs = freqs.copy() for i in range(1, len(cumu_freqs)): cumu_freqs[i] += cumu_freqs[i - 1] else: cumu_freqs = None freqs = None start_date = datetime.datetime.today() base_data = { "name": user, "unit": "minutes", "year": start_date.year, "month": start_date.month - 1, "day": start_date.day, } return { "datasets": [{ "data": cumu_freqs, "title": 'Cumulative lab usage in minutes', "type": "line", **base_data, }, { "data": freqs, 'title': 'Daily lab usage in minutes', "type": "line", **base_data, }] }
def _toner_used_by_printer(printer, cutoff=.05, since=date(2017, 8, 20)): """Returns toner changes for a printer since a given date. Toner numbers can be significantly noisy, including significant diffs whenever toner gets taken out and put back in whenever there is jam. Because of this it's hard to determine if a new toner is inserted into a printer to reduce this noise we only count diffs that are smaller than a cutoff which empirically seems to be more accurate """ with stats.get_connection() as cursor: cursor.execute( ''' CREATE TEMPORARY TABLE ordered1 (PRIMARY KEY (position)) AS ( SELECT * FROM ( SELECT T.*, @rownum := @rownum + 1 AS position FROM ( ( SELECT * FROM printer_toner_public WHERE printer = %s AND date > %s ORDER BY date ) AS T, (SELECT @rownum := 0) AS r ) ) AS x ) ''', (printer, since.strftime('%Y-%m-%d')), ) cursor.execute(''' CREATE TEMPORARY TABLE ordered2 (PRIMARY KEY (position)) AS (SELECT * FROM ordered1) ''') cursor.execute(''' CREATE TEMPORARY TABLE diffs AS (SELECT B.date AS date, A.value/A.max - B.value/B.max as pct_diff FROM ordered1 as A, ordered2 as B WHERE B.position = A.position + 1) ''') cursor.execute( ''' SELECT SUM(pct_diff) as toner_used FROM diffs WHERE ABS(pct_diff)<%s ''', (cutoff, ), ) result = cursor.fetchone()['toner_used'] assert result is not None, 'No data exists for printer \'{}\''.format( printer) return float(result)
def _toner_used_by_printer(printer: str, cutoff: float = .05, since: date = stats.current_semester_start()) -> float: """Returns toner used for a printer since a given date (by default it returns toner used for this semester). Toner numbers can be significantly noisy, including significant diffs whenever toner gets taken out and put back in whenever there is a jam. Because of this it's hard to determine if a new toner is inserted into a printer or if it was the same toner again. To reduce this noise we only count diffs that are smaller than a cutoff which empirically seems to be more accurate. """ with stats.get_connection() as cursor: cursor.execute( ''' CREATE TEMPORARY TABLE ordered1 (PRIMARY KEY (position)) AS ( SELECT * FROM ( SELECT T.*, @rownum := @rownum + 1 AS position FROM ( ( SELECT * FROM printer_toner_public WHERE printer = %s AND date > %s ORDER BY date ) AS T, (SELECT @rownum := 0) AS r ) ) AS x ) ''', (printer, since.strftime('%Y-%m-%d')), ) cursor.execute(''' CREATE TEMPORARY TABLE ordered2 (PRIMARY KEY (position)) AS (SELECT * FROM ordered1) ''') cursor.execute(''' CREATE TEMPORARY TABLE diffs AS (SELECT B.date AS date, A.value/A.max - B.value/B.max as pct_diff FROM ordered1 as A, ordered2 as B WHERE B.position = A.position + 1) ''') cursor.execute( ''' SELECT SUM(pct_diff) as toner_used FROM diffs WHERE ABS(pct_diff)<%s ''', (cutoff,), ) result = cursor.fetchone()['toner_used'] return float(result or 0.0)