Пример #1
0
def _refresh_session(host: str, user: str) -> None:
    """Keep a session around if the user is still logged in."""

    with get_connection() as c:
        c.execute(
            'UPDATE `session` SET `last_update` = NOW() '
            'WHERE `host` = %s AND `user` = %s AND `end` IS NULL', (host, user),
        )
Пример #2
0
def _bandwidth_by_dist(start):
    with get_connection() as c:
        c.execute(
            'SELECT `dist`, SUM(`up` + `down`) as `bandwidth` FROM `mirrors_public` WHERE `date` > %s'
            'GROUP BY `dist` ORDER BY `bandwidth` DESC', start,
        )

    return [(i['dist'], _humanize(float(i['bandwidth']))) for i in c]
Пример #3
0
def _pages_per_day() -> Dict[str, int]:
    with stats.get_connection() as cursor:
        cursor.execute('''
            SELECT max(value) as value, cast(date as date) as date, printer
                FROM printer_pages_public
                GROUP BY cast(date as date), printer
                ORDER BY date ASC, printer ASC
        ''')
Пример #4
0
def _close_sessions(host: str) -> None:
    """Close all sessions for a particular host."""

    with get_connection() as c:
        c.execute(
            'UPDATE `session` SET `end` = NOW(), `last_update` = NOW() '
            'WHERE `host` = %s AND `end` IS NULL', (host,),
        )
Пример #5
0
def _bandwidth_by_dist(start):
    with get_connection() as c:
        c.execute(
            'SELECT `dist`, SUM(`up` + `down`) as `bandwidth` FROM `mirrors_public` WHERE `date` > %s'
            'GROUP BY `dist` ORDER BY `bandwidth` DESC',
            start,
        )

    return [(i['dist'], _humanize(float(i['bandwidth']))) for i in c]
Пример #6
0
def get_sessions_plot(start_day: datetime, end_day: datetime) -> Figure:
    """Return matplotlib plot representing median session length between start
    and end day.."""

    with get_connection() as c:
        query = '''
        SELECT
            CAST(start AS DATE) AS date,
            AVG(TIME_TO_SEC(duration)) as mean_duration_seconds
          FROM session_duration_public
          WHERE
            CAST(start AS DATE) BETWEEN %s AND %s
            AND end IS NOT NULL
          GROUP BY date
        '''
        c.execute(query, (start_day, end_day))
        days = {r['date']: r for r in c}

    fig = Figure(figsize=(10, 3))
    ax = fig.add_subplot(1, 1, 1)

    x = []
    mean_duration_hours = []

    day = start_day
    while day <= end_day:
        x.append(time.mktime(day.timetuple()))

        row = days.get(day)
        mean_duration_hours.append(
            row['mean_duration_seconds'] / 3600 if
            (row and row['mean_duration_seconds'] / 3600 <= 4) else 0, )
        day += ONE_DAY

    ax.grid(True)

    # we want to show an "o" marker to suggest the data points are discrete,
    # but it harms readability with too much data
    kwargs = {'marker': 'o'}
    if end_day - start_day > timedelta(days=60):
        del kwargs['marker']
    ax.plot(x, mean_duration_hours, linewidth=2, **kwargs)

    ax.set_xlim(x[0], x[-1])

    skip = max(1, len(x) // 5)  # target 5 labels
    ax.set_xticks(x[::skip])
    ax.set_xticklabels(list(map(date.fromtimestamp, x))[::skip])
    ax.set_ylim(bottom=0)
    ax.set_ylabel('Duration (hours)')
    ax.set_title(
        'Mean session duration {} to {}'.format(
            start_day.isoformat(),
            end_day.isoformat(),
        ), )
    return fig
Пример #7
0
def _new_session(host: str, user: str) -> None:
    """Register new session in when a user logs into a desktop."""

    _close_sessions(host)

    with get_connection() as c:
        c.execute(
            'INSERT INTO `session` (`host`, `user`, `start`, `last_update`) '
            'VALUES (%s, %s, NOW(), NOW())', (host, user),
        )
Пример #8
0
def _session_exists(host: str, user: str) -> bool:
    """Returns whether an open session already exists for a given host and user."""

    with get_connection() as c:
        c.execute(
            'SELECT COUNT(*) AS `count` FROM `session` '
            'WHERE `host` = %s AND `user` = %s AND `end` IS NULL', (host, user),
        )

        return c.fetchone()['count'] > 0
Пример #9
0
def _get_desktops_in_use():
    """List which desktops are currently in use."""

    # https://github.com/ocf/ocflib/blob/90f9268a89ac9d53c089ab819c1aa95bdc38823d/ocflib/lab/ocfstats.sql#L70
    # we don't use users_in_lab_count_public because we're looking for
    # desktops in use, and the view does COUNT(DISTINCT users)
    with get_connection() as c:
        c.execute('SELECT * FROM `desktops_in_use_public`;', )

    return {hostname_from_domain(session['host']) for session in c}
Пример #10
0
def get_sessions_plot(start_day, end_day):
    """Return matplotlib plot representing median session length between start
    and end day.."""

    with get_connection() as c:
        query = '''
        SELECT
            CAST(start AS DATE) AS date,
            AVG(TIME_TO_SEC(duration)) as mean_duration_seconds
          FROM session_duration_public
          WHERE
            CAST(start AS DATE) BETWEEN %s AND %s
            AND end IS NOT NULL
          GROUP BY date
        '''
        c.execute(query, (start_day, end_day))
        days = {r['date']: r for r in c}

    fig = Figure(figsize=(10, 3))
    ax = fig.add_subplot(1, 1, 1)

    x = []
    mean_duration_hours = []

    day = start_day
    while day <= end_day:
        x.append(time.mktime(day.timetuple()))

        row = days.get(day)
        mean_duration_hours.append(row['mean_duration_seconds'] / 3600 if row else 0)

        day += ONE_DAY

    ax.grid(True)

    # we want to show an "o" marker to suggest the data points are discrete,
    # but it harms readability with too much data
    kwargs = {'marker': 'o'}
    if end_day - start_day > timedelta(days=60):
        del kwargs['marker']
    ax.plot(x, mean_duration_hours, linewidth=2, **kwargs)

    ax.set_xlim(x[0], x[-1])

    skip = max(1, len(x) // 5)  # target 5 labels
    ax.set_xticks(x[::skip])
    ax.set_xticklabels(list(map(date.fromtimestamp, x))[::skip])
    ax.set_ylim(ymin=0)
    ax.set_ylabel('Duration (hours)')
    ax.set_title('Mean session duration {} to {}'.format(
        start_day.isoformat(),
        end_day.isoformat(),
    ))
    return fig
Пример #11
0
def collect_desktops():
    """Collect the currently in use desktops for today."""
    with get_connection() as c:
        query = '''
            SELECT `host`
            FROM `session_duration_public`
            WHERE `duration` is NULL
        '''
        c.execute(query)
    # trimming suffix "ocf.berkeley.edu" from each host
    desktops = {r['host'][:-17] for r in c}
    return desktops
Пример #12
0
async def lab_desktops():
    public_desktops = list_desktops(public_only=True)

    with get_connection() as c:
        c.execute("SELECT * FROM `desktops_in_use_public`;", )
    desktops_in_use = {hostname_from_domain(session["host"]) for session in c}

    return {
        "public_desktops_in_use":
        desktops_in_use.intersection(public_desktops),
        "public_desktops_num": len(public_desktops),
    }
Пример #13
0
def _toner_used_by_printer(printer, cutoff=.05, since=date(2017, 8, 20)):
    with stats.get_connection() as cursor:
        cursor.execute(
            '''
            CREATE TEMPORARY TABLE ordered1
                (PRIMARY KEY (position))
                AS (
                    SELECT * FROM (
                        SELECT
                            T.*,
                            @rownum := @rownum + 1 AS position
                            FROM (
                                (
                                    SELECT * FROM printer_toner_public
                                    WHERE printer = %s AND
                                    date > %s
                                    ORDER BY date
                                ) AS T,
                                (SELECT @rownum := 0) AS r
                            )
                    ) AS x
                )
        ''', (printer, since.strftime('%Y-%m-%d')),
        )
        cursor.execute('''
            CREATE TEMPORARY TABLE ordered2
                (PRIMARY KEY (position))
                AS (SELECT * FROM ordered1)
        ''')
        cursor.execute('''
            CREATE TEMPORARY TABLE diffs
            AS (SELECT
                B.date AS date,
                A.value/A.max - B.value/B.max as pct_diff
                FROM
                    ordered1 as A,
                    ordered2 as B
                WHERE
                    B.position = A.position + 1)
        ''')
        cursor.execute(
            '''
            SELECT SUM(pct_diff) as toner_used
            FROM
            diffs
            WHERE
            ABS(pct_diff)<%s
        ''', (cutoff),
        )
        return float(cursor.fetchone()['toner_used'])
Пример #14
0
def get_sessions_plot(start_day: date, end_day: date) -> Figure:
    """Return matplotlib plot representing unique sessions between start and
    end day.."""

    with get_connection() as c:
        query = '''
            SELECT `date`, `unique_logins`
            FROM `daily_sessions_public`
            WHERE `date` BETWEEN %s AND %s
        '''
        c.execute(query, (start_day, end_day))
        days = {r['date']: r for r in c}

    fig = Figure(figsize=(10, 3))
    ax = fig.add_subplot(1, 1, 1)

    x = []
    unique_logins = []

    day = start_day
    while day <= end_day:
        x.append(time.mktime(day.timetuple()))

        row = days.get(day)
        unique_logins.append(row['unique_logins'] if row else 0)

        day += ONE_DAY

    ax.grid(True)

    # we want to show an "o" marker to suggest the data points are discrete,
    # but it harms readability with too much data
    kwargs = {'marker': 'o'}
    if end_day - start_day > timedelta(days=60):
        del kwargs['marker']
    ax.plot(x, unique_logins, linewidth=2, **kwargs)

    ax.set_xlim(x[0], x[-1])

    skip = max(1, len(x) // 5)  # target 5 labels
    ax.set_xticks(x[::skip])
    ax.set_xticklabels(list(map(date.fromtimestamp, x))[::skip])
    ax.set_ylim(bottom=0)
    ax.set_title(
        'Unique lab logins {} to {}'.format(
            start_day.isoformat(),
            end_day.isoformat(),
        ), )
    return fig
Пример #15
0
def get_sessions_plot(start_day, end_day):
    """Return matplotlib plot representing unique sessions between start and
    end day.."""

    with get_connection() as c:
        query = '''
            SELECT `date`, `unique_logins`
            FROM `daily_sessions_public`
            WHERE `date` BETWEEN %s AND %s
        '''
        c.execute(query, (start_day, end_day))
        days = {r['date']: r for r in c}

    fig = Figure(figsize=(10, 3))
    ax = fig.add_subplot(1, 1, 1)

    x = []
    unique_logins = []

    day = start_day
    while day <= end_day:
        x.append(time.mktime(day.timetuple()))

        row = days.get(day)
        unique_logins.append(row['unique_logins'] if row else 0)

        day += ONE_DAY

    ax.grid(True)

    # we want to show an "o" marker to suggest the data points are discrete,
    # but it harms readability with too much data
    kwargs = {'marker': 'o'}
    if end_day - start_day > timedelta(days=60):
        del kwargs['marker']
    ax.plot(x, unique_logins, linewidth=2, **kwargs)

    ax.set_xlim(x[0], x[-1])

    skip = max(1, len(x) // 5)  # target 5 labels
    ax.set_xticks(x[::skip])
    ax.set_xticklabels(list(map(date.fromtimestamp, x))[::skip])
    ax.set_ylim(ymin=0)
    ax.set_title('Unique lab logins {} to {}'.format(
        start_day.isoformat(),
        end_day.isoformat(),
    ))
    return fig
Пример #16
0
def _toner_changes_for_printer(printer):
    with stats.get_connection() as cursor:
        cursor.execute(
            """
            CREATE TEMPORARY TABLE ordered1
                (PRIMARY KEY (position))
                AS (
                    SELECT * FROM (
                        SELECT
                            T.*,
                            @rownum := @rownum + 1 AS position
                            FROM (
                                (
                                    SELECT * FROM printer_toner_public
                                    WHERE printer = %s
                                    ORDER BY date
                                ) AS T,
                                (SELECT @rownum := 0) AS r
                            )
                    ) AS x
                )
        """,
            (printer,),
        )
        cursor.execute(
            """
            CREATE TEMPORARY TABLE ordered2
                (PRIMARY KEY (position))
                AS (SELECT * FROM ordered1)
        """
        )
        cursor.execute(
            """
            SELECT
                B.date AS date,
                A.value as pages_before,
                B.value as pages_after
                FROM
                    ordered1 as A,
                    ordered2 as B
                WHERE
                    B.position = A.position + 1 AND
                    B.value > A.value AND
                    A.value > 0
        """
        )
        return reversed(list(cursor))
Пример #17
0
def _toner_changes_for_printer(printer):
    with stats.get_connection() as cursor:
        cursor.execute(
            '''
            CREATE TEMPORARY TABLE ordered1
                (PRIMARY KEY (position))
                AS (
                    SELECT * FROM (
                        SELECT
                            T.*,
                            @rownum := @rownum + 1 AS position
                            FROM (
                                (
                                    SELECT * FROM printer_toner_public
                                    WHERE printer = %s
                                    ORDER BY date
                                ) AS T,
                                (SELECT @rownum := 0) AS r
                            )
                    ) AS x
                )
        ''',
            (printer, ),
        )
        cursor.execute('''
            CREATE TEMPORARY TABLE ordered2
                (PRIMARY KEY (position))
                AS (SELECT * FROM ordered1)
        ''')
        cursor.execute('''
            SELECT
                B.date AS date,
                A.value as pages_before,
                B.value as pages_after
                FROM
                    ordered1 as A,
                    ordered2 as B
                WHERE
                    B.position = A.position + 1 AND
                    B.value > A.value AND
                    A.value > 0
           LIMIT 20;
        ''')
        return reversed(list(cursor))
Пример #18
0
def _pages_per_day():
    with stats.get_connection() as cursor:
        cursor.execute('''
            SELECT max(value) as value, cast(date as date) as date, printer
                FROM printer_pages_public
                GROUP BY cast(date as date), printer
                ORDER BY date ASC, printer ASC
        ''')

        last_seen = {}
        pages_printed = {}

        for row in cursor:
            if row['printer'] in last_seen:
                pages_printed.setdefault(row['date'], defaultdict(int))
                pages_printed[row['date']][row['printer']] = (
                    row['value'] - last_seen[row['printer']])
            last_seen[row['printer']] = row['value']

    return pages_printed
Пример #19
0
def _pages_per_day():
    with stats.get_connection() as cursor:
        cursor.execute(
            """
            SELECT max(value) as value, cast(date as date) as date, printer
                FROM printer_pages_public
                GROUP BY cast(date as date), printer
                ORDER BY date ASC, printer ASC
        """
        )

        last_seen = {}
        pages_printed = {}

        for row in cursor:
            if row["printer"] in last_seen:
                pages_printed.setdefault(row["date"], defaultdict(int))
                pages_printed[row["date"]][row["printer"]] = row["value"] - last_seen[row["printer"]]
            last_seen[row["printer"]] = row["value"]

    return pages_printed
Пример #20
0
def _pages_printed_for_printer(printer, resolution=100):
    with stats.get_connection() as cursor:
        cursor.execute(
            '''
            SELECT Z.date, Z.value FROM (
                SELECT
                    T.*,
                    @rownum := @rownum + 1 AS position
                FROM (
                    (
                        SELECT * FROM printer_pages_public
                        WHERE printer = %s
                        ORDER BY date
                    ) AS T,
                    (SELECT @rownum := 0) AS r
                )
            ) as Z
            WHERE Z.position mod %s = 0
        ''', (printer, resolution))
        return [(time.mktime(row['date'].timetuple()) * 1000, row['value'])
                for row in cursor]
Пример #21
0
def _pages_per_day():
    with stats.get_connection() as cursor:
        cursor.execute('''
            SELECT max(value) as value, cast(date as date) as date, printer
                FROM printer_pages_public
                GROUP BY cast(date as date), printer
                ORDER BY date ASC, printer ASC
        ''')

        last_seen = {}
        pages_printed = {}

        for row in cursor:
            if row['printer'] in last_seen:
                pages_printed.setdefault(row['date'], defaultdict(int))
                pages_printed[row['date']][row['printer']] = (
                    row['value'] - last_seen[row['printer']]
                )
            last_seen[row['printer']] = row['value']

    return pages_printed
Пример #22
0
def _pages_printed_for_printer(printer, resolution=100):
    with stats.get_connection() as cursor:
        cursor.execute('''
            SELECT Z.date, Z.value FROM (
                SELECT
                    T.*,
                    @rownum := @rownum + 1 AS position
                FROM (
                    (
                        SELECT * FROM printer_pages_public
                        WHERE printer = %s
                        ORDER BY date
                    ) AS T,
                    (SELECT @rownum := 0) AS r
                )
            ) as Z
            WHERE Z.position mod %s = 0
        ''', (printer, resolution))
        return [
            (time.mktime(row['date'].timetuple()) * 1000, row['value'])
            for row in cursor
        ]
Пример #23
0
def _pages_per_day() -> Dict[str, int]:
    with stats.get_connection() as cursor:
        cursor.execute('''
            SELECT max(value) as value, cast(date as date) as date, printer
                FROM printer_pages_public
                GROUP BY cast(date as date), printer
                ORDER BY date ASC, printer ASC
        ''')

        # Resolves the issue of possible missing dates.
        # defaultdict(lambda: defaultdict(int)) doesn't work due to inability to pickle local objects like lambdas;
        # this effectively does the same thing as that.
        pages_printed: Dict[Any, Any] = defaultdict(partial(defaultdict, int))
        last_seen: Dict[Any, Any] = {}

        for row in cursor:
            if row['printer'] in last_seen:
                pages_printed.setdefault(row['date'], defaultdict(int))
                pages_printed[row['date']][row['printer']] = (
                    row['value'] - last_seen[row['printer']]
                )
            last_seen[row['printer']] = row['value']

    return pages_printed
Пример #24
0
def get_data(user):
    """
    Generates a json blob of data for the passed user
    """
    cnx = stats.get_connection()
    cursor = cnx.cursor()
    query = """
            SELECT `start`, `end`
            FROM `staff_session_duration_public`
            WHERE `user` = %s
                AND `duration` IS NOT NULL
            ORDER BY start ASC"""
    cursor.execute(query, (user))
    cleaned = cursor.fetchall()

    if cleaned:
        step = datetime.timedelta(days=1)
        start_date = cleaned[0]['start'].replace(hour=0,
                                                 minute=0,
                                                 second=0,
                                                 microsecond=0)
        end_date = datetime.datetime.today().replace(hour=0,
                                                     minute=0,
                                                     second=0,
                                                     microsecond=0)
        frequency = defaultdict(lambda: 0)

        for row in cleaned:
            start = row['start']
            start_0 = start.replace(hour=0, minute=0, second=0, microsecond=0)
            end = row['end']
            end_0 = end.replace(hour=0, minute=0, second=0, microsecond=0)

            num_days = (end_0 - start_0).days
            for day_num in range(num_days + 1):
                day = start_0 + datetime.timedelta(days=day_num)

                effective_start = max(start, day)
                effective_end = min(end, day + step)
                frequency[day] += (effective_end -
                                   effective_start).total_seconds() // 60

        # collect tuples
        cur_date = start_date
        freqs = []
        while cur_date <= end_date:
            freqs.append(frequency[cur_date])
            ts = cur_date.replace(tzinfo=timezone.utc).timestamp()
            cur_date += step

        cumu_freqs = freqs.copy()
        for i in range(1, len(cumu_freqs)):
            cumu_freqs[i] += cumu_freqs[i - 1]
    else:
        cumu_freqs = None
        freqs = None
        start_date = datetime.datetime.today()

    base_data = {
        "name": user,
        "unit": "minutes",
        "year": start_date.year,
        "month": start_date.month - 1,
        "day": start_date.day,
    }

    return {
        "datasets": [{
            "data": cumu_freqs,
            "title": 'Cumulative lab usage in minutes',
            "type": "line",
            **base_data,
        }, {
            "data": freqs,
            'title': 'Daily lab usage in minutes',
            "type": "line",
            **base_data,
        }]
    }
Пример #25
0
def _toner_used_by_printer(printer, cutoff=.05, since=date(2017, 8, 20)):
    """Returns toner changes for a printer since a given date.

    Toner numbers can be significantly noisy, including significant diffs
    whenever toner gets taken out and put back in whenever there is jam. Because
    of this it's hard to determine if a new toner is inserted into a printer to
    reduce this noise we only count diffs that are smaller than a cutoff which
    empirically seems to be more accurate
    """
    with stats.get_connection() as cursor:
        cursor.execute(
            '''
            CREATE TEMPORARY TABLE ordered1
                (PRIMARY KEY (position))
                AS (
                    SELECT * FROM (
                        SELECT
                            T.*,
                            @rownum := @rownum + 1 AS position
                            FROM (
                                (
                                    SELECT * FROM printer_toner_public
                                    WHERE printer = %s AND
                                    date > %s
                                    ORDER BY date
                                ) AS T,
                                (SELECT @rownum := 0) AS r
                            )
                    ) AS x
                )
        ''',
            (printer, since.strftime('%Y-%m-%d')),
        )
        cursor.execute('''
            CREATE TEMPORARY TABLE ordered2
                (PRIMARY KEY (position))
                AS (SELECT * FROM ordered1)
        ''')
        cursor.execute('''
            CREATE TEMPORARY TABLE diffs
            AS (SELECT
                B.date AS date,
                A.value/A.max - B.value/B.max as pct_diff
                FROM
                    ordered1 as A,
                    ordered2 as B
                WHERE
                    B.position = A.position + 1)
        ''')
        cursor.execute(
            '''
            SELECT SUM(pct_diff) as toner_used
            FROM
            diffs
            WHERE
            ABS(pct_diff)<%s
        ''',
            (cutoff, ),
        )
        result = cursor.fetchone()['toner_used']
        assert result is not None, 'No data exists for printer \'{}\''.format(
            printer)
        return float(result)
Пример #26
0
def _toner_used_by_printer(printer: str, cutoff: float = .05, since: date = stats.current_semester_start()) -> float:
    """Returns toner used for a printer since a given date (by default it
    returns toner used for this semester).

    Toner numbers can be significantly noisy, including significant diffs
    whenever toner gets taken out and put back in whenever there is a jam.
    Because of this it's hard to determine if a new toner is inserted into a
    printer or if it was the same toner again. To reduce this noise we only
    count diffs that are smaller than a cutoff which empirically seems to be
    more accurate.
    """
    with stats.get_connection() as cursor:
        cursor.execute(
            '''
            CREATE TEMPORARY TABLE ordered1
                (PRIMARY KEY (position))
                AS (
                    SELECT * FROM (
                        SELECT
                            T.*,
                            @rownum := @rownum + 1 AS position
                            FROM (
                                (
                                    SELECT * FROM printer_toner_public
                                    WHERE printer = %s AND
                                    date > %s
                                    ORDER BY date
                                ) AS T,
                                (SELECT @rownum := 0) AS r
                            )
                    ) AS x
                )
        ''', (printer, since.strftime('%Y-%m-%d')),
        )
        cursor.execute('''
            CREATE TEMPORARY TABLE ordered2
                (PRIMARY KEY (position))
                AS (SELECT * FROM ordered1)
        ''')
        cursor.execute('''
            CREATE TEMPORARY TABLE diffs
            AS (SELECT
                B.date AS date,
                A.value/A.max - B.value/B.max as pct_diff
                FROM
                    ordered1 as A,
                    ordered2 as B
                WHERE
                    B.position = A.position + 1)
        ''')
        cursor.execute(
            '''
            SELECT SUM(pct_diff) as toner_used
            FROM
            diffs
            WHERE
            ABS(pct_diff)<%s
        ''', (cutoff,),
        )
        result = cursor.fetchone()['toner_used']
        return float(result or 0.0)