Пример #1
0
def main():
    try:
        exercise_file = open(util.relative_path("exercise_reports"), 'r+')
        ex_reports = json.loads(exercise_file.read())
    except IOError:
        exercise_file = open(util.relative_path("exercise_reports"), 'w')
        ex_reports = {"elapsed_time": 1,  # Filler value
                      "max_id": -1,
                      "last_time": 0}

    new_reports = get_errors(copy.deepcopy(ex_reports))

    period_len = new_reports["time_this_period"]

    for ex in new_reports:
        if ex in SPECIAL_VALUES:
            continue

        if ex in ex_reports and ex_reports[ex]["num_errors"] > 0:
            errors_this_period = new_reports[ex]["this_period"]

            mean, probability = util.probability(ex_reports[ex]["num_errors"],
                                                 ex_reports["elapsed_time"],
                                                 errors_this_period,
                                                 period_len)

            print ("%s] TOTAL %s/%ss; %s-: %s/%ss; m=%.3f p=%.3f"
                   % (time.strftime("%Y-%m-%d %H:%M:%S %Z"),
                      ex_reports[ex]["num_errors"], ex_reports["elapsed_time"],
                      ex_reports["last_time"],
                      errors_this_period, period_len,
                      mean, probability))

            if (probability > 0.997 and errors_this_period > 1):
                util.send_to_slack(
                    "*Elevated exercise bug report rate in exercise `%s`\n"
                    "Reports: %s.  We saw %s in the last %s minutes,"
                    " while the mean indicates we should see around %s."
                    " *Probability that this is abnormally elevated: %.4f.*"
                    % (ex,
                       generate_slack_links(new_reports[ex]["href"]),
                       util.thousand_commas(errors_this_period),
                       util.thousand_commas(int(period_len / 60)),
                       util.thousand_commas(round(mean, 2)),
                       probability),
                    channel="#support")
        if "href" in new_reports[ex].keys():
            del new_reports[ex]["href"]  # don't need to keep the links around

    del new_reports["time_this_period"]
    # Overwrite with new contents
    exercise_file.seek(0)
    exercise_file.truncate()
    exercise_file.write(json.dumps(new_reports))

    exercise_file.close()
Пример #2
0
def main():
    try:
        exercise_file = open(util.relative_path("exercise_reports"), 'r+')
        ex_reports = json.loads(exercise_file.read())
    except IOError:
        exercise_file = open(util.relative_path("exercise_reports"), 'w')
        ex_reports = {
            "elapsed_time": 1,  # Filler value
            "max_id": -1,
            "last_time": 0
        }

    new_reports = get_errors(copy.deepcopy(ex_reports))

    period_len = new_reports["time_this_period"]

    for ex in new_reports:
        if ex in SPECIAL_VALUES:
            continue

        if ex in ex_reports and ex_reports[ex]["num_errors"] > 0:
            errors_this_period = new_reports[ex]["this_period"]

            mean, probability = util.probability(ex_reports[ex]["num_errors"],
                                                 ex_reports["elapsed_time"],
                                                 errors_this_period,
                                                 period_len)

            print("%s] TOTAL %s/%ss; %s-: %s/%ss; m=%.3f p=%.3f" %
                  (time.strftime("%Y-%m-%d %H:%M:%S %Z"),
                   ex_reports[ex]["num_errors"], ex_reports["elapsed_time"],
                   ex_reports["last_time"], errors_this_period, period_len,
                   mean, probability))

            if (probability > 0.997 and errors_this_period > 1):
                util.send_to_slack(
                    "*Elevated exercise bug report rate in exercise `%s`\n"
                    "Reports: %s.  We saw %s in the last %s minutes,"
                    " while the mean indicates we should see around %s."
                    " *Probability that this is abnormally elevated: %.4f.*" %
                    (ex, generate_slack_links(new_reports[ex]["href"]),
                     util.thousand_commas(errors_this_period),
                     util.thousand_commas(int(period_len / 60)),
                     util.thousand_commas(round(mean, 2)), probability),
                    channel="#support")
        if "href" in new_reports[ex].keys():
            del new_reports[ex]["href"]  # don't need to keep the links around

    del new_reports["time_this_period"]
    # Overwrite with new contents
    exercise_file.seek(0)
    exercise_file.truncate()
    exercise_file.write(json.dumps(new_reports))

    exercise_file.close()
Пример #3
0
def main():
    try:
        exercise_file = open(util.relative_path("exercise_reports"), 'r+')
        ex_reports = json.loads(exercise_file.read())
    except IOError:
        exercise_file = open(util.relative_path("exercise_reports"), 'w')
        ex_reports = {"elapsed_time": 1,  # Filler value
                      "max_id": -1,
                      "last_time": 0}

    new_reports = get_errors(copy.deepcopy(ex_reports))

    period_len = new_reports["time_this_period"]

    for ex in new_reports:
        if ex in SPECIAL_VALUES:
            continue

        if ex in ex_reports and ex_reports[ex]["num_errors"] > 0:
            errors_this_period = new_reports[ex]["this_period"]

            mean, probability = util.probability(ex_reports[ex]["num_errors"],
                                                 ex_reports["elapsed_time"],
                                                 errors_this_period,
                                                 period_len)

            if (probability > 0.997 and errors_this_period > 1):
                # Too many errors!
                hipchat_message.send_message(
                    "Elevated exercise bug report rate in exercise %s!"
                    " Reports: %s.  We saw %s in the last %s minutes,"
                    " while the mean indicates we should see around %s."
                    " Probability that this is abnormally elevated: %.4f."
                        % (ex,
                           generate_links(new_reports[ex]["href"]),
                           util.thousand_commas(errors_this_period),
                           util.thousand_commas(int(period_len / 60)),
                           util.thousand_commas(round(mean, 2)),
                           probability),
                    room_id="Exercises")
        if "href" in new_reports[ex].keys():
            del new_reports[ex]["href"]  # don't need to keep the links around

    del new_reports["time_this_period"]
    # Overwrite with new contents
    exercise_file.seek(0)
    exercise_file.truncate()
    exercise_file.write(json.dumps(new_reports))

    exercise_file.close()
Пример #4
0
def handle_alerts(num_new_tickets,
                  time_this_period,
                  mean,
                  probability,
                  start_time,
                  end_time):
    """Determine which alerts to send at various thresholds.

    If probability of elevated ticket count is high, a notification
    is sent to Slack and Alerta. A Pagerduty alert is only sent out
    if a significantly elevated rate is detected.
    """
    # TODO(jacqueline): Including SIGNIFICANT_TICKET_COUNT hard
    # threshold here so as to catch false positives, especially during
    # transition. Maybe consider removing this once change in mean
    # starts flattening out; August 2017?
    url = 'https://khanacademy.zendesk.com/agent/filters/37051364'
    message = (
            "We saw %s in the last %s minutes,"
            " while the mean indicates we should see around %s."
            " *Probability that this is abnormally elevated: %.4f.*"
            % (util.thousand_commas(num_new_tickets),
               util.thousand_commas(int(time_this_period / 60)),
               util.thousand_commas(round(mean, 2)),
               probability))

    if (mean != 0 and probability > 0.999 and
            num_new_tickets >= SIGNIFICANT_TICKET_COUNT):
        # Too many errors!  Point people to the 'all tickets' filter.
        message = ("*Elevated bug report rate on <%s|Zendesk>*\n"
                   % url + message)

        util.send_to_slack(message, channel='#1s-and-0s')
        util.send_to_slack(message, channel='#user-issues')
        util.send_to_alerta(message, severity=logging.ERROR)

        # Before we start texting people, make sure we've hit higher threshold.
        # TODO(benkraft/jacqueline): Potentially could base this off more
        # historical data from analogous dow/time datapoints, but doesn't look
        # like Zendesk API has a good way of doing this, running into request
        # quota issues. Readdress this option if threshold is too noisy.
        if probability > 0.9995:
            util.send_to_pagerduty(message, service='beep-boop')
    else:
        # If ticket rate is normal, still send alert to alerta to resolve any
        # prior existing alerts.
        message = ("Normal bug report rate on <%s|Zendesk>\n"
                   % url + message)
        util.send_to_alerta(message, severity=logging.INFO)
Пример #5
0
def main():
    try:
        google_code_file = open(util.relative_path("google_code"), 'r+')
        old_reports = json.loads(google_code_file.read())
    except IOError:
        google_code_file = open(util.relative_path("google_code"), 'w')
        # elapsed_time is filler value: doesn't matter what it is
        # since issue_count is 0.
        old_reports = {"elapsed_time": 1,
                       "last_id": -1,
                       "issue_count": 0,
                       "last_time": 0}

    new_reports = get_errors(copy.deepcopy(old_reports))

    time_this_period = new_reports["time_this_period"]

    mean, probability = util.probability(old_reports["issue_count"],
                                         old_reports["elapsed_time"],
                                         new_reports["issues_this_period"],
                                         time_this_period)

    if (mean != 0 and probability > 0.99):
        # Too many errors!
        hipchat_message.send_message(
            "Elevated bug report rate on"
            " <a href='http://khanacademy.org/r/bugs'>Google"
            " code!</a>"
            " We saw %s in the last %s minutes,"
            " while the mean indicates we should see around %s."
            " Probability that this is abnormally elevated: %.4f."
            % (util.thousand_commas(new_reports["issues_this_period"]),
               util.thousand_commas(int(time_this_period / 60)),
               util.thousand_commas(round(mean, 2)),
               probability))

    # Delete fields we don't need anymore
    del(new_reports["issues_this_period"])
    del(new_reports["time_this_period"])

    google_code_file.seek(0)
    google_code_file.truncate()
    google_code_file.write(json.dumps(new_reports))

    google_code_file.close()
Пример #6
0
def handle_alerts(new_tickets,
                  time_this_period,
                  mean,
                  probability,
                  start_time,
                  end_time):
    """Determine which alerts to send at various thresholds.

    If probability of elevated ticket count is high, a notification
    is sent to Slack and Alerta. A Pagerduty alert is only sent out
    if a significantly elevated rate is detected.
    """
    # TODO(jacqueline): Including SIGNIFICANT_TICKET_COUNT hard
    # threshold here so as to catch false positives, especially during
    # transition. Maybe consider removing this once change in mean
    # starts flattening out; August 2017?
    num_new_tickets = len(new_tickets)
    message = (
            "We saw %s in the last %s minutes,"
            " while the mean indicates we should see around %s."
            " *Probability that this is abnormally elevated: %.4f.*"
            % (util.thousand_commas(num_new_tickets),
               util.thousand_commas(int(time_this_period / 60)),
               util.thousand_commas(round(mean, 2)),
               probability))

    if (mean != 0 and probability > 0.999 and
            num_new_tickets >= SIGNIFICANT_TICKET_COUNT):
        # Too many errors!  Point people to the slack channel.
        message = ("Elevated Zendesk report rate (#zendesk-technical)\n"
                   + message)

        # Generated a list of tickets that we will send to Slack along with the
        # original message
        ticket_list = ''
        for ticket in new_tickets:
            created_at = _parse_time(ticket['created_at'])
            created_at = datetime.datetime.fromtimestamp(created_at)
            ticket_list += "\n*[%s][Ticket #%d]:* %s" % (
                created_at.strftime("%I:%M %p"),
                ticket['id'],
                # Strip any non-safe characters from the subject line
                re.sub(r"[^\w\-\.'%&:,\[\]/\\\(\)\" ]", '', ticket['subject']))

        util.send_to_slack(message + ticket_list, channel='#1s-and-0s')
        util.send_to_slack(message + ticket_list, channel='#user-issues')
        util.send_to_alerta(message, severity=logging.ERROR)

        # Before we start texting people, make sure we've hit higher threshold.
        # TODO(benkraft/jacqueline): Potentially could base this off more
        # historical data from analogous dow/time datapoints, but doesn't look
        # like Zendesk API has a good way of doing this, running into request
        # quota issues. Readdress this option if threshold is too noisy.
        if (probability > 0.9995 and
                num_new_tickets >= MIN_TICKET_COUNT_TO_PAGE_SOMEONE):
            util.send_to_pagerduty(message, service='beep-boop')
    else:
        # If ticket rate is normal, still send alert to alerta to resolve any
        # prior existing alerts.
        message = "Normal Zendesk report rate (#zendesk-technical)\n" + message
        util.send_to_alerta(message, severity=logging.INFO, mark_resolved=True)
Пример #7
0
def main():
    try:
        jira_status_file = util.relative_path('jira')
        with open(jira_status_file) as f:
            old_data = cPickle.load(f)
    except IOError:
        old_data = {'elapsed_times': {},
                    'ticket_counts': collections.defaultdict(int),
                    'last_time_t': None,
                    }

    # We compare the number of tickets in the last few minutes against
    # the historical average for all time. But we don't start "all
    # time" at AD 1, we start it 100 days ago.
    # Note: this is a way wider window than we use for Zendesk, but we're
    # making exercise-specific recommendations, so we need more data.
    now = int(time.time())
    num_days_in_past = 100
    (num_new_tickets, oldest_ticket_time_t) = num_tickets_between(
        old_data['last_time_t'] or (now - 86400 * num_days_in_past), now)

    # Elapsed time is computed per-exercise, so store values as we go.
    # We use a copy so that exercises that don't appear as new tickets still
    # have their old elapsed times preserved.
    elapsed_times = copy.copy(old_data['elapsed_times'])
    for exercise in num_new_tickets:
        # If this is the first time we're running, we don't have a last_time_t,
        # so we take the oldest ticket for each exercise as its last_time_t
        last_time_t = old_data['last_time_t'] or oldest_ticket_time_t[exercise]
        time_this_period = now - last_time_t
        # Avoid divide-by-0 if this is the first time we've seen an exercise
        time_last_period = old_data['elapsed_times'].get(exercise, 0.0001)

        num_old_tickets_for_exercise = old_data['ticket_counts'][exercise]
        num_new_tickets_for_exercise = num_new_tickets[exercise]
        (mean, probability) = util.probability(num_old_tickets_for_exercise,
                                               time_last_period,
                                               num_new_tickets_for_exercise,
                                               time_this_period)

        print('%s] %s TOTAL %s/%ss; %s-: %s/%ss; m=%.3f p=%.3f'
              % (time.strftime('%Y-%m-%d %H:%M:%S %Z'),
                  exercise,
                  num_old_tickets_for_exercise, int(time_last_period),
                  last_time_t,
                  num_new_tickets_for_exercise, time_this_period,
                  mean, probability))

        if (mean != 0 and probability > 0.9995 and
                num_new_tickets_for_exercise > THRESHOLD):
            quoted = urllib.quote(exercise.encode("utf-8"))
            ka_url = "https://khanacademy.org/e/%s" % quoted
            jira_url = "https://khanacademy.atlassian.net/browse/AI-941528?jql=Exercise%%20%%3D%%20%s" % quoted
            util.send_to_slack(
                "*Elevated bug report rate on exercise `%s`*\n"
                "We saw %s in the last %s minutes,"
                " while the mean indicates we should see around %s."
                " *Probability that this is abnormally elevated: %.4f.*\n"
                " Links: <%s|exercise on Khan Academy>, <%s|JIRA tickets>."
                % (exercise,
                   util.thousand_commas(num_new_tickets_for_exercise),
                   util.thousand_commas(int(time_this_period / 60)),
                   util.thousand_commas(round(mean, 2)),
                   probability,
                   ka_url,
                   jira_url),
                channel='#content-beep-boop')
        elapsed_times[exercise] = time_last_period + time_this_period

    new_ticket_counts = util.merge_int_dicts(old_data['ticket_counts'],
                                             num_new_tickets)
    new_data = {'elapsed_times': elapsed_times,
                'ticket_counts': new_ticket_counts,
                'last_time_t': now,
                }
    with open(jira_status_file, 'w') as f:
        cPickle.dump(new_data, f)