def _setup(self, test_data): uid = self.db.query("SELECT util.newid() uid FROM DUAL")[0].uid ## VERFIY THE alert_reason EXISTS exists = self.db.query(""" SELECT count(1) num FROM reasons WHERE code={{reason}} """, {"reason": REASON} )[0].num if exists == 0: Log.error("Expecting the database to have an alert_reason={{reason}}", {"reason": REASON}) ## MAKE A 'PAGE' TO TEST self.db.execute("DELETE FROM pages") self.db.insert("pages", { "test_id": 0, "url": self.url }) self.page_id = self.db.query("SELECT id FROM pages")[0].id ## ENSURE THERE ARE NO ALERTS IN DB self.db.execute("DELETE FROM alerts WHERE reason={{reason}}", {"reason": REASON}) self.insert_test_results(test_data)
def make_test_database(settings): try: settings.perftest.debug = True no_schema = settings.perftest.copy() no_schema.schema = "" Log.note("CLEAR DATABASE {{database}}", {"database": settings.perftest.schema}) with DB(no_schema) as db: db.execute("DROP DATABASE IF EXISTS " + settings.perftest.schema) db.flush() db.execute("CREATE DATABASE " + settings.perftest.schema) #TEMPLATE HAS {engine} TAG THAT MUST BE REPLACED Log.note("BUILD NEW DATABASE {{database}}", {"database": settings.perftest.schema}) DB.execute_file(settings.perftest, "tests/resources/sql/schema_perftest.sql") DB.execute_file(settings.perftest, "tests/resources/sql/Add test_data_all_dimensions.sql") Log.note("MIGRATE {{database}} TO NEW SCHEMA", {"database": settings.perftest.schema}) DB.execute_file(settings.perftest, "resources/migration/alerts.sql") DB.execute_file(settings.perftest, "resources/migration/v1.2 email.sql") with DB(settings.perftest) as db: db.execute("ALTER TABLE test_data_all_dimensions DROP FOREIGN KEY `fk_test_run_id_tdad`") db.execute("ALTER TABLE pages DROP FOREIGN KEY `fk_pages_test`") db.execute("DELETE FROM mail.delivery") db.execute("DELETE FROM mail.attachment") db.execute("DELETE FROM mail.content") #ADD FUNCTIONS FOR TEST VERIFICATION DB.execute_file(settings.perftest, "tests/resources/sql/add_objectstore.sql") DB.execute_file(settings.perftest, "tests/resources/sql/json.sql") Log.note("DATABASE READY {{database}}", {"database": settings.perftest.schema}) except Exception, e: Log.error("Database setup failed", e)
def help_send_alerts(self, to_list): try: self.setup(to_list) ######################################################################## # TEST ######################################################################## send_alerts( settings=struct.wrap({"param": {"debug": True}}), db=self.db ) ######################################################################## # VERIFY ######################################################################## emails = self.get_new_emails() # id, to, body if len(to_list) == 0: assert len(emails) == 0 return #VERIFY ONE MAIL SENT assert len(emails) == 1 #VERIFY to MATCHES WHAT WAS PASSED TO THIS FUNCTION assert set(emails[0].to) == set(to_list), "mail.delivery not matching what's send" #VERIFY last_sent IS WRITTEN alert_state = self.db.query(""" SELECT id FROM alerts WHERE reason={{reason}} AND last_sent>={{send_time}} """, { "reason": self.reason, "send_time": self.now }) expected_marked = set([d.id for d in self.test_data if CNV.JSON2object(d.details).expect == 'pass']) actual_marked = set(Q.select(alert_state, "id")) assert expected_marked == actual_marked, expand_template( "Expecting only id in {{expected}}, but instead got {{actual}}", { "expected": str(expected_marked), "actual": str(actual_marked) }) #VERIFY BODY HAS THE CORRECT ALERTS expecting_alerts = set([d.id for d in map(lambda d: CNV.JSON2object(d.details), self.test_data) if d.expect == 'pass']) actual_alerts_sent = set([ CNV.value2int(between(b, ">>>>", "<<<<")) for b in emails[0].body.split(dzAlerts.daemons.alert.SEPARATOR) if CNV.value2int(between(b, ">>>>", "<<<<")) != None ]) assert expecting_alerts == actual_alerts_sent except Exception, e: Log.error("Test failure", e)
def significant_difference(a, b): try: if a in (0.0, 1.0) or b in (0.0, 1.0): return True if a / b < (1 - SIGNIFICANT) or (1 + SIGNIFICANT) < a / b: return True b_diff = Math.bayesian_subtract(a, b) if 0.3 < b_diff < 0.7: return False return True except Exception, e: Log.error("Problem", e)
def __enter__(self): if self.server is not None: Log.error("Got a problem") if self.settings.use_ssl: self.server = smtplib.SMTP_SSL(self.settings.host, self.settings.port) else: self.server = smtplib.SMTP(self.settings.host, self.settings.port) if self.settings.username and self.settings.password: self.server.login(self.settings.username, self.settings.password) return self
def single_ttest(point, stats, min_variance=0): n1 = stats.count m1 = stats.mean v1 = max(stats.variance, 1.0/12.0) # VARIANCE OF STANDARD UNIFORM DISTRIBUTION if n1 < 2: return {"confidence": 0, "diff": 0} try: tt = (point - m1) / sqrt(v1) t_distribution = stats.distributions.t(n1 - 1) confidence = t_distribution.cdf(tt) return {"confidence": confidence, "diff": tt} except Exception, e: Log.error("error with t-test", e)
def pull_repo(repo): if not File(os.path.join(repo.directory, ".hg")).exists: File(repo.directory).delete() #REPO DOES NOT EXIST, CLONE IT with Timer("Clone hg log for {{name}}", {"name":repo.name}): proc = subprocess.Popen( ["hg", "clone", repo.url, File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1 ) try: while True: line = proc.stdout.readline() if line.startswith("abort:"): Log.error("Can not clone {{repo.url}}, because {{problem}}", { "repo": repo, "problem": line }) if line == '': break Log.note("Mercurial cloning: {{status}}", {"status": line}) finally: proc.wait() else: hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc")) if not hgrc_file.exists: hgrc_file.write("[paths]\ndefault = " + repo.url + "\n") #REPO EXISTS, PULL TO UPDATE with Timer("Pull hg log for {{name}}", {"name":repo.name}): proc = subprocess.Popen( ["hg", "pull", "--cwd", File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1 ) (output, _) = proc.communicate() if output.find("abort: repository default not found!") >= 0: File(repo.directory).delete() pull_repo(repo) return if output.find("abort: abandoned transaction found") >= 0: Log.error("Problem pulling repo, try \"hg recover\"\n{{reason|indent}}", {"reason": output}) File(repo.directory).delete() pull_repo(repo) return if output.find("abort: ") >= 0: Log.error("Problem with pull {{reason}}", {"reason": between(output, "abort:", "\n")}) Log.note("Mercurial pull results:\n{{pull_results}}", {"pull_results": output})
subject=email.subject, html_data=email.body ) db.execute("UPDATE mail.content SET date_sent={{now}} WHERE id={{id}}", {"id": email.id, "now": datetime.utcnow()}) db.flush() num_done += len(email.to.split(',')) except Exception, e: Log.warning("Problem sending email", e) not_done = 1 db.execute("UPDATE mail.notify SET new_mail={{not_done}}", {"not_done": not_done}) Log.note(str(num_done) + " emails have been sent") except Exception, e: Log.error("Could not send emails", e) def main(): settings = startup.read_settings() Log.start(settings.debug) try: Log.note("Running email using schema {{schema}}", {"schema": settings.perftest.schema}) with DB(settings.alerts) as db: email_send( db=db, emailer=Emailer(settings.email), debug=nvl(settings.debug, False) ) except Exception, e: Log.warning("Failure to send emails", cause=e)
def _setup(self): uid = self.db.query("SELECT util.newid() uid FROM DUAL")[0].uid ## VERFIY THE alert_reason EXISTS exists = self.db.query(""" SELECT count(1) num FROM reasons WHERE code={{reason}} """, {"reason": REASON} )[0].num if exists == 0: Log.error("Expecting the database to have an alert_reason={{reason}}", {"reason": REASON}) ## MAKE A 'PAGE' TO TEST self.db.execute("DELETE FROM pages") self.db.insert("pages", { "test_id": 0, "url": self.url }) self.page_id = self.db.query("SELECT id FROM pages")[0].id ## ADD A THRESHOLD TO TEST WITH self.db.execute(""" INSERT INTO alert_page_thresholds ( id, page, threshold, severity, reason, time_added, contact ) VALUES ( {{uid}}, {{page_id}}, {{threshold}}, {{severity}}, concat("(", {{url}}, ") for test"), now(), "*****@*****.**" ) """, { "uid": uid, "url": self.url, "page_id": self.page_id, "severity": self.severity, "threshold": 800 }) ## ENSURE THERE ARE NO ALERTS IN DB self.db.execute("DELETE FROM alerts WHERE reason={{reason}}", {"reason": REASON}) ## diff_time IS REQUIRED TO TRANSLATE THE TEST DATE DATES TO SOMETHING MORE CURRENT now_time = CNV.datetime2unix(datetime.utcnow()) max_time = max([CNV.datetime2unix(CNV.string2datetime(t.date, "%Y-%b-%d %H:%M:%S")) for t in CNV.table2list(self.test_data.header, self.test_data.rows)]) diff_time = now_time - max_time ## INSERT THE TEST RESULTS for t in CNV.table2list(self.test_data.header, self.test_data.rows): time = CNV.datetime2unix(CNV.string2datetime(t.date, "%Y-%b-%d %H:%M:%S")) time += diff_time self.db.insert("test_data_all_dimensions", { "id": SQL("util.newid()"), "test_run_id": SQL("util.newid()"), "product_id": 0, "operating_system_id": 0, "test_id": 0, "page_id": self.page_id, "date_received": time, "revision": "ba928cbd5191", "product": "Firefox", "branch": "Mozilla-Inbound", "branch_version": "23.0a1", "operating_system_name": "mac", "operating_system_version": "OS X 10.8", "processor": "x86_64", "build_type": "opt", "machine_name": "talos-mtnlion-r5-049", "pushlog_id": 19998363, "push_date": time, "test_name": "tp5o", "page_url": self.url, "mean": float(t.mean), "std": float(t["mean+std"]) - float(t.mean), "h0_rejected": 0, "p": None, "n_replicates": t.count, "fdr": 0, "trend_mean": None, "trend_std": None, "test_evaluation": 0, "status": 1 })
def send_alerts(settings, db): """ BLINDLY SENDS ALERTS FROM THE ALERTS TABLE, ASSUMING ALL HAVE THE SAME STRUCTURE. """ debug = settings.param.debug db.debug = debug #TODO: REMOVE, LEAVE IN DB if db.debug: db.execute("update reasons set email_subject={{subject}}, email_template={{template}} where code={{reason}}", { "template": CNV.object2JSON(TEMPLATE), "subject": CNV.object2JSON(SUBJECT), "reason": REASON }) db.flush() try: new_alerts = db.query(""" SELECT a.id alert_id, a.reason, r.description, a.details, a.severity, a.confidence, a.revision, r.email_template, r.email_subject FROM alerts a JOIN reasons r on r.code = a.reason WHERE a.last_sent IS NULL AND a.status <> 'obsolete' AND math.bayesian_add(a.severity, a.confidence) > {{alert_limit}} AND a.solution IS NULL AND a.reason in {{reasons}} AND a.create_time > {{min_time}} ORDER BY math.bayesian_add(a.severity, a.confidence) DESC, json.number(left(details, 65000), "diff_percent") DESC LIMIT 10 """, { "last_sent": datetime.utcnow() - RESEND_AFTER, "alert_limit": ALERT_LIMIT - EPSILON, "min_time": datetime.utcnow()-LOOK_BACK, "reasons": SQL("("+", ".join(db.quote_value(v) for v in SEND_REASONS)+")") }) if not new_alerts: if debug: Log.note("Nothing important to email") return for alert in new_alerts: #poor souls that signed up for emails listeners = ";".join(db.query("SELECT email FROM listeners WHERE reason={{reason}}", {"reason": alert.reason}).email) body = [HEADER] if alert.confidence >= 1: alert.confidence = 0.999999 alert.details = CNV.JSON2object(alert.details) try: alert.revision = CNV.JSON2object(alert.revision) except Exception, e: pass alert.score = str(-log(1.0-Math.bayesian_add(alert.severity, alert.confidence), 10)) #SHOW NUMBER OF NINES alert.details.url = alert.details.page_url example = alert.details.example for e in alert.details.tests.example + [example]: if e.push_date_min: e.push_date_max = (2 * e.push_date) - e.push_date_min e.date_range = (datetime.utcnow()-CNV.milli2datetime(e.push_date_min)).total_seconds()/(24*60*60) #REQUIRED FOR DATAZILLA B2G CHART REFERENCE e.date_range = nvl(nvl(*[v for v in (7, 30, 60) if v > e.date_range]), 90) #PICK FIRST v > CURRENT VALUE subject = expand_template(CNV.JSON2object(alert.email_subject), alert) body.append(expand_template(CNV.JSON2object(alert.email_template), alert)) body = "".join(body)+FOOTER if debug: Log.note("EMAIL: {{email}}", {"email": body}) if len(body) > MAX_EMAIL_LENGTH: Log.note("Truncated the email body") suffix = "... (has been truncated)" body = body[0:MAX_EMAIL_LENGTH - len(suffix)] + suffix #keep it reasonable db.call("mail.send", ( listeners, #to subject, body, #body None )) #I HOPE I CAN SEND ARRAYS OF NUMBERS db.execute( "UPDATE alerts SET last_sent={{time}} WHERE {{where}}", { "time": datetime.utcnow(), "where": esfilter2sqlwhere(db, {"terms": {"id": Q.select(new_alerts, "alert_id")}}) }) except Exception, e: Log.error("Could not send alerts", e)
def _normalize(esfilter): """ DO NOT USE Structs, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING REALLY, WE JUST COLLAPSE CASCADING and AND or FILTERS """ if esfilter is TRUE_FILTER or esfilter is FALSE_FILTER or esfilter.isNormal: return esfilter # Log.note("from: " + CNV.object2JSON(esfilter)) isDiff = True while isDiff: isDiff = False if esfilter["and"] != None: output = [] for a in esfilter["and"]: if isinstance(a, (list, set)): from dzAlerts.util.env.logs import Log Log.error("and clause is not allowed a list inside a list") a_ = normalize(a) if a_ is not a: isDiff = True a = a_ if a == TRUE_FILTER: isDiff = True continue if a == FALSE_FILTER: return FALSE_FILTER if a.get("and", None): isDiff = True a.isNormal = None output.extend(a.get("and", None)) else: a.isNormal = None output.append(a) if not output: return TRUE_FILTER elif len(output) == 1: # output[0].isNormal = True esfilter = output[0] break elif isDiff: esfilter = wrap({"and": output}) continue if esfilter["or"] != None: output = [] for a in esfilter["or"]: a_ = _normalize(a) if a_ is not a: isDiff = True a = a_ if a == TRUE_FILTER: return TRUE_FILTER if a == FALSE_FILTER: isDiff = True continue if a.get("or", None): a.isNormal = None isDiff = True output.extend(a["or"]) else: a.isNormal = None output.append(a) if not output: return FALSE_FILTER elif len(output) == 1: esfilter = output[0] break elif isDiff: esfilter = wrap({"or": output}) continue if esfilter.term != None: if esfilter.term.keys(): esfilter.isNormal = True return esfilter else: return TRUE_FILTER if esfilter.terms != None: for k, v in esfilter.terms.items(): if len(v) > 0: if OR(vv == None for vv in v): rest = [vv for vv in v if vv != None] if len(rest) > 0: return { "or": [{ "missing": { "field": k } }, { "terms": { k: rest } }], "isNormal": True } else: return {"missing": {"field": k}, "isNormal": True} else: esfilter.isNormal = True return esfilter return FALSE_FILTER if esfilter["not"] != None: _sub = esfilter["not"] sub = _normalize(_sub) if sub is FALSE_FILTER: return TRUE_FILTER elif sub is TRUE_FILTER: return FALSE_FILTER elif sub is not _sub: sub.isNormal = None return wrap({"not": sub, "isNormal": True}) else: sub.isNormal = None esfilter.isNormal = True return esfilter
def iterator(): try: while True: try: line = proc.stdout.readline() if line == '': proc.wait() if proc.returncode: Log.error("Unable to pull hg log: return code {{return_code}}", { "return_code": proc.returncode }) return except Exception, e: Log.error("Problem getting another line", e) if line.strip() == "": continue Log.note(line) # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n" # branch = "{branch}%0A" # parent = "{parent}%0A" # tag = "{tag}%0A" # child = "{child}%0A" ( date, node, rev, author, branches, files, file_adds, file_dels, p1rev, p1node, parents, children, tags, desc ) = (CNV.latin12unicode(urllib.unquote(c)) for c in line.split("\t")) file_adds = set(file_adds.split("\n")) - {""} file_dels = set(file_dels.split("\n")) - {""} files = set(files.split("\n")) - set() doc = { "repo": repo.name, "date": CNV.unix2datetime(CNV.value2number(date.split(" ")[0])), "node": node, "revision": rev, "author": author, "branches": set(branches.split("\n")) - {""}, "file_changes": files - file_adds - file_dels - {""}, "file_adds": file_adds, "file_dels": file_dels, "parents": set(parents.split("\n")) - {""} | {p1rev+":"+p1node}, "children": set(children.split("\n")) - {""}, "tags": set(tags.split("\n")) - {""}, "description": desc } doc = ElasticSearch.scrub(doc) yield doc except Exception, e: if isinstance(e, ValueError) and e.message.startswith("need more than "): Log.error("Problem iterating through log ({{message}})", { "message": line }, e) Log.error("Problem iterating through log", e)
def _normalize(esfilter): """ DO NOT USE Structs, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING REALLY, WE JUST COLLAPSE CASCADING and AND or FILTERS """ if esfilter is TRUE_FILTER or esfilter is FALSE_FILTER or esfilter.isNormal: return esfilter # Log.note("from: " + CNV.object2JSON(esfilter)) isDiff = True while isDiff: isDiff = False if esfilter["and"]: output = [] for a in esfilter["and"]: if isinstance(a, (list, set)): from dzAlerts.util.env.logs import Log Log.error("and clause is not allowed a list inside a list") a_ = normalize(a) if a_ is not a: isDiff = True a = a_ if a == TRUE_FILTER: isDiff = True continue if a == FALSE_FILTER: return FALSE_FILTER if a.get("and", None): isDiff = True a.isNormal = None output.extend(a.get("and", None)) else: a.isNormal = None output.append(a) if not output: return TRUE_FILTER elif len(output) == 1: # output[0].isNormal = True esfilter = output[0] break elif isDiff: esfilter = wrap({"and": output}) continue if esfilter["or"]: output = [] for a in esfilter["or"]: a_ = _normalize(a) if a_ is not a: isDiff = True a = a_ if a == TRUE_FILTER: return TRUE_FILTER if a == FALSE_FILTER: isDiff = True continue if a.get("or", None): a.isNormal = None isDiff = True output.extend(a["or"]) else: a.isNormal = None output.append(a) if not output: return FALSE_FILTER elif len(output) == 1: esfilter = output[0] break elif isDiff: esfilter = wrap({"or": output}) continue if esfilter.term != None: if esfilter.term.keys(): esfilter.isNormal = True return esfilter else: return TRUE_FILTER if esfilter.terms != None: for k, v in esfilter.terms.items(): if len(v) > 0: esfilter.isNormal = True return esfilter return FALSE_FILTER if esfilter["not"] != None: _sub = esfilter["not"] sub = _normalize(_sub) if sub is FALSE_FILTER: return TRUE_FILTER elif sub is TRUE_FILTER: return FALSE_FILTER elif sub is not _sub: sub.isNormal = None return wrap({"not": sub, "isNormal": True}) else: sub.isNormal = None esfilter.isNormal = True return esfilter
if debug: Log.note("Update Alerts: ({{num_new}} new, {{num_change}} changed, {{num_delete}} obsoleted)", { "num_new": len(new_alerts), "num_change": len(changed_alerts), "num_delete": len(obsolete_alerts) }) if new_alerts: for a in new_alerts: a.id = SQL("util.newid()") a.last_updated = NOW try: alerts_db.insert_list("alerts", new_alerts) except Exception, e: Log.error("problem with insert", e) for curr in changed_alerts: if len(nvl(curr.solution, "").strip()) != 0: continue # DO NOT TOUCH SOLVED ALERTS a = found_alerts[(curr.tdad_id, )] if a == None: Log.error("Programmer error, changed_alerts must have {{key_value}}", {"key_value": curr.tdad.id}) if significant_difference(curr.severity, a.severity) or \ significant_difference(curr.confidence, a.confidence) or \ curr.reason != a.reason: curr.last_updated = NOW alerts_db.update("alerts", {"id": curr.id}, a)
def page_threshold_limit(db, debug): """ simplest of rules to test the dataflow from test_run, to alert, to email may prove slightly useful also! #point out any pages that are breaking human-set threshold limits """ db.debug = debug try: # CALCULATE HOW FAR BACK TO LOOK lasttime = db.query("SELECT last_run, description FROM reasons WHERE code={{type}}", {"type": REASON})[0] lasttime = nvl(lasttime.last_run, datetime.utcnow()) min_date = lasttime + LOOK_BACK # FIND ALL PAGES THAT HAVE LIMITS TO TEST # BRING BACK ONES THAT BREAK LIMITS # BUT DO NOT ALREADY HAVE AN ALERTS EXISTING pages = db.query( """ SELECT t.id tdad_id, t.n_replicates, t.mean, t.std, h.threshold, h.severity, h.reason, m.id alert_id FROM alert_page_thresholds h JOIN test_data_all_dimensions t ON t.page_id=h.page LEFT JOIN alerts m on m.tdad_id=t.test_run_id AND m.reason={{type}} WHERE h.threshold<t.mean AND t.push_date>{{min_date}} AND (m.id IS NULL OR m.status='obsol11ete') """, {"type": REASON, "min_date": min_date}, ) # FOR EACH PAGE THAT BREAKS LIMITS for page in pages: if page.alert_id != None: break alert = { "id": SQL("util.newid()"), "status": "new", "create_time": datetime.utcnow(), "last_updated": datetime.utcnow(), "tdad_id": page.tdad_id, "reason": REASON, "details": CNV.object2JSON( {"expected": float(page.threshold), "actual": float(page.mean), "reason": page.reason} ), "severity": page.severity, "confidence": 1.0 # USING NORMAL DIST ASSUMPTION WE CAN ADJUST # CONFIDENCE EVEN BEFORE THRESHOLD IS HIT! # FOR NOW WE KEEP IT SIMPLE } db.insert("alerts", alert) for page in pages: if page.alert_id == None: break db.update("alerts", None) # ERROR FOR NOW # OBSOLETE THE ALERTS THAT SHOULD NO LONGER GET SENT obsolete = db.query( """ SELECT m.id, m.tdad_id FROM alerts m JOIN test_data_all_dimensions t ON m.tdad_id=t.id JOIN alert_page_thresholds h on t.page_id=h.page WHERE m.reason={{reason}} AND h.threshold>=t.mean AND t.push_date>{{time}} """, {"reason": REASON, "time": min_date}, ) if obsolete: db.execute( "UPDATE alerts SET status='obsolete' WHERE {{where}}", {"where": esfilter2sqlwhere(db, {"terms": {"id": Q.select(obsolete, "id")}})}, ) db.execute( "UPDATE reasons SET last_run={{now}} WHERE code={{reason}}", {"now": datetime.utcnow(), "reason": REASON} ) except Exception, e: Log.error("Could not perform threshold comparisons", e)