def update_repo(repo, settings): with MySQL(settings.database) as db: try: pull_repo(repo) # GET LATEST DATE existing_range = db.query(""" SELECT max(`date`) `max`, min(`date`) `min`, min(revision) min_rev, max(revision) max_rev FROM changesets WHERE repos={{repos}} """, {"repos": repo.name})[0] ranges = wrap([ {"min": coalesce(existing_range.max, convert.milli2datetime(0)) + timedelta(days=1)}, {"max": existing_range.min} ]) for r in ranges: for g, docs in qb.groupby(get_changesets(date_range=r, repo=repo), size=100): for doc in docs: doc.file_changes = None doc.file_adds = None doc.file_dels = None doc.description = doc.description[0:16000] db.insert_list("changesets", docs) db.flush() missing_revisions = find_holes(db, "changesets", "revision", {"min": 0, "max": existing_range.max_rev + 1}, {"term": {"repos": repo.name}}) for _range in missing_revisions: for g, docs in qb.groupby(get_changesets(revision_range=_range, repo=repo), size=100): for doc in docs: doc.file_changes = None doc.file_adds = None doc.file_dels = None doc.description = doc.description[0:16000] db.insert_list("changesets", docs) db.flush() except Exception, e: Log.warning("Failure to pull from {{repos.name}}", {"repos": repo}, e)
def log_loop(settings, synch, queue, bucket, please_stop): with aws.Queue(settings.work_queue) as work_queue: for i, g in qb.groupby(queue, size=settings.param.size): Log.note( "Preparing {{num}} pulse messages to bucket={{bucket}}", num=len(g), bucket=bucket.name ) full_key = unicode(synch.next_key) + ":" + unicode(MIN(g.select("_meta.count"))) try: output = [ set_default( d, {"etl": { "name": "Pulse block", "bucket": settings.destination.bucket, "timestamp": Date.now().unix, "id": synch.next_key, "source": { "name": "pulse.mozilla.org", "id": d._meta.count, "count": d._meta.count, "message_id": d._meta.message_id, "sent": Date(d._meta.sent), }, "type": "aggregation" }} ) for i, d in enumerate(g) if d != None # HAPPENS WHEN PERSISTENT QUEUE FAILS TO LOG start ] bucket.write(full_key, "\n".join(convert.value2json(d) for d in output)) synch.advance() synch.source_key = MAX(g.select("_meta.count")) + 1 now = Date.now() work_queue.add({ "bucket": bucket.name, "key": full_key, "timestamp": now.unix, "date/time": now.format() }) synch.ping() queue.commit() Log.note("Wrote {{num}} pulse messages to bucket={{bucket}}, key={{key}} ", num= len(g), bucket= bucket.name, key= full_key) except Exception, e: queue.rollback() if not queue.closed: Log.warning("Problem writing {{key}} to S3", key=full_key, cause=e) if please_stop: break
def pull_from_es(settings, destq, all_parents, all_children, all_descendants, work_queue): # LOAD PARENTS FROM ES for g, r in qb.groupby(qb.sort(work_queue), size=100): result = destq.query({ "from": settings.destination.index, "select": "*", "where": {"terms": {"bug_id": r}} }) for r in result.data: all_parents.extend(r.bug_id, listwrap(r.parents)) all_children.extend(r.bug_id, listwrap(r.children)) all_descendants.extend(r.bug_id, listwrap(r.descendants))
def _insert_loop(self, please_stop=None): bad_count = 0 while not please_stop: try: Thread.sleep(seconds=1) messages = wrap(self.queue.pop_all()) if messages: for m in messages: m.value.params = leafer(m.value.params) m.value.error = leafer(m.value.error) for g, mm in qb.groupby(messages, size=self.batch_size): self.es.extend(mm) bad_count = 0 except Exception, e: Log.warning("Problem inserting logs into ES", cause=e) bad_count += 1 if bad_count > 5: break
def encrypt(text, _key, salt=None): """ RETURN JSON OF ENCRYPTED DATA {"salt":s, "length":l, "data":d} """ if not isinstance(text, unicode): Log.error("only unicode is encrypted") if _key is None: Log.error("Expecting a key") if salt is None: salt = Random.bytes(16) data = bytearray(text.encode("utf8")) # Initialize encryption using key and iv key_expander_256 = key_expander.KeyExpander(256) expanded_key = key_expander_256.expand(_key) aes_cipher_256 = aes_cipher.AESCipher(expanded_key) aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16) aes_cbc_256.set_iv(salt) output = Dict() output.type = "AES256" output.salt = convert.bytes2base64(salt) output.length = len(data) encrypted = bytearray() for _, d in qb.groupby(data, size=16): encrypted.extend(aes_cbc_256.encrypt_block(d)) output.data = convert.bytes2base64(encrypted) json = convert.value2json(output) if DEBUG: test = decrypt(json, _key) if test != text: Log.error("problem with encryption") return json
def decrypt(data, _key): """ ACCEPT JSON OF ENCRYPTED DATA {"salt":s, "length":l, "data":d} """ # Key and iv have not been generated or provided, bail out if _key is None: Log.error("Expecting a key") _input = convert.json2value(data) # Initialize encryption using key and iv key_expander_256 = key_expander.KeyExpander(256) expanded_key = key_expander_256.expand(_key) aes_cipher_256 = aes_cipher.AESCipher(expanded_key) aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16) aes_cbc_256.set_iv(convert.base642bytearray(_input.salt)) raw = convert.base642bytearray(_input.data) out_data = bytearray() for _, e in qb.groupby(raw, size=16): out_data.extend(aes_cbc_256.decrypt_block(e)) return str(out_data[:_input.length:]).decode("utf8")
def _load_all_in_push(self, revision, locale=None): # http://hg.mozilla.org/mozilla-central/json-pushes?full=1&changeset=57c461500a0c found_revision = copy(revision) if isinstance(found_revision.branch, basestring): lower_name = found_revision.branch.lower() else: lower_name = found_revision.branch.name.lower() if not lower_name: Log.error("Defective revision? {{rev|json}}", rev=found_revision.branch) b = found_revision.branch = self.branches[(lower_name, locale)] if not b: b = found_revision.branch = self.branches[(lower_name, DEFAULT_LOCALE)] if not b: Log.error("can not find branch ({{branch}}, {{locale}})", name=lower_name, locale=locale) if Date.now() - Date(b.etl.timestamp) > _OLD_BRANCH: self.branches = _hg_branches.get_branches(use_cache=True, settings=self.settings) url = found_revision.branch.url.rstrip("/") + "/json-pushes?full=1&changeset=" + found_revision.changeset.id Log.note( "Reading pushlog for revision ({{branch}}, {{locale}}, {{changeset}}): {{url}}", branch=found_revision.branch.name, locale=locale, changeset=found_revision.changeset.id, url=url ) try: data = self._get_and_retry(url, found_revision.branch) revs = [] output = None for index, _push in data.items(): push = Push(id=int(index), date=_push.date, user=_push.user) for _, ids in qb.groupby(_push.changesets.node, size=200): url_param = "&".join("node=" + c[0:12] for c in ids) url = found_revision.branch.url.rstrip("/") + "/json-info?" + url_param Log.note("Reading details from {{url}}", {"url": url}) raw_revs = self._get_and_retry(url, found_revision.branch) for r in raw_revs.values(): rev = Revision( branch=found_revision.branch, index=r.rev, changeset=Changeset( id=r.node, id12=r.node[0:12], author=r.user, description=r.description, date=Date(r.date), files=r.files ), parents=unwraplist(r.parents), children=unwraplist(r.children), push=push, etl={"timestamp": Date.now().unix} ) if r.node == found_revision.changeset.id: output = rev if r.node[0:12] == found_revision.changeset.id[0:12]: output = rev _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name + "-" + coalesce(rev.branch.locale, DEFAULT_LOCALE) revs.append({"id": _id, "value": rev}) self.es.extend(revs) return output except Exception, e: Log.error("Problem pulling pushlog from {{url}}", url=url, cause=e)
def full_etl(settings, sink, bugs): with Timer("process block {{start}}", {"start": min(bugs)}): es = elasticsearch.Index(settings.source) with FromES(es) as esq: versions = esq.query({ "from": "bugs", "select": "*", "where": {"terms": {"bug_id": bugs}} }) starts = qb.run({ "select": [ "bug_id", "bug_status", {"name": "attach_id", "value": "attachments.attach_id"}, {"name": "request_time", "value": "modified_ts"}, {"name": "request_type", "value": "attachments.flags.request_type"}, {"name": "reviewer", "value": "attachments.flags.requestee"}, {"name": "created_by", "value": "attachments.created_by"}, "product", "component" ], "from": versions, "where": {"and": [ {"terms": {"attachments.flags.request_status": ["?"]}}, {"terms": {"attachments.flags.request_type": TYPES}}, {"equal": ["attachments.flags.modified_ts", "modified_ts"]}, {"term": {"attachments.isobsolete": 0}} ]}, "sort": ["bug_id", "attach_id", "created_by"] }) ends = qb.run({ "select": [ {"name": "bug_id", "value": "bug_id"}, "bug_status", {"name": "attach_id", "value": "attachments.attach_id"}, {"name": "modified_ts", "value": lambda r: Math.max(r.modified_ts, r.attachments.modified_ts, r.attachments.flags.modified_ts)}, {"name": "reviewer", "value": "attachments.flags.requestee"}, {"name": "request_type", "value": "attachments.flags.request_type"}, {"name": "modified_by", "value": "attachments.flags.modified_by"}, {"name": "product", "value": "product"}, {"name": "component", "value": "component"}, {"name": "review_end_reason", "value": lambda r: 'done' if r.attachments.flags.request_status != '?' else ('obsolete' if r.attachments.isobsolete == 1 else 'closed')}, {"name": "review_result", "value": lambda r: '+' if r.attachments.flags.request_status == '+' else ('-' if r.attachments.flags.request_status == '-' else '?')} ], "from": versions, "where": {"and": [ {"terms": {"attachments.flags.request_type": TYPES}}, {"or": [ {"and": [# IF THE REQUESTEE SWITCHED THE ? FLAG, THEN IT IS DONE {"term": {"attachments.flags.previous_status": "?"}}, {"not": {"term": {"attachments.flags.request_status": "?"}}}, {"equal": ["attachments.flags.modified_ts", "modified_ts"]} ]}, {"and": [# IF OBSOLETED THE ATTACHMENT, IT IS DONE {"term": {"attachments.isobsolete": 1}}, {"term": {"previous_values.isobsolete_value": 0}} ]}, {"and": [# SOME BUGS ARE CLOSED WITHOUT REMOVING REVIEW {"terms": {"bug_status": ["resolved", "verified", "closed"]}}, {"not": {"terms": {"previous_values.bug_status_value": ["resolved", "verified", "closed"]}}} ]} ]} ]} }) # SOME ATTACHMENTS GO MISSING, CLOSE THEM TOO closed_bugs = {b.bug_id: b for b in qb.filter(versions, {"and": [# SOME BUGS ARE CLOSED WITHOUT REMOVING REVIEW {"terms": {"bug_status": ["resolved", "verified", "closed"]}}, {"range": {"expires_on": {"gte": Date.now().milli}}} ]})} for s in starts: if s.bug_id in closed_bugs: e = closed_bugs[s.bug_id] ends.append({ "bug_id": e.bug_id, "bug_status": e.bug_status, "attach_id": s.attach_id, "modified_ts": e.modified_ts, "reviewer": s.reviewer, "request_type": s.request_type, "modified_by": e.modified_by, "product": e.product, "component": e.component, "review_end_reason": 'closed', "review_result": '?' }) # REVIEWS END WHEN REASSIGNED TO SOMEONE ELSE changes = qb.run({ "select": [ "bug_id", {"name": "attach_id", "value": "changes.attach_id"}, "modified_ts", {"name": "reviewer", "value": lambda r: r.changes.old_value.split("?")[1]}, {"name": "request_type", "value": lambda r: r.changes.old_value.split("?")[0]}, {"name": "modified_by", "value": "null"}, "product", "component", {"name": "review_end_reason", "value": "'reassigned'"} ], "from": versions, "where": {"and": [# ONLY LOOK FOR NAME CHANGES IN THE "review?" FIELD {"term": {"changes.field_name": "flags"}}, {"or": [{"prefix": {"changes.old_value": t + "?"}} for t in TYPES]} ]} }) ends.extend(changes) # PYTHON VERSION NOT CAPABLE OF THIS JOIN, YET # reviews = qb.run({ # "from": # starts, # "select": [ # {"name": "bug_status", "value": "bug_status", "aggregate": "one"}, # {"name": "review_time", "value": "doneReview.modified_ts", "aggregate": "minimum"}, # {"name": "review_result", "value": "doneReview.review_result", "aggregate": "minimum"}, # {"name": "product", "value": "coalesce(doneReview.product, product)", "aggregate": "minimum"}, # {"name": "component", "value": "coalesce(doneReview.component, component)", "aggregate": "minimum"}, # # {"name": "keywords", "value": "(coalesce(keywords, '')+' '+ETL.parseWhiteBoard(whiteboard)).trim()+' '+flags", "aggregate": "one"}, # {"name": "requester_review_num", "value": "-1", "aggregate": "one"} # ], # "analytic": [ # {"name": "is_first", "value": "rownum==0 ? 1 : 0", "sort": "request_time", "edges": ["bug_id"]} # ], # "edges": [ # "bug_id", # "attach_id", # {"name": "reviewer", "value": "requestee"}, # {"name": "requester", "value": "created_by"}, # {"name": "request_time", "value": "modified_ts"}, # { # "name": "doneReview", # "test": # "bug_id==doneReview.bug_id && " + # "attach_id==doneReview.attach_id && " + # "requestee==doneReview.requestee && " + # "!(bug_status=='closed' && doneReview.review_end_reason=='closed') && " + # "modified_ts<=doneReview.modified_ts", # "allowNulls": True, # "domain": {"type": "set", "key":["bug_id", "attach_id", "requestee", "modified_ts"], "partitions": ends} # } # ] # }) with Timer("match starts and ends for block {{start}}", {"start":min(*bugs)}): reviews = [] ends = Index(data=ends, keys=["bug_id", "attach_id", "request_type", "reviewer"]) for g, s in qb.groupby(starts, ["bug_id", "attach_id", "request_type", "reviewer"]): start_candidates = qb.sort(s, {"value": "request_time", "sort": 1}) end_candidates = qb.sort(ends[g], {"value": "modified_ts", "sort": 1}) #ZIP, BUT WITH ADDED CONSTRAINT s.modified_ts<=e.modified_ts if len(start_candidates) > 1: Log.note("many reviews on one attachment") ei = 0 for i, s in enumerate(start_candidates): while ei < len(end_candidates) and end_candidates[ei].modified_ts < coalesce(s.request_time, convert.datetime2milli(Date.MAX)): ei += 1 e = end_candidates[ei] s.review_time = e.modified_ts s.review_duration = e.modified_ts - s.request_time s.review_result = e.review_result s.review_end_reason = e.review_end_reason s.product = coalesce(e.product, s.product) s.component = coalesce(e.component, s.component) s.requester_review_num = -1 ei += 1 if s.bug_status == 'closed' and e.review_end_reason == 'closed': #reviews on closed bugs are ignored continue reviews.append(s) qb.run({ "from": reviews, "window": [{ "name": "is_first", "value": "rownum == 0", "edges": ["bug_id"], "sort": ["request_time"], "aggregate": "none" }] }) with Timer("add {{num}} reviews to ES for block {{start}}", {"start": min(*bugs), "num": len(reviews)}): sink.extend({"json": convert.value2json(r)} for r in reviews)
# https://github.com/PyMySQL/PyMySQL/issues/157 for b in backlog: sql = self.preamble + b try: if self.debug: Log.note("Execute SQL:\n{{sql|indent}}", sql=sql) self.cursor.execute(b) except Exception, e: Log.error("Can not execute sql:\n{{sql}}", sql=sql, cause=e) self.cursor.close() self.cursor = self.db.cursor() else: for i, g in qb.groupby(backlog, size=MAX_BATCH_SIZE): sql = self.preamble + ";\n".join(g) try: if self.debug: Log.note("Execute block of SQL:\n{{sql|indent}}", sql=sql) self.cursor.execute(sql) self.cursor.close() self.cursor = self.db.cursor() except Exception, e: Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1) ## Insert dictionary of values into table
def transform(self, uid, talos_test_result): try: r = talos_test_result def mainthread_transform(r): if r == None: return None output = Dict() for i in r.mainthread_readbytes: output[literal_field(i[1])].name = i[1] output[literal_field(i[1])].readbytes = i[0] r.mainthread_readbytes = None for i in r.mainthread_writebytes: output[literal_field(i[1])].name = i[1] output[literal_field(i[1])].writebytes = i[0] r.mainthread_writebytes = None for i in r.mainthread_readcount: output[literal_field(i[1])].name = i[1] output[literal_field(i[1])].readcount = i[0] r.mainthread_readcount = None for i in r.mainthread_writecount: output[literal_field(i[1])].name = i[1] output[literal_field(i[1])].writecount = i[0] r.mainthread_writecount = None r.mainthread = output.values() mainthread_transform(r.results_aux) mainthread_transform(r.results_xperf) branch = r.build.branch if branch.lower().endswith("-non-pgo"): branch = branch[0:-8] r.build.branch = branch r.build.pgo = False else: r.build.pgo = True if r.machine.osversion.endswith(".e"): r.machine.osversion = r.machine.osversion[:-2] r.machine.e10s = True #ADD PUSH LOG INFO try: with Profiler("get from pushlog"): revision = Revision( **{ "branch": { "name": branch }, "changeset": { "id": r.build.revision } }) with self.locker: revision = self.repo.get_revision(revision) with self.locker: push = self.repo.get_push(revision) r.build.push_date = push.date except Exception, e: Log.warning( "{{build.branch}} @ {{build.revision}} (perf_id=={{treeherder.perf_id}}) has no pushlog", r, e) # TRY AGAIN LATER return [] new_records = [] # RECORD THE UNKNOWN PART OF THE TEST RESULTS remainder = r.copy() remainder.results = None if not r.results or len(remainder.keys()) > 4: new_records.append(remainder) #RECORD TEST RESULTS total = DictList() if r.run.suite in ["dromaeo_css", "dromaeo_dom"]: #dromaeo IS SPECIAL, REPLICATES ARE IN SETS OF FIVE #RECORD ALL RESULTS for i, (test_name, replicates) in enumerate(r.results.items()): for g, sub_results in qb.groupby(replicates, size=5): new_record = Dict(machine=r.machine, treeherder=r.treeherder, run=r.run, build=r.build, result={ "test_name": unicode(test_name) + "." + unicode(g), "ordering": i, "samples": sub_results }) try: s = stats(sub_results) new_record.result.stats = s total.append(s) except Exception, e: Log.warning("can not reduce series to moments", e) new_records.append(new_record)
if self.db.__module__.startswith("pymysql"): # BUG IN PYMYSQL: CAN NOT HANDLE MULTIPLE STATEMENTS # https://github.com/PyMySQL/PyMySQL/issues/157 for b in backlog: sql = self.preamble + b try: if self.debug: Log.note("Execute SQL:\n{{sql|indent}}", sql= sql) self.cursor.execute(b) except Exception, e: Log.error("Can not execute sql:\n{{sql}}", sql= sql, cause=e) self.cursor.close() self.cursor = self.db.cursor() else: for i, g in qb.groupby(backlog, size=MAX_BATCH_SIZE): sql = self.preamble + ";\n".join(g) try: if self.debug: Log.note("Execute block of SQL:\n{{sql|indent}}", sql= sql) self.cursor.execute(sql) self.cursor.close() self.cursor = self.db.cursor() except Exception, e: Log.error("Problem executing SQL:\n{{sql|indent}}", sql= sql, cause=e, stack_depth=1) ## Insert dictionary of values into table def insert(self, table_name, record): keys = record.keys()
def update_repo(repo, settings): with MySQL(settings.database) as db: try: pull_repo(repo) # GET LATEST DATE existing_range = db.query( """ SELECT max(`date`) `max`, min(`date`) `min`, min(revision) min_rev, max(revision) max_rev FROM changesets WHERE repos={{repos}} """, {"repos": repo.name})[0] ranges = wrap([{ "min": coalesce(existing_range.max, convert.milli2datetime(0)) + timedelta(days=1) }, { "max": existing_range.min }]) for r in ranges: for g, docs in qb.groupby(get_changesets(date_range=r, repo=repo), size=100): for doc in docs: doc.file_changes = None doc.file_adds = None doc.file_dels = None doc.description = doc.description[0:16000] db.insert_list("changesets", docs) db.flush() missing_revisions = find_holes(db, "changesets", "revision", { "min": 0, "max": existing_range.max_rev + 1 }, {"term": { "repos": repo.name }}) for _range in missing_revisions: for g, docs in qb.groupby(get_changesets(revision_range=_range, repo=repo), size=100): for doc in docs: doc.file_changes = None doc.file_adds = None doc.file_dels = None doc.description = doc.description[0:16000] db.insert_list("changesets", docs) db.flush() except Exception, e: Log.warning("Failure to pull from {{repos.name}}", {"repos": repo}, e)
def _update_cardinality(self, c): """ QUERY ES TO FIND CARDINALITY AND PARTITIONS FOR A SIMPLE COLUMN """ if c.type in ["object", "nested"]: Log.error("not supported") try: if c.table == "meta.columns": with self.columns.locker: partitions = qb.sort([g[c.abs_name] for g, _ in qb.groupby(self.columns, c.abs_name) if g[c.abs_name] != None]) self.columns.update({ "set": { "partitions": partitions, "count": len(self.columns), "cardinality": len(partitions), "last_updated": Date.now() }, "where": {"eq": {"table": c.table, "abs_name": c.abs_name}} }) return if c.table == "meta.tables": with self.columns.locker: partitions = qb.sort([g[c.abs_name] for g, _ in qb.groupby(self.tables, c.abs_name) if g[c.abs_name] != None]) self.columns.update({ "set": { "partitions": partitions, "count": len(self.tables), "cardinality": len(partitions), "last_updated": Date.now() }, "where": {"eq": {"table": c.table, "name": c.name}} }) return es_index = c.table.split(".")[0] result = self.default_es.post("/"+es_index+"/_search", data={ "aggs": {c.name: _counting_query(c)}, "size": 0 }) r = result.aggregations.values()[0] count = result.hits.total cardinality = coalesce(r.value, r._nested.value) if cardinality == None: Log.error("logic error") query = Dict(size=0) if c.type in ["object", "nested"]: Log.note("{{field}} has {{num}} parts", field=c.name, num=cardinality) with self.columns.locker: self.columns.update({ "set": { "count": count, "cardinality": cardinality, "last_updated": Date.now() }, "clear": ["partitions"], "where": {"eq": {"table": c.table, "name": c.name}} }) return elif cardinality > 1000 or (count >= 30 and cardinality == count) or (count >= 1000 and cardinality / count > 0.99): Log.note("{{field}} has {{num}} parts", field=c.name, num=cardinality) with self.columns.locker: self.columns.update({ "set": { "count": count, "cardinality": cardinality, "last_updated": Date.now() }, "clear": ["partitions"], "where": {"eq": {"table": c.table, "name": c.name}} }) return elif c.type in _elasticsearch.ES_NUMERIC_TYPES and cardinality > 30: Log.note("{{field}} has {{num}} parts", field=c.name, num=cardinality) with self.columns.locker: self.columns.update({ "set": { "count": count, "cardinality": cardinality, "last_updated": Date.now() }, "clear": ["partitions"], "where": {"eq": {"table": c.table, "name": c.name}} }) return elif c.nested_path: query.aggs[literal_field(c.name)] = { "nested": {"path": listwrap(c.nested_path)[0]}, "aggs": {"_nested": {"terms": {"field": c.abs_name, "size": 0}}} } else: query.aggs[literal_field(c.name)] = {"terms": {"field": c.abs_name, "size": 0}} result = self.default_es.post("/"+es_index+"/_search", data=query) aggs = result.aggregations.values()[0] if aggs._nested: parts = qb.sort(aggs._nested.buckets.key) else: parts = qb.sort(aggs.buckets.key) Log.note("{{field}} has {{parts}}", field=c.name, parts=parts) with self.columns.locker: self.columns.update({ "set": { "count": count, "cardinality": cardinality, "partitions": parts, "last_updated": Date.now() }, "where": {"eq": {"table": c.table, "abs_name": c.abs_name}} }) except Exception, e: if "IndexMissingException" in e and c.table.startswith("testing"): Log.alert("{{col.table}} does not exist", col=c) else: self.columns.update({ "set": { "last_updated": Date.now() }, "clear":[ "count", "cardinality", "partitions", ], "where": {"eq": {"table": c.table, "abs_name": c.abs_name}} }) Log.warning("Could not get {{col.table}}.{{col.abs_name}} info", col=c, cause=e)
def transform(self, uid, talos_test_result): try: r = talos_test_result def mainthread_transform(r): if r == None: return None output = Dict() for i in r.mainthread_readbytes: output[literal_field(i[1])].name = i[1] output[literal_field(i[1])].readbytes = i[0] r.mainthread_readbytes = None for i in r.mainthread_writebytes: output[literal_field(i[1])].name = i[1] output[literal_field(i[1])].writebytes = i[0] r.mainthread_writebytes = None for i in r.mainthread_readcount: output[literal_field(i[1])].name = i[1] output[literal_field(i[1])].readcount = i[0] r.mainthread_readcount = None for i in r.mainthread_writecount: output[literal_field(i[1])].name = i[1] output[literal_field(i[1])].writecount = i[0] r.mainthread_writecount = None r.mainthread = output.values() mainthread_transform(r.results_aux) mainthread_transform(r.results_xperf) branch = r.build.branch if branch.lower().endswith("-non-pgo"): branch = branch[0:-8] r.build.branch = branch r.build.pgo = False else: r.build.pgo = True if r.machine.osversion.endswith(".e"): r.machine.osversion = r.machine.osversion[:-2] r.machine.e10s = True #ADD PUSH LOG INFO try: with Profiler("get from pushlog"): revision = Revision(**{"branch": {"name": branch}, "changeset": {"id": r.build.revision}}) with self.locker: revision = self.repo.get_revision(revision) with self.locker: push = self.repo.get_push(revision) r.build.push_date = push.date except Exception, e: Log.warning("{{build.branch}} @ {{build.revision}} (perf_id=={{treeherder.perf_id}}) has no pushlog", r, e) # TRY AGAIN LATER return [] new_records = [] # RECORD THE UNKNOWN PART OF THE TEST RESULTS remainder = r.copy() remainder.results = None if not r.results or len(remainder.keys()) > 4: new_records.append(remainder) #RECORD TEST RESULTS total = DictList() if r.run.suite in ["dromaeo_css", "dromaeo_dom"]: #dromaeo IS SPECIAL, REPLICATES ARE IN SETS OF FIVE #RECORD ALL RESULTS for i, (test_name, replicates) in enumerate(r.results.items()): for g, sub_results in qb.groupby(replicates, size=5): new_record = Dict( machine=r.machine, treeherder=r.treeherder, run=r.run, build=r.build, result={ "test_name": unicode(test_name) + "." + unicode(g), "ordering": i, "samples": sub_results } ) try: s = stats(sub_results) new_record.result.stats = s total.append(s) except Exception, e: Log.warning("can not reduce series to moments", e) new_records.append(new_record)
def replicate(source, destination, pending, last_updated): """ COPY THE DEPENDENCY RECORDS TO THE destination NOTE THAT THE PUBLIC CLUSTER HAS HOLES, SO WE USE blocked TO FILL THEM """ for g, bugs in qb.groupby(pending, max_size=BATCH_SIZE): with Timer("Replicate {{num_bugs}} bug versions", {"num_bugs": len(bugs)}): data = source.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"terms": {"bug_id": set(bugs)}}, {"range": {"expires_on": {"gte": convert.datetime2milli(last_updated)}}}, {"or": [ {"exists": {"field": "dependson"}}, {"exists": {"field": "blocked"}} ]} ]} }}, "from": 0, "size": 200000, "sort": [], "fields": ["bug_id", "modified_ts", "expires_on", "dependson", "blocked"] }) with Timer("Push to destination"): d2 = [ { "id": str(x.bug_id) + "_" + str(x.modified_ts)[:-3], "value": { "bug_id": x.bug_id, "modified_ts": x.modified_ts, "expires_on": x.expires_on, "dependson": x.dependson } } for x in data.hits.hits.fields if x.dependson ] destination.extend(d2) with Timer("filter"): d4 = qb.run({ "from": data.hits.hits.fields, "where": {"exists": {"field": "blocked"}} }) with Timer("select"): d3 = qb.run({ "from": d4, "select": [ {"name": "bug_id", "value": "blocked."}, # SINCE blocked IS A LIST, ARE SELECTING THE LIST VALUES, AND EFFECTIVELY PERFORMING A JOIN "modified_ts", "expires_on", {"name": "dependson", "value": "bug_id"} ] }) with Timer("Push to destination"): destination.extend([ { "id": str(x.bug_id) + "_" + str(x.dependson) + "_" + str(x.modified_ts)[:-3], "value": x } for x in d3 if x.dependson ])