def test_branch_count(self): if self.not_real_service(): return test = wrap({"query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [ {"aggregate": "count"}, ], "edges": [ "build.branch" ], "where": {"or": [ {"missing": "build.id"} # {"gte": {"timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000}} ]}, "format": "table" }}) query = value2json(test.query).encode('utf8') # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) Log.note("result\n{{result|indent}}", {"result": result})
def process(source_key, source, destination, resources, please_stop=None): for task in source: # pull net new tasks to process # for each task # get the optimized_tasks.list tasks = http.get(ACTIVEDATA_URL, json={ "from":"tasks", "where": { "gte":{"action.start_time":{"date":"today-month"}}, "suffix":{"task.artifacts.name":"/optimized_tasks.list"} }, "sort":{"action.start_time":"desc"}, "limit":10000, }) for t in tasks: destination.add( for t in tasks )
def _open(self): """ DO NOT USE THIS UNLESS YOU close() FIRST""" if self.settings.host.startswith("mysql://"): # DECODE THE URI: mysql://username:password@host:optional_port/database_name up = strings.between(self.settings.host, "mysql://", "@") if ":" in up: self.settings.username, self.settings.password = unquote( up).split(":") else: self.settings.username = up url = strings.between(self.settings.host, "@", None) hp, self.settings.schema = url.split("/", 1) if ":" in hp: self.settings.host, self.settings.port = hp.split(":") self.settings.port = int(self.settings.port) else: self.settings.host = hp # SSL PEM if self.settings.host in ("localhost", "mysql", '127.0.0.1'): ssl_context = None else: if self.settings.ssl and not self.settings.ssl.pem: Log.error("Expecting 'pem' property in ssl") # ssl_context = ssl.create_default_context(**get_ssl_pem_file(self.settings.ssl.pem)) filename = File(".pem") / URL(self.settings.ssl.pem).host filename.write_bytes(http.get(self.settings.ssl.pem).content) ssl_context = {"ca": filename.abspath} try: self.db = connect( host=self.settings.host, port=self.settings.port, user=coalesce(self.settings.username, self.settings.user), passwd=coalesce(self.settings.password, self.settings.passwd), db=coalesce(self.settings.schema, self.settings.db), read_timeout=coalesce(self.settings.read_timeout, (EXECUTE_TIMEOUT / 1000) - 10 if EXECUTE_TIMEOUT else None, 5 * 60), charset=u"utf8", use_unicode=True, ssl=ssl_context, cursorclass=cursors.SSCursor) except Exception as e: if self.settings.host.find("://") == -1: Log.error(u"Failure to connect to {{host}}:{{port}}", host=self.settings.host, port=self.settings.port, cause=e) else: Log.error( u"Failure to connect. PROTOCOL PREFIX IS PROBABLY BAD", e) self.cursor = None self.partial_rollback = False self.transaction_level = 0 self.backlog = [ ] # accumulate the write commands so they are sent at once if self.readonly: self.begin()
def more(): xml = http.get(self.url + "?" + value2url_param(state)).content data = BeautifulSoup(xml, 'xml') state.get_more = data.find("istruncated").contents[0] == "true" contents = data.findAll("contents") if len(contents): state.marker = contents[-1].find("key").contents[0] return [{k: t(d.find(k).contents[0]) for k, t in content_keys.items()} for d in contents]
def test_coverage_parser(self): diff = http.get( 'https://hg.mozilla.org/mozilla-central/raw-rev/14dc6342ec5' ).content.decode('utf8') moves = diff_to_moves(diff) Log.note("{{files}}", files=[ m.old.name if m.new.name == 'dev/null' else m.new.name for m in moves ])
def _get_source_code_from_hg(self, revision, file_path): response = http.get( expand_template( FILE_URL, { "location": revision.branch.url, "rev": revision.changeset.id, "path": file_path, }, )) return response.content.decode("utf8", "replace")
def _get_url(url, branch, **kwargs): with Explanation("get push from {{url}}", url=url, debug=DEBUG): response = http.get(url, **kwargs) data = json2value(response.content.decode("utf8")) if data.error.startswith("unknown revision"): Log.error(UNKNOWN_PUSH, revision=strings.between(data.error, "'", "'")) if is_text(data) and data.startswith("unknown revision"): Log.error(UNKNOWN_PUSH, revision=strings.between(data, "'", "'")) # branch.url = _trim(url) # RECORD THIS SUCCESS IN THE BRANCH return data
def test_simple_query(self): if self.not_real_service(): return query = value2json({"from": "unittest"}).encode('utf8') # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) Log.note("result\n{{result|indent}}", {"result": result})
def inner(changeset_id): # ALWAYS TRY ES FIRST moves = _get_changeset_from_es(self.moves, changeset_id).changeset.moves if moves: return moves url = URL(revision.branch.url) / "raw-rev" / changeset_id DEBUG and Log.note("get unified diff from {{url}}", url=url) try: # THE ENCODING DOES NOT MATTER BECAUSE WE ONLY USE THE '+', '-' PREFIXES IN THE DIFF moves = http.get(url).content.decode("latin1") return diff_to_moves(text(moves)) except Exception as e: Log.warning("could not get unified diff from {{url}}", url=url, cause=e)
def _get_branches_from_hg(kwarg): # GET MAIN PAGE response = http.get(kwarg.url) doc = BeautifulSoup(response.all_content, "html.parser") all_repos = doc("table")[1] branches = UniqueIndex(["name", "locale"], fail_on_dup=False) for i, r in enumerate(all_repos("tr")): dir, name = [v.text.strip() for v in r("td")] b = _get_single_branch_from_hg(kwarg, name, dir.lstrip("/")) branches.extend(b) # branches.add(set_default({"name": "release-mozilla-beta"}, branches["mozilla-beta", DEFAULT_LOCALE])) for b in list(branches["mozilla-beta", ]): branches.add(set_default({"name": "release-mozilla-beta"}, b)) # THIS IS THE l10n "name" b.url = "https://hg.mozilla.org/releases/mozilla-beta" # THIS IS THE for b in list(branches["mozilla-release", ]): branches.add(set_default({"name": "release-mozilla-release"}, b)) for b in list(branches["mozilla-aurora", ]): if b.locale == "en-US": continue branches.add(set_default({"name": "comm-aurora"}, b)) # b.url = "https://hg.mozilla.org/releases/mozilla-aurora" for b in list(branches): if b.name.startswith("mozilla-esr"): branches.add(set_default({"name": "release-" + b.name}, b)) # THIS IS THE l10n "name" b.url = "https://hg.mozilla.org/releases/" + b.name # CHECKS for b in branches: if b.name != b.name.lower(): Log.error("Expecting lowercase name") if not b.locale: Log.error("Not expected") if not b.url.startswith("http"): Log.error("Expecting a valid url") if not b.etl.timestamp: Log.error("Expecting a timestamp") return branches
def test_timing(self): if self.not_real_service(): return test = wrap({"query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [ {"name": "count", "value": "run.duration", "aggregate": "count"}, {"name": "total", "value": "run.duration", "aggregate": "sum"} ], "edges": [ {"name": "chunk", "value": ["run.suite", "run.chunk"]}, "result.ok" ], "where": {"and": [ {"lt": {"timestamp": Date.floor(Date.now()).milli / 1000}}, {"gte": {"timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000}} ]}, "format": "cube", "samples": { "limit": 30 } }}) query = value2json(test.query).encode('utf8') # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) Log.note("result\n{{result|indent}}", {"result": result})
def test_failures_by_directory(self): if self.not_real_service(): return test = wrap({"query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [ { "aggregate": "count" } ], "edges": [ "result.test", "result.ok" ], "where": { "prefix": { "result.test": "/" } }, "format": "table" }}) query = value2json(test.query).encode('utf8') # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) Log.note("result\n{{result|indent}}", {"result": result})
def inner(changeset_id): # ALWAYS TRY ES FIRST json_diff = _get_changeset_from_es(self.repo, changeset_id).changeset.diff if json_diff: return json_diff url = URL(revision.branch.url) / "raw-rev" / changeset_id DEBUG and Log.note("get unified diff from {{url}}", url=url) try: response = http.get(url) try: diff = response.content.decode("utf8") except Exception as e: diff = response.content.decode("latin1") # File("tests/resources/big.patch").write_bytes(response.content) json_diff = diff_to_json(diff) num_changes = _count(c for f in json_diff for c in f.changes) if json_diff: if (IGNORE_MERGE_DIFFS and revision.changeset.description.startswith( "merge ")): return None # IGNORE THE MERGE CHANGESETS elif num_changes < MAX_DIFF_SIZE: return json_diff else: Log.warning( "Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes, ) for file in json_diff: file.changes = None return json_diff except Exception as e: Log.warning("could not get unified diff from {{url}}", url=url, cause=e)
def test_longest_running_tests(self): test = wrap({"query": { "sort": {"sort": -1, "field": "avg"}, "from": { "from": "unittest", "where": {"and": [{"gt": {"build.date": "1439337600"}}]}, "groupby": ["build.platform", "build.type", "run.suite", "result.test"], "select": [{"aggregate": "avg", "name": "avg", "value": "result.duration"}], "format": "table", "limit": 100 }, "limit": 100, "format": "list" }}) query = value2json(test.query).encode('utf8') # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) Log.note("result\n{{result|indent}}", {"result": result})
def test_multiple_agg_on_same_field(self): if self.not_real_service(): return test = wrap({"query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [ { "name": "max_bytes", "value": "run.stats.bytes", "aggregate": "max" }, { "name": "count", "value": "run.stats.bytes", "aggregate": "count" } ] }}) query = value2json(test.query).encode('utf8') # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) Log.note("result\n{{result|indent}}", {"result": result})
def get_ssl_pem_file(url): filename = File(".pem") / URL(url).host filename.write_bytes(http.get(url).content) return {"cafile": filename.abspath}
def _get_source_code_from_hg(self, revision, file_path): response = http.get( URL(revision.branch.url) / "raw-file" / revision.changeset.id / file_path) return response.content.decode("utf8", "replace")
def test_call_google(self): http.get("https://google.com")
def inner(changeset_id): if self.moves.cluster.version.startswith("1.7."): query = { "query": { "filtered": { "query": { "match_all": {} }, "filter": { "and": [ { "prefix": { "changeset.id": changeset_id } }, { "range": { "etl.timestamp": { "gt": MIN_ETL_AGE } } }, ] }, } }, "size": 1, } else: query = { "query": { "bool": { "must": [ { "prefix": { "changeset.id": changeset_id } }, { "range": { "etl.timestamp": { "gt": MIN_ETL_AGE } } }, ] } }, "size": 1, } try: # ALWAYS TRY ES FIRST with self.moves_locker: response = self.moves.search(query) moves = response.hits.hits[0]._source.changeset.moves if moves: return moves except Exception as e: pass url = URL(revision.branch.url) / "raw-rev" / changeset_id DEBUG and Log.note("get unified diff from {{url}}", url=url) try: moves = http.get(url).content.decode( "latin1" ) # THE ENCODING DOES NOT MATTER BECAUSE WE ONLY USE THE '+', '-' PREFIXES IN THE DIFF return diff_to_moves(text(moves)) except Exception as e: Log.warning("could not get unified diff from {{url}}", url=url, cause=e)
def inner(changeset_id): if self.repo.cluster.version.startswith("1.7."): query = { "query": { "filtered": { "query": { "match_all": {} }, "filter": { "and": [ { "prefix": { "changeset.id": changeset_id } }, { "range": { "etl.timestamp": { "gt": MIN_ETL_AGE } } }, ] }, } }, "size": 1, } else: query = { "query": { "bool": { "must": [ { "prefix": { "changeset.id": changeset_id } }, { "range": { "etl.timestamp": { "gt": MIN_ETL_AGE } } }, ] } }, "size": 1, } try: # ALWAYS TRY ES FIRST with self.repo_locker: response = self.repo.search(query) json_diff = response.hits.hits[0]._source.changeset.diff if json_diff: return json_diff except Exception as e: pass url = URL(revision.branch.url) / "raw-rev" / changeset_id DEBUG and Log.note("get unified diff from {{url}}", url=url) try: response = http.get(url) try: diff = response.content.decode("utf8") except Exception as e: diff = response.content.decode("latin1") json_diff = diff_to_json(diff) num_changes = _count(c for f in json_diff for c in f.changes) if json_diff: if revision.changeset.description.startswith("merge "): return None # IGNORE THE MERGE CHANGESETS elif num_changes < MAX_DIFF_SIZE: return json_diff else: Log.warning( "Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes, ) for file in json_diff: file.changes = None return json_diff except Exception as e: Log.warning("could not get unified diff from {{url}}", url=url, cause=e)
def read_lines(self, key): url = self.url + "/" + key return http.get(url).all_lines
def _get_single_branch_from_hg(settings, description, dir): if dir == "users": return [] response = http.get(settings.url + "/" + dir) doc = BeautifulSoup(response.all_content, "html.parser") output = [] try: all_branches = doc("table")[0] except Exception: return [] for i, b in enumerate(all_branches("tr")): if i == 0: continue # IGNORE HEADER columns = b("td") try: path = columns[0].a.get("href") if path == "/": continue name, desc, last_used = [c.text.strip() for c in columns][0:3] if last_used.startswith("at"): last_used = last_used[2:] detail = Data( name=name.lower(), locale=DEFAULT_LOCALE, parent_name=description, url=settings.url + path, description=desc, last_used=Date(last_used), etl={"timestamp": Date.now()}, ) if detail.description == "unknown": detail.description = None # SOME BRANCHES HAVE NAME COLLISIONS, IGNORE LEAST POPULAR if path in [ "/projects/dxr/", # moved to webtools "/build/compare-locales/", # ?build team likes to clone? "/build/puppet/", # ?build team likes to clone? "/SeaMonkey/puppet/", # looses the popularity contest "/releases/gaia-l10n/v1_2/en-US/", # use default branch "/releases/gaia-l10n/v1_3/en-US/", # use default branch "/releases/gaia-l10n/v1_4/en-US/", # use default branch "/releases/gaia-l10n/v2_0/en-US/", # use default branch "/releases/gaia-l10n/v2_1/en-US/", # use default branch "/build/autoland/", ]: continue # MARKUP BRANCH IF LOCALE SPECIFIC if path.startswith("/l10n-central"): _path = path.strip("/").split("/") detail.locale = _path[-1] detail.name = "mozilla-central" elif path.startswith("/releases/l10n/"): _path = path.strip("/").split("/") detail.locale = _path[-1] detail.name = _path[-2].lower() elif path.startswith("/releases/gaia-l10n/"): _path = path.strip("/").split("/") detail.locale = _path[-1] detail.name = "gaia-" + _path[-2][1::] elif path.startswith("/weave-l10n"): _path = path.strip("/").split("/") detail.locale = _path[-1] detail.name = "weave" if BRANCH_WHITELIST is not None: found = False for br in BRANCH_WHITELIST: if br in str(detail.name): found = True break if not found: continue Log.note("Branch {{name}} {{locale}}", name=detail.name, locale=detail.locale) output.append(detail) except Exception as e: Log.warning("branch digestion problem", cause=e) return output