def json2value(json_string, params={}, flexible=False, leaves=False): """ :param json_string: THE JSON :param params: STANDARD JSON PARAMS :param flexible: REMOVE COMMENTS :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED :return: Python value """ if isinstance(json_string, str): Log.error("only unicode json accepted") try: if flexible: # REMOVE """COMMENTS""", # COMMENTS, //COMMENTS, AND \n \r # DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py# L58 json_string = re.sub(r"\"\"\".*?\"\"\"", r"\n", json_string, flags=re.MULTILINE) json_string = "\n".join(remove_line_comment(l) for l in json_string.split("\n")) # ALLOW DICTIONARY'S NAME:VALUE LIST TO END WITH COMMA json_string = re.sub(r",\s*\}", r"}", json_string) # ALLOW LISTS TO END WITH COMMA json_string = re.sub(r",\s*\]", r"]", json_string) if params: json_string = expand_template(json_string, params) # LOOKUP REFERENCES value = wrap(json_decoder(json_string)) if leaves: value = wrap_leaves(value) return value except Exception, e: e = Except.wrap(e) if "Expecting '" in e and "' delimiter: line" in e: line_index = int(strings.between(e.message, " line ", " column ")) - 1 column = int(strings.between(e.message, " column ", " ")) - 1 line = json_string.split("\n")[line_index].replace("\t", " ") if column > 20: sample = "..." + line[column - 20:] pointer = " " + (" " * 20) + "^" else: sample = line pointer = (" " * column) + "^" if len(sample) > 43: sample = sample[:43] + "..." Log.error("Can not decode JSON at:\n\t" + sample + "\n\t" + pointer + "\n") base_str = unicode2utf8(strings.limit(json_string, 1000)) hexx_str = bytes2hex(base_str, " ") try: char_str = " " + (" ".join(c.decode("latin1") if ord(c) >= 32 else ".") for c in base_str) except Exception: char_str = " " Log.error("Can not decode JSON:\n" + char_str + "\n" + hexx_str + "\n", e)
def fix(rownum, line, source, sample_only_filter, sample_size): # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}') # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED if source.name.startswith("active-data-test-result"): # "suite": {"flavor": "plain-chunked", "name": "mochitest"} found = strings.between(line, '"suite": {', '}') if found: suite_json = '{' + found + "}" if suite_json: suite = convert.json2value(suite_json) suite = convert.value2json(suite.name) line = line.replace(suite_json, suite) if rownum == 0: value = convert.json2value(line) if len(line) > 100000: value.result.subtests = [ s for s in value.result.subtests if s.ok is False ] value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int( int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter( [value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") return row, True elif len(line) > 100000: value = convert.json2value(line) value.result.subtests = [ s for s in value.result.subtests if s.ok is False ] value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} elif line.find("\"resource_usage\":") != -1: value = convert.json2value(line) _id, value = _fix(value) row = {"id": _id, "value": value} else: # FAST _id = strings.between(line, "\"_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} return row, False
def _get_url(url, branch, **kwargs): with Explanation("get push from {{url}}", url=url): response = http.get(url, **kwargs) data = convert.json2value(response.content.decode("utf8")) if isinstance(data, basestring) and data.startswith("unknown revision"): Log.error("Unknown push {{revision}}", revision=strings.between(data, "'", "'")) branch.url = _trim(url) #RECORD THIS SUCCESS IN THE BRANCH return data
def pull_repo(repo): if not File(os.path.join(repo.directory, ".hg")).exists: File(repo.directory).delete() # REPO DOES NOT EXIST, CLONE IT with Timer("Clone hg log for {{name}}", {"name":repo.name}): proc = subprocess.Popen( ["hg", "clone", repo.url, File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1 ) try: while True: line = proc.stdout.readline() if line.startswith("abort:"): Log.error("Can not clone {{repos.url}}, because {{problem}}", { "repos": repo, "problem": line }) if line == '': break Log.note("Mercurial cloning: {{status}}", {"status": line}) finally: proc.wait() else: hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc")) if not hgrc_file.exists: hgrc_file.write("[paths]\ndefault = " + repo.url + "\n") # REPO EXISTS, PULL TO UPDATE with Timer("Pull hg log for {{name}}", {"name":repo.name}): proc = subprocess.Popen( ["hg", "pull", "--cwd", File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1 ) (output, _) = proc.communicate() if output.find("abort: repository default not found!") >= 0: File(repo.directory).delete() pull_repo(repo) return if output.find("abort: abandoned transaction found") >= 0: Log.error("Problem pulling repos, try \"hg recover\"\n{{reason|indent}}", {"reason": output}) File(repo.directory).delete() pull_repo(repo) return if output.find("abort: ") >= 0: Log.error("Problem with pull {{reason}}", {"reason": between(output, "abort:", "\n")}) Log.note("Mercurial pull results:\n{{pull_results}}", {"pull_results": output})
def fix(rownum, line, source, sample_only_filter, sample_size): # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}') # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED if source.name.startswith("active-data-test-result"): # "suite": {"flavor": "plain-chunked", "name": "mochitest"} found = strings.between(line, '"suite": {', '}') if found: suite_json = '{' + found + "}" if suite_json: suite = convert.json2value(suite_json) suite = convert.value2json(suite.name) line = line.replace(suite_json, suite) if rownum == 0: value = convert.json2value(line) if len(line) > 100000: value.result.subtests = [s for s in value.result.subtests if s.ok is False] value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") return row, True elif len(line) > 100000: value = convert.json2value(line) value.result.subtests = [s for s in value.result.subtests if s.ok is False] value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} elif line.find("\"resource_usage\":") != -1: value = convert.json2value(line) _id, value = _fix(value) row = {"id": _id, "value": value} else: # FAST _id = strings.between(line, "\"_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} return row, False
def replace_vars(text, params=None): """ REPLACE {{vars}} WITH ENVIRONMENTAL VALUES """ start = 0 var = strings.between(text, "{{", "}}", start) while var: replace = "{{" + var + "}}" index = text.find(replace, 0) if index == -1: Log.error("could not find {{var}} (including quotes)", var=replace) end = index + len(replace) try: replacement = unicode(Date(var).unix) text = text[:index] + replacement + text[end:] start = index + len(replacement) except Exception, _: start += 1 var = strings.between(text, "{{", "}}", start)
def query(self, query): try: with self.esq: self.esq.query(query) return None except Exception, e: f = Except(ERROR, unicode(e), trace=extract_tb(1)) try: details = str(f) query = convert.json2value(strings.between(details, ">>>>", "<<<<")) return query except Exception, g: Log.error("problem", f)
def _load_all_in_push(self, revision, locale=None): # http://hg.mozilla.org/mozilla-central/json-pushes?full=1&changeset=57c461500a0c if isinstance(revision.branch, basestring): lower_name = revision.branch.lower() else: lower_name = revision.branch.name.lower() revision.branch = self.branches[lower_name, locale] if not revision.branch: Log.error("can not find branch ({{branch}}, {{locale}})", name=lower_name, locale=locale) Log.note( "Reading pushlog for revision ({{branch}}, {{locale}}, {{changeset}})", branch=revision.branch.name, locale=locale, changeset=revision.changeset.id) url = revision.branch.url.rstrip( "/") + "/json-pushes?full=1&changeset=" + revision.changeset.id try: response = self._get_and_retry(url) data = convert.json2value(response.all_content.decode("utf8")) if isinstance(data, basestring) and data.startswith("unknown revision"): Log.error("Unknown push {{revision}}", revision=strings.between(data, "'", "'")) for index, _push in data.items(): push = Push(id=int(index), date=_push.date, user=_push.user) self.current_push = push revs = [] for c in _push.changesets: changeset = Changeset(id=c.node, **c) rev = self.get_revision( Revision(branch=revision.branch, changeset=changeset), locale) rev.push = push _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name revs.append({"id": _id, "value": rev}) self.es.extend(revs) except Exception, e: Log.error("Problem pulling pushlog from {{url}}", url=url, cause=e)
def copy(self, keys, source, sample_only_filter=None, sample_size=None): num_keys = 0 for key in keys: try: for rownum, line in enumerate( source.read_lines(strip_extension(key))): if rownum == 0: value = convert.json2value(line) if len(line) > 1000000: # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests)", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests)) value.result.subtests = None value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int( int(1.0 / coalesce( sample_size, 0.01))) != 0 and qb.filter( [value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") num_keys += 1 self.queue.add(row) break elif len(line) > 1000000: value = convert.json2value(line) # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests).", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests)) value.result.subtests = None value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} else: #FAST _id = strings.between(line, "_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} num_keys += 1 self.queue.add(row) except Exception, e: Log.warning("Could not get queue for {{key}}", key=key, cause=e)
def _load_all_in_push(self, revision, locale=None): # http://hg.mozilla.org/mozilla-central/json-pushes?full=1&changeset=57c461500a0c if isinstance(revision.branch, basestring): lower_name = revision.branch.lower() else: lower_name = revision.branch.name.lower() revision.branch = self.branches[lower_name, locale] if not revision.branch: Log.error("can not find branch ({{branch}}, {{locale}})", name=lower_name, locale=locale) Log.note( "Reading pushlog for revision ({{branch}}, {{locale}}, {{changeset}})", branch=revision.branch.name, locale=locale, changeset=revision.changeset.id ) url = revision.branch.url.rstrip("/") + "/json-pushes?full=1&changeset=" + revision.changeset.id try: response = self._get_and_retry(url) data = convert.json2value(response.all_content.decode("utf8")) if isinstance(data, basestring) and data.startswith("unknown revision"): Log.error("Unknown push {{revision}}", revision=strings.between(data, "'", "'")) for index, _push in data.items(): push = Push(id=int(index), date=_push.date, user=_push.user) self.current_push = push revs = [] for c in _push.changesets: changeset = Changeset(id=c.node, **c) rev = self.get_revision(Revision(branch=revision.branch, changeset=changeset), locale) rev.push = push _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name revs.append({"id": _id, "value": rev}) self.es.extend(revs) except Exception, e: Log.error("Problem pulling pushlog from {{url}}", url=url, cause=e)
def copy(self, keys, source, sample_only_filter=None, sample_size=None): num_keys = 0 for key in keys: try: for rownum, line in enumerate(source.read_lines(strip_extension(key))): if rownum == 0: value = convert.json2value(line) if len(line) > 1000000: # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests)", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests)) value.result.subtests = None value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and qb.filter([value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") num_keys += 1 self.queue.add(row) break elif len(line) > 1000000: value = convert.json2value(line) # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests).", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests)) value.result.subtests = None value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} else: #FAST _id = strings.between(line, "_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} num_keys += 1 self.queue.add(row) except Exception, e: Log.warning("Could not get queue for {{key}}", key=key, cause=e)
value = wrap_leaves(value) return value except Exception, e: e = Except.wrap(e) if not json_string.strip(): Log.error("JSON string is only whitespace") c = e while "Expecting '" in c.cause and "' delimiter: line" in c.cause: c = c.cause if "Expecting '" in c and "' delimiter: line" in c: line_index = int(strings.between(c.message, " line ", " column ")) - 1 column = int(strings.between(c.message, " column ", " ")) - 1 line = json_string.split("\n")[line_index].replace("\t", " ") if column > 20: sample = "..." + line[column - 20:] pointer = " " + (" " * 20) + "^" else: sample = line pointer = (" " * column) + "^" if len(sample) > 43: sample = sample[:43] + "..." Log.error("Can not decode JSON at:\n\t" + sample + "\n\t" + pointer + "\n") base_str = unicode2utf8(strings.limit(json_string, 1000))
def fix_locale(locale): # compensate for bug https://bugzilla.mozilla.org/show_bug.cgi?id=1174979 if locale.find("\"") == -1: return locale return strings.between(locale, "\"", "\"")
def pull_repo(repo): if not File(os.path.join(repo.directory, ".hg")).exists: File(repo.directory).delete() # REPO DOES NOT EXIST, CLONE IT with Timer("Clone hg log for {{name}}", {"name": repo.name}): proc = subprocess.Popen( ["hg", "clone", repo.url, File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1) try: while True: line = proc.stdout.readline() if line.startswith("abort:"): Log.error( "Can not clone {{repos.url}}, because {{problem}}", { "repos": repo, "problem": line }) if line == '': break Log.note("Mercurial cloning: {{status}}", {"status": line}) finally: proc.wait() else: hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc")) if not hgrc_file.exists: hgrc_file.write("[paths]\ndefault = " + repo.url + "\n") # REPO EXISTS, PULL TO UPDATE with Timer("Pull hg log for {{name}}", {"name": repo.name}): proc = subprocess.Popen( ["hg", "pull", "--cwd", File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1) (output, _) = proc.communicate() if output.find("abort: repository default not found!") >= 0: File(repo.directory).delete() pull_repo(repo) return if output.find("abort: abandoned transaction found") >= 0: Log.error( "Problem pulling repos, try \"hg recover\"\n{{reason|indent}}", {"reason": output}) File(repo.directory).delete() pull_repo(repo) return if output.find("abort: ") >= 0: Log.error("Problem with pull {{reason}}", {"reason": between(output, "abort:", "\n")}) Log.note("Mercurial pull results:\n{{pull_results}}", {"pull_results": output})