# process the total times for m in re.finditer(self._time_pat, inputstr): num = m.group('num') time = m.group('time') d = piperesults.get(num, {}) d['pipeline_time'] = time # process the starts for m in re.finditer(self._start_pat, inputstr): num = m.group('num') time = m.group('time') d = piperesults.get(num, {}) d['pipeline_start'] = time # process the ends for m in re.finditer(self._end_pat, inputstr): num = m.group('num') time = m.group('time') d = piperesults.get(num, {}) d['pipeline_end'] = time # combine into records for num, d in piperesults.iteritems(): d['pipeline_id'] = num cr = pdict.copy() cr.update(d) yield cr if __name__ == '__main__': cli(PipelineLogParser())
class ImpalaLogParser(Parser): def recorditer(self, inputstr): querypat = re.compile( r"Running query: (?P<query>q\d+)[_a-z]+, no_codegen: (?P<ncodegen>\d+), scale: (?P<scale>\d+)\nTime:(?P<preptime>\d+[.]\d+)\nTime:(?P<runtime1>\d+[.]\d+)\nTime:(?P<runtime2>\d+[.]\d+)\n(?P<failmsg>(ABOVE QUERY FAILED:1)?)" ) for m in re.finditer(querypat, inputstr): # params r = { "machine": "bigdata", "system": "impala", "nnode": 16, "codegen": 1 - int(m.group("ncodegen")), "scale": m.group("scale"), } # measures for k in ["query", "runtime1", "runtime2", "preptime"]: r[k] = m.group(k) if m.group("failmsg") != "": print "failed query {0}; not saving".format(r["query"]) continue yield r if __name__ == "__main__": cli(ImpalaLogParser())
self.includes_params = includes_params def recorditer(self, inputstr): jparams = JSONParamsParser(PARAMS_TAG) sparams = JSONParamsParser('STATS') if self.includes_params: assert jparams.count(inputstr) == sparams.count(inputstr), \ "different numbers of STATS and PARAMS; " \ "check your log file for errors" # concurrently search for adjacent pairs of PARAMS and STATS for pdict, sdict in itertools.izip( jparams.idict_from_json(inputstr), sparams.idict_from_json(inputstr)): result = {} result.update(pdict) result.update(sdict) yield result else: for sdict in itertools.izip( jparams.idict_from_json(inputstr)): result = {} result.update(sdict) yield result if __name__ == '__main__': cli(GrappaLogParser())