def parse_pylint(lines): severity_opts = { 'E': 'error', 'F': 'fatal', 'W': 'warning', 'C': 'convention', 'R': 'refactor' } for line in lines: if LINE_RE.match(line): info = LINE_RE.match(line).groupdict() else: continue severity = severity_opts[info['err']] yield Issue(cwe=None, testid=info['id'], location=Location(file=File(info['path'], None), function=None, point=Point(int(info['line']), int(info['col']))), severity=severity, message=Message(text=info['msg']), notes=None, trace=None)
def parse_roodi(lines): dic = {'should': 'warning', 'missing': 'error', "Don't": 'critical'} for line in lines: info = LINE_RE.match(line) if info is not None: info.groupdict() testid = str(id(line)) # TODO find a pretty way to remove color escape codes path = info.group('path')[5:] message = info.group('msg')[:-5] for key in dic: if key in info.group('msg'): severity = dic[key] yield Issue(cwe=None, testid=testid, location=Location( file=File(path, None), function=None, point=Point(int(info.group('line')), 1) ), severity=severity, message=Message(message), notes=None, trace=None)
def parse_plist(pathOrFile, analyzerversion=None, sut=None, file_=None, stats=None): """ Given a .plist file emitted by clang-static-analyzer (e.g. via scan-build), parse it and return an Analysis instance """ plist = plistlib.readPlist(pathOrFile) # We now have the .plist file as a hierarchy of dicts, lists, etc # Handy debug dump: if 0: pprint(plist) # A list of filenames, apparently referenced by index within # diagnostics: files = plist['files'] generator = Generator(name='clang-analyzer', version=analyzerversion) metadata = Metadata(generator, sut, file_, stats) analysis = Analysis(metadata, []) for diagnostic in plist['diagnostics']: if 0: pprint(diagnostic) cwe = None # TODO: we're not yet handling the following: # diagnostic['category'] # diagnostic['type'] message = Message(text=diagnostic['description']) loc = diagnostic['location'] location = Location( file=File(givenpath=files[loc.file], abspath=None), # FIXME: doesn't tell us function name # TODO: can we patch this upstream? function=None, point=Point(int(loc.line), int(loc.col))) notes = None trace = make_trace(files, diagnostic['path']) issue = Issue( cwe, None, # FIXME: can we get at the test id? location, message, notes, trace) analysis.results.append(issue) return analysis
def parse_line(line): """ :param line: current line read from file :type line: str :return: Issue if match, else None """ match = FINDBUGS_PATTERN.match(line) if match: if DEBUG: print(match.groupdict()) print(match.groups()) match2 = PATH_AND_FUNCTION_PATTERN.match( match.group("bug_path_and_function")) if match2: if DEBUG: print(match.groupdict()) print(match.groups()) message = Message(match.group("bug_message")) # findbugs has no column information point = Point(int(match.group("bug_line_number")), 0) function = Function(match2.group("bug_function_name")) path = match.group("bug_file_name") if match2.group("bug_class_path"): path = match2.group("bug_class_path").replace(".", "/") + "/" + path path = File(path, None) location = Location(path, function, point) return Issue(None, None, location, message, None, None)
def make_complex_analysis(self): """ Construct a Analysis instance that uses all features """ a = Analysis( metadata=Metadata(generator=Generator(name='cpychecker', version='0.11'), sut=SourceRpm(name='python-ethtool', version='0.7', release='4.fc19', buildarch='x86_64'), file_=File(givenpath='foo.c', abspath='/home/david/coding/foo.c'), stats=Stats(wallclocktime=0.4)), results=[ Issue(cwe=681, testid='refcount-too-high', location=Location(file=File( givenpath='foo.c', abspath='/home/david/coding/foo.c'), function=Function('bar'), point=Point(10, 15)), message=Message(text='something bad involving pointers'), notes=Notes('here is some explanatory text'), trace=Trace([ State(location=Location(file=File('foo.c', None), function=Function('bar'), point=Point(7, 12)), notes=Notes('first we do this')), State(location=Location(file=File('foo.c', None), function=Function('bar'), point=Point(8, 10)), notes=Notes('then we do that')), State(location=Location(file=File('foo.c', None), function=Function('bar'), range_=Range( Point(10, 15), Point(10, 25))), notes=Notes('then it crashes here')) ]), severity='really bad', customfields=CustomFields(foo='bar')), ], customfields=CustomFields( gccinvocation='gcc -I/usr/include/python2.7 -c foo.c'), ) return a, a.results[0]
def parse_file(fileobj, sut=None, file_=None, stats=None): tree = ET.parse(fileobj) root = tree.getroot() node_cppcheck = root.find('cppcheck') version = node_cppcheck.get('version') node_errors = root.find('errors') generator = Generator(name='cppcheck', version=node_cppcheck.get('version')) metadata = Metadata(generator, sut, file_, stats) analysis = Analysis(metadata, []) for node_error in node_errors.findall('error'): # e.g.: # <error id="nullPointer" severity="error" msg="Possible null pointer dereference: end - otherwise it is redundant to check it against null." verbose="Possible null pointer dereference: end - otherwise it is redundant to check it against null."> # <location file="python-ethtool/ethtool.c" line="139"/> # <location file="python-ethtool/ethtool.c" line="141"/> # </error> testid = node_error.get('id') str_msg = node_error.get('msg') str_verbose = node_error.get('verbose') message = Message(text=str_msg) if str_verbose != str_msg: notes = Notes(str_verbose) else: notes = None location_nodes = list(node_error.findall('location')) for node_location in location_nodes: location = Location( file=File(node_location.get('file'), None), # FIXME: doesn't tell us function name # TODO: can we patch this upstream? function=None, # doesn't emit column point=Point(int(node_location.get('line')), 0)) # FIXME: bogus column issue = Issue(None, testid, location, message, notes, None, severity=node_error.get('severity')) analysis.results.append(issue) if not location_nodes: customfields = CustomFields() if str_verbose != str_msg: customfields['verbose'] = str_verbose failure = Failure(failureid=testid, location=None, message=message, customfields=customfields) analysis.results.append(failure) return analysis
def make_state(event): """ Construct a State instance from an event within the JSON """ loc = Location(file=File(givenpath=event['filePathname'], abspath=None), function=None, point=Point(int(event['lineNumber']), int(0))) notes = Notes(text=event['eventDescription']) return State(loc, notes)
def parse_findbugs(payload): tree = lxml.etree.fromstring(payload) for result in tree.xpath("//BugCollection/BugInstance/Class/SourceLine"): keys = result.keys() p = None # Start is not always defined if 'start' in keys: line = result.attrib['start'] p = Point(int(line), 0) if 'end' in keys: lineEnd = result.attrib['end'] p = Point(int(line), int(lineEnd)) if 'sourcepath' in keys: path = result.attrib['sourcepath'] else: path = "unknown" message, = result.xpath("../../LongMessage") message = message.text testid = result.getparent().getparent().attrib['instanceHash'] rank = result.getparent().getparent().attrib['rank'] if rank <= 4: severity = "scariest" elif rank <= 9: severity = "scary" elif rank <= 14: severity = "troubling" else: severity = "concern" yield Issue(cwe=None, testid=testid, location=Location(file=File(path, None), function=None, point=p), severity=severity, message=Message(text=message), notes=None, trace=None)
def parse_json_v2(path): """ Given a JSON file emitted by: cov-format-errors --json-output-v2=<filename> parse it and return an Analysis instance """ with open(path) as f: js = json.load(f) if 0: pprint(js) generator = Generator(name='coverity') metadata = Metadata(generator, sut=None, file_=None, stats=None) analysis = Analysis(metadata, []) for issue in js['issues']: if 0: pprint(issue) cwe = None # Use checkerName (e.g. "RESOURCE_LEAK") for # the testid: testid = issue['checkerName'] # Use the eventDescription of the final event for the message: message = Message(text=issue['events'][-1]['eventDescription']) location = Location( file=File(givenpath=issue['mainEventFilePathname'], abspath=None), function=Function(name=issue['functionDisplayName']), point=Point(int(issue['mainEventLineNumber']), int(0))) notes = None trace = make_trace(issue) customfields = CustomFields() for key in ['mergeKey', 'subcategory', 'domain']: if key in issue: customfields[key] = issue[key] issue = Issue(cwe, testid, location, message, notes, trace, customfields=customfields) analysis.results.append(issue) return analysis
def parse_BugInstance(bugInstance): message = Message(bugInstance.find("LongMessage").text) # findbugs has no column information sourceLine = bugInstance.find("SourceLine") point = Point(int(sourceLine.get("start")), 0) function = bugInstance.find("Method").find("Message").text tmpIndex = function.rfind("In method ") + len("In method ") - 1 function = Function(function[tmpIndex + 1:]) path = sourceLine.get("sourcepath") path = File(path, None) location = Location(path, function, point) if DEBUG: print(str(location) + " " + str(message)) return Issue(None, None, location, message, None, None)
def parse_pep8(lines): for line in lines: info = LINE_RE.match(line).groupdict() severity = "error" if info['err'].startswith("E") else "warning" yield Issue(cwe=None, testid=info['err'], location=Location(file=File(info['path'], None), function=None, point=Point(*(int(x) for x in (info['line'], info['col'])))), severity=severity, message=Message(text=line), notes=None, trace=None)
def parse_jshint(lines): for line in lines: if FILE_RE.match(line): path = FILE_RE.match(line).groupdict() if LINE_RE.match(line): info = LINE_RE.match(line).groupdict() else: continue yield Issue(cwe=None, testid=info['testid'], location=Location( file=File(path['file'], None), function=None, point=Point(int(info['line']), int(info['col']))), severity=info['severity'], message=Message(text=info['msg']), notes=None, trace=None)
def make_info(self): a = Analysis( metadata=Metadata(generator=Generator(name='an-invented-checker'), sut=None, file_=None, stats=None), results=[ Info(infoid='gimple-stats', location=Location(file=File('bar.c', None), function=Function('sample_function'), point=Point(10, 15)), message=Message('sample message'), customfields=CustomFields(num_stmts=57, num_basic_blocks=10)) ]) return a, a.results[0]
def parse_perlcritic(lines): for line in lines: info = LINE_EXPR.match(line) if info is None: continue info = info.groupdict() yield Issue(cwe=None, testid=info['testid'], location=Location(file=File(info['file'], None), function=None, point=Point(int(info['line']), int(info['column']))), severity=info['severity'], message=Message(text=info['message']), notes=None, trace=None)
def to_issue(self): """ Generate an Issue from this csv row. """ location = Location(file=File(givenpath=self.file, abspath=None), function=None, # FIXME point=Point(int(self.line), int(self.column))) return Issue(cwe=None, testid=self.flag_name, location=location, message=Message(self.warning_text), notes=Notes(self.additional_text), trace=None, severity=self.priority, customfields=None)
def make_failed_analysis(self): a = Analysis( metadata=Metadata(generator=Generator(name='yet-another-checker'), sut=None, file_=None, stats=None), results=[ Failure(failureid='out-of-memory', location=Location( file=File('foo.c', None), function=Function('something_complicated'), point=Point(10, 15)), message=Message('out of memory'), customfields=CustomFields(stdout='sample stdout', stderr='sample stderr', returncode=-9)) # (killed) ]) return a, a.results[0]
def parse_warning(match_warning): """ :param match_warning: the matched object :type match_warning: SRE_Match :param sut: metadata about the software-under-test :type sut: Sut :return: Issue """ message = Message(match_warning.group('message')) point = Point(int(match_warning.group('line')), 0) path = File(match_warning.group('path'), None) location = Location(file=path, function=None, point=point) return Issue(cwe=None, testid=None, location=location, message=message, notes=None, trace=None)
def make_simple_analysis(self): """ Construct a minimal Analysis instance """ a = Analysis( metadata=Metadata(generator=Generator(name='cpychecker'), sut=None, file_=None, stats=None), results=[ Issue(cwe=None, testid=None, location=Location(file=File('foo.c', None), function=None, point=Point(10, 15)), message=Message(text='something bad involving pointers'), notes=None, trace=None) ]) return a, a.results[0]
def parse_cppcheck(payload): tree = lxml.etree.fromstring(payload) for result in tree.xpath("//results/error"): if 'file' not in result.attrib: continue path = result.attrib['file'] line = result.attrib['line'] severity = result.attrib['severity'] message = result.attrib['msg'] testid = result.attrib['id'] yield Issue(cwe=None, testid=testid, location=Location( file=File(path, None), function=None, point=Point(int(line), 0) if line else None), severity=severity, message=Message(text=message), notes=None, trace=None)
def parse_warning(line, func_name): """ :param line: current line read from file :type line: basestring :param func_name: name of the current function :type func_name: basestring :param gccversion: version of GCC that generated this report :type gccversion: str :param sut: metadata about the software-under-test :type sut: Sut :return: Issue if match, else None """ match = GCC_PATTERN.match(line) if match: message = Message(match.group('message')) func = Function(func_name) try: column = int(match.group('column')) except ValueError: if match.group('column') == '': column = 0 else: raise except TypeError: column = None switch_match = SWITCH_SUB_PATTERN.match(match.group('switch') or '') if switch_match: switch = switch_match.group('name') else: switch = None point = Point(int(match.group('line')), column) path = File(match.group('path'), None) location = Location(path, func, point) return Issue(None, switch, location, message, None, None)
def parse_file(infile): """ Parser flawfinder output :infile: file-like object :returns: Firehose Analysis object, representing the final XML. Flawfinder can generate multiple cwes for a single issue. Firehose's models does not supports multiple CWEs. For now, when multiple CWEs ocurrs, we get only the first one. A issue was created to track this bug: https://github.com/fedora-static-analysis/firehose/issues/35 """ line = infile.readline() generator = Generator(name='flawfinder', version=get_flawfinder_version(line)) metadata = Metadata(generator, None, None, None) analysis = Analysis(metadata, []) # A regex for "filename:linenum:" ISSUE_LINE_PATTERN = r"(\S.*)\:([0-9]+)\:" # A regex for the reported severity, e.g. "[2]" ISSUE_SEVERITY_PATTERN = r"\[([0-9]+)\]" # A regex for the reported testid, e.g. "(buffer)" ISSUE_TESTID_PATTERN = r"\(([a-z]+)\)" WHITESPACE = "\s+" FIRST_LINE_PATTERN = (ISSUE_LINE_PATTERN + WHITESPACE + ISSUE_SEVERITY_PATTERN + WHITESPACE + ISSUE_TESTID_PATTERN) prog = re.compile(FIRST_LINE_PATTERN) while line: m = prog.match(line) if m: issue_path = m.group(1) issue_line = m.group(2) issue_severity = m.group(3) testid = m.group(4) location = Location(file=File(issue_path, None), function=None, point=Point(int(issue_line), 0)) message_line = infile.readline() issue_message = "" while not prog.search(message_line) and message_line != "\n": # Build up issue_message as one line, stripping out # extraneous whitespace. if issue_message: issue_message += " " + message_line.strip() else: issue_message = message_line.strip() message_line = infile.readline() line = message_line cwes = [int(cwe) for cwe in re.findall("CWE-([0-9]+)", issue_message)] if cwes: first_cwe = int(cwes[0]) else: first_cwe = None issue = Issue(first_cwe, testid, location, Message(text=issue_message), notes=None, trace=None, severity=issue_severity, customfields=None) analysis.results.append(issue) else: line = infile.readline() return analysis
def parse_plist(pathOrFile, analyzerversion=None, sut=None, file_=None, stats=None): """ Given a .plist file emitted by clang-static-analyzer (e.g. via scan-build), parse it and return an Analysis instance """ plist = plistlib.readPlist(pathOrFile) # We now have the .plist file as a hierarchy of dicts, lists, etc # Handy debug dump: if 0: pprint(plist) # A list of filenames, apparently referenced by index within # diagnostics: files = plist['files'] generator = Generator(name='clang-analyzer', version=analyzerversion) metadata = Metadata(generator, sut, file_, stats) analysis = Analysis(metadata, []) if 'clang_version' in plist: generator.version = plist['clang_version'] for diagnostic in plist['diagnostics']: if 0: pprint(diagnostic) cwe = None customfields = CustomFields() for key in ['category', 'issue_context', 'issue_context_kind']: if key in diagnostic: customfields[key] = diagnostic[key] message = Message(text=diagnostic['description']) loc = diagnostic['location'] location = Location( file=File(givenpath=files[loc['file']], abspath=None), # FIXME: doesn't tell us function name # TODO: can we patch this upstream? function=None, point=Point(int(loc['line']), int(loc['col']))) notes = None trace = make_trace(files, diagnostic['path']) issue = Issue( cwe, # Use the 'type' field for the testid: diagnostic['type'], location, message, notes, trace, customfields=customfields) analysis.results.append(issue) return analysis
def make_point_from_plist_point(loc): # point: # e.g. {'col': 2, 'file': 0, 'line': 130} return Point(int(loc['line']), int(loc['col']))