def test_extract_file_contents_zip_no_file(): zipbuf = io.BytesIO() with zipfile.ZipFile(zipbuf, mode="w") as zipobj: zipobj.writestr("foo/notsetup.py", b"these are some contents") zipbuf.seek(0) assert utils.extract_file_content(zipbuf, "setup.py") is None
def test_extract_file_contents_zip(): zipbuf = io.BytesIO() with zipfile.ZipFile(zipbuf, mode="w") as zipobj: zipobj.writestr("toplevelgetsskipped", b"nothing to see here") zipobj.writestr("foo/setup.py", b"these are some contents") zipbuf.seek(0) assert utils.extract_file_content(zipbuf, "setup.py") == b"these are some contents"
def test_extract_file_contents_tar_no_file(): tarbuf = io.BytesIO() with tarfile.open(fileobj=tarbuf, mode="w:gz") as tarobj: contents = io.BytesIO(b"these are some contents") member = tarfile.TarInfo(name="foo/notsetup.py") member.size = len(contents.getbuffer()) tarobj.addfile(member, fileobj=contents) contents = io.BytesIO(b"nothing to see here") member = tarfile.TarInfo(name="toplevelgetsskipped") member.size = len(contents.getbuffer()) tarobj.addfile(member, fileobj=contents) tarbuf.seek(0) assert utils.extract_file_content(tarbuf, "setup.py") is None
def test_extract_file_contents_tar_empty(): tarbuf = io.BytesIO(b"invalid tar contents") assert utils.extract_file_content(tarbuf, "setup.py") is None
def scan(self, **kwargs): release_file = kwargs.get("obj") file_url = kwargs.get("file_url") if release_file is None or file_url is None: raise FatalCheckException( "Release file or file url is None, indicating user error.") if release_file.packagetype != "sdist": # Per PEP 491: bdists do not contain setup.py. # This check only scans dists that contain setup.py, so # we have nothing to perform. return archive_stream = fetch_url_content(file_url) setup_py_contents = extract_file_content(archive_stream, "setup.py") if setup_py_contents is None: self.add_verdict( file_id=release_file.id, classification=VerdictClassification.Indeterminate, confidence=VerdictConfidence.High, message= "sdist does not contain a suitable setup.py for analysis", ) return matches = self._yara_rules.match(data=setup_py_contents) if len(matches) > 0: # We reduce N matches into a single verdict by taking the maximum # classification and confidence. classification = max( VerdictClassification(m.meta["classification"]) for m in matches) confidence = max( VerdictConfidence(m.meta["confidence"]) for m in matches) message = ":".join(m.rule for m in matches) details = {} for match in matches: details[match.rule] = { "classification": match.meta["classification"], "confidence": match.meta["confidence"], # NOTE: We could include the raw bytes here (s[2]), # but we'd have to serialize/encode it to make JSON happy. # It probably suffices to include the offset and identifier # for triage purposes. "strings": [[s[0], s[1]] for s in match.strings], } self.add_verdict( file_id=release_file.id, classification=classification, confidence=confidence, message=message, details=details, ) else: # No matches? Report a low-confidence benign verdict. self.add_verdict( file_id=release_file.id, classification=VerdictClassification.Benign, confidence=VerdictConfidence.Low, message="No malicious patterns found in setup.py", )