def get_javascript(self, obj, f, version): if not isinstance(obj.object, peepdf.PDFCore.PDFDictionary): return if "/JS" not in obj.object.elements: return ref = obj.object.elements["/JS"] if isinstance(ref, peepdf.PDFCore.PDFString): return { "orig_code": self._parse_string("".join(ref.getJSCode())), "beautified": self._parse_string(jsbeautify("".join(ref.getJSCode()))), "urls": [] } if not isinstance(ref, peepdf.PDFCore.PDFReference): log.warning("PDFObject: can't follow type %s", ref) return if ref.id not in f.body[version].objects: log.warning("PDFObject: Reference is broken, can't follow") return obj = f.body[version].objects[ref.id] return { "orig_code": self._parse_string("".join(obj.object.getJSCode())), "beautified": self._parse_string(jsbeautify("".join(obj.object.getJSCode()))), "urls": [] }
def get_javascript(self, obj, f, version): if not isinstance(obj.object, peepdf.PDFCore.PDFDictionary): return if "/JS" not in obj.object.elements: return ref = obj.object.elements["/JS"] if isinstance(ref, peepdf.PDFCore.PDFString): return { "orig_code": "".join(ref.getJSCode()), "beautified": jsbeautify("".join(ref.getJSCode())), "urls": [] } if not isinstance(ref, peepdf.PDFCore.PDFReference): log.warning("PDFObject: can't follow type %s", ref) return if ref.id not in f.body[version].objects: log.warning("PDFObject: Reference is broken, can't follow") return obj = f.body[version].objects[ref.id] return { "orig_code": obj.object.decodedStream, "beautified": jsbeautify(obj.object.decodedStream), "urls": [] }
def test_jsbeautify_packer(p, capsys): def beautify(s): print u"error: Unknown p.a.c.k.e.r. encoding.\n", p.beautify.side_effect = beautify utils.jsbeautify("thisisjavascript") out, err = capsys.readouterr() assert not out and not err
def run(self): p = peepdf.PDFCore.PDFParser() r, f = p.parse( self.filepath, forceMode=True, looseMode=True, manualAnalysis=False ) if r: log.warning("Error parsing PDF file, error code %s", r) return ret = [] for version in xrange(f.updates + 1): md = f.getBasicMetadata(version) row = { "version": version, "creator": self._sanitize(md, "creator"), "creation": self._sanitize(md, "creation"), "title": self._sanitize(md, "title"), "subject": self._sanitize(md, "subject"), "producer": self._sanitize(md, "producer"), "author": self._sanitize(md, "author"), "modification": self._sanitize(md, "modification"), "javascript": [], "urls": [], } for obj in f.body[version].objects.values(): if obj.object.type == "stream": stream = obj.object.decodedStream # Is this actually Javascript code? if not peepdf.JSAnalysis.isJavascript(stream): continue javascript = stream.decode("latin-1") row["javascript"].append({ "orig_code": javascript, "beautified": jsbeautify(javascript), "urls": [], }) continue if obj.object.type == "dictionary": for url in obj.object.urlsFound: row["urls"].append(self._parse_string(url)) for url in obj.object.uriList: row["urls"].append(self._parse_string(url)) ret.append(row) return ret
def get_javascript(self, obj, f, version): if not isinstance(obj.object, peepdf.PDFCore.PDFDictionary): return if "/JS" not in obj.object.elements: return ref = obj.object.elements["/JS"] if ref.id not in f.body[version].objects: log.warning("PDFObject: Reference is broken, can't follow") return obj = f.body[version].objects[ref.id] return { "orig_code": obj.object.decodedStream, "beautified": jsbeautify(obj.object.decodedStream), "urls": [] }
def walk_object(self, obj, entry): if isinstance(obj, peepdf.PDFCore.PDFStream): stream = obj.decodedStream # Is this actually Javascript code? if not peepdf.JSAnalysis.isJavascript(stream): return javascript = stream.decode("latin-1") entry["javascript"].append({ "orig_code": javascript, "beautified": jsbeautify(javascript), "urls": [], }) return if isinstance(obj, peepdf.PDFCore.PDFDictionary): for url in obj.urlsFound: entry["urls"].append(self._parse_string(url)) for url in obj.uriList: entry["urls"].append(self._parse_string(url)) # TODO We should probably add some more criteria here. uri_obj = obj.elements.get("/URI") if uri_obj: if isinstance(uri_obj, peepdf.PDFCore.PDFString): entry["urls"].append(uri_obj.value) else: log.warning( "Identified a potential URL, but its associated " "type is not a string?" ) for element in obj.elements.values(): self.walk_object(element, entry) return if isinstance(obj, peepdf.PDFCore.PDFArray): for element in obj.elements: self.walk_object(element, entry) return
def _api_CWindow_AddTimeoutCode(self, event): event["raw"] = "code", event["arguments"]["code"] = jsbeautify(event["arguments"]["code"])
def _api_COleScript_Compile(self, event): event["raw"] = "script", event["arguments"]["script"] = \ jsbeautify(event["arguments"]["script"])
def _api_pdf_eval(self, event): event["raw"] = "script", event["arguments"]["script"] = \ jsbeautify(event["arguments"]["script"])
def test_jsbeautifier_exception(): buf = open("tests/files/jsbeautifier1.js", "rb").read() assert utils.jsbeautify(buf) == buf
def test_jsbeautify(): js = { "if(1){a(1,2,3);}": "if (1) {\n a(1, 2, 3);\n}", } for k, v in js.items(): assert utils.jsbeautify(k) == v