Exemplo n.º 1
0
    def get_javascript(self, obj, f, version):
        if not isinstance(obj.object, peepdf.PDFCore.PDFDictionary):
            return

        if "/JS" not in obj.object.elements:
            return

        ref = obj.object.elements["/JS"]

        if isinstance(ref, peepdf.PDFCore.PDFString):
            return {
                "orig_code":
                self._parse_string("".join(ref.getJSCode())),
                "beautified":
                self._parse_string(jsbeautify("".join(ref.getJSCode()))),
                "urls": []
            }

        if not isinstance(ref, peepdf.PDFCore.PDFReference):
            log.warning("PDFObject: can't follow type %s", ref)
            return

        if ref.id not in f.body[version].objects:
            log.warning("PDFObject: Reference is broken, can't follow")
            return

        obj = f.body[version].objects[ref.id]
        return {
            "orig_code":
            self._parse_string("".join(obj.object.getJSCode())),
            "beautified":
            self._parse_string(jsbeautify("".join(obj.object.getJSCode()))),
            "urls": []
        }
Exemplo n.º 2
0
    def get_javascript(self, obj, f, version):
        if not isinstance(obj.object, peepdf.PDFCore.PDFDictionary):
            return

        if "/JS" not in obj.object.elements:
            return

        ref = obj.object.elements["/JS"]

        if isinstance(ref, peepdf.PDFCore.PDFString):
            return {
                "orig_code": "".join(ref.getJSCode()),
                "beautified": jsbeautify("".join(ref.getJSCode())),
                "urls": []
            }

        if not isinstance(ref, peepdf.PDFCore.PDFReference):
            log.warning("PDFObject: can't follow type %s", ref)
            return

        if ref.id not in f.body[version].objects:
            log.warning("PDFObject: Reference is broken, can't follow")
            return

        obj = f.body[version].objects[ref.id]
        return {
            "orig_code": obj.object.decodedStream,
            "beautified": jsbeautify(obj.object.decodedStream),
            "urls": []
        }
Exemplo n.º 3
0
def test_jsbeautify_packer(p, capsys):
    def beautify(s):
        print u"error: Unknown p.a.c.k.e.r. encoding.\n",

    p.beautify.side_effect = beautify
    utils.jsbeautify("thisisjavascript")
    out, err = capsys.readouterr()
    assert not out and not err
Exemplo n.º 4
0
def test_jsbeautify_packer(p, capsys):
    def beautify(s):
        print u"error: Unknown p.a.c.k.e.r. encoding.\n",

    p.beautify.side_effect = beautify
    utils.jsbeautify("thisisjavascript")
    out, err = capsys.readouterr()
    assert not out and not err
Exemplo n.º 5
0
    def run(self):
        p = peepdf.PDFCore.PDFParser()
        r, f = p.parse(
            self.filepath, forceMode=True,
            looseMode=True, manualAnalysis=False
        )
        if r:
            log.warning("Error parsing PDF file, error code %s", r)
            return

        ret = []

        for version in xrange(f.updates + 1):
            md = f.getBasicMetadata(version)
            row = {
                "version": version,
                "creator": self._sanitize(md, "creator"),
                "creation": self._sanitize(md, "creation"),
                "title": self._sanitize(md, "title"),
                "subject": self._sanitize(md, "subject"),
                "producer": self._sanitize(md, "producer"),
                "author": self._sanitize(md, "author"),
                "modification": self._sanitize(md, "modification"),
                "javascript": [],
                "urls": [],
            }

            for obj in f.body[version].objects.values():
                if obj.object.type == "stream":
                    stream = obj.object.decodedStream

                    # Is this actually Javascript code?
                    if not peepdf.JSAnalysis.isJavascript(stream):
                        continue

                    javascript = stream.decode("latin-1")
                    row["javascript"].append({
                        "orig_code": javascript,
                        "beautified": jsbeautify(javascript),
                        "urls": [],
                    })
                    continue

                if obj.object.type == "dictionary":
                    for url in obj.object.urlsFound:
                        row["urls"].append(self._parse_string(url))

                    for url in obj.object.uriList:
                        row["urls"].append(self._parse_string(url))

            ret.append(row)

        return ret
Exemplo n.º 6
0
    def get_javascript(self, obj, f, version):
        if not isinstance(obj.object, peepdf.PDFCore.PDFDictionary):
            return

        if "/JS" not in obj.object.elements:
            return

        ref = obj.object.elements["/JS"]

        if ref.id not in f.body[version].objects:
            log.warning("PDFObject: Reference is broken, can't follow")
            return

        obj = f.body[version].objects[ref.id]
        return {
            "orig_code": obj.object.decodedStream,
            "beautified": jsbeautify(obj.object.decodedStream),
            "urls": []
        }
Exemplo n.º 7
0
    def walk_object(self, obj, entry):
        if isinstance(obj, peepdf.PDFCore.PDFStream):
            stream = obj.decodedStream

            # Is this actually Javascript code?
            if not peepdf.JSAnalysis.isJavascript(stream):
                return

            javascript = stream.decode("latin-1")
            entry["javascript"].append({
                "orig_code": javascript,
                "beautified": jsbeautify(javascript),
                "urls": [],
            })
            return

        if isinstance(obj, peepdf.PDFCore.PDFDictionary):
            for url in obj.urlsFound:
                entry["urls"].append(self._parse_string(url))

            for url in obj.uriList:
                entry["urls"].append(self._parse_string(url))

            # TODO We should probably add some more criteria here.
            uri_obj = obj.elements.get("/URI")
            if uri_obj:
                if isinstance(uri_obj, peepdf.PDFCore.PDFString):
                    entry["urls"].append(uri_obj.value)
                else:
                    log.warning(
                        "Identified a potential URL, but its associated "
                        "type is not a string?"
                    )

            for element in obj.elements.values():
                self.walk_object(element, entry)
            return

        if isinstance(obj, peepdf.PDFCore.PDFArray):
            for element in obj.elements:
                self.walk_object(element, entry)
            return
Exemplo n.º 8
0
    def walk_object(self, obj, entry):
        if isinstance(obj, peepdf.PDFCore.PDFStream):
            stream = obj.decodedStream

            # Is this actually Javascript code?
            if not peepdf.JSAnalysis.isJavascript(stream):
                return

            javascript = stream.decode("latin-1")
            entry["javascript"].append({
                "orig_code": javascript,
                "beautified": jsbeautify(javascript),
                "urls": [],
            })
            return

        if isinstance(obj, peepdf.PDFCore.PDFDictionary):
            for url in obj.urlsFound:
                entry["urls"].append(self._parse_string(url))

            for url in obj.uriList:
                entry["urls"].append(self._parse_string(url))

            # TODO We should probably add some more criteria here.
            uri_obj = obj.elements.get("/URI")
            if uri_obj:
                if isinstance(uri_obj, peepdf.PDFCore.PDFString):
                    entry["urls"].append(uri_obj.value)
                else:
                    log.warning(
                        "Identified a potential URL, but its associated "
                        "type is not a string?"
                    )

            for element in obj.elements.values():
                self.walk_object(element, entry)
            return

        if isinstance(obj, peepdf.PDFCore.PDFArray):
            for element in obj.elements:
                self.walk_object(element, entry)
            return
Exemplo n.º 9
0
 def _api_CWindow_AddTimeoutCode(self, event):
     event["raw"] = "code",
     event["arguments"]["code"] = jsbeautify(event["arguments"]["code"])
Exemplo n.º 10
0
 def _api_COleScript_Compile(self, event):
     event["raw"] = "script",
     event["arguments"]["script"] = \
         jsbeautify(event["arguments"]["script"])
Exemplo n.º 11
0
 def _api_pdf_eval(self, event):
     event["raw"] = "script",
     event["arguments"]["script"] = \
         jsbeautify(event["arguments"]["script"])
Exemplo n.º 12
0
def test_jsbeautifier_exception():
    buf = open("tests/files/jsbeautifier1.js", "rb").read()
    assert utils.jsbeautify(buf) == buf
Exemplo n.º 13
0
def test_jsbeautify():
    js = {
        "if(1){a(1,2,3);}": "if (1) {\n    a(1, 2, 3);\n}",
    }
    for k, v in js.items():
        assert utils.jsbeautify(k) == v
Exemplo n.º 14
0
def test_jsbeautify():
    js = {
        "if(1){a(1,2,3);}": "if (1) {\n    a(1, 2, 3);\n}",
    }
    for k, v in js.items():
        assert utils.jsbeautify(k) == v
Exemplo n.º 15
0
def test_jsbeautifier_exception():
    buf = open("tests/files/jsbeautifier1.js", "rb").read()
    assert utils.jsbeautify(buf) == buf