Exemplo n.º 1
0
    def handle_pdf(self, url, content):
        sample = log.ThugLogging.build_sample(content, url)
        if sample is None or sample['type'] not in ('PDF', ):
            return

        fd, rfile = tempfile.mkstemp()
        with open(rfile, 'wb') as fd:
            fd.write(content)

        pdfparser = PDFParser()

        try:
            ret, pdf = pdfparser.parse(rfile, forceMode=True, looseMode=True)
        except:
            os.remove(rfile)
            return False

        statsDict = pdf.getStats()
        analysis = self.getPeepXML(statsDict, url)

        log_dir = os.path.join(log.ThugLogging.baseDir, "analysis", "pdf")
        log.ThugLogging.log_peepdf(log_dir, sample, analysis)

        self.swf_mastah(pdf, statsDict, url)
        os.remove(rfile)
        return True
Exemplo n.º 2
0
    def handle_pdf(self, url, content):
        sample = log.ThugLogging.build_sample(content, url)
        if sample is None or sample['type'] not in ('PDF', ):
            return

        fd, rfile = tempfile.mkstemp()
        with open(rfile, 'wb') as fd:
            fd.write(content)

        pdfparser = PDFParser()

        try:
            ret, pdf = pdfparser.parse(rfile, forceMode = True, looseMode = True) #pylint:disable=unused-variable
        except: #pylint:disable=bare-except
            os.remove(rfile)
            return False

        statsDict = pdf.getStats() 
        analysis  = self.getPeepXML(statsDict, url)

        log_dir = os.path.join(log.ThugLogging.baseDir, "analysis", "pdf")
        log.ThugLogging.log_peepdf(log_dir, sample, analysis)

        self.swf_mastah(pdf, statsDict, url)
        os.remove(rfile)
        return True
Exemplo n.º 3
0
    def handle_pdf(self, url, content):
        m = hashlib.md5()
        m.update(content)
        md5sum = m.hexdigest()

        rfile = log.ThugLogging.store_content(log.ThugLogging.baseDir, md5sum, content)

        pdfparser = PDFParser()

        try:
            ret, pdf = pdfparser.parse(rfile, forceMode = True, looseMode = True)
        except:
            os.remove(rfile)
            return False

        statsDict = pdf.getStats() 
        analysis  = self.getPeepXML(statsDict, url)

        pdflogdir = os.path.join(log.ThugLogging.baseDir, "analysis", "pdf")
        
        try:
            os.makedirs(pdflogdir)
        except:
            pass

        log.ThugLogging.store_content(pdflogdir, "%s.xml" % (statsDict["MD5"], ), analysis)
        self.swf_mastah(pdf, statsDict)
        os.remove(rfile)
        return True