示例#1
0
    def _is_pdf(self, http_resp):
        """
        :param http_resp: A http response object that contains a document of
                          type HTML / PDF / WML / etc.

        :return: True if the document parameter is a string that contains a PDF
                 document.
        """
        if http_resp.content_type in ('application/x-pdf', 'application/pdf'):
            document = http_resp.body

            #   With the objective of avoiding this bug:
            #   https://sourceforge.net/tracker/?func=detail&atid=853652&aid=2954220&group_id=170274
            #   I perform this safety check:
            if not document:
                return False

            #   Some PDF files don't end with %%EOF, they end with
            #   things like %%EOF\n , or %%EOF\r, or %%EOF\r\n.
            #   So... just to be sure I search in the last 12 characters.
            if document.startswith('%PDF-') and '%%EOF' in document[-12:]:
                try:
                    text = pdf_to_text(document)
                except Exception:
                    return False
                else:
                    return text != u''

        return False
示例#2
0
文件: pdf.py 项目: intfrr/Tortazo
 def _get_pdf_content(self, document_str):
     """
     Iterate through all PDF pages and extract text
     
     :return: A list containing the words in the PDF
     """
     pdf_text = pdf_to_text(document_str)
     return pdf_text.split()
示例#3
0
文件: pdf.py 项目: cathartic/w3af
 def _get_pdf_content(self, document_str):
     """
     Iterate through all PDF pages and extract text
     
     :return: A list containing the words in the PDF
     """
     pdf_text = pdf_to_text(document_str)
     return pdf_text.split()
示例#4
0
文件: test_pdf.py 项目: RON313/w3af
 def test_pdf_to_text_no_pdf(self):
     text = pdf_to_text('hello world')
     self.assertEqual('', text)
示例#5
0
文件: test_pdf.py 项目: RON313/w3af
 def test_pdf_to_text(self):
     text = pdf_to_text(file(self.SIMPLE_SAMPLE).read())
     self.assertIn('Hello', text)
     self.assertIn('World', text)
示例#6
0
 def test_pdf_to_text_no_pdf(self):
     text = pdf_to_text('hello world')
     self.assertEqual('', text)
示例#7
0
 def test_pdf_to_text(self):
     text = pdf_to_text(file(self.SIMPLE_SAMPLE).read())
     self.assertIn('Hello', text)
     self.assertIn('World', text)