Python SGMLParser.SGMLParser 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: w3af.core.data.parsers.doc.sgml

클래스/타입: SGMLParser

메소드/함수: SGMLParser

hotexamples.com에서의 예제들: 17

Python SGMLParser.SGMLParser - 17개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 w3af.core.data.parsers.doc.sgml.SGMLParser.SGMLParser에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

SGMLParser(17)

parse(12)

get_emails(4)

get_clear_text_body(3)

__init__(2)

get_tags_by_filter(2)

_emails(1)

_handle_form_tag_start(1)

_handle_script_tag_start(1)

_handle_select_tag_end(1)

_handle_textarea_tag_end(1)

_handle_textarea_tag_start(1)

close(1)

start(1)

예제 #1

파일 보기

    def test_extract_emails_mailto(self):
        body = u'<a href="mailto:[email protected]">test</a>'
        resp = build_http_response(self.url, body)
        p = SGMLParser(resp)
        p.parse()

        expected_res = {u'*****@*****.**'}
        self.assertEqual(p.get_emails(), expected_res)

예제 #2

파일 보기

    def test_mailto_ignored_in_links(self):
        body = u'<a href="mailto:[email protected]">a</a>'
        resp = build_http_response(self.url, body)
        p = SGMLParser(resp)
        p.parse()

        parsed, _ = p.references
        self.assertEqual(parsed, [])

예제 #3

파일 보기

    def test_get_emails_filter(self):
        resp = build_http_response(self.url, '')
        p = SGMLParser(resp)
        p._emails = {'*****@*****.**', '*****@*****.**'}

        self.assertEqual(p.get_emails(), {'*****@*****.**', '*****@*****.**'})

        self.assertEqual(p.get_emails(domain='w3af.com'), ['*****@*****.**'])
        self.assertEqual(p.get_emails(domain='not.com'), ['*****@*****.**'])

예제 #4

파일 보기

    def test_mailto_subject_body(self):
        body = u'<a href="mailto:[email protected]?subject=testing out mailto'\
               u'&body=Just testing">test</a>'
        resp = build_http_response(self.url, body)
        p = SGMLParser(resp)
        p.parse()

        expected_res = {u'*****@*****.**'}
        self.assertEqual(p.get_emails(), expected_res)

예제 #5

파일 보기

    def test_get_clear_text_body(self):
        html = 'header <b>ABC</b>-<b>DEF</b>-<b>XYZ</b> footer'
        clear_text = 'header ABC-DEF-XYZ footer'
        headers = Headers([('Content-Type', 'text/html')])
        r = build_http_response(self.url, html, headers)

        p = SGMLParser(r)
        p.parse()

        self.assertEquals(clear_text, p.get_clear_text_body())

예제 #6

파일 보기

파일: test_sgml.py 프로젝트: yuliuspratama/w3af

 def test_meta_tags(self):
     body = HTML_DOC % \
         {'head': META_REFRESH + META_REFRESH_WITH_URL,
          'body': ''}
     resp = build_http_response(self.url, body)
     p = SGMLParser(resp)
     p.parse()
     self.assertTrue(2, len(p.meta_redirs))
     self.assertTrue("2;url=http://crawler.w3af.com/" in p.meta_redirs)
     self.assertTrue("600" in p.meta_redirs)
     self.assertEquals([URL('http://crawler.w3af.com/')], p.references[0])

예제 #7

파일 보기

    def test_meta_tags_with_single_quotes(self):
        body = HTML_DOC % {'head': META_REFRESH + META_REFRESH_WITH_URL_AND_QUOTES,
                           'body': ''}
        resp = build_http_response(self.url, body)

        p = SGMLParser(resp)
        p.parse()

        self.assertEqual(2, len(p.meta_redirs))
        self.assertIn("2;url='http://crawler.w3af.com/'", p.meta_redirs)
        self.assertIn("600", p.meta_redirs)
        self.assertEqual([URL('http://crawler.w3af.com/')], p.references[0])

예제 #8

파일 보기

    def test_nested_with_text(self):
        body = '<html><a href="/abc">foo<div>bar</div></a></html>'
        url = URL('http://www.w3af.com/')
        headers = Headers()
        headers['content-type'] = 'text/html'
        resp = HTTPResponse(200, body, headers, url, url, charset='utf-8')

        p = SGMLParser(resp)
        tags = p.get_tags_by_filter(('a', 'b'), yield_text=True)
        tags = list(tags)

        self.assertEqual([Tag('a', {'href': '/abc'}, 'foo')], tags)

예제 #9

파일 보기

    def test_none(self):
        body = '<html><a href="/abc">foo<div>bar</div></a></html>'
        url = URL('http://www.w3af.com/')
        headers = Headers()
        headers['content-type'] = 'text/html'
        resp = HTTPResponse(200, body, headers, url, url, charset='utf-8')

        p = SGMLParser(resp)
        tags = p.get_tags_by_filter(None)
        tags = list(tags)
        tag_names = [tag.name for tag in tags]

        self.assertEqual(tag_names, ['html', 'body', 'a', 'div'])

예제 #10

파일 보기

 def test_reference_with_colon(self):
     body = """
     <html>
         <a href="d:url.html?id=13&subid=3">foo</a>
     </html>"""
     r = build_http_response(self.url, body)
     p = SGMLParser(r)
     p.parse()
     parsed_refs = p.references[0]
     #
     #    Finding zero URLs is the correct behavior based on what
     #    I've seen in Opera and Chrome.
     #
     self.assertEquals(0, len(parsed_refs))

예제 #11

파일 보기

    def test_parser_attrs(self):
        body_content = HTML_DOC % {'head': '', 'body': ''}
        p = SGMLParser(build_http_response(self.url, body_content))

        # Assert parser has these attrs correctly initialized
        self.assertFalse(getattr(p, '_inside_form'))
        self.assertFalse(getattr(p, '_inside_select'))
        self.assertFalse(getattr(p, '_inside_text_area'))
        self.assertFalse(getattr(p, '_inside_script'))

        self.assertEquals(set(), getattr(p, '_tag_and_url'))
        self.assertEquals([], getattr(p, '_forms'))
        self.assertEquals([], getattr(p, '_comments_in_doc'))
        self.assertEquals([], getattr(p, '_meta_redirs'))
        self.assertEquals([], getattr(p, '_meta_tags'))

예제 #12

파일 보기

 def test_parsed_references(self):
     # The *parsed* urls *must* come both from valid tags and tag attributes
     # Also invalid urls like must be ignored (like javascript instructions)
     body = """
     <html>
         <a href="/x.py?a=1" Invalid_Attr="/invalid_url.php">
         <form action="javascript:history.back(1)">
             <tagX href="/py.py"/>
         </form>
     </html>"""
     r = build_http_response(self.url, body)
     p = SGMLParser(r)
     p.parse()
     parsed_refs = p.references[0]
     self.assertEquals(1, len(parsed_refs))
     self.assertEquals(
         'http://w3af.com/x.py?a=1', parsed_refs[0].url_string)

예제 #13

파일 보기

    def test_get_clear_text_body_encodings(self):

        raise SkipTest('Not sure why this one is failing :S')

        for lang_desc, (body, encoding) in TEST_RESPONSES.iteritems():
            encoding_header = 'text/html; charset=%s' % encoding
            headers = Headers([('Content-Type', encoding_header)])

            encoded_body = body.encode(encoding)
            r = build_http_response(self.url, encoded_body, headers)

            p = SGMLParser(r)
            p.parse()

            ct_body = p.get_clear_text_body()

            # These test strings don't really have tags, so they should be eq
            self.assertEqual(ct_body, body)

예제 #14

파일 보기

    def test_get_clear_text_issue_4402(self):
        """
        :see: https://github.com/andresriancho/w3af/issues/4402
        """
        test_file_path = 'core/data/url/tests/data/encoding_4402.php'
        test_file = os.path.join(ROOT_PATH, test_file_path)
        body = file(test_file, 'rb').read()

        sample_encodings = [encoding for _, (_, encoding) in TEST_RESPONSES.iteritems()]
        sample_encodings.extend(['', 'utf-8'])

        for encoding in sample_encodings:
            encoding_header = 'text/html; charset=%s' % encoding
            headers = Headers([('Content-Type', encoding_header)])

            r = build_http_response(self.url, body, headers)

            p = SGMLParser(r)
            p.parse()

            p.get_clear_text_body()

예제 #15

파일 보기

    def test_case_sensitivity(self):
        """
        Ensure handler methods are *always* called with lowered-cased
        tag and attribute names
        """
        def islower(s):
            il = False
            if isinstance(s, basestring):
                il = s.islower()
            else:
                il = all(k.islower() for k in s)
            assert il, "'%s' is not lowered-case" % s
            return il

        def start_wrapper(orig_start, tag):
            islower(tag.tag)
            islower(tag.attrib)
            return orig_start(tag)

        tags = (A_LINK_ABSOLUTE, INPUT_CHECKBOX_WITH_NAME, SELECT_WITH_NAME,
                TEXTAREA_WITH_ID_AND_DATA, INPUT_HIDDEN)
        ops = "lower", "upper", "title"

        for indexes in combinations(range(len(tags)), 2):

            body_elems = []

            for index, tag in enumerate(tags):
                ele = tag
                if index in indexes:
                    ele = getattr(tag, choice(ops))()
                body_elems.append(ele)

            body = HTML_DOC % {'head': '', 'body': ''.join(body_elems)}
            resp = build_http_response(self.url, body)
            p = SGMLParser(resp)
            orig_start = p.start
            wrapped_start = partial(start_wrapper, orig_start)
            p.start = wrapped_start
            p.parse()

예제 #16

파일 보기

    def test_extract_emails_blank(self):
        resp = build_http_response(self.url, '')
        p = SGMLParser(resp)

        self.assertEqual(p.get_emails(), set())

예제 #17

파일 보기

 def test_baseurl(self):
     body = HTML_DOC % {'head': BASE_TAG, 'body': ''}
     resp = build_http_response(self.url, body)
     p = SGMLParser(resp)
     p.parse()
     self.assertEquals(URL('http://www.w3afbase.com/'), p._base_url)