Пример #1
0
    def test_dash_issue(self):
        html = '<strong>&#151;</strong>'
        self.server.response['get.data'] = html
        grab = build_grab()
        grab.go(self.server.get_url())

        # By default &#[128-160]; are fixed
        self.assertFalse(
            grab.doc.select('//strong/text()').text() == six.unichr(151))
        self.assertTrue(
            grab.doc.select('//strong/text()').text() == six.unichr(8212))

        # disable fix-behaviour
        grab.setup(fix_special_entities=False)
        grab.go(self.server.get_url())

        # By default &#[128-160]; are fixed
        self.assertTrue(
            grab.doc.select('//strong/text()').text() == six.unichr(151))
        self.assertFalse(
            grab.doc.select('//strong/text()').text() == six.unichr(8212))

        # Explicitly use unicode_body func
        grab = build_grab()
        grab.go(self.server.get_url())
        #print(':::', grab.doc.unicode_body())
        self.assertTrue('&#8212;' in grab.doc.unicode_body())
Пример #2
0
    def test_unicode_post(self):
        # By default, unicode post should be converted into utf-8
        grab = build_grab()
        data = u'фыва'
        grab.setup(post=data, url=self.server.get_url())
        grab.request()
        self.assertEqual(self.server.request['data'], data.encode('utf-8'))

        # Now try cp1251 with charset option
        self.server.request['charset'] = 'cp1251'
        grab = build_grab()
        data = u'фыва'
        grab.setup(post=data,
                   url=self.server.get_url(),
                   charset='cp1251',
                   debug=True)
        grab.request()
        self.assertEqual(self.server.request['data'], data.encode('cp1251'))

        # Now try dict with unicode value & charset option
        self.server.request['charset'] = 'cp1251'
        grab = build_grab()
        data = u'фыва'
        grab.setup(post={'foo': data},
                   url=self.server.get_url(),
                   charset='cp1251',
                   debug=True)
        grab.request()
        test = 'foo=%s' % quote(data.encode('cp1251'))
        test = test.encode('utf-8')  # py3 hack
        self.assertEqual(self.server.request['data'], test)
Пример #3
0
    def test_session(self):
        # Test that if Grab gets some cookies from the server
        # then it sends it back
        grab = build_grab()
        grab.setup(reuse_cookies=True)
        self.server.response['cookies'] = {'foo': 'bar'}.items()
        grab.go(self.server.get_url())
        self.assertEqual(grab.doc.cookies['foo'], 'bar')
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['headers']['Cookie'], 'foo=bar')
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['headers']['Cookie'], 'foo=bar')

        # Test reuse_cookies=False
        grab = build_grab()
        grab.setup(reuse_cookies=False)
        self.server.response['cookies'] = {'foo': 'baz'}.items()
        grab.go(self.server.get_url())
        self.assertEqual(grab.doc.cookies['foo'], 'baz')
        grab.go(self.server.get_url())
        self.assertTrue(len(self.server.request['cookies']) == 0)

        # Test something
        grab = build_grab()
        grab.setup(reuse_cookies=True)
        self.server.response['cookies'] = {'foo': 'bar'}.items()
        grab.go(self.server.get_url())
        self.assertEqual(grab.doc.cookies['foo'], 'bar')
        grab.clear_cookies()
        grab.go(self.server.get_url())
        self.assertTrue(len(self.server.request['cookies']) == 0)
Пример #4
0
    def test_dash_issue(self):
        html = '<strong>&#151;</strong>'
        self.server.response['get.data'] = html
        grab = build_grab()
        grab.go(self.server.get_url())

        # By default &#[128-160]; are fixed
        self.assertFalse(grab.doc.select('//strong/text()').text()
                         == six.unichr(151))
        self.assertTrue(grab.doc.select('//strong/text()').text()
                        == six.unichr(8212))

        # disable fix-behaviour
        grab.setup(fix_special_entities=False)
        grab.go(self.server.get_url())

        # By default &#[128-160]; are fixed
        self.assertTrue(grab.doc.select('//strong/text()').text()
                        == six.unichr(151))
        self.assertFalse(grab.doc.select('//strong/text()').text()
                         == six.unichr(8212))

        # Explicitly use unicode_body func
        grab = build_grab()
        grab.go(self.server.get_url())
        #print(':::', grab.doc.unicode_body())
        self.assertTrue('&#8212;' in grab.doc.unicode_body())
Пример #5
0
    def test_load_dump(self):
        with temp_file() as tmp_file:
            grab = build_grab()
            cookies = {'foo': 'bar', 'spam': 'ham'}
            grab.setup(cookies=cookies)
            grab.go(self.server.get_url())
            grab.cookies.save_to_file(tmp_file)
            with open(tmp_file) as inp:
                self.assertEqual(set(cookies.items()),
                                 set((x['name'], x['value'])
                                     for x in json.load(inp)))

            grab = build_grab()
            cookies = {'foo': 'bar', 'spam': u'begemot'}
            grab.setup(cookies=cookies)
            grab.go(self.server.get_url())
            grab.cookies.save_to_file(tmp_file)
            with open(tmp_file) as inp:
                self.assertEqual(set(cookies.items()),
                                 set((x['name'], x['value'])
                                     for x in json.load(inp)))

            # Test load cookies
            grab = build_grab()
            cookies = [{'name': 'foo', 'value': 'bar',
                        'domain': self.server.address},
                       {'name': 'spam', 'value': u'begemot',
                        'domain': self.server.address}]
            with open(tmp_file, 'w') as out:
                json.dump(cookies, out)
            grab.cookies.load_from_file(tmp_file)
            self.assertEqual(set(grab.cookies.items()),
                             set((x['name'], x['value']) for x in cookies))
Пример #6
0
    def test_load_dump(self):
        with temp_file() as tmp_file:
            grab = build_grab()
            cookies = {'foo': 'bar', 'spam': 'ham'}
            grab.setup(cookies=cookies)
            grab.go(self.server.get_url())
            grab.cookies.save_to_file(tmp_file)
            with open(tmp_file) as inp:
                self.assertEqual(set(cookies.items()),
                                 set((x['name'], x['value'])
                                     for x in json.load(inp)))

            grab = build_grab()
            cookies = {'foo': 'bar', 'spam': u'begemot'}
            grab.setup(cookies=cookies)
            grab.go(self.server.get_url())
            grab.cookies.save_to_file(tmp_file)
            with open(tmp_file) as inp:
                self.assertEqual(set(cookies.items()),
                                 set((x['name'], x['value'])
                                     for x in json.load(inp)))

            # Test load cookies
            grab = build_grab()
            cookies = [{'name': 'foo', 'value': 'bar',
                        'domain': self.server.address},
                       {'name': 'spam', 'value': u'begemot',
                        'domain': self.server.address}]
            with open(tmp_file, 'w') as out:
                json.dump(cookies, out)
            grab.cookies.load_from_file(tmp_file)
            self.assertEqual(set(grab.cookies.items()),
                             set((x['name'], x['value']) for x in cookies))
Пример #7
0
    def test_unicode_post(self):
        # By default, unicode post should be converted into utf-8
        grab = build_grab()
        data = u'фыва'
        grab.setup(post=data, url=self.server.get_url())
        grab.request()
        self.assertEqual(self.server.request['data'], data.encode('utf-8'))

        # Now try cp1251 with charset option
        self.server.request['charset'] = 'cp1251'
        grab = build_grab()
        data = u'фыва'
        grab.setup(post=data, url=self.server.get_url(),
                   charset='cp1251', debug=True)
        grab.request()
        self.assertEqual(self.server.request['data'], data.encode('cp1251'))

        # Now try dict with unicode value & charset option
        self.server.request['charset'] = 'cp1251'
        grab = build_grab()
        data = u'фыва'
        grab.setup(post={'foo': data}, url=self.server.get_url(),
                   charset='cp1251', debug=True)
        grab.request()
        test = 'foo=%s' % quote(data.encode('cp1251'))
        test = test.encode('utf-8')  # py3 hack
        self.assertEqual(self.server.request['data'], test)
Пример #8
0
    def test_session(self):
        # Test that if Grab gets some cookies from the server
        # then it sends it back
        grab = build_grab()
        grab.setup(reuse_cookies=True)
        self.server.response['cookies'] = {'foo': 'bar'}.items()
        grab.go(self.server.get_url())
        self.assertEqual(grab.doc.cookies['foo'], 'bar')
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['headers']['Cookie'], 'foo=bar')
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['headers']['Cookie'], 'foo=bar')

        # Test reuse_cookies=False
        grab = build_grab()
        grab.setup(reuse_cookies=False)
        self.server.response['cookies'] = {'foo': 'baz'}.items()
        grab.go(self.server.get_url())
        self.assertEqual(grab.doc.cookies['foo'], 'baz')
        grab.go(self.server.get_url())
        self.assertTrue(len(self.server.request['cookies']) == 0)

        # Test something
        grab = build_grab()
        grab.setup(reuse_cookies=True)
        self.server.response['cookies'] = {'foo': 'bar'}.items()
        grab.go(self.server.get_url())
        self.assertEqual(grab.doc.cookies['foo'], 'bar')
        grab.clear_cookies()
        grab.go(self.server.get_url())
        self.assertTrue(len(self.server.request['cookies']) == 0)
Пример #9
0
 def test_adopt(self):
     grab = build_grab()
     self.server.response['get.data'] = 'Moon'
     grab.go(self.server.get_url())
     grab2 = build_grab()
     self.assertEqual(grab2.config['url'], None)
     grab2.adopt(grab)
     self.assertTrue(b'Moon' in grab2.doc.body)
     self.assertEqual(grab2.config['url'], self.server.get_url())
Пример #10
0
 def test_make_url_absolute(self):
     grab = build_grab()
     self.server.response['get.data'] = '<base href="http://foo/bar/">'
     grab.go(self.server.get_url())
     absolute_url = grab.make_url_absolute('/foobar', resolve_base=True)
     self.assertEqual(absolute_url, 'http://foo/foobar')
     grab = build_grab()
     absolute_url = grab.make_url_absolute('/foobar')
     self.assertEqual(absolute_url, '/foobar')
Пример #11
0
    def test_empty_document(self):
        self.server.response['get.data'] = 'oops'
        grab = build_grab()
        grab.go(self.server.get_url())
        grab.xpath_exists('//anytag')

        self.server.response['get.data'] = '<frameset></frameset>'
        grab = build_grab()
        grab.go(self.server.get_url())
        grab.xpath_exists('//anytag')
Пример #12
0
    def test_empty_document(self):
        self.server.response['get.data'] = 'oops'
        grab = build_grab()
        grab.go(self.server.get_url())
        grab.xpath_exists('//anytag')

        self.server.response['get.data'] = '<frameset></frameset>'
        grab = build_grab()
        grab.go(self.server.get_url())
        grab.xpath_exists('//anytag')
Пример #13
0
    def test_options_method(self):
        grab = build_grab()
        grab.setup(method='options', post=b'abc')
        grab.go(self.server.get_url())
        self.assertEqual('OPTIONS', self.server.request['method'])
        self.assertEqual('3', self.server.request['headers']['Content-Length'])

        grab = build_grab()
        grab.setup(method='options')
        grab.go(self.server.get_url())
        self.assertEqual('OPTIONS', self.server.request['method'])
        self.assertTrue('Content-Length' not in self.server.request['headers'])
Пример #14
0
    def test_useragent(self):
        grab = build_grab()

        # Null value activates default random user-agent
        # For some transports it just allow them to send default user-agent
        # like in Kit transport case
        grab = build_grab()
        grab.setup(user_agent=None)
        grab.go(self.server.get_url())
        self.assertTrue(len(self.server.request['headers']) > 0)
        self.assertFalse(
            'PycURL' in self.server.request['headers']['user-agent'])

        # By default user_agent is None => random user agent is generated
        grab = build_grab()
        grab.go(self.server.get_url())
        self.assertTrue(len(self.server.request['headers']) > 0)
        self.assertFalse(
            'PycURL' in self.server.request['headers']['user-agent'])

        # Simple case: setup user agent manually
        grab.setup(user_agent='foo')
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['headers']['user-agent'], 'foo')

        with temp_file() as ua_file:
            # user agent from file should be loaded
            with open(ua_file, 'w') as out:
                out.write('GOD')
            grab.setup(user_agent=None, user_agent_file=ua_file)
            grab.go(self.server.get_url())
            self.assertEqual(self.server.request['headers']['user-agent'],
                             'GOD')

        with temp_file() as ua_file:
            # random user agent from file should be loaded
            with open(ua_file, 'w') as out:
                out.write('GOD1\nGOD2')
            grab.setup(user_agent=None, user_agent_file=ua_file)
            grab.go(self.server.get_url())
            self.assertTrue(
                self.server.request['headers']['user-agent'] in ('GOD1',
                                                                 'GOD2'))
            agent = grab.config['user_agent']

        # User-agent should not change
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['headers']['user-agent'], agent)

        # User-agent should not change
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['headers']['user-agent'], agent)
Пример #15
0
    def test_useragent(self):
        grab = build_grab()

        # Null value activates default random user-agent
        # For some transports it just allow them to send default user-agent
        # like in Kit transport case
        grab = build_grab()
        grab.setup(user_agent=None)
        grab.go(self.server.get_url())
        self.assertTrue(len(self.server.request['headers']) > 0)
        self.assertFalse('PycURL' in
                         self.server.request['headers']['user-agent'])

        # By default user_agent is None => random user agent is generated
        grab = build_grab()
        grab.go(self.server.get_url())
        self.assertTrue(len(self.server.request['headers']) > 0)
        self.assertFalse('PycURL' in
                         self.server.request['headers']['user-agent'])

        # Simple case: setup user agent manually
        grab.setup(user_agent='foo')
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['headers']['user-agent'], 'foo')

        with temp_file() as ua_file:
            # user agent from file should be loaded
            with open(ua_file, 'w') as out:
                out.write('GOD')
            grab.setup(user_agent=None, user_agent_file=ua_file)
            grab.go(self.server.get_url())
            self.assertEqual(self.server.request['headers']['user-agent'],
                             'GOD')

        with temp_file() as ua_file:
            # random user agent from file should be loaded
            with open(ua_file, 'w') as out:
                out.write('GOD1\nGOD2')
            grab.setup(user_agent=None, user_agent_file=ua_file)
            grab.go(self.server.get_url())
            self.assertTrue(self.server.request['headers']['user-agent']
                            in ('GOD1', 'GOD2'))
            agent = grab.config['user_agent']

        # User-agent should not change
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['headers']['user-agent'], agent)

        # User-agent should not change
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['headers']['user-agent'], agent)
Пример #16
0
    def test_cdata_issue(self):
        self.server.response['data'] = XML

        # By default HTML DOM builder is used
        # It handles CDATA incorrectly
        grab = build_grab()
        grab.go(self.server.get_url())
        self.assertEqual(None, grab.xpath_one('//weight').text)
        self.assertEqual(None, grab.doc.tree.xpath('//weight')[0].text)

        # But XML DOM builder produces valid result
        # self.assertEqual(None, grab.xpath_one('//weight').text)
        grab = build_grab(content_type='xml')
        grab.go(self.server.get_url())
        self.assertEqual('30', grab.doc.tree.xpath('//weight')[0].text)
Пример #17
0
    def test_cdata_issue(self):
        self.server.response['data'] = XML

        # By default HTML DOM builder is used
        # It handles CDATA incorrectly
        grab = build_grab()
        grab.go(self.server.get_url())
        self.assertEqual(None, grab.xpath_one('//weight').text)
        self.assertEqual(None, grab.doc.tree.xpath('//weight')[0].text)

        # But XML DOM builder produces valid result
        # self.assertEqual(None, grab.xpath_one('//weight').text)
        grab = build_grab(content_type='xml')
        grab.go(self.server.get_url())
        self.assertEqual('30', grab.doc.tree.xpath('//weight')[0].text)
Пример #18
0
    def test_useragent_simple(self):
        grab = build_grab()

        # Simple case: setup user agent manually
        grab.setup(user_agent='foo')
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['headers']['user-agent'], 'foo')
Пример #19
0
    def test_submit(self):
        grab = build_grab()
        self.server.response['get.data'] = POST_FORM % self.server.get_url()
        grab.go(self.server.get_url())
        grab.doc.set_input('name', 'Alex')
        grab.submit()
        self.assert_equal_qs(self.server.request['data'],
                             b'name=Alex&secret=123')

        # Default submit control
        self.server.response['get.data'] = MULTIPLE_SUBMIT_FORM
        grab.go(self.server.get_url())
        grab.submit()
        self.assert_equal_qs(self.server.request['data'],
                             b'secret=123&submit1=submit1')

        # Selected submit control
        self.server.response['get.data'] = MULTIPLE_SUBMIT_FORM
        grab.go(self.server.get_url())
        grab.submit(submit_name='submit2')
        self.assert_equal_qs(self.server.request['data'],
                             b'secret=123&submit2=submit2')

        # Default submit control if submit control name is invalid
        self.server.response['get.data'] = MULTIPLE_SUBMIT_FORM
        grab.go(self.server.get_url())
        grab.submit(submit_name='submit3')
        self.assert_equal_qs(self.server.request['data'],
                             b'secret=123&submit1=submit1')
Пример #20
0
    def test_set_methods(self):
        grab = build_grab()
        self.server.response['get.data'] = FORMS_HTML
        grab.go(self.server.get_url())

        # pylint: disable=protected-access
        self.assertEqual(grab.doc._lxml_form, None)
        # pylint: enable=protected-access

        grab.doc.set_input('gender', '1')
        # pylint: disable=no-member,protected-access
        self.assertEqual('common_form', grab.doc._lxml_form.get('id'))
        # pylint: enable=no-member,protected-access

        # pylint: disable=no-member,protected-access
        self.assertRaises(KeyError,
                          lambda: grab.doc.set_input('query', 'asdf'))
        # pylint: enable=no-member,protected-access

        grab.doc._lxml_form = None # pylint: disable=protected-access
        grab.doc.set_input_by_id('search_box', 'asdf')
        # pylint: disable=no-member,protected-access
        self.assertEqual('search_form', grab.doc._lxml_form.get('id'))
        # pylint: enable=no-member,protected-access

        grab.doc.choose_form(xpath='//form[@id="common_form"]')
        grab.doc.set_input_by_number(0, 'asdf')

        # pylint: disable=no-member,protected-access
        grab.doc._lxml_form = None
        grab.doc.set_input_by_xpath('//*[@name="gender"]', '2')
        self.assertEqual('common_form', grab.doc._lxml_form.get('id'))
Пример #21
0
    def test_assign_unicode_to_body(self):
        grab = build_grab()
        grab.doc.body = b'abc'
        grab.doc.body = b'def'

        with self.assertRaises(GrabMisuseError):
            grab.doc.body = u'Спутник'
Пример #22
0
    def test_multipart_post(self):
        grab = build_grab(url=self.server.get_url(), debug_post=True)
        # Dict
        grab.setup(multipart_post={'foo': 'bar'})
        grab.request()
        self.assertTrue(b'name="foo"' in self.server.request['data'])

        # Few values with non-ascii data
        # TODO: understand and fix
        # AssertionError: 'foo=bar&gaz=%D0%94%D0%B5%D0%BB%'\
        #                 'D1%8C%D1%84%D0%B8%D0%BD&abc=' !=
        #                 'foo=bar&gaz=\xd0\x94\xd0\xb5\xd0'\
        #                 '\xbb\xd1\x8c\xd1\x84\xd0\xb8\xd0\xbd&abc='
        # grab.setup(post=({'foo': 'bar', 'gaz': u'Дельфин', 'abc': None}))
        # grab.request()
        # self.assertEqual(self.server.request['data'],
        #                   'foo=bar&gaz=Дельфин&abc=')

        # tuple with one pair
        grab.setup(multipart_post=(('foo', 'bar'), ))
        grab.request()
        self.assertTrue(b'name="foo"' in self.server.request['data'])

        # tuple with two pairs
        grab.setup(multipart_post=(('foo', 'bar'), ('foo', 'baz')))
        grab.request()
        self.assertTrue(b'name="foo"' in self.server.request['data'])
Пример #23
0
 def test_xml_with_declaration(self):
     self.server.response['get.data'] =\
         b'<?xml version="1.0" encoding="UTF-8"?>'\
         b'<root><foo>foo</foo></root>'
     grab = build_grab()
     grab.go(self.server.get_url())
     self.assertTrue(grab.doc.select('//foo').text() == 'foo')
Пример #24
0
 def test_invalid_charset(self):
     html = '''<head><meta http-equiv="Content-Type"
                 content="text/html; charset=windows-874">'
                 </head><body>test</body>'''
     self.server.response['get.data'] = html
     grab = build_grab()
     grab.go(self.server.get_url())
Пример #25
0
 def test_task_clone_grab_config_and_url(self):
     grab = build_grab()
     grab.setup(url='http://foo.com/')
     task = Task('foo', grab=grab)
     task2 = task.clone(url='http://bar.com/')
     self.assertEqual(task2.url, 'http://bar.com/')
     self.assertEqual(task2.grab_config['url'], 'http://bar.com/')
Пример #26
0
    def test_cookiefile(self):
        with temp_file() as tmp_file:
            grab = build_grab()

            cookies = [{'name': 'spam', 'value': 'ham',
                        'domain': self.server.address}]
            with open(tmp_file, 'w') as out:
                json.dump(cookies, out)

            # One cookie are sent in server reponse
            # Another cookies is passed via the `cookiefile` option
            self.server.response['cookies'] = {'godzilla': 'monkey'}.items()
            grab.setup(cookiefile=tmp_file, debug=True)
            grab.go(self.server.get_url())
            self.assertEqual(self.server.request['cookies']['spam']['value'],
                             'ham')

            # This is correct reslt of combining two cookies
            merged_cookies = [('godzilla', 'monkey'), ('spam', 'ham')]

            # grab.cookies should contains merged cookies
            self.assertEqual(set(merged_cookies),
                             set(grab.cookies.items()))

            # `cookiefile` file should contains merged cookies
            with open(tmp_file) as inp:
                self.assertEqual(set(merged_cookies),
                                 set((x['name'], x['value'])
                                     for x in json.load(inp)))

            # Just ensure it works
            grab.go(self.server.get_url())
Пример #27
0
    def test_useragent_simple(self):
        grab = build_grab()

        # Simple case: setup user agent manually
        grab.setup(user_agent='foo')
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['headers']['user-agent'], 'foo')
Пример #28
0
 def test_grab_parse_defensedxml(self):
     with temp_dir() as tmp_dir:
         injection_path = os.path.join(tmp_dir, 'injection')
         with open(injection_path, 'w') as out:
             out.write('Hey there!')
         # Prepare file:// URL valid for both linux and windows
         injection_url = 'file:///%s' % (injection_path.lstrip('/')
                                         .replace('\\', '/'))
         bad_xml = (
             '<!DOCTYPE external ['
             '<!ENTITY ee SYSTEM "' + injection_url + '">'
             ']>'
             '<root>&ee;</root>'
         ).encode()
         xml_file = os.path.join(tmp_dir, 'bad.xml')
         # On windows, use slashed instead of backslashes to avoid error:
         # Invalid file://hostname/, expected localhost or 127.0.0.1 or none
         if '\\' in xml_file:
             xml_file = xml_file.replace('\\', '/')
         with open(xml_file, 'wb') as out:
             out.write(bad_xml)
         grab = build_grab(content_type='xml')
         file_url = 'file://%s' % xml_file
         grab.go(file_url)
         self.assertRaises(EntitiesForbidden, grab.doc, '//title')
Пример #29
0
 def test_nonascii_path(self):
     grab = build_grab()
     self.server.response['data'] = 'medved'
     grab.go(self.server.get_url(u'/превед'))
     self.assertEqual(b'medved', grab.doc.body)
     self.assertEqual('/%D0%BF%D1%80%D0%B5%D0%B2%D0%B5%D0%B4',
                      self.server.request['path'])
Пример #30
0
 def test_invalid_charset(self):
     html = '''<head><meta http-equiv="Content-Type"
                 content="text/html; charset=windows-874">'
                 </head><body>test</body>'''
     self.server.response['get.data'] = html
     grab = build_grab()
     grab.go(self.server.get_url())
Пример #31
0
 def test_submit(self):
     data = b'''<form method="post">
         <input type="text" name="foo" value="val"></form>'''
     grab = build_grab(data)
     grab.choose_form(0)
     grab.submit(make_request=False)
     self.assertTrue('foo' in dict(grab.config['post']))
Пример #32
0
 def test_nobody(self):
     grab = build_grab()
     grab.setup(nobody=True)
     self.server.response['get.data'] = 'foo'
     grab.go(self.server.get_url())
     self.assertEqual(b'', grab.doc.body)
     self.assertTrue(len(grab.doc.head) > 0)
Пример #33
0
 def test_choose_form_by_element_noform(self):
     data = b'''
     <div>test</div>
     '''
     grab = build_grab(data)
     self.assertRaises(DataNotFound, grab.choose_form_by_element,
                       '//input[@name="bar"]')
Пример #34
0
 def test_form_fields(self):
     data = b'''
     <form>
         <input value="foo">
         <input name="dis" disabled="disabled" value="diz">
         <select name="sel">
             <option value="opt1">opt1</option)
             <option value="opt2">opt2</option)
         </select>
         <input type="radio" name="rad1" value="rad1">
         <input type="checkbox" name="cb1" value="cb1">
         <input type="checkbox" name="cb2" value="cb2" checked="checked">
         <input type="text" name="text1" value="text1">
         <textarea name="area1">area1</textarea>
     </form>
     '''
     grab = build_grab(data)
     fields = {
         'sel': 'opt1',
         'rad1': 'rad1',
         'cb2': 'cb2',
         'text1': 'text1',
         'area1': 'area1',
     }
     self.assertEqual(fields, grab.form_fields())
Пример #35
0
    def test_multipart_post(self):
        grab = build_grab(url=self.server.get_url(), debug_post=True)
        # Dict
        grab.setup(multipart_post={'foo': 'bar'})
        grab.request()
        self.assertTrue(b'name="foo"' in self.server.request['data'])

        # Few values with non-ascii data
        # TODO: understand and fix
        # AssertionError: 'foo=bar&gaz=%D0%94%D0%B5%D0%BB%'\
        #                 'D1%8C%D1%84%D0%B8%D0%BD&abc=' !=
        #                 'foo=bar&gaz=\xd0\x94\xd0\xb5\xd0'\
        #                 '\xbb\xd1\x8c\xd1\x84\xd0\xb8\xd0\xbd&abc='
        # grab.setup(post=({'foo': 'bar', 'gaz': u'Дельфин', 'abc': None}))
        # grab.request()
        # self.assertEqual(self.server.request['data'],
        #                   'foo=bar&gaz=Дельфин&abc=')

        # tuple with one pair
        grab.setup(multipart_post=(('foo', 'bar'),))
        grab.request()
        self.assertTrue(b'name="foo"' in self.server.request['data'])

        # tuple with two pairs
        grab.setup(multipart_post=(('foo', 'bar'), ('foo', 'baz')))
        grab.request()
        self.assertTrue(b'name="foo"' in self.server.request['data'])
Пример #36
0
    def test_pyquery_handler(self):
        self.server.response['get.data'] = (
            '<body><h1>Hello world</h1><footer>2014</footer>')
        grab = build_grab()
        grab.go(self.server.get_url())

        self.assertEqual(grab.doc.pyquery('h1').text(), 'Hello world')
Пример #37
0
    def test_submit(self):
        grab = build_grab()
        self.server.response['get.data'] = POST_FORM % self.server.get_url()
        grab.go(self.server.get_url())
        grab.doc.set_input('name', 'Alex')
        grab.submit()
        self.assert_equal_qs(self.server.request['data'],
                             b'name=Alex&secret=123')

        # Default submit control
        self.server.response['get.data'] = MULTIPLE_SUBMIT_FORM
        grab.go(self.server.get_url())
        grab.submit()
        self.assert_equal_qs(self.server.request['data'],
                             b'secret=123&submit1=submit1')

        # Selected submit control
        self.server.response['get.data'] = MULTIPLE_SUBMIT_FORM
        grab.go(self.server.get_url())
        grab.submit(submit_name='submit2')
        self.assert_equal_qs(self.server.request['data'],
                             b'secret=123&submit2=submit2')

        # Default submit control if submit control name is invalid
        self.server.response['get.data'] = MULTIPLE_SUBMIT_FORM
        grab.go(self.server.get_url())
        grab.submit(submit_name='submit3')
        self.assert_equal_qs(self.server.request['data'],
                             b'secret=123&submit1=submit1')
Пример #38
0
    def test_set_methods(self):
        grab = build_grab()
        self.server.response['get.data'] = FORMS_HTML
        grab.go(self.server.get_url())

        # pylint: disable=protected-access
        self.assertEqual(grab.doc._lxml_form, None)
        # pylint: enable=protected-access

        grab.doc.set_input('gender', '1')
        # pylint: disable=no-member,protected-access
        self.assertEqual('common_form', grab.doc._lxml_form.get('id'))
        # pylint: enable=no-member,protected-access

        # pylint: disable=no-member,protected-access
        self.assertRaises(KeyError,
                          lambda: grab.doc.set_input('query', 'asdf'))
        # pylint: enable=no-member,protected-access

        grab.doc._lxml_form = None  # pylint: disable=protected-access
        grab.doc.set_input_by_id('search_box', 'asdf')
        # pylint: disable=no-member,protected-access
        self.assertEqual('search_form', grab.doc._lxml_form.get('id'))
        # pylint: enable=no-member,protected-access

        grab.doc.choose_form(xpath='//form[@id="common_form"]')
        grab.doc.set_input_by_number(0, 'asdf')

        # pylint: disable=no-member,protected-access
        grab.doc._lxml_form = None
        grab.doc.set_input_by_xpath('//*[@name="gender"]', '2')
        self.assertEqual('common_form', grab.doc._lxml_form.get('id'))
Пример #39
0
    def test_assign_unicode_to_body(self):
        grab = build_grab()
        grab.doc.body = b'abc'
        grab.doc.body = b'def'

        with self.assertRaises(GrabMisuseError):
            grab.doc.body = u'Спутник'
Пример #40
0
 def test_body_maxsize(self):
     grab = build_grab()
     grab.setup(body_maxsize=100)
     self.server.response['get.data'] = 'x' * 1024 * 1024
     grab.go(self.server.get_url())
     # Should be less 50kb
     self.assertTrue(len(grab.doc.body) < 50000)
Пример #41
0
    def test_redirect_session(self):
        grab = build_grab()
        self.server.response['cookies'] = {'foo': 'bar'}.items()
        grab.go(self.server.get_url())
        self.assertEqual(grab.doc.cookies['foo'], 'bar')

        # Setup one-time redirect
        grab = build_grab()
        self.server.response['cookies'] = {}
        self.server.response_once['headers'] = [
            ('Location', self.server.get_url()),
            ('Set-Cookie', 'foo=bar'),
        ]
        self.server.response_once['code'] = 302
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['cookies']['foo']['value'], 'bar')
Пример #42
0
    def test_cookiefile(self):
        with temp_file() as tmp_file:
            grab = build_grab()

            cookies = [{
                'name': 'spam',
                'value': 'ham',
                'domain': self.server.address
            }]
            with open(tmp_file, 'w') as out:
                json.dump(cookies, out)

            # One cookie are sent in server reponse
            # Another cookies is passed via the `cookiefile` option
            self.server.response['cookies'] = {'godzilla': 'monkey'}.items()
            grab.setup(cookiefile=tmp_file, debug=True)
            grab.go(self.server.get_url())
            self.assertEqual(self.server.request['cookies']['spam']['value'],
                             'ham')

            # This is correct reslt of combining two cookies
            merged_cookies = [('godzilla', 'monkey'), ('spam', 'ham')]

            # grab.cookies should contains merged cookies
            self.assertEqual(set(merged_cookies), set(grab.cookies.items()))

            # `cookiefile` file should contains merged cookies
            with open(tmp_file) as inp:
                self.assertEqual(
                    set(merged_cookies),
                    set((x['name'], x['value']) for x in json.load(inp)))

            # Just ensure it works
            grab.go(self.server.get_url())
Пример #43
0
    def test_request_counter(self):
        import threading

        reset_request_counter()
        grab = build_grab()
        grab.go(self.server.get_url())
        self.assertEqual(grab.request_counter, 1)

        grab.go(self.server.get_url())
        self.assertEqual(grab.request_counter, 2)

        def func():
            grab = build_grab()
            grab.go(self.server.get_url())

        # Make 10 requests in concurrent threads
        threads = []
        for _ in six.moves.range(10):
            thread = threading.Thread(target=func)
            threads.append(thread)
            thread.start()
        for thread in threads:
            thread.join()

        grab.go(self.server.get_url())
        self.assertEqual(grab.request_counter, 13)
Пример #44
0
 def test_find_link(self):
     data = b'''<a href="http://ya.ru/">ya.ru</a>'''
     grab = build_grab(data)
     self.assertEqual('http://ya.ru/',
                      grab.find_link(b'ya.ru', make_absolute=True))
     self.assertEqual(None, grab.find_link(b'google.ru',
                                           make_absolute=True))
     self.assertRaises(GrabMisuseError, grab.find_link, u'asdf')
Пример #45
0
    def test_original_exceptions_grab(self):
        import pycurl

        grab = build_grab()
        try:
            grab.go('http://%s' % NON_ROUTABLE_IP)
        except GrabNetworkError as ex: # pylint: disable=broad-except
            self.assertTrue(isinstance(ex.original_exc, pycurl.error))
Пример #46
0
    def test_original_exceptions_urllib2(self):
        from urllib3.exceptions import ConnectTimeoutError

        grab = build_grab()
        try:
            grab.go('http://%s' % NON_ROUTABLE_IP)
        except GrabNetworkError as ex: # pylint: disable=broad-except
            self.assertTrue(isinstance(ex.original_exc, ConnectTimeoutError))
Пример #47
0
 def test_put(self):
     grab = build_grab()
     grab.setup(post=b'abc', url=self.server.get_url(),
                method='put', debug=True)
     self.server.request['debug'] = True
     grab.request()
     self.assertEqual(self.server.request['method'], 'PUT')
     self.assertEqual(self.server.request['headers']['content-length'], '3')
Пример #48
0
 def test_choose_form_by_element(self):
     data = b'''
     <form><input name="foo"></form>
     <form><input name="bar"></form>
     '''
     grab = build_grab(data)
     grab.choose_form_by_element('//input[@name="bar"]')
     self.assertEqual(grab.doc('//form[2]').node(), grab.doc.form)
Пример #49
0
    def test_empty_useragent_pycurl(self):
        grab = build_grab()

        # Empty string disable default pycurl user-agent
        grab.setup(user_agent='')
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['headers']
                         .get('user-agent', ''), '')
Пример #50
0
    def test_pyquery_handler(self):
        self.server.response['get.data'] = (
            '<body><h1>Hello world</h1><footer>2014</footer>'
        )
        grab = build_grab()
        grab.go(self.server.get_url())

        self.assertEqual(grab.doc.pyquery('h1').text(), 'Hello world')
Пример #51
0
    def test_redirect_session(self):
        grab = build_grab()
        self.server.response['cookies'] = {'foo': 'bar'}.items()
        grab.go(self.server.get_url())
        self.assertEqual(grab.doc.cookies['foo'], 'bar')

        # Setup one-time redirect
        grab = build_grab()
        self.server.response['cookies'] = {}
        self.server.response_once['headers'] = [
            ('Location', self.server.get_url()),
            ('Set-Cookie', 'foo=bar'),
        ]
        self.server.response_once['code'] = 302
        grab.go(self.server.get_url())
        self.assertEqual(self.server.request['cookies']['foo']['value'],
                         'bar')
Пример #52
0
 def test_post_multivalue_key(self):
     grab = build_grab()
     grab.setup(post=[('foo', [1, 2])])
     grab.go(self.server.get_url())
     self.assertEqual(
         self.server.request['data'],
         b'foo=1&foo=2'
     )
Пример #53
0
 def test_attribute_exception(self):
     grab = build_grab()
     self.assertTrue(grab.exception is None)
     try:
         grab.go('http://%s' % NON_ROUTABLE_IP)
     except GrabNetworkError:
         pass
     self.assertTrue(isinstance(grab.exception, GrabNetworkError))
Пример #54
0
 def test_cookiefile_empty(self):
     with temp_file() as tmp_file:
         grab = build_grab()
         # Empty file should not raise Exception
         with open(tmp_file, 'w') as out:
             out.write('')
         grab.setup(cookiefile=tmp_file)
         grab.go(self.server.get_url())