Пример #1
0
class ImpMakerParser(Parser):
    def __init__(self, opener=None, url=None, **kw):
        super(ImpMakerParser, self).__init__(opener=opener, url=url, **kw)
        self.logger = kw.get('logger')

    def parse(self, url=None):
        url = url or self.url
        if 'click' not in url:
            times = random.randrange(2, 5)
        else:
            times = 1
        i = 0

        self.opener = MechanizeOpener(user_agent=random_user_agent())
        odds = random.randint(0, 100)
        if 'click' not in url or odds <= 5:
            # add proxy
            p_ = get_ip_proxy()
            if p_:

                self.opener.remove_proxy()
                self.opener.add_proxy(p_)
            while i < times:
                html = self.opener.open(url)
                #print(html)
                i = i + 1
                time.sleep(.1)

        return url
Пример #2
0
 def testMechanizeOpener(self):
     test_url = 'http://www.baidu.com'
     opener = MechanizeOpener()
       
     assert 'baidu' in opener.open(test_url)
       
     br = opener.browse_open(test_url)
     assert u'百度' in br.title()
     assert 'baidu' in br.response().read()
Пример #3
0
 def testMechanizeOpener(self):
     test_url = 'http://www.baidu.com'
     opener = MechanizeOpener()
      
     assert 'baidu' in opener.open(test_url)
      
     br = opener.browse_open(test_url)
     assert u'百度' in br.title()
     assert 'baidu' in br.response().read()
Пример #4
0
    def testMechanizeOpener(self):
        test_url = "http://www.baidu.com"
        opener = MechanizeOpener()

        assert "baidu" in opener.open(test_url)

        br = opener.browse_open(test_url)
        assert "百度" in br.title()
        assert "baidu" in br.response().read()