class ImpMakerParser(Parser): def __init__(self, opener=None, url=None, **kw): super(ImpMakerParser, self).__init__(opener=opener, url=url, **kw) self.logger = kw.get('logger') def parse(self, url=None): url = url or self.url if 'click' not in url: times = random.randrange(2, 5) else: times = 1 i = 0 self.opener = MechanizeOpener(user_agent=random_user_agent()) odds = random.randint(0, 100) if 'click' not in url or odds <= 5: # add proxy p_ = get_ip_proxy() if p_: self.opener.remove_proxy() self.opener.add_proxy(p_) while i < times: html = self.opener.open(url) #print(html) i = i + 1 time.sleep(.1) return url
def testMechanizeOpener(self): test_url = 'http://www.baidu.com' opener = MechanizeOpener() assert 'baidu' in opener.open(test_url) br = opener.browse_open(test_url) assert u'百度' in br.title() assert 'baidu' in br.response().read()
def testMechanizeOpener(self): test_url = "http://www.baidu.com" opener = MechanizeOpener() assert "baidu" in opener.open(test_url) br = opener.browse_open(test_url) assert "百度" in br.title() assert "baidu" in br.response().read()