def test_constructor_meta_argument(self): req = Request() self.assertEqual(req.meta, {}) req = Request(meta=1) self.assertEqual(req.meta, 1) req = Request(meta={'place': 'hell'}) self.assertEqual(req.meta, {'place': 'hell'})
def task_generator(self): yield Request('test', url=server.get_url(), timeout=0.1, meta={'id': 1}) yield Request('test', url=server.get_url(), timeout=0.5, meta={'id': 2})
def test_constructor_arguments(self): req = Request('TAG') self.assertEqual(req.tag, 'TAG') self.assertEqual(req.url, None) req = Request('TAG', 'URL') self.assertEqual(req.tag, 'TAG') self.assertEqual(req.url, 'URL') req = Request(url='URL', tag='TAG') self.assertEqual(req.tag, 'TAG') self.assertEqual(req.url, 'URL')
def click_button(self): self.searchButton.setText(_translate("MainWindow", "搜索中...")) search_word = self.searchEdit.text().strip() print('点击了按钮,开始搜索:{}'.format(search_word)) r = Request() tabtab = self.tabtab # 遍历选中的web for i in range(min(len(self.webs), 10)): box = self.checkBoxList[i] if box.isChecked(): print(box.text(), '开始搜索') queue = r.createSearchThread(box.text(), search_word) tabtab.add_ResultWidget_fuck(box.text(), queue)
def click_button(self): self.saveButton.setText(_translate("MainWindow", "下载中...")) queue = Queue() print('aaaa' + self.web) Request().createDownTxt(self.web, self.book, queue) is_ok = True while 1: if queue.empty(): QApplication.processEvents() continue id, chapter = queue.get() if id == 'over' or chapter == 'over': print(chapter) break if id == '404': is_ok = False break if id < -1: self.saveButton.setText( _translate("MainWindow", "剩余{}".format(-id))) else: self.saveButton.setText(_translate("MainWindow", "正在保存..")) if is_ok: self.saveButton.setText(_translate("MainWindow", "下载完成")) QApplication.processEvents() time.sleep(5) else: self.saveButton.setText(_translate("MainWindow", "下载失败")) QApplication.processEvents() time.sleep(5) self.saveButton.setText(_translate("MainWindow", "重新下载"))
def task_generator(self): with open('var/host.txt') as inp: for line in inp: if not line.startswith('#'): host = line.strip() yield Request('page', 'https://%s/' % host, meta={'host': host})
def test_single_request(self): class SimpleCrawler(Crawler): data = {} def handler_test(self, req, res): self.data['response'] = res.body token = 'Python' self.server.response['data'] = token bot = SimpleCrawler() bot.add_task(Request('test', url=self.server.get_url())) bot.run() self.assertEquals(token, bot.data['response'])
def test_required_data_not_found(self): class TestCrawler(Crawler): def init_hook(self): self.gen = iter([False, True]) self.result = [] def handler_test(self, req, res): if not next(self.gen): raise RequiredDataNotFound else: self.result.append('ok') bot = TestCrawler() bot.add_task(Request('test', url=self.server.get_url())) bot.run() self.assertEquals('ok', bot.result[0])
def test_too_many_redirects(self): class TestCrawler(Crawler): def init_hook(self): self.data = None def handler_test(self, req, res): self.data = res.body self.server.response['data'] = 'foo' self.server.response['code'] = 301 self.server.response['headers'] = [('location', self.server.get_url())] bot = TestCrawler() bot.add_task(Request('test', url=self.server.get_url())) bot.run() self.assertEquals(10, bot.stat.counters['network:request-error-test'])
def test_required_data_not_found_implicit(self): class TestCrawler(Crawler): def init_hook(self): self.gen = iter([False, True]) self.result = [] def handler_test(self, req, res): res.xpath('//h1').require() self.result.append('ok') self.server.response_once['data'] = '<h2>test</h2>' self.server.response['data'] = '<h1>test</h1>' bot = TestCrawler() bot.add_task(Request('test', url=self.server.get_url())) bot.run() self.assertEquals('ok', bot.result[0]) self.assertEquals(2, bot.stat.counters['network:request-ok-test'])
def test_handler_error(self): class SimpleCrawler(Crawler): data = {} def handler_test(self, req, res): 1 / 0 bot = SimpleCrawler() bot.add_task(Request('test', url=self.server.get_url())) bot.run() self.assertEqual(1, len(bot.stat.items['handler_error'])) self.assertEqual( bot.stat.items['handler_error'][0], '%s|%s|%s|%s' % ( 'test', 'ZeroDivisionError', 'division by zero', self.server.get_url(), ))
def test_redirect_by_default(self): class TestCrawler(Crawler): def init_hook(self): self.data = None def handler_test(self, req, res): self.data = res.body self.server.response_once['data'] = 'foo' self.server.response_once['code'] = 301 self.server.response_once['headers'] = [('location', self.server.get_url())] self.server.response['data'] = 'bar' bot = TestCrawler() bot.add_task(Request('test', url=self.server.get_url())) bot.run() self.assertEquals(b'bar', bot.data)
def task_generator(self): for url in self._urls_todo: yield Request('test', url=url)
def check_url(self, url): yield from self.check_request(Request(url))
def task_generator(self): for x in range(10): yield Request('test', url=server.get_url())
def handler_page(self, req, res): yield Request('page2', url=server.get_url(), proxy=proxy_server.address())
def task_generator(self): with open(pl_file, 'w') as out: out.write(proxy_server2) time.sleep(0.5) yield Request('page', url=server.get_url())
def handler_test2(self, req, res): self.points.append(req.meta['id']) yield Request('test3', url=server.get_url(), meta={'id': 3})
def task_generator(self): for x in range(2000): yield Request('page', url='http://127.0.0.1/awesome_python.html?%d' % x)
def task_generator(self): for host in ['google.com']: host = host.strip() if host: yield Request('http://%s/' % host, tag='page')
def task_generator(self): yield Request('page', url=server.get_url()) self.unfreeze.wait()
def task_generator(self): yield Request('page', url=server.get_url())
def task_generator(self): while True: yield Request('page', url=server.get_url()) time.sleep(0.1)
def task_generator(self): yield Request('test', url=server.get_url(), meta={'id': 1})
def handler_page(self, req, res): yield Request('page', url=server.get_url())
def task_generator(self): yield Request('test', url=server.get_url(), callback=self.handler_test)
def task_generator(self): for host in islice(open('docs/domains.txt'), 100): host = host.strip() if host: yield Request('http://%s/' % host, tag='page')
def task_generator(self): for x in range(self._meta['num_req']): yield Request('page', 'http://127.0.0.1/awesome_python.html')