async def main() -> None: backup = Path(__file__).parent / 'backup.mp' config = Configuration(backup_filename=f'{backup}', item_processors=[date_processor]) spider = StaticSpider(urls=['http://quotes.toscrape.com'], parse=parse, config=config) await spider.run() print(spider.statistics()) # you can do whatever you want with the results async for item in read_mp(backup, decoder=spider.config.msgpack_decoder): print(item)
async def main() -> None: backup = Path(__file__).parent / 'backup.mp' config = Configuration(selenium_driver_log_file=None, backup_filename=f'{backup}', item_processors=[date_processor]) spider = SeleniumSpider(urls=['http://quotes.toscrape.com'], parse=parse, config=config) await spider.run() print(spider.statistics()) # you can do whatever you want with the results async for quote in read_mp(filename=backup, decoder=datetime_decoder): print(quote)
async def main() -> None: backup = Path(__file__).parent / 'backup.mp' config = Configuration(selenium_driver_log_file=None, backup_filename=f'{backup}', item_processors=[date_processor]) sel_spider = SeleniumSpider(urls=['http://httpbin.org/'], parse=parse, config=config) await sel_spider.run() print(sel_spider.statistics()) # you can do whatever you want with the results async for quote_data in read_mp(filename=backup, decoder=datetime_decoder): print('****', quote_data['title'], '****') print(quote_data['description']) print('== operations ==') for operation in quote_data['operations']: print('\tmethod:', operation['method']) print('\tpath:', operation['path']) print('\tdescription:', operation['description'], end='\n\n')
next_link = None try: element = response.driver.find_element_by_xpath( '//nav/ul/li[@class="next"]/a') next_link = element.get_attribute('href') except NoSuchElementException: pass if next_link is not None: response.follow(next_link) def date_processor(item: dict) -> dict: item['date'] = datetime.now() return item if __name__ == '__main__': backup = Path(__file__).parent / 'backup.mp' config = Configuration(selenium_driver_log_file=None, backup_filename=f'{backup}', item_processors=[date_processor]) sel_spider = SeleniumSpider(urls=['http://quotes.toscrape.com'], parse=parse, config=config) sel_spider.run() print(sel_spider.statistics()) # you can do whatever you want with the results for quote_data in read_mp(filename=backup, decoder=datetime_decoder): print(quote_data)
'message': quote.xpath('./span[@class="text"]/text()').get(), 'author': quote.xpath('./span/small/text()').get(), 'tags': quote.xpath('./div/a/text()').getall(), } spider.save_item(data) next_link = response.xpath('//nav/ul/li[@class="next"]/a').xpath( '@href').get() if next_link is not None: response.follow(next_link) def date_processor(item: dict) -> dict: item['date'] = datetime.now() return item if __name__ == '__main__': backup = Path(__file__).parent / 'backup.mp' config = Configuration(backup_filename=f'{backup}', item_processors=[date_processor]) spider = StaticSpider(urls=['http://quotes.toscrape.com'], parse=parse, config=config) spider.run() print(spider.statistics()) # you can do whatever you want with the results for quote_data in read_mp(filename=backup, decoder=spider.config.msgpack_decoder): print(quote_data)