async def on_start(self): launch = check_import("aninja.browser").launch CookiesManager = check_import("aninja.cookies").CookiesManager options = self.crawler.config.get("LAUNCH_OPTIONS", {}) self.cookies_manager = CookiesManager() self.client = await launch(cookies_manager=self.cookies_manager, options=options)
async def on_start(self): self.redis = None self.do_redis = self.crawler.redis_enable self.do_web = self.crawler.web_enable if self.do_redis: aioredis = check_import("aioredis") self.redis = await aioredis.create_redis_pool( address=self.crawler.config.get("REDIS_ADDRESS")) self.crawler.redis = self.redis self.crawler.counter = RedisCounter(self.crawler) self.crawler.counter.redis = self.redis if self.do_web: web = check_import("acrawler.web") self.web_runner = await web.runweb(self.crawler)
async def on_start(self): aioredis = check_import("aioredis") self.items_key = self.crawler.config.get("REDIS_ITEMS_KEY", self.items_key) self.redis = await aioredis.create_redis_pool(self.address, maxsize=self.maxsize, loop=self.crawler.loop) logger.info(f"Connecting to Redis... {self.redis}")
def to_mongo(self, db, col, key=None, priority=100, address="mongodb://localhost:27017"): mo = check_import("motor.motor_asyncio") mongo_client = mo.AsyncIOMotorClient(address) mongo_db = mongo_client[db] mongo_col = mongo_db[col] async def quick_to_mongo(item): if key: await mongo_col.update_many({key: item[key]}, {"$set": item.content}, upsert=True) else: await mongo_col.insert_one(item.content) register(self.primary_family, priority=priority)(quick_to_mongo) return self
import traceback import aiohttp from multidict import CIMultiDict from parselx import SelectorX from yarl import URL from acrawler.task import Task from acrawler.utils import ( check_import, make_text_links_absolute, open_html, to_asyncgen, ) aiofiles = check_import("aiofiles", allow_import_error=True) pyquery = check_import("pyquery", allow_import_error=True) _Function = Callable _Functions = Union[_Function, List[_Function]] _History = List["aiohttp.ClientResponse"] _TaskGenerator = AsyncGenerator["Task", None] _LooseURL = Union[URL, str] logger = logging.getLogger(__name__) class Request(Task): """Request is a Task that execute :meth:`fetch` method.
async def on_start(self): mo = check_import("motor.motor_asyncio") self.client = mo.AsyncIOMotorClient(self.address) self.db = self.client[self.db_name] self.col = self.db[self.col_name] logger.info(f"Connecting to MongoDB... {self.col}")
async def start(self): aioredis = check_import("aioredis") self.redis = await aioredis.create_redis_pool(self.address)