Пример #1
0
    async def on_start(self):
        launch = check_import("aninja.browser").launch
        CookiesManager = check_import("aninja.cookies").CookiesManager
        options = self.crawler.config.get("LAUNCH_OPTIONS", {})

        self.cookies_manager = CookiesManager()
        self.client = await launch(cookies_manager=self.cookies_manager,
                                   options=options)
Пример #2
0
    async def on_start(self):
        self.redis = None
        self.do_redis = self.crawler.redis_enable
        self.do_web = self.crawler.web_enable

        if self.do_redis:
            aioredis = check_import("aioredis")
            self.redis = await aioredis.create_redis_pool(
                address=self.crawler.config.get("REDIS_ADDRESS"))
            self.crawler.redis = self.redis
            self.crawler.counter = RedisCounter(self.crawler)
            self.crawler.counter.redis = self.redis

        if self.do_web:
            web = check_import("acrawler.web")
            self.web_runner = await web.runweb(self.crawler)
Пример #3
0
 async def on_start(self):
     aioredis = check_import("aioredis")
     self.items_key = self.crawler.config.get("REDIS_ITEMS_KEY",
                                              self.items_key)
     self.redis = await aioredis.create_redis_pool(self.address,
                                                   maxsize=self.maxsize,
                                                   loop=self.crawler.loop)
     logger.info(f"Connecting to Redis... {self.redis}")
Пример #4
0
    def to_mongo(self,
                 db,
                 col,
                 key=None,
                 priority=100,
                 address="mongodb://localhost:27017"):
        mo = check_import("motor.motor_asyncio")
        mongo_client = mo.AsyncIOMotorClient(address)
        mongo_db = mongo_client[db]
        mongo_col = mongo_db[col]

        async def quick_to_mongo(item):
            if key:
                await mongo_col.update_many({key: item[key]},
                                            {"$set": item.content},
                                            upsert=True)
            else:
                await mongo_col.insert_one(item.content)

        register(self.primary_family, priority=priority)(quick_to_mongo)
        return self
Пример #5
0
import traceback

import aiohttp
from multidict import CIMultiDict
from parselx import SelectorX
from yarl import URL

from acrawler.task import Task
from acrawler.utils import (
    check_import,
    make_text_links_absolute,
    open_html,
    to_asyncgen,
)

aiofiles = check_import("aiofiles", allow_import_error=True)
pyquery = check_import("pyquery", allow_import_error=True)


_Function = Callable
_Functions = Union[_Function, List[_Function]]
_History = List["aiohttp.ClientResponse"]
_TaskGenerator = AsyncGenerator["Task", None]
_LooseURL = Union[URL, str]

logger = logging.getLogger(__name__)


class Request(Task):
    """Request is a Task that execute :meth:`fetch` method.
Пример #6
0
 async def on_start(self):
     mo = check_import("motor.motor_asyncio")
     self.client = mo.AsyncIOMotorClient(self.address)
     self.db = self.client[self.db_name]
     self.col = self.db[self.col_name]
     logger.info(f"Connecting to MongoDB... {self.col}")
Пример #7
0
 async def start(self):
     aioredis = check_import("aioredis")
     self.redis = await aioredis.create_redis_pool(self.address)