示例#1
0
文件: irc.py 项目: JarryShaw/darc
    def loader(timestamp: 'datetime', driver: 'Driver', link: 'darc_link.Link') -> 'NoReturn':  # pylint: disable=unused-argument
        """Not implemented.

        Raises:
            LinkNoReturn: This hook is not implemented.

        """
        raise LinkNoReturn(link)
示例#2
0
    def loader(timestamp: typing.Datetime, driver: typing.Driver, link: Link) -> typing.NoReturn:  # pylint: disable=unused-argument
        """Not implemented.

        Raises:
            LinkNoReturn: This hook is not implemented.

        """
        raise LinkNoReturn(link)
示例#3
0
    def loader(cls, timestamp: typing.Datetime, driver: typing.Driver,
               link: Link) -> typing.NoReturn:
        """Market loader hook.

        The hook fetches cookies information first. If no cookies found, the hook
        will consider such link as not applicable and will then be removed from
        the task queue.

        With the cached records, the hook will set the cookies in the session
        then goes to the actual targeting homepage. Then it shall calls
        :meth:`~MarketSite.process_loader` with the
        :class:`WebDriver <selenium.webdriver.Chrome>` object to perform further
        processing.

        Afterwards, it pushes the homepage URL into the task queues and raises
        :exc:`LinkNoReturn` to drop the further processing.

        Args:
            timestamp: Timestamp of the worker node reference.
            driver (selenium.webdriver.Chrome): Web driver object with proxy settings.
            link: Link object to be loaded.

        Raises:
            LinkNoReturn: This link has no return response.

        """
        cached = cls.get_cookies(link.host)
        if cached is None:
            raise LinkNoReturn(link=link)

        cookies = cached['cookies']
        for name, value in cookies.items():
            driver.add_cookie({
                'name': name,
                'value': value,
            })

        cls.process_loader(timestamp, driver, link, cached)
        raise LinkNoReturn(link=link, drop=False)
示例#4
0
    def loader(timestamp: 'datetime', driver: 'Driver', link: 'darc_link.Link') -> 'Union[NoReturn, Driver]':  # pylint: disable=unused-argument
        """Loader hook for my site.

        Args:
            timestamp: Timestamp of the worker node reference.
            driver (selenium.webdriver.Chrome): Web driver object with proxy settings.
            link: Link object to be loaded.

        Raises:
            LinkNoReturn: This link has no return response.

        """
        raise LinkNoReturn(link)
示例#5
0
    def crawler(timestamp: 'datetime', session: 'Session', link: 'darc_link.Link') -> 'Union[NoReturn, Response]':  # pylint: disable=unused-argument
        """Crawler hook for my site.

        Args:
            timestamp: Timestamp of the worker node reference.
            session: Session object with proxy settings.
            link: Link object to be crawled.

        Raises:
            LinkNoReturn: This link has no return response.

        """
        raise LinkNoReturn(link)
示例#6
0
    def loader(timestamp: typing.Datetime, driver: typing.Driver, link: Link) -> typing.Union[typing.NoReturn, typing.Driver]:  # pylint: disable=unused-argument,line-too-long
        """Loader hook for my site.

        Args:
            timestamp: Timestamp of the worker node reference.
            driver (selenium.webdriver.Chrome): Web driver object with proxy settings.
            link: Link object to be loaded.

        Raises:
            LinkNoReturn: This link has no return response.

        """
        raise LinkNoReturn(link)
示例#7
0
    def crawler(timestamp: typing.Datetime, session: typing.Session, link: Link) -> typing.Union[typing.NoReturn, typing.Response]:  # pylint: disable=unused-argument,line-too-long
        """Crawler hook for my site.

        Args:
            timestamp: Timestamp of the worker node reference.
            session: Session object with proxy settings.
            link: Link object to be crawled.

        Raises:
            LinkNoReturn: This link has no return response.

        """
        raise LinkNoReturn(link)
示例#8
0
    def crawler(cls, timestamp: typing.Datetime, session: typing.Session,
                link: Link) -> typing.Response:
        """Default crawler hook.

        The hook fetches cookies information first. If no cookies found, the hook
        will consider such link as not applicable and will then be removed from
        the task queue.

        With the cached records, the hook will set the cookies in the session
        then goes to the actual targeting homepage. Then it shall calls
        :meth:`~MarketSite.process_crawler` with the :class:`~requests.Response`
        object to perform further processing.

        Afterwards, it pushes the homepage URL into the task queues and raises
        :exc:`LinkNoReturn` to drop the further processing.

        Args:
            timestamp: Timestamp of the worker node reference.
            session (requests.Session): Session object with proxy settings.
            link: Link object to be crawled.

        Raises:
            LinkNoReturn: This link has no return response.

        See Also:
            * :func:`darc.crawl.crawler`

        """
        cached = cls.get_cookies(link.host)
        if cached is None:
            raise LinkNoReturn(link=link)

        cookies = cached['cookies']
        for name, value in cookies.items():
            session.cookies.set(name, value)

        cls.process_crawler(timestamp, session, link, cached)
        raise LinkNoReturn(link=link, drop=False)
示例#9
0
文件: irc.py 项目: JarryShaw/darc
    def crawler(timestamp: 'datetime', session: 'Session', link: 'darc_link.Link') -> 'NoReturn':  # pylint: disable=unused-argument
        """Crawler hook for IRC addresses.

        Args:
            timestamp: Timestamp of the worker node reference.
            session (:class:`requests.Session`): Session object with proxy settings.
            link: Link object to be crawled.

        Raises:
            LinkNoReturn: This link has no return response.

        """
        save_irc(link)
        raise LinkNoReturn(link)
示例#10
0
    def crawler(timestamp: typing.Datetime, session: typing.Session, link: Link) -> typing.NoReturn:  # pylint: disable=unused-argument
        """Crawler hook for ED2K magnet links.

        Args:
            timestamp: Timestamp of the worker node reference.
            session (:class:`requests.Session`): Session object with proxy settings.
            link: Link object to be crawled.

        Raises:
            LinkNoReturn: This link has no return response.

        """
        save_ed2k(link)
        raise LinkNoReturn(link)
示例#11
0
    def crawler(
        timestamp: 'datetime', session: 'Session',
        link: 'darc_link.Link') -> 'NoReturn':  # pylint: disable=unused-argument
        """Crawler hook for data URIs.

        Args:
            timestamp: Timestamp of the worker node reference.
            session (:class:`requests.Session`): Session object with proxy settings.
            link: Link object to be crawled.

        Raises:
            LinkNoReturn: This link has no return response.

        """
        try:
            save_data(link)
        except ValueError:
            logger.pexc(
                message=f'[REQUESTS] Failed to save data URI from {link.url}')
        raise LinkNoReturn(link)
示例#12
0
    def crawler(
            timestamp: typing.Datetime, session: typing.Session,
            link: Link) -> typing.NoReturn:  # pylint: disable=unused-argument
        """Crawler hook for data URIs.

        Args:
            timestamp: Timestamp of the worker node reference.
            session (:class:`requests.Session`): Session object with proxy settings.
            link: Link object to be crawled.

        Raises:
            LinkNoReturn: This link has no return response.

        """
        try:
            save_data(link)
        except ValueError as error:
            print(render_error(
                f'[REQUESTS] Failed to save data URI from {link.url} <{error}>',
                stem.util.term.Color.RED),
                  file=sys.stderr)  # pylint: disable=no-member
        raise LinkNoReturn(link)