import logging import re import wpull.util from wpull.backport.logging import StyleAdapter from wpull.document.html import HTMLReader from wpull.document.htmlparse.element import Element from wpull.document.util import detect_response_encoding from wpull.pipeline.item import LinkType from wpull.scraper.base import BaseHTMLScraper, ScrapeResult, LinkContext from wpull.scraper.util import urljoin_safe, clean_link_soup, parse_refresh, \ is_likely_inline, is_likely_link, is_unlikely_link, identify_link_type from wpull.url import percent_decode _ = gettext.gettext _logger = StyleAdapter(logging.getLogger(__name__)) _BaseLinkInfo = collections.namedtuple( 'LinkInfoType', [ 'element', 'tag', 'attrib', 'link', 'inline', 'linked', 'base_link', 'value_type', 'link_type' ] ) class LinkInfo(_BaseLinkInfo): def __hash__(self): return self.link.__hash__()
'''Delegation to other processor.''' import gettext import logging import asyncio from wpull.backport.logging import StyleAdapter from wpull.pipeline.session import ItemSession from wpull.processor.base import BaseProcessor _logger = StyleAdapter(logging.getLogger()) _ = gettext.gettext class DelegateProcessor(BaseProcessor): '''Delegate to Web or FTP processor.''' def __init__(self): self._processors = {} @asyncio.coroutine def process(self, item_session: ItemSession): scheme = item_session.url_record.url_info.scheme processor = self._processors.get(scheme) if processor: return (yield from processor.process(item_session)) else: _logger.warning( _('No processor available to handle {scheme} scheme.'), scheme=repr(scheme))