Пример #1
0
 def __init__(self, settings, stats):
     if not settings['HTTPCACHE_ENABLED']:
         raise NotConfigured
     self.policy = load_object(settings['HTTPCACHE_POLICY'])(settings)
     self.storage = load_object(settings['HTTPCACHE_STORAGE'])(settings)
     self.ignore_missing = settings['HTTPCACHE_IGNORE_MISSING']
     self.stats = stats
Пример #2
0
 def __init__(self):
     self.classes = {}
     self.mimetypes = MimeTypes()
     mimedata = get_data('scrapy', 'mime.types').decode('utf8')
     self.mimetypes.readfp(StringIO(mimedata))
     for mimetype, cls in six.iteritems(self.CLASSES):
         self.classes[mimetype] = load_object(cls)
Пример #3
0
 def from_settings(cls, settings, starter=None):
     mwlist = cls._get_middleware_list_from_settings(settings)
     middlewares = []
     enabled = []
     for clspath in mwlist:
         try:
             mwcls = load_object(clspath)
             if starter and hasattr(mwcls, 'from_starter'):
                 mw = mwcls.from_starter(starter)
             elif hasattr(mwcls, 'from_settings'):
                 mw = mwcls.from_settings(settings)
             else:
                 mw = mwcls()
             middlewares.append(mw)
             enabled.append(clspath)
         except NotConfigured as e:
             if e.args:
                 clsname = clspath.split('.')[-1]
                 logger.warning("Disabled %(clsname)s: %(eargs)s", {
                     'clsname': clsname,
                     'eargs': e.args[0]
                 },
                                extra={'starter': starter})
     logger.info("Enabled %(componentname)ss:\n%(enabledlist)s", {
         'componentname': cls.component_name,
         'enabledlist': pprint.pformat(enabled)
     },
                 extra={'starter': starter})
     return cls(*middlewares)
Пример #4
0
 def __init__(self, starter):
     self.slot = None
     self.spidermw = SpiderMiddlewareManager.from_starter(starter)
     itemmw_cls = load_object(starter.settings['ITEM_PROCESSOR'])
     self.itemmw = itemmw_cls.from_starter(starter)
     self.concurrent_items = starter.settings['CONCURRENT_ITEMS']
     self.starter = starter
Пример #5
0
 def __init__(self, starter):
     self.starter = starter
     self.settings = starter.settings
     self.signals = starter.signals
     self.loop = None
     self.spider = None
     self.scheduler = None
     self.scraper = Scraper(starter)
     self.heart = None
     self.scheduler_class = load_object(self.settings['SCHEDULER'])
     downloader_class = load_object(self.settings['DOWNLOADER'])
     self.downloader = downloader_class(starter)
     self.running = False
     self.crawling = []
     self.max = 5
     self.paused = False
     self.start_time = 0
Пример #6
0
def _get_spider_loader(settings):
    """Get SpiderLoader instance from settings"""
    cls_path = settings['SPIDER_LOADER_CLASS']
    loader_cls = load_object(cls_path)
    try:
        verifyClass(ISpiderLoader, loader_cls)
    except DoesNotImplement:
        logger.warn(
            'SPIDER_LOADER_CLASS (previously named SPIDER_MANAGER_CLASS) does not fully implement '
            'quixote.spider.spiderloader.ISpiderLoader interface. Please add all missing methods to avoid '
            'unexpected runtime errors.',
            category=QuixoteDeprecationWarning,
            stacklevel=2)
    return loader_cls.from_settings(settings)
Пример #7
0
 def __init__(self,
              spider_name,
              project_settings=None,
              is_check_emmory=False):
     from quixote.settings import settings
     self.settings = Settings(settings).get_settings()
     project_settings = Settings.get_dict_from_settings_file(
         project_settings)
     for s_k, s_v in project_settings.items():
         self.settings[s_k] = s_v
     self.spider_loader = _get_spider_loader(self.settings)
     print(self.spider_loader)
     print(self.settings)
     self.engine_class = load_object(self.settings['ENGINE'])
     # self.spider_class = load_object(spider_class)
     self.spider_class = self.spider_loader.get_spider_by_name(spider_name)
     self.signals = SignalManager()
     self.stats = load_object(self.settings['STATS_CLASS'])(self)
     self.extensions = ExtensionManager.from_starter(self)
     self.engine = None
     self.spider = None
     self.crawling = False
     self.is_check_emmory = is_check_emmory
     self.start_time = None
Пример #8
0
 def _get_handler(self, scheme):
     if scheme in self._handlers:
         return self._handlers[scheme]
     if scheme in self._not_configured:
         return None
     if scheme not in self._schemes:
         self._not_configured[scheme] = 'no handler available for that scheme'
         return None
     path = self._schemes[scheme]
     try:
         download_handler = load_object(path)
         dh = download_handler(self._starter.settings)
     except NotConfigured as e:
         self._not_configured[scheme] = str(e)
         return None
     except Exception as e:
         logger.error('Loading "%(class_path)s" for scheme "%(scheme)s"', {"class_path": path, "scheme": scheme},
                      exc_info=True,  extra={'crawler': self._starter})
         self._not_configured[scheme] = str(e)
         return None
     else:
         self._handlers[scheme] = dh
     return self._handlers[scheme]