def __init__(self, settings=settings): self.ignore_missing = settings.getbool('HTTPCACHE_IGNORE_MISSING', False) self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES', ['file']) self.ignore_http_codes = map( int, settings.getlist('HTTPCACHE_IGNORE_HTTP_CODES', []))
def _print_setting(self, opts): if opts.get: print settings_.get(opts.get) elif opts.getbool: print settings_.getbool(opts.getbool) elif opts.getint: print settings_.getint(opts.getint) elif opts.getfloat: print settings_.getfloat(opts.getfloat) elif opts.getlist: print settings_.getlist(opts.getlist)
def __init__(self, *args, **kwargs): kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8') super(CSVkwItemExporter, self).__init__( *args, **kwargs) #fields_to_export= settings.getlist('EXPORT_FIELDS'),
def __init__(self, *args, **kwargs): kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8') delimiter = settings.get('CSV_DELIMITER', '|') kwargs['delimiter'] = delimiter kwargs['include_headers_line'] = False super(ProductCSVExporter, self).__init__(*args, **kwargs)
def get_exporter_and_file(self): format = settings['EXPORT_FORMAT'] filename = settings['EXPORT_FILE'] if not format or not filename: raise NotConfigured exp_kwargs = { 'fields_to_export': settings.getlist('EXPORT_FIELDS') or None, 'export_empty_fields': settings.getbool('EXPORT_EMPTY', False), 'encoding': settings.get('EXPORT_ENCODING', 'utf-8'), } file = open(filename, 'wb') if format == 'xml': exp = exporter.XmlItemExporter(file, **exp_kwargs) elif format == 'csv': exp = exporter.CsvItemExporter(file, **exp_kwargs) elif format == 'csv_headers': exp = exporter.CsvItemExporter(file, include_headers_line=True, \ **exp_kwargs) elif format == 'pprint': exp = exporter.PprintItemExporter(file, **exp_kwargs) elif format == 'pickle': exp = exporter.PickleItemExporter(file, **exp_kwargs) elif format == 'json': exp = exporter.JsonLinesItemExporter(file, **exp_kwargs) elif format == 'jsonlines': exp = exporter.JsonItemExporter(file, **exp_kwargs) else: raise NotConfigured("Unsupported export format: %s" % format) return exp, file
def __init__(self): if not settings.getbool('TELNETCONSOLE_ENABLED'): raise NotConfigured self.noisy = False self.portrange = map(int, settings.getlist('TELNETCONSOLE_PORT')) self.host = settings['TELNETCONSOLE_HOST'] dispatcher.connect(self.start_listening, signals.engine_started) dispatcher.connect(self.stop_listening, signals.engine_stopped)
def __init__(self, *args, **kwargs): kwargs['delimiter'] = settings.get('CSV_DELIMITER') or "\t" kwargs['quotechar'] = settings.get('CSV_QUOTECHAR') or "\"" kwargs['quoting'] = csv.QUOTE_ALL kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8') super(CSVkwItemExporter, self).__init__(*args, **kwargs)
def __init__(self): if not settings.getbool('MEMDEBUG_ENABLED'): raise NotConfigured self.mail = MailSender() self.rcpts = settings.getlist('MEMDEBUG_NOTIFY') dispatcher.connect(self.engine_started, signals.engine_started) dispatcher.connect(self.engine_stopped, signals.engine_stopped)
def __init__(self, file, include_headers_line=True, join_multivalued=",", **kwargs): item = settings.get("EXPORT_ITEM", "") kwargs["fields_to_export"] = settings.getlist("EXPORT_FIELDS" + item) or None kwargs["encoding"] = settings.get("EXPORT_ENCODING", "utf-8") self._configure(kwargs, dont_fail=True) self.include_headers_line = include_headers_line kwargs["delimiter"] = settings.get("CSV_DELIMITER", ",") self.csv_writer = csv.writer(file, **kwargs) # self._headers_not_written = False self._headers_not_written = settings.get("EXPORT_HEADLINE", "True") != "False" self._join_multivalued = join_multivalued
def spider_opened(self, spider): try: fo = open(spider.output_file, 'w+b') except IOError as e: spider.crawler.engine.close_spider(spider, "ERROR: Can't create CSV file: " + str(e)) return self.files[spider] = fo self.exporter = CsvItemExporter(fo) self.exporter.fields_to_export = settings.getlist("EXPORT_FIELDS") self.exporter.start_exporting()
def load(self, spider_modules=None): """Load spiders from module directory.""" if spider_modules is None: spider_modules = settings.getlist('SPIDER_MODULES') self.spider_modules = spider_modules self._spiders = {} modules = [__import__(m, {}, {}, ['']) for m in self.spider_modules] for module in modules: for spider in self._getspiders(ISpider, module): ISpider.validateInvariants(spider) self._spiders[spider.name] = spider self.loaded = True
def load(self, spider_modules=None): if spider_modules is None: spider_modules = settings.getlist('SPIDER_MODULES') self.spider_modules = spider_modules self._invaliddict = {} self._spiders = {} modules = [__import__(m, {}, {}, ['']) for m in self.spider_modules] for module in modules: for spider in self._getspiders(ISpider, module): ISpider.validateInvariants(spider) self._spiders[spider.domain_name] = spider self.loaded = True
def __init__(self): if not settings.getbool("MEMUSAGE_ENABLED"): raise NotConfigured if not procfs_supported(): raise NotConfigured self.warned = False self.notify_mails = settings.getlist("MEMUSAGE_NOTIFY") self.limit = settings.getint("MEMUSAGE_LIMIT_MB") * 1024 * 1024 self.warning = settings.getint("MEMUSAGE_WARNING_MB") * 1024 * 1024 self.report = settings.getbool("MEMUSAGE_REPORT") self.mail = MailSender() dispatcher.connect(self.engine_started, signal=signals.engine_started) dispatcher.connect(self.engine_stopped, signal=signals.engine_stopped)
def __init__(self): try: import libxml2 self.libxml2 = libxml2 except ImportError: self.libxml2 = None if not settings.getbool('MEMDEBUG_ENABLED'): raise NotConfigured self.mail = MailSender() self.rcpts = settings.getlist('MEMDEBUG_NOTIFY') dispatcher.connect(self.engine_started, signals.engine_started) dispatcher.connect(self.engine_stopped, signals.engine_stopped)
def __init__(self): if not settings.getbool('MEMUSAGE_ENABLED'): raise NotConfigured if not os.path.exists('/proc'): raise NotConfigured self.warned = False self.notify_mails = settings.getlist('MEMUSAGE_NOTIFY') self.limit = settings.getint('MEMUSAGE_LIMIT_MB')*1024*1024 self.warning = settings.getint('MEMUSAGE_WARNING_MB')*1024*1024 self.report = settings.getbool('MEMUSAGE_REPORT') self.mail = MailSender() dispatcher.connect(self.engine_started, signal=signals.engine_started) dispatcher.connect(self.engine_stopped, signal=signals.engine_stopped)
def __init__(self): if not settings.getbool('MEMUSAGE_ENABLED'): raise NotConfigured if not procfs_supported(): raise NotConfigured self.warned = False self.notify_mails = settings.getlist('MEMUSAGE_NOTIFY') self.limit = settings.getint('MEMUSAGE_LIMIT_MB') * 1024 * 1024 self.warning = settings.getint('MEMUSAGE_WARNING_MB') * 1024 * 1024 self.report = settings.getbool('MEMUSAGE_REPORT') self.mail = MailSender() dispatcher.connect(self.engine_started, signal=signals.engine_started) dispatcher.connect(self.engine_stopped, signal=signals.engine_stopped)
def __init__(self): if not settings.getbool('WEBSERVICE_ENABLED'): raise NotConfigured logfile = settings['WEBSERVICE_LOGFILE'] self.portrange = map(int, settings.getlist('WEBSERVICE_PORT')) self.host = settings['WEBSERVICE_HOST'] root = RootResource() reslist = build_component_list(settings['WEBSERVICE_RESOURCES_BASE'], \ settings['WEBSERVICE_RESOURCES']) for res_cls in map(load_object, reslist): res = res_cls() root.putChild(res.ws_name, res) server.Site.__init__(self, root, logPath=logfile) self.noisy = False dispatcher.connect(self.start_listening, signals.engine_started) dispatcher.connect(self.stop_listening, signals.engine_stopped)
def __init__(self, settings): self.settings = settings self.urifmt = settings['FEED_URI'] if not self.urifmt: raise NotConfigured self.format = settings['FEED_FORMAT'].lower() self.storages = self._load_components('FEED_STORAGES') self.exporters = self._load_components('FEED_EXPORTERS') if not self._storage_supported(self.urifmt): raise NotConfigured if not self._exporter_supported(self.format): raise NotConfigured self.store_empty = settings.getbool('FEED_STORE_EMPTY') self.export_fields = settings.getlist('FEED_EXPORT_FIELDS') or None uripar = settings['FEED_URI_PARAMS'] self._uripar = load_object(uripar) if uripar else lambda x, y: None
def load(self): """ Load pipelines stages defined in settings module """ self.enabled.clear() self.disabled.clear() for pipepath in settings.getlist('ITEM_PIPELINES'): cls = load_object(pipepath) if cls: try: pipe = cls() self.pipeline.append(pipe) self.enabled[cls.__name__] = pipe except NotConfigured, e: self.disabled[cls.__name__] = pipepath if e.args: log.msg(e)
def __init__(self, file, include_headers_line=True, join_multivalued=',', **kwargs): kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8') super(CSVkwItemExporter, self).__init__(file, include_headers_line, join_multivalued, **kwargs) self._configure(kwargs, dont_fail=True) self.stream.close() storage = FixedFileFeedStorage(file.name) file = storage.open(file.name) self.stream = io.TextIOWrapper( file, line_buffering=False, write_through=True, encoding=self.encoding, newline='', ) if six.PY3 else file self.csv_writer = csv.writer(self.stream, **kwargs)
def __init__(self): self.recipients = settings.getlist("STATSMAILER_RCPTS") if not self.recipients: raise NotConfigured dispatcher.connect(self.stats_spider_closed, signal=signals.stats_spider_closed)
def __init__(self): self.controller = Controller.from_port(port = 9151) self.controller.authenticate('931005') self.codes = set(int(x) for x in settings.getlist('RETRY_HTTP_CODES')) self.count = 0
def __init__(self): self.proxy = self.parse_proxy(settings.get('HTTP_PROXY'), 'http') self.proxy_spiders = set(settings.getlist('PROXY_SPIDERS', []))
def __init__(self, *args, **kwargs): kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8') kwargs['include_headers_line'] = False super(CSVkwItemExporter, self).__init__(*args, **kwargs)
def __init__(self, settings=settings): self.ignore_missing = settings.getbool('HTTPCACHE_IGNORE_MISSING', False) self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES', ['file']) self.ignore_http_codes = map(int, settings.getlist('HTTPCACHE_IGNORE_HTTP_CODES', []))
def __init__(self, *args, **kwargs): kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8') super(CSVColripItemExporter, self).__init__(*args, **kwargs) self._join_multivalued = settings.get('MY_CSV_DELIMITER', ',')
def __init__(self, *args, **kwargs): kwargs['fields_to_export'] = settings.getlist('CSV_EXPORT_FIELDS') or None super(SlybotCSVItemExporter, self).__init__(*args, **kwargs)
def __init__(self, *args, **kwargs): kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8') super(CsvItemExporter2, self).__init__(*args, **kwargs)
def __init__(self): self.max_retry_times = settings.getint('RETRY_TIMES') self.retry_http_codes = map(int, settings.getlist('RETRY_HTTP_CODES')) self.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST')
def __init__(self): if not settings.getbool('RETRY_ENABLED'): raise NotConfigured self.max_retry_times = settings.getint('RETRY_TIMES') self.retry_http_codes = set(int(x) for x in settings.getlist('RETRY_HTTP_CODES')) self.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST')
def __init__(self, *args, **kwargs): kwargs['fields_to_export'] = settings.getlist( 'CSV_EXPORT_FIELDS') or None super(SlybotCSVItemExporter, self).__init__(*args, **kwargs)
def open_spider(self, spider): output_csv_file = "{}-{}-{}.csv".format(spider.name, spider.tipprunde, spider.matchday) if not settings.get('FEED_URI') else settings.get('FEED_URI') self.output_csv = open(output_csv_file, 'w') self.exporter = CsvBiertotoItemExporter( self.output_csv, fields_to_export=settings.getlist('FEED_EXPORT_FIELDS'))
def __init__(self, *args, **kwargs): kwargs['delimiter'] = settings.get('EXPORT_CSV_DELIMITER', '\001') kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None kwargs['encoding'] = settings.getlist('EXPORT_ENCODING', 'utf-8') super(AppinfoCsvExporter, self).__init__(*args, **kwargs) self.include_headers_line = settings.getbool('export_csv_headers', true);
def __init__(self): self.file = codecs.open(settings.getlist())
def start_requests(self): start_urls = settings.getlist("URL") for url in start_urls: request = Request(url=url, callback=self.parse_item, dont_filter=True) request.meta["flow"] = "test" yield request
def from_crawler(cls, crawler): settings = crawler.settings my_setting = settings.getlist("USER_AGENT_LIST") return cls(my_setting)