示例#1
0
 def __init__(self, settings=settings):
     self.ignore_missing = settings.getbool('HTTPCACHE_IGNORE_MISSING',
                                            False)
     self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES',
                                            ['file'])
     self.ignore_http_codes = map(
         int, settings.getlist('HTTPCACHE_IGNORE_HTTP_CODES', []))
示例#2
0
 def _print_setting(self, opts):
     if opts.get:
         print settings_.get(opts.get)
     elif opts.getbool:
         print settings_.getbool(opts.getbool)
     elif opts.getint:
         print settings_.getint(opts.getint)
     elif opts.getfloat:
         print settings_.getfloat(opts.getfloat)
     elif opts.getlist:
         print settings_.getlist(opts.getlist)
示例#3
0
    def __init__(self, *args, **kwargs):
        kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None
        kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8')

        super(CSVkwItemExporter, self).__init__(
            *args,
            **kwargs)  #fields_to_export= settings.getlist('EXPORT_FIELDS'),
示例#4
0
 def __init__(self, *args, **kwargs):
    kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None
    kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8')
    delimiter = settings.get('CSV_DELIMITER', '|')
    kwargs['delimiter'] = delimiter
    kwargs['include_headers_line'] = False
    super(ProductCSVExporter, self).__init__(*args, **kwargs)
示例#5
0
 def get_exporter_and_file(self):
     format = settings['EXPORT_FORMAT']
     filename = settings['EXPORT_FILE']
     if not format or not filename:
         raise NotConfigured
     exp_kwargs = {
         'fields_to_export': settings.getlist('EXPORT_FIELDS') or None,
         'export_empty_fields': settings.getbool('EXPORT_EMPTY', False),
         'encoding': settings.get('EXPORT_ENCODING', 'utf-8'),
     }
     file = open(filename, 'wb')
     if format == 'xml':
         exp = exporter.XmlItemExporter(file, **exp_kwargs)
     elif format == 'csv':
         exp = exporter.CsvItemExporter(file, **exp_kwargs)
     elif format == 'csv_headers':
         exp = exporter.CsvItemExporter(file, include_headers_line=True, \
             **exp_kwargs)
     elif format == 'pprint':
         exp = exporter.PprintItemExporter(file, **exp_kwargs)
     elif format == 'pickle':
         exp = exporter.PickleItemExporter(file, **exp_kwargs)
     elif format == 'json':
         exp = exporter.JsonLinesItemExporter(file, **exp_kwargs)
     elif format == 'jsonlines':
         exp = exporter.JsonItemExporter(file, **exp_kwargs)
     else:
         raise NotConfigured("Unsupported export format: %s" % format)
     return exp, file
示例#6
0
 def __init__(self):
     if not settings.getbool('TELNETCONSOLE_ENABLED'):
         raise NotConfigured
     self.noisy = False
     self.portrange = map(int, settings.getlist('TELNETCONSOLE_PORT'))
     self.host = settings['TELNETCONSOLE_HOST']
     dispatcher.connect(self.start_listening, signals.engine_started)
     dispatcher.connect(self.stop_listening, signals.engine_stopped)
示例#7
0
    def __init__(self, *args, **kwargs):
        kwargs['delimiter'] = settings.get('CSV_DELIMITER') or "\t"
        kwargs['quotechar'] = settings.get('CSV_QUOTECHAR') or "\""
        kwargs['quoting'] = csv.QUOTE_ALL
        kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None
        kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8')

        super(CSVkwItemExporter, self).__init__(*args, **kwargs)
示例#8
0
    def __init__(self):
        if not settings.getbool('MEMDEBUG_ENABLED'):
            raise NotConfigured

        self.mail = MailSender()
        self.rcpts = settings.getlist('MEMDEBUG_NOTIFY')

        dispatcher.connect(self.engine_started, signals.engine_started)
        dispatcher.connect(self.engine_stopped, signals.engine_stopped)
示例#9
0
 def __init__(self, file, include_headers_line=True, join_multivalued=",", **kwargs):
     item = settings.get("EXPORT_ITEM", "")
     kwargs["fields_to_export"] = settings.getlist("EXPORT_FIELDS" + item) or None
     kwargs["encoding"] = settings.get("EXPORT_ENCODING", "utf-8")
     self._configure(kwargs, dont_fail=True)
     self.include_headers_line = include_headers_line
     kwargs["delimiter"] = settings.get("CSV_DELIMITER", ",")
     self.csv_writer = csv.writer(file, **kwargs)
     # self._headers_not_written = False
     self._headers_not_written = settings.get("EXPORT_HEADLINE", "True") != "False"
     self._join_multivalued = join_multivalued
示例#10
0
    def spider_opened(self, spider):
        try:
            fo = open(spider.output_file, 'w+b')
        except IOError as e:
            spider.crawler.engine.close_spider(spider, "ERROR: Can't create CSV file: " + str(e))
            return

        self.files[spider] = fo
        self.exporter = CsvItemExporter(fo)
        self.exporter.fields_to_export = settings.getlist("EXPORT_FIELDS")
        self.exporter.start_exporting()
示例#11
0
    def load(self, spider_modules=None):
        """Load spiders from module directory."""
        if spider_modules is None:
            spider_modules = settings.getlist('SPIDER_MODULES')
        self.spider_modules = spider_modules
        self._spiders = {}

        modules = [__import__(m, {}, {}, ['']) for m in self.spider_modules]
        for module in modules:
            for spider in self._getspiders(ISpider, module):
                ISpider.validateInvariants(spider)
                self._spiders[spider.name] = spider
        self.loaded = True
示例#12
0
    def load(self, spider_modules=None):
        if spider_modules is None:
            spider_modules = settings.getlist('SPIDER_MODULES')
        self.spider_modules = spider_modules
        self._invaliddict = {}
        self._spiders = {}

        modules = [__import__(m, {}, {}, ['']) for m in self.spider_modules]
        for module in modules:
            for spider in self._getspiders(ISpider, module):
                ISpider.validateInvariants(spider)
                self._spiders[spider.domain_name] = spider
        self.loaded = True
示例#13
0
    def __init__(self):
        if not settings.getbool("MEMUSAGE_ENABLED"):
            raise NotConfigured
        if not procfs_supported():
            raise NotConfigured

        self.warned = False
        self.notify_mails = settings.getlist("MEMUSAGE_NOTIFY")
        self.limit = settings.getint("MEMUSAGE_LIMIT_MB") * 1024 * 1024
        self.warning = settings.getint("MEMUSAGE_WARNING_MB") * 1024 * 1024
        self.report = settings.getbool("MEMUSAGE_REPORT")
        self.mail = MailSender()
        dispatcher.connect(self.engine_started, signal=signals.engine_started)
        dispatcher.connect(self.engine_stopped, signal=signals.engine_stopped)
示例#14
0
    def __init__(self):
        try:
            import libxml2
            self.libxml2 = libxml2
        except ImportError:
            self.libxml2 = None
        if not settings.getbool('MEMDEBUG_ENABLED'):
            raise NotConfigured

        self.mail = MailSender()
        self.rcpts = settings.getlist('MEMDEBUG_NOTIFY')

        dispatcher.connect(self.engine_started, signals.engine_started)
        dispatcher.connect(self.engine_stopped, signals.engine_stopped)
示例#15
0
    def __init__(self):
        if not settings.getbool('MEMUSAGE_ENABLED'):
            raise NotConfigured
        if not os.path.exists('/proc'):
            raise NotConfigured

        self.warned = False
        self.notify_mails = settings.getlist('MEMUSAGE_NOTIFY')
        self.limit = settings.getint('MEMUSAGE_LIMIT_MB')*1024*1024
        self.warning = settings.getint('MEMUSAGE_WARNING_MB')*1024*1024
        self.report = settings.getbool('MEMUSAGE_REPORT')
        self.mail = MailSender()
        dispatcher.connect(self.engine_started, signal=signals.engine_started)
        dispatcher.connect(self.engine_stopped, signal=signals.engine_stopped)
示例#16
0
    def __init__(self):
        if not settings.getbool('MEMUSAGE_ENABLED'):
            raise NotConfigured
        if not procfs_supported():
            raise NotConfigured

        self.warned = False
        self.notify_mails = settings.getlist('MEMUSAGE_NOTIFY')
        self.limit = settings.getint('MEMUSAGE_LIMIT_MB') * 1024 * 1024
        self.warning = settings.getint('MEMUSAGE_WARNING_MB') * 1024 * 1024
        self.report = settings.getbool('MEMUSAGE_REPORT')
        self.mail = MailSender()
        dispatcher.connect(self.engine_started, signal=signals.engine_started)
        dispatcher.connect(self.engine_stopped, signal=signals.engine_stopped)
示例#17
0
 def __init__(self):
     if not settings.getbool('WEBSERVICE_ENABLED'):
         raise NotConfigured
     logfile = settings['WEBSERVICE_LOGFILE']
     self.portrange = map(int, settings.getlist('WEBSERVICE_PORT'))
     self.host = settings['WEBSERVICE_HOST']
     root = RootResource()
     reslist = build_component_list(settings['WEBSERVICE_RESOURCES_BASE'], \
         settings['WEBSERVICE_RESOURCES'])
     for res_cls in map(load_object, reslist):
         res = res_cls()
         root.putChild(res.ws_name, res)
     server.Site.__init__(self, root, logPath=logfile)
     self.noisy = False
     dispatcher.connect(self.start_listening, signals.engine_started)
     dispatcher.connect(self.stop_listening, signals.engine_stopped)
示例#18
0
 def __init__(self):
     if not settings.getbool('WEBSERVICE_ENABLED'):
         raise NotConfigured
     logfile = settings['WEBSERVICE_LOGFILE']
     self.portrange = map(int, settings.getlist('WEBSERVICE_PORT'))
     self.host = settings['WEBSERVICE_HOST']
     root = RootResource()
     reslist = build_component_list(settings['WEBSERVICE_RESOURCES_BASE'], \
         settings['WEBSERVICE_RESOURCES'])
     for res_cls in map(load_object, reslist):
         res = res_cls()
         root.putChild(res.ws_name, res)
     server.Site.__init__(self, root, logPath=logfile)
     self.noisy = False
     dispatcher.connect(self.start_listening, signals.engine_started)
     dispatcher.connect(self.stop_listening, signals.engine_stopped)
示例#19
0
 def __init__(self, settings):
     self.settings = settings
     self.urifmt = settings['FEED_URI']
     if not self.urifmt:
         raise NotConfigured
     self.format = settings['FEED_FORMAT'].lower()
     self.storages = self._load_components('FEED_STORAGES')
     self.exporters = self._load_components('FEED_EXPORTERS')
     if not self._storage_supported(self.urifmt):
         raise NotConfigured
     if not self._exporter_supported(self.format):
         raise NotConfigured
     self.store_empty = settings.getbool('FEED_STORE_EMPTY')
     self.export_fields = settings.getlist('FEED_EXPORT_FIELDS') or None
     uripar = settings['FEED_URI_PARAMS']
     self._uripar = load_object(uripar) if uripar else lambda x, y: None
示例#20
0
 def load(self):
     """
     Load pipelines stages defined in settings module
     """
     self.enabled.clear()
     self.disabled.clear()
     for pipepath in settings.getlist('ITEM_PIPELINES'):
         cls = load_object(pipepath)
         if cls:
             try:
                 pipe = cls()
                 self.pipeline.append(pipe)
                 self.enabled[cls.__name__] = pipe
             except NotConfigured, e:
                 self.disabled[cls.__name__] = pipepath
                 if e.args:
                     log.msg(e)
示例#21
0
    def __init__(self,
                 file,
                 include_headers_line=True,
                 join_multivalued=',',
                 **kwargs):
        kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None
        kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8')

        super(CSVkwItemExporter, self).__init__(file, include_headers_line,
                                                join_multivalued, **kwargs)
        self._configure(kwargs, dont_fail=True)
        self.stream.close()
        storage = FixedFileFeedStorage(file.name)
        file = storage.open(file.name)
        self.stream = io.TextIOWrapper(
            file,
            line_buffering=False,
            write_through=True,
            encoding=self.encoding,
            newline='',
        ) if six.PY3 else file
        self.csv_writer = csv.writer(self.stream, **kwargs)
示例#22
0
 def __init__(self):
     self.recipients = settings.getlist("STATSMAILER_RCPTS")
     if not self.recipients:
         raise NotConfigured
     dispatcher.connect(self.stats_spider_closed,
                        signal=signals.stats_spider_closed)
示例#23
0
 def __init__(self):
     self.recipients = settings.getlist("STATSMAILER_RCPTS")
     if not self.recipients:
         raise NotConfigured
     dispatcher.connect(self.stats_spider_closed, signal=signals.stats_spider_closed)
示例#24
0
 def __init__(self):
     self.controller = Controller.from_port(port = 9151)
     self.controller.authenticate('931005')
     self.codes = set(int(x) for x in settings.getlist('RETRY_HTTP_CODES'))
     self.count = 0
示例#25
0
 def __init__(self):
     self.proxy = self.parse_proxy(settings.get('HTTP_PROXY'), 'http')
     self.proxy_spiders = set(settings.getlist('PROXY_SPIDERS', []))
示例#26
0
 def __init__(self, *args, **kwargs):
     kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None
     kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8')
     kwargs['include_headers_line'] = False
     super(CSVkwItemExporter, self).__init__(*args, **kwargs)
 def __init__(self, settings=settings):
     self.ignore_missing = settings.getbool('HTTPCACHE_IGNORE_MISSING', False)
     self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES', ['file'])
     self.ignore_http_codes = map(int, settings.getlist('HTTPCACHE_IGNORE_HTTP_CODES', []))
 def __init__(self, *args, **kwargs):
     kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None
     kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8')
     super(CSVColripItemExporter, self).__init__(*args, **kwargs)
     self._join_multivalued = settings.get('MY_CSV_DELIMITER', ',')
示例#29
0
 def __init__(self, *args, **kwargs):
     kwargs['fields_to_export'] = settings.getlist('CSV_EXPORT_FIELDS') or None
     super(SlybotCSVItemExporter, self).__init__(*args, **kwargs)
示例#30
0
 def __init__(self, *args, **kwargs):
     kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None
     kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8')
     kwargs['include_headers_line'] = False
     super(CSVkwItemExporter, self).__init__(*args, **kwargs)
示例#31
0
    def __init__(self, *args, **kwargs):
        kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None
        kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8')

        super(CsvItemExporter2, self).__init__(*args, **kwargs)
示例#32
0
 def __init__(self):
     self.max_retry_times = settings.getint('RETRY_TIMES')
     self.retry_http_codes = map(int, settings.getlist('RETRY_HTTP_CODES'))
     self.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST')
示例#33
0
 def __init__(self):
     if not settings.getbool('RETRY_ENABLED'):
         raise NotConfigured
     self.max_retry_times = settings.getint('RETRY_TIMES')
     self.retry_http_codes = set(int(x) for x in settings.getlist('RETRY_HTTP_CODES'))
     self.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST')
示例#34
0
 def __init__(self, *args, **kwargs):
     kwargs['fields_to_export'] = settings.getlist(
         'CSV_EXPORT_FIELDS') or None
     super(SlybotCSVItemExporter, self).__init__(*args, **kwargs)
示例#35
0
 def open_spider(self, spider):
     output_csv_file = "{}-{}-{}.csv".format(spider.name, spider.tipprunde, spider.matchday) if not settings.get('FEED_URI') else settings.get('FEED_URI')
     self.output_csv = open(output_csv_file, 'w')
     self.exporter = CsvBiertotoItemExporter(
         self.output_csv,
         fields_to_export=settings.getlist('FEED_EXPORT_FIELDS'))
示例#36
0
 def __init__(self, *args, **kwargs):
     kwargs['delimiter'] = settings.get('EXPORT_CSV_DELIMITER', '\001')
     kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None
     kwargs['encoding'] = settings.getlist('EXPORT_ENCODING', 'utf-8')
     super(AppinfoCsvExporter, self).__init__(*args, **kwargs)
     self.include_headers_line = settings.getbool('export_csv_headers', true);
示例#37
0
 def __init__(self):
     self.file = codecs.open(settings.getlist())
示例#38
0
文件: retry.py 项目: mattd/scrapy
 def __init__(self):
     if not settings.getbool('RETRY_ENABLED'):
         raise NotConfigured
     self.max_retry_times = settings.getint('RETRY_TIMES')
     self.retry_http_codes = set(int(x) for x in settings.getlist('RETRY_HTTP_CODES'))
     self.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST')
示例#39
0
文件: retry.py 项目: chzealot/scrapy
 def __init__(self):
     self.max_retry_times = settings.getint('RETRY_TIMES')
     self.retry_http_codes = map(int, settings.getlist('RETRY_HTTP_CODES'))
     self.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST')
示例#40
0
文件: test.py 项目: yidun55/crawler
 def start_requests(self):
     start_urls = settings.getlist("URL")
     for url in start_urls:
         request = Request(url=url, callback=self.parse_item, dont_filter=True)
         request.meta["flow"] = "test"
         yield request
示例#41
0
 def __init__(self, *args, **kwargs):
     kwargs['fields_to_export'] = settings.getlist('EXPORT_FIELDS') or None
     kwargs['encoding'] = settings.get('EXPORT_ENCODING', 'utf-8')
     super(CSVColripItemExporter, self).__init__(*args, **kwargs)
     self._join_multivalued = settings.get('MY_CSV_DELIMITER', ',')
示例#42
0
 def from_crawler(cls, crawler):
     settings = crawler.settings
     my_setting = settings.getlist("USER_AGENT_LIST")
     return cls(my_setting)