def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == '-': self.settings.set('FEED_URI', 'stdout:', priority='cmdline') else: self.settings.set('FEED_URI', opts.output, priority='cmdline') feed_exporters = without_none_values( self.settings.getwithbase('FEED_EXPORTERS')) valid_output_formats = feed_exporters.keys() if not opts.output_format: opts.output_format = os.path.splitext(opts.output)[1].replace( ".", "") if opts.output_format not in valid_output_formats: raise UsageError( "Unrecognized output format '%s', set one" " using the '-t' switch or as a file extension" " from the supported list %s" % (opts.output_format, tuple(valid_output_formats))) self.settings.set('FEED_FORMAT', opts.output_format, priority='cmdline')
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError( "Invalid -a value, use -a NAME=VALUE", print_help=False)
def process_spider_arguments(self, opts): try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError('Invalid -a value, use -a Name=VALUE', print_hlpe=False)
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.overwrite_output: if opts.output: raise UsageError("Please use only one of --output and --overwrite-output") opts.output = opts.overwrite_output self.settings.overrides['FEED_OVERWRITE'] = True if opts.output: if opts.output == '-': self.settings.set('FEED_URI', 'stdout:', priority='cmdline') else: self.settings.set('FEED_URI', opts.output, priority='cmdline') valid_output_formats = ( list(self.settings.getdict('FEED_EXPORTERS').keys()) + list(self.settings.getdict('FEED_EXPORTERS_BASE').keys()) ) if not opts.output_format: opts.output_format = os.path.splitext(opts.output)[1].replace(".", "") if opts.output_format not in valid_output_formats: raise UsageError("Unrecognized output format '%s', set one" " using the '-t' switch or as a file extension" " from the supported list %s" % (opts.output_format, tuple(valid_output_formats))) self.settings.set('FEED_FORMAT', opts.output_format, priority='cmdline')
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise RuntimeError("Invalid -a value, use -a NAME=VALUE", print_help=False)
def process_options(self, args, opts): try: self.settings.setdict(arglist_to_dict(opts.set), priority='cmdline') except ValueError: raise UsageError("Invalid -s value, use -s NAME=VALUE", print_help=False) if opts.logfile: self.settings.set('LOG_ENABLED', True, priority='cmdline') self.settings.set('LOG_FILE', opts.logfile, priority='cmdline') if opts.loglevel: self.settings.set('LOG_ENABLED', True, priority='cmdline') self.settings.set('LOG_LEVEL', opts.loglevel, priority='cmdline') if opts.nolog: self.settings.set('LOG_ENABLED', False, priority='cmdline') if opts.pidfile: with open(opts.pidfile, "w") as f: f.write(str(os.getpid()) + os.linesep) if opts.pdb: failure.startDebugMode()
def run(self, args, opts): self.set_pages(opts.pages) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) self.settings.set('GOOD_ONLY', opts.good_only) self.settings.set('SEE_LZ', opts.see_lz) if opts.filter: try: opts.filter = eval('filter.' + opts.filter) except: raise UsageError("Invalid filter function name!") self.settings.set("FILTER", opts.filter) cfg = config.config() if len(args) >= 3: raise UsageError("Too many arguments!") for i in range(len(args)): if isinstance(args[i], bytes): args[i] = args[i].decode("utf8") if not 'MYSQL_PORT' in cfg.config.keys(): cfg.config['MYSQL_PORT'] = 3306 self.settings.set('MYSQL_HOST', cfg.config['MYSQL_HOST']) self.settings.set('MYSQL_USER', cfg.config['MYSQL_USER']) self.settings.set('MYSQL_PASSWD', cfg.config['MYSQL_PASSWD']) self.settings.set('MYSQL_PORT', cfg.config['MYSQL_PORT']) tbname = cfg.config['DEFAULT_TIEBA'] if len(args) >= 1: tbname = args[0] dbname = None if tbname in cfg.config['MYSQL_DBNAME'].keys(): dbname = cfg.config['MYSQL_DBNAME'][tbname] if len(args) >= 2: dbname = args[1] cfg.config['MYSQL_DBNAME'][tbname] = dbname if not dbname: raise UsageError("Please input database name!") self.settings.set('TIEBA_NAME', tbname, priority='cmdline') self.settings.set('MYSQL_DBNAME', dbname, priority='cmdline') config.init_database(cfg.config['MYSQL_HOST'], cfg.config['MYSQL_USER'], cfg.config['MYSQL_PASSWD'], cfg.config['MYSQL_PORT'], dbname) log = config.log(tbname, dbname, self.settings['BEGIN_PAGE'], opts.good_only, opts.see_lz) self.settings.set('SIMPLE_LOG', log) self.crawler_process.crawl('tieba', **opts.spargs) self.crawler_process.start() cfg.save()
def test_arglist_to_dict(self): self.assertEqual( arglist_to_dict(["arg1=val1", "arg2=val2"]), { "arg1": "val1", "arg2": "val2" }, )
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.nofollow: settings.overrides['CRAWLSPIDER_FOLLOW_LINKS'] = False
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: feeds = feed_process_params_from_cli(self.settings, opts.output, opts.output_format) self.settings.set('FEEDS', feeds, priority='cmdline')
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) if opts.urlqueue: self.settings.set('Redis_key', opts.urlqueue, priority='cmdline') except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == '-': self.settings.overrides['FEED_URI'] = 'stdout:' else: self.settings.overrides['FEED_URI'] = opts.output self.settings.overrides['FEED_FORMAT'] = opts.output_format
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == '-': self.settings.overrides['FEED_URI'] = 'stdout:' else: self.settings.overrides['FEED_URI'] = opts.output valid_output_formats = self.settings['FEED_EXPORTERS'].keys() + self.settings['FEED_EXPORTERS_BASE'].keys() if opts.output_format not in valid_output_formats: raise UsageError('Invalid/unrecognized output format: %s, Expected %s' % (opts.output_format,valid_output_formats)) self.settings.overrides['FEED_FORMAT'] = opts.output_format
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == '-': self.settings.overrides['FEED_URI'] = 'stdout:' else: self.settings.overrides['FEED_URI'] = opts.output valid_output_formats = self.settings['FEED_EXPORTERS'].keys() + self.settings['FEED_EXPORTERS_BASE'].keys() if opts.output_format not in valid_output_formats: raise UsageError('Invalid/unrecognized output format: %s, Expected %s' % (opts.output_format, valid_output_formats)) self.settings.overrides['FEED_FORMAT'] = opts.output_format
def process_options(self, args, opts): try: self.settings.overrides.update(arglist_to_dict(opts.set)) except ValueError: raise UsageError("Invalid -s value, use -s NAME=VALUE", print_help=False) if opts.logfile: self.settings.overrides['LOG_ENABLED'] = True self.settings.overrides['LOG_FILE'] = opts.logfile if opts.loglevel: self.settings.overrides['LOG_ENABLED'] = True self.settings.overrides['LOG_LEVEL'] = opts.loglevel if opts.nolog: self.settings.overrides['LOG_ENABLED'] = False if opts.pidfile: with open(opts.pidfile, "w") as f: f.write(str(os.getpid()) + os.linesep)
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == '-': self.settings.overrides['FEED_URI'] = 'stdout:' else: self.settings.overrides['FEED_URI'] = opts.output valid_output_formats = self.settings['FEED_EXPORTERS'].keys() + self.settings['FEED_EXPORTERS_BASE'].keys() if not opts.output_format: opts.output_format = os.path.splitext(opts.output)[1].replace(".", "") if opts.output_format not in valid_output_formats: raise UsageError("Unrecognized output format '%s', set one" " using the '-t' switch or as a file extension" " from the supported list %s" % (opts.output_format, tuple(valid_output_formats))) self.settings.overrides['FEED_FORMAT'] = opts.output_format
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == '-': self.settings.set('FEED_URI', 'stdout:', priority='cmdline') else: self.settings.set('FEED_URI', opts.output, priority='cmdline') feed_exporters = without_none_values(self.settings._getcomposite('FEED_EXPORTERS')) valid_output_formats = feed_exporters.keys() if not opts.output_format: opts.output_format = os.path.splitext(opts.output)[1].replace(".", "") if opts.output_format not in valid_output_formats: raise UsageError("Unrecognized output format '%s', set one" " using the '-t' switch or as a file extension" " from the supported list %s" % (opts.output_format, tuple(valid_output_formats))) self.settings.set('FEED_FORMAT', opts.output_format, priority='cmdline')
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == "-": self.settings.set("FEED_URI", "stdout:", priority="cmdline") else: self.settings.set("FEED_URI", opts.output, priority="cmdline") valid_output_formats = list(self.settings.getdict("FEED_EXPORTERS").keys()) + list( self.settings.getdict("FEED_EXPORTERS_BASE").keys() ) if not opts.output_format: opts.output_format = os.path.splitext(opts.output)[1].replace(".", "") if opts.output_format not in valid_output_formats: raise UsageError( "Unrecognized output format '%s', set one" " using the '-t' switch or as a file extension" " from the supported list %s" % (opts.output_format, tuple(valid_output_formats)) ) self.settings.set("FEED_FORMAT", opts.output_format, priority="cmdline")
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: print "Invalid -a value, use -a NAME=VALUE"
def test_arglist_to_dict(self): self.assertEqual(arglist_to_dict(['arg1=val1', 'arg2=val2']), {'arg1': 'val1', 'arg2': 'val2'})
def test_arglist_to_dict(self): self.assertEqual(arglist_to_dict(['arg1=val1', 'arg2=val2']), { 'arg1': 'val1', 'arg2': 'val2' })
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: print("Invalid -a value, use -a NAME=VALUE")
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: print "*************** Multitask exception *******************"
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) opts.spargs = arglist_to_dict(opts.spargs)
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: pass
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) # try: opts.spargs = arglist_to_dict(opts.spargs)
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except Exception: raise Exception