示例#1
0
class CommandSettings(unittest.TestCase):
    def setUp(self):
        self.command = ScrapyCommand()
        self.command.settings = Settings()
        self.parser = argparse.ArgumentParser(
            formatter_class=ScrapyHelpFormatter, conflict_handler='resolve')
        self.command.add_options(self.parser)

    def test_settings_json_string(self):
        feeds_json = '{"data.json": {"format": "json"}, "data.xml": {"format": "xml"}}'
        opts, args = self.parser.parse_known_args(
            args=['-s', f'FEEDS={feeds_json}', 'spider.py'])
        self.command.process_options(args, opts)
        self.assertIsInstance(self.command.settings['FEEDS'],
                              scrapy.settings.BaseSettings)
        self.assertEqual(dict(self.command.settings['FEEDS']),
                         json.loads(feeds_json))

    def test_help_formatter(self):
        formatter = ScrapyHelpFormatter(prog='scrapy')
        part_strings = [
            'usage: scrapy genspider [options] <name> <domain>\n\n', '\n',
            'optional arguments:\n', '\n', 'Global Options:\n'
        ]
        self.assertEqual(
            formatter._join_parts(part_strings),
            ('Usage\n=====\n  scrapy genspider [options] <name> <domain>\n\n\n'
             'Optional Arguments\n==================\n\n'
             'Global Options\n--------------\n'))
示例#2
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE",
                          print_help=False)
     if opts.output:
         if opts.output == '-':
             self.settings.set('FEED_URI', 'stdout:', priority='cmdline')
         else:
             self.settings.set('FEED_URI', opts.output, priority='cmdline')
         valid_output_formats = (
             list(self.settings.getdict('FEED_EXPORTERS').keys()) +
             list(self.settings.getdict('FEED_EXPORTERS_BASE').keys()))
         if not opts.output_format:
             opts.output_format = os.path.splitext(opts.output)[1].replace(
                 ".", "")
         if opts.output_format not in valid_output_formats:
             raise UsageError(
                 "Unrecognized output format '%s', set one"
                 " using the '-t' switch or as a file extension"
                 " from the supported list %s" %
                 (opts.output_format, tuple(valid_output_formats)))
         self.settings.set('FEED_FORMAT',
                           opts.output_format,
                           priority='cmdline')
示例#3
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE",
                          print_help=False)
示例#4
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError(
             "Invalid -a value, use -a NAME=VALUE", print_help=False)
示例#5
0
    def process_options(self, args, opts):
        '''对输入的命令进行解析'''
        ScrapyCommand.process_options(self, args, opts)

        if opts.travelmode:
            travelmode = opts.travelmode.strip()
            if travelmode not in ("飞机", "火车"):
                raise UsageError("you must input correct travelmode")
            else:
                self.settings.set("travelmode", travelmode, priority="cmdline")

        if opts.traveltime:
            traveltime = opts.traveltime.strip()
            if self.verify_date_str_lawyer(traveltime):
                self.settings.set("traveltime", traveltime, priority="cmdline")
            else:
                raise UsageError("you must input correct traveltime")

        if opts.startstation:
            startstation = opts.startstation.strip()
            self.settings.set("startstation", startstation, priority="cmdline")
            if startstation in list(CityCode.keys()):
                startcitycode = CityCode[startstation]
                self.settings.set("startcitycode", startcitycode, priority="cmdline")
            else:
                raise UsageError("you must input correct startstation")

        if opts.endstation:
            endstation = opts.endstation.strip()
            self.settings.set("endstation", endstation, priority="cmdline")
            if endstation in list(CityCode.keys()):
                endcitycode = CityCode[endstation]
                self.settings.set("endcitycode", endcitycode, priority="cmdline")
            else:
                raise UsageError("you must input correct endstation")
示例#6
0
    def process_options(self, args, opts):
        ScrapyCommand.process_options(self, args, opts)
        try:
            pass

        except ValueError:
            raise UsageError("Invalid -a value, use -a NAME=VALUE",
                             print_help=False)
示例#7
0
文件: run.py 项目: leitianya/p_spider
    def process_options(self, args, opts):
        # 处理从命令行中传入的选项参数
        ScrapyCommand.process_options(self, args, opts)
        # print(self.settings.__dict__)
        # if not os.path.exists(os.path.dirname(self.settings.attributes.get('LOG_FILE').value)):
        #     os.makedirs(os.path.dirname(self.settings.attributes.get('LOG_FILE').value))

        # 加载默认配置
        # self.parse_default_args()
        self.parse_from_cmdline(args,opts)
示例#8
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     if args:
         self._locations[args[0]] = opts.output_filename
     else:
         locations = self.settings.get('SPIDERDOCS_LOCATIONS', None)
         if locations:
             self._locations = locations
         else:
             raise UsageError("Module name is required.", print_help=False)
示例#9
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE",
                          print_help=False)
     if opts.output:
         feeds = feed_process_params_from_cli(self.settings, opts.output,
                                              opts.output_format)
         self.settings.set('FEEDS', feeds, priority='cmdline')
示例#10
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
         if opts.urlqueue:
             self.settings.set('Redis_key',
                               opts.urlqueue,
                               priority='cmdline')
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE",
                          print_help=False)
示例#11
0
class CommandSettings(unittest.TestCase):

    def setUp(self):
        self.command = ScrapyCommand()
        self.command.settings = Settings()
        self.parser = optparse.OptionParser(
            formatter=optparse.TitledHelpFormatter(),
            conflict_handler='resolve',
        )
        self.command.add_options(self.parser)

    def test_settings_json_string(self):
        feeds_json = '{"data.json": {"format": "json"}, "data.xml": {"format": "xml"}}'
        opts, args = self.parser.parse_args(args=['-s', f'FEEDS={feeds_json}', 'spider.py'])
        self.command.process_options(args, opts)
        self.assertIsInstance(self.command.settings['FEEDS'], scrapy.settings.BaseSettings)
        self.assertEqual(dict(self.command.settings['FEEDS']), json.loads(feeds_json))
示例#12
0
    def process_options(self, args, opts):
        ScrapyCommand.process_options(self, args, opts)

        opts.spargs = {}

        if len(args) == 2:
            self.settings['RSS'] = args.pop()

        self._takeover_logging()

        if opts.output:
            self.settings['OUTPUT'] = opts.output[0]
        self.settings.pop('FEEDS')

        self.settings['CMDLINE_ARGS'] = {'args': args, 'opts': vars(opts)}

        if opts.verbose:
            self.settings['VERBOSE'] = True
            self.settings.set('LOG_VIOLATIONS', True, priority='cmdline')
            self.settings.set('STATS_DUMP', True, priority='cmdline')
示例#13
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
     if opts.output:
         if opts.output == '-':
             self.settings.set('FEED_URI', 'stdout:', priority='cmdline')
         else:
             self.settings.set('FEED_URI', opts.output, priority='cmdline')
         feed_exporters = without_none_values(self.settings._getcomposite('FEED_EXPORTERS'))
         valid_output_formats = feed_exporters.keys()
         if not opts.output_format:
             opts.output_format = os.path.splitext(opts.output)[1].replace(".", "")
         if opts.output_format not in valid_output_formats:
             raise UsageError("Unrecognized output format '%s', set one"
                              " using the '-t' switch or as a file extension"
                              " from the supported list %s" % (opts.output_format,
                                                               tuple(valid_output_formats)))
         self.settings.set('FEED_FORMAT', opts.output_format, priority='cmdline')
示例#14
0
    def process_options(self, args, opts):
        ScrapyCommand.process_options(self, args, opts)

        self.process_spider_arguments(opts)
        self.process_request_meta(opts)
        self.process_request_cb_kwargs(opts)
示例#15
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         print "Invalid -a value, use -a NAME=VALUE"
示例#16
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
示例#17
0
    def process_options(self, args, opts):
        ScrapyCommand.process_options(self, args, opts)

        self.process_spider_arguments(opts)
        self.process_request_meta(opts)
示例#18
0
    def process_options(self, args, opts):
        ScrapyCommand.process_options(self, args, opts)

        opts.spargs = arglist_to_dict(opts.spargs)
示例#19
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         print("Invalid -a value, use -a NAME=VALUE")
示例#20
0
文件: parser.py 项目: wanliqun/Gerapy
 def process_options(self, args, opts):
     BaseParser.process_options(self, args, opts)
     
     self.process_spider_arguments(opts)
     self.process_request_meta(opts)
示例#21
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         print "*************** Multitask exception *******************"
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     # try:
     opts.spargs = arglist_to_dict(opts.spargs)
示例#23
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except Exception:
         raise Exception
示例#24
0
 def process_options(self, args, opts):
     ScrapyCommand.process_options(self, args, opts)
     try:
         opts.spargs = arglist_to_dict(opts.spargs)
     except ValueError:
         pass