def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == '-': self.settings.set('FEED_URI', 'stdout:', priority='cmdline') else: self.settings.set('FEED_URI', opts.output, priority='cmdline') valid_output_formats = ( list(self.settings.getdict('FEED_EXPORTERS').keys()) + list(self.settings.getdict('FEED_EXPORTERS_BASE').keys())) if not opts.output_format: opts.output_format = os.path.splitext(opts.output)[1].replace( ".", "") if opts.output_format not in valid_output_formats: raise UsageError( "Unrecognized output format '%s', set one" " using the '-t' switch or as a file extension" " from the supported list %s" % (opts.output_format, tuple(valid_output_formats))) self.settings.set('FEED_FORMAT', opts.output_format, priority='cmdline')
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-u", "--unreviewed", dest="unreviewed", action="store_true", default=False, help="train and test with unreviewed files") parser.add_option("-r", "--reviewed", dest="reviewed", action="store_true", default=True, help="train and test with reviewed files") parser.add_option("--report", action="store_true", dest="print_report", default=False, help="Print a detailed classification report.") parser.add_option("--confusion_matrix", action="store_true", dest="print_cm", default=False, help="Print the confusion matrix.") #Not supported: parser.add_option("-t", "--top-n-features", dest="topn", type="int", default=0, help="number of top features to reveal")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "-c", dest="code", help="evaluate the code in the shell, print the result and exit") parser.add_option("--spider", dest="spider", help="use this spider")
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.overwrite_output: if opts.output: raise UsageError("Please use only one of --output and --overwrite-output") opts.output = opts.overwrite_output self.settings.overrides['FEED_OVERWRITE'] = True if opts.output: if opts.output == '-': self.settings.set('FEED_URI', 'stdout:', priority='cmdline') else: self.settings.set('FEED_URI', opts.output, priority='cmdline') valid_output_formats = ( list(self.settings.getdict('FEED_EXPORTERS').keys()) + list(self.settings.getdict('FEED_EXPORTERS_BASE').keys()) ) if not opts.output_format: opts.output_format = os.path.splitext(opts.output)[1].replace(".", "") if opts.output_format not in valid_output_formats: raise UsageError("Unrecognized output format '%s', set one" " using the '-t' switch or as a file extension" " from the supported list %s" % (opts.output_format, tuple(valid_output_formats))) self.settings.set('FEED_FORMAT', opts.output_format, priority='cmdline')
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-p", "--project", help="the project name in the target") parser.add_option("-v", "--version", help="the version to deploy. Defaults to current timestamp") parser.add_option("-l", "--list-targets", action="store_true", help="list available targets") parser.add_option("-L", "--list-projects", metavar="TARGET", help="list available projects on TARGET") parser.add_option("--egg", metavar="FILE", help="use the given egg, instead of building it")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", default=None, help="use this spider without looking for one") parser.add_option( "--nolinks", dest="nolinks", action="store_true", help="don't show links to follow (extracted requests)" ) parser.add_option("--noitems", dest="noitems", action="store_true", help="don't show scraped items") parser.add_option( "--nocolour", dest="nocolour", action="store_true", help="avoid using pygments to colorize the output" ) parser.add_option( "-r", "--rules", dest="rules", action="store_true", help="use CrawlSpider rules to discover the callback" ) parser.add_option( "-c", "--callback", dest="callback", help="use this callback for parsing, instead looking for a callback" ) parser.add_option( "-d", "--depth", dest="depth", type="int", default=1, help="maximum depth for parsing requests [default: %default]", ) parser.add_option( "-v", "--verbose", dest="verbose", action="store_true", help="print each depth level one by one" )
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="only list contracts, without checking them")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="List available templates") parser.add_option("-e", "--edit", dest="edit", action="store_true", help="Edit spider after creating it") parser.add_option("-d", "--dump", dest="dump", metavar="TEMPLATE", help="Dump template to standard output") parser.add_option("-t", "--template", dest="template", default="basic", help="Uses a custom template.") parser.add_option( "--force", dest="force", action="store_true", help="If the spider already exists, overwrite it with the template" )
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", default=None, \ help="always use this spider when arguments are urls") parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", \ help="set spider argument (may be repeated)") parser.add_option("-n", "--nofollow", dest="nofollow", action="store_true", \ help="don't follow links (for use with URLs only)")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", \ help="set spider argument (may be repeated)") parser.add_option("-o", "--output", metavar="FILE", \ help="dump scraped items into FILE (use - for stdout)") parser.add_option("-t", "--output-format", metavar="FORMAT", default="jsonlines", \ help="format to use for dumping items with -o (default: %default)")
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.nofollow: settings.overrides['CRAWLSPIDER_FOLLOW_LINKS'] = False
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option("-o", "--output", metavar="FILE", help="dump scraped items into FILE (use - for stdout)") parser.add_option("-t", "--output-format", metavar="FORMAT", default="jsonlines", help="format to use for dumping items with -o (default: %default)")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--list", dest="list", action="store_true") parser.add_option("--dump", dest="dump", action="store_true") parser.add_option("-t", "--template", dest="template", default="crawl", help="Uses a custom template.") parser.add_option("--force", dest="force", action="store_true", help="If the spider already exists, overwrite it with the template")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "--verbose", "-v", dest="verbose", action="store_true", help="also display twisted/python/platform info (useful for bug reports)", )
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-d", "--date", help="Print data for given date (in format yyyy-mm-dd, by default is last stored data)") parser.add_option("-c", "--compare", metavar="DATE", help="Compare with data on given date (in format yyyy-mm-dd)") parser.add_option("-D", "--day", action="store_true", help="Compare with previous day") parser.add_option("-W", "--week", action="store_true", help="Compare with previous week") parser.add_option("-M", "--month", action="store_true", help="Compare with previous month") parser.add_option("-4", "--four_month", action="store_true", help="Compare with previous 4 month") parser.add_option("-Y", "--year", action="store_true", help="Compare with previous year") parser.add_option("--show_year", action="store_true", help="Show year of comparing date in report")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "--verbose", "-v", dest="verbose", action="store_true", help= "also display twisted/python/platform info (useful for bug reports)" )
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="List available templates") parser.add_option("-d", "--dump", dest="dump", metavar="TEMPLATE", help="Dump template to standard output") parser.add_option("-t", "--template", dest="template", default="crawl", help="Uses a custom template.") parser.add_option("--force", dest="force", action="store_true", help="If the spider already exists, overwrite it with the template")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--server", dest="server", action="store_true", help="Run scraper server that polls for tasks and runs them") parser.add_option("--demonize", dest="demonize", action="store_true", help="Run scrapy as a demon process continiously processing the spider's tasks.") parser.add_option("--all", dest="all", action="store_true", help="Run all available tasks for the spider.") parser.add_option('--interval', dest='interval', help="Polling interval for executing the spider in demon mode.") parser.add_option('--priority', dest='priority', help="Priority of task that you want to execute." ) parser.add_option('--task-name', dest='task_name', help="Name of task that you would like to execute.", default=None ) parser.add_option('--task-id', dest='task_id', help="Id of task that you would like to execute.", default=None ) parser.add_option('--child-logfile', dest='child_logfile', help="Pass this parameter if you want to log output of child processes.", default=None) parser.add_option("--child", dest="child", action="store_true", help="Make this process a child.")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-s", action='store_true', dest='skip_start_urls', default=False, help="Skip start URLs and work on queue") parser.add_option("-w", action='store_true', dest='dont_exit', default=False, help="Don't exit when idle, wait around for more work") parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", \ help="set spider argument (may be repeated)") parser.add_option("-o", "--output", metavar="FILE", \ help="dump scraped items into FILE (use - for stdout)") parser.add_option("-t", "--output-format", metavar="FORMAT", default="jsonlines", \ help="format to use for dumping items with -o (default: %default)")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--get", dest="get", metavar="SETTING", \ help="print raw setting value") parser.add_option("--getbool", dest="getbool", metavar="SETTING", \ help="print setting value, intepreted as a boolean") parser.add_option("--getint", dest="getint", metavar="SETTING", \ help="print setting value, intepreted as an integer") parser.add_option("--getfloat", dest="getfloat", metavar="SETTING", \ help="print setting value, intepreted as an float") parser.add_option("--getlist", dest="getlist", metavar="SETTING", \ help="print setting value, intepreted as an float")
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == '-': self.settings.overrides['FEED_URI'] = 'stdout:' else: self.settings.overrides['FEED_URI'] = opts.output self.settings.overrides['FEED_FORMAT'] = opts.output_format
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="only list contracts, without checking them") parser.add_option("-v", "--verbose", dest="verbose", default=False, action='store_true', help="print contract tests for all spiders")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="only list contracts, without checking them") parser.add_option("-v", "--verbose", dest="verbose", default=1, action="count", help="print all contract hooks")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", default=None, \ help="use this spider without looking for one") parser.add_option("--nolinks", dest="nolinks", action="store_true", \ help="don't show links to follow (extracted requests)") parser.add_option("--noitems", dest="noitems", action="store_true", \ help="don't show scraped items") parser.add_option("--nocolour", dest="nocolour", action="store_true", \ help="avoid using pygments to colorize the output") parser.add_option("-r", "--rules", dest="rules", action="store_true", \ help="use CrawlSpider rules to discover the callback") parser.add_option("-c", "--callback", dest="callback", \ help="use this callback for parsing, instead looking for a callback")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--get", dest="get", metavar="SETTING", \ help="print raw setting value") parser.add_option("--getbool", dest="getbool", metavar="SETTING", \ help="print setting value, intepreted as a boolean") parser.add_option("--getint", dest="getint", metavar="SETTING", \ help="print setting value, intepreted as an integer") parser.add_option("--getfloat", dest="getfloat", metavar="SETTING", \ help="print setting value, intepreted as an float") parser.add_option("--getlist", dest="getlist", metavar="SETTING", \ help="print setting value, intepreted as an float") parser.add_option("--init", dest="init", action="store_true", \ help="print initial setting value (before loading extensions and spiders)")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "-c", "--classifiers", dest="classifiers", action="append", default=[], help="list classifiers by which the file will be reviewed") parser.add_option("-r", "--resume", dest="i_no", type="int", default=0, help="resume review of a file at a given item")
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == '-': self.settings.overrides['FEED_URI'] = 'stdout:' else: self.settings.overrides['FEED_URI'] = opts.output valid_output_formats = self.settings['FEED_EXPORTERS'].keys() + self.settings['FEED_EXPORTERS_BASE'].keys() if opts.output_format not in valid_output_formats: raise UsageError('Invalid/unrecognized output format: %s, Expected %s' % (opts.output_format,valid_output_formats)) self.settings.overrides['FEED_FORMAT'] = opts.output_format
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == '-': self.settings.overrides['FEED_URI'] = 'stdout:' else: self.settings.overrides['FEED_URI'] = opts.output valid_output_formats = self.settings['FEED_EXPORTERS'].keys() + self.settings['FEED_EXPORTERS_BASE'].keys() if opts.output_format not in valid_output_formats: raise UsageError('Invalid/unrecognized output format: %s, Expected %s' % (opts.output_format, valid_output_formats)) self.settings.overrides['FEED_FORMAT'] = opts.output_format
def execute(argv=None): if argv is None: argv = sys.argv cmds = _get_commands_dict() cmdname = _get_command_name(argv) _update_default_settings("scrapy.conf.commands", cmdname) _update_default_settings(settings["COMMANDS_SETTINGS_MODULE"], cmdname) parser = optparse.OptionParser( formatter=optparse.TitledHelpFormatter(), conflict_handler="resolve", add_help_option=False ) if cmdname in cmds: cmd = cmds[cmdname] cmd.add_options(parser) opts, args = parser.parse_args(args=argv[1:]) cmd.process_options(args, opts) parser.usage = "%%prog %s %s" % (cmdname, cmd.syntax()) parser.description = cmd.long_desc() if cmd.requires_project and not settings.settings_module: print "Error running: scrapy-ctl.py %s\n" % cmdname print "Cannot find project settings module in python path: %s" % settings.settings_module_path sys.exit(1) if opts.help: parser.print_help() sys.exit() elif not cmdname: cmd = ScrapyCommand() cmd.add_options(parser) opts, args = parser.parse_args(args=argv) cmd.process_options(args, opts) _print_usage(settings.settings_module) sys.exit(2) else: print "Unknown command: %s\n" % cmdname print 'Use "scrapy-ctl.py -h" for help' sys.exit(2) del args[0] # remove command name from args send_catch_log(signal=command_executed, cmdname=cmdname, cmdobj=cmd, args=args, opts=opts) from scrapy.core.manager import scrapymanager scrapymanager.configure(control_reactor=True) ret = _run_command(cmd, args, opts) if ret is False: parser.print_help()
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-p", "--project", help="the project name in the target") parser.add_option("-v", "--version", help="the version to deploy. Defaults to current timestamp") parser.add_option("-l", "--list-targets", action="store_true", \ help="list available targets") parser.add_option("-L", "--list-projects", metavar="TARGET", \ help="list available projects on TARGET") parser.add_option("--egg", metavar="FILE", help="use the given egg, instead of building it") parser.add_option("--build-egg", metavar="FILE", help="only build the egg, don't deploy it") parser.add_option("--keep-build", action="store_true", help="set it to keep the build files, otherwise they are removed")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-p", "--project", help="the project name in the target") parser.add_option( "-v", "--version", help="the version to deploy. Defaults to current timestamp") parser.add_option("-l", "--list-targets", action="store_true", \ help="list available targets") parser.add_option("-L", "--list-projects", metavar="TARGET", \ help="list available projects on TARGET") parser.add_option("--egg", metavar="FILE", help="use the given egg, instead of building it")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", default=None, \ help="use this spider without looking for one") parser.add_option("--nolinks", dest="nolinks", action="store_true", \ help="don't show links to follow (extracted requests)") parser.add_option("--noitems", dest="noitems", action="store_true", \ help="don't show scraped items") parser.add_option("--nocolour", dest="nocolour", action="store_true", \ help="avoid using pygments to colorize the output") parser.add_option("-r", "--rules", dest="rules", action="store_true", \ help="use CrawlSpider rules to discover the callback") parser.add_option("-c", "--callback", dest="callback", \ help="use this callback for parsing, instead looking for a callback") parser.add_option("-d", "--depth", dest="depth", type="int", default=1, \ help="maximum depth for parsing requests [default: %default]") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", \ help="print each depth level one by one")
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == '-': self.settings.overrides['FEED_URI'] = 'stdout:' else: self.settings.overrides['FEED_URI'] = opts.output valid_output_formats = self.settings['FEED_EXPORTERS'].keys() + self.settings['FEED_EXPORTERS_BASE'].keys() if not opts.output_format: opts.output_format = os.path.splitext(opts.output)[1].replace(".", "") if opts.output_format not in valid_output_formats: raise UsageError("Unrecognized output format '%s', set one" " using the '-t' switch or as a file extension" " from the supported list %s" % (opts.output_format, tuple(valid_output_formats))) self.settings.overrides['FEED_FORMAT'] = opts.output_format
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--nolinks", dest="nolinks", action="store_true", help="don't show extracted links") parser.add_option("--noitems", dest="noitems", action="store_true", help="don't show scraped items") parser.add_option( "--nocolour", dest="nocolour", action="store_true", help="avoid using pygments to colorize the output" ) parser.add_option( "-r", "--rules", dest="rules", action="store_true", help="try to match and parse the url with the defined rules (if any)", ) parser.add_option( "-c", "--callbacks", dest="callbacks", action="store", help="use the provided callback(s) for parsing the url (separated with commas)", )
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", default=None, \ help="use this spider without looking for one") parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", \ help="set spider argument (may be repeated)") parser.add_option("--pipelines", action="store_true", \ help="process items through pipelines") parser.add_option("--nolinks", dest="nolinks", action="store_true", \ help="don't show links to follow (extracted requests)") parser.add_option("--noitems", dest="noitems", action="store_true", \ help="don't show scraped items") parser.add_option("--nocolour", dest="nocolour", action="store_true", \ help="avoid using pygments to colorize the output") parser.add_option("-r", "--rules", dest="rules", action="store_true", \ help="use CrawlSpider rules to discover the callback") parser.add_option("-c", "--callback", dest="callback", \ help="use this callback for parsing, instead looking for a callback") parser.add_option("-d", "--depth", dest="depth", type="int", default=1, \ help="maximum depth for parsing requests [default: %default]") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", \ help="print each depth level one by one")
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) if opts.output: if opts.output == "-": self.settings.set("FEED_URI", "stdout:", priority="cmdline") else: self.settings.set("FEED_URI", opts.output, priority="cmdline") valid_output_formats = list(self.settings.getdict("FEED_EXPORTERS").keys()) + list( self.settings.getdict("FEED_EXPORTERS_BASE").keys() ) if not opts.output_format: opts.output_format = os.path.splitext(opts.output)[1].replace(".", "") if opts.output_format not in valid_output_formats: raise UsageError( "Unrecognized output format '%s', set one" " using the '-t' switch or as a file extension" " from the supported list %s" % (opts.output_format, tuple(valid_output_formats)) ) self.settings.set("FEED_FORMAT", opts.output_format, priority="cmdline")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "-d", "--date", help= "Print data for given date (in format yyyy-mm-dd, by default is last stored data)" ) parser.add_option( "-c", "--compare", metavar="DATE", help="Compare with data on given date (in format yyyy-mm-dd)") parser.add_option("-D", "--day", action="store_true", help="Compare with previous day") parser.add_option("-W", "--week", action="store_true", help="Compare with previous week") parser.add_option("-M", "--month", action="store_true", help="Compare with previous month") parser.add_option("-4", "--four_month", action="store_true", help="Compare with previous 4 month") parser.add_option("-Y", "--year", action="store_true", help="Compare with previous year") parser.add_option("--show_year", action="store_true", help="Show year of comparing date in report")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", help="use this spider") parser.add_option("--headers", dest="headers", action="store_true", help="print response HTTP headers instead of body")
def add_options(self, parser): ScrapyCommand.add_options(self, parser)
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) if opts.nofollow: settings.overrides['CRAWLSPIDER_FOLLOW_LINKS'] = False
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-n", "--nofollow", dest="nofollow", action="store_true", \ help="don't follow links (for use with URLs only)")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", help="use this spider")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", help="use this spider") parser.add_option("--headers", dest="headers", action="store_true", \ help="print response HTTP headers instead of body")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", "--arg", dest="spargs", action="append", default=[], \ help="set spider argument (may be repeated)")
def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts)
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-c", dest="code", help="evaluate the code in the shell, print the result and exit") parser.add_option("--spider", dest="spider", help="use this spider")