def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "--postgres_uri", dest="postgres_uri", metavar="URI", help="connection string for PostgreSQL to put data into", default="postgresql:///strava") parser.add_option("-a", dest="spargs", action="append", default={}, metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option( "-m", "--max", action="store_true", dest="max", help="start crawling from the largest ID found in the DB") parser.add_option( "-s", "--start", dest="start_val", default=None, metavar="ID", type="int", help="start crawling from the ID equal to this value")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "-c", dest="code", help="evaluate the code in the shell, print the result and exit") parser.add_option("--spider", dest="spider", help="use this spider")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option( "-o", "--output", metavar="FILE", help="dump scraped items into FILE (use - for stdout)") parser.add_option("-t", "--output-format", metavar="FORMAT", help="format to use for dumping items with -o") parser.add_option("-c", "--city", type="str", dest="city", default="", help="设置要爬取的城市名,如 北京市") parser.add_option("-d", "--dates", nargs=2, type="str", dest="sign_date", default=["", ""], help="设置要爬取的签约起始日期和结束如期")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="List available templates") parser.add_option("-e", "--edit", dest="edit", action="store_true", help="Edit spider after creating it") parser.add_option("-d", "--dump", dest="dump", metavar="TEMPLATE", help="Dump template to standard output") parser.add_option("-t", "--template", dest="template", default="basic", help="Uses a custom template.") parser.add_option( "--force", dest="force", action="store_true", help="If the spider already exists, overwrite it with the template" )
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", help="use this spider") parser.add_option("--headers", dest="headers", action="store_true", \ help="print response HTTP headers instead of body") parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \ default=False, help="do not handle HTTP 3xx status codes and print response as-is")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "-q", "--queue", type="str", dest="queue_name", help="Queue name to consume messages", action="callback", callback=self.queue_option_callback, ) parser.add_option( "-m", "--mode", type="choice", choices=self.action_modes, default="action", dest="mode", help= "Command run mode: action for one time execution and exit or worker", ) parser.add_option( "-p", "--prefetch_count", type="int", default=None, dest="prefetch_count", help="RabbitMQ consumer prefetch count setting", )
def add_options(self, parser): ScrapyCommand.add_options(self, parser) # 运行验证程序 parser.add_option("-r", dest="validate", default=False, action="store_true", help="run validate") # 运行循环验证程序 parser.add_option("-l", dest="circle", default=False, action="store_true", help="run circle validate") # 运行抓取程序 parser.add_option("-c", dest="crawl", default=False, action="store_true", help="run spider crawl") # 运行RPC程序 parser.add_option("--rpc", dest="rpc", default=False, action="store_true", help="run rpc server") # 测试RPC程序 parser.add_option("--rpc-test", dest="rpc_test", default=False, action="store_true", help="test rpc server")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "--city", dest="city", help= "City code. Must be an integer, according to internal Booking.com id for location" ) parser.add_option("--checkin", dest="checkin", help="Checkin date in ISO (YYYY-MM-DD) format") parser.add_option("--checkout", dest="checkout", help="Checkout date in ISO (YYYY-MM-DD) format") parser.add_option("-p", "--proxy", dest="proxy", default=False, action='store_true', help="Use proxy servers") parser.add_option("--cr", dest="concurrent_requests", default=16, help="Use maximum concurrent requests (default: 16)") parser.add_option( "--crpd", dest="concurrent_requests_per_domain", default=16, help="Use concurrent requests per domain (default: 16)") parser.add_option("--crpip", dest="concurrent_requests_per_ip", default=16, help="Use concurrent requests per ip (default: 16)")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", default=None, help="use this spider without looking for one") parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option("--pipelines", action="store_true", help="process items through pipelines") parser.add_option("--nolinks", dest="nolinks", action="store_true", help="don't show links to follow (extracted requests)") parser.add_option("--noitems", dest="noitems", action="store_true", help="don't show scraped items") parser.add_option("--nocolour", dest="nocolour", action="store_true", help="avoid using pygments to colorize the output") parser.add_option("-r", "--rules", dest="rules", action="store_true", help="use CrawlSpider rules to discover the callback") parser.add_option("-c", "--callback", dest="callback", help="use this callback for parsing, instead looking for a callback") parser.add_option("-m", "--meta", dest="meta", help="inject extra meta into the Request, it must be a valid raw json string") parser.add_option("--cbkwargs", dest="cbkwargs", help="inject extra callback kwargs into the Request, it must be a valid raw json string") parser.add_option("-d", "--depth", dest="depth", type="int", default=1, help="maximum depth for parsing requests [default: %default]") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="print each depth level one by one")
class CommandSettings(unittest.TestCase): def setUp(self): self.command = ScrapyCommand() self.command.settings = Settings() self.parser = argparse.ArgumentParser( formatter_class=ScrapyHelpFormatter, conflict_handler='resolve') self.command.add_options(self.parser) def test_settings_json_string(self): feeds_json = '{"data.json": {"format": "json"}, "data.xml": {"format": "xml"}}' opts, args = self.parser.parse_known_args( args=['-s', f'FEEDS={feeds_json}', 'spider.py']) self.command.process_options(args, opts) self.assertIsInstance(self.command.settings['FEEDS'], scrapy.settings.BaseSettings) self.assertEqual(dict(self.command.settings['FEEDS']), json.loads(feeds_json)) def test_help_formatter(self): formatter = ScrapyHelpFormatter(prog='scrapy') part_strings = [ 'usage: scrapy genspider [options] <name> <domain>\n\n', '\n', 'optional arguments:\n', '\n', 'Global Options:\n' ] self.assertEqual( formatter._join_parts(part_strings), ('Usage\n=====\n scrapy genspider [options] <name> <domain>\n\n\n' 'Optional Arguments\n==================\n\n' 'Global Options\n--------------\n'))
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option( "-o", "--output", metavar="FILE", help="dump scraped items into FILE (use - for stdout)") parser.add_option("-t", "--output-format", metavar="FORMAT", help="format to use for dumping items with -o") parser.add_option("-k", "--keyword", metavar="KEYWORD", type="str", dest="keyword", default="", help="set keyword") parser.add_option("-l", "--link", metavar="LINK", type="str", dest="link", default="", help="set link")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option( "-o", "--output", metavar="FILE", help="dump scraped items into FILE (use - for stdout)") parser.add_option("-t", "--output-format", metavar="FORMAT", help="format to use for dumping items with -o") parser.add_option("-p", "--pages", nargs=2, type="int", dest="pages", default=[], help="set the range of pages you want to crawl") parser.add_option( "-f", "--filter", type="str", dest="filter", default="", help='set function name in "filter.py" to filter threads')
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option("-p", "--pages", nargs=2, type="int", dest="pages", default=[], help="set the range of pages you want to crawl") parser.add_option( "-g", "--good", action="store_true", dest="good_only", default=False, help="only crawl good threads and their posts and comments") parser.add_option( "-f", "--filter", type="str", dest="filter", default="", help='set function name in "filter.py" to filter threads') parser.add_option("-s", "--see_lz", action="store_true", dest="see_lz", default=False, help='enable "only see lz" mode')
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--get", dest="get", metavar="SETTING", help="print raw setting value") parser.add_option( "--getbool", dest="getbool", metavar="SETTING", help="print setting value, interpreted as a boolean", ) parser.add_option( "--getint", dest="getint", metavar="SETTING", help="print setting value, interpreted as an integer", ) parser.add_option( "--getfloat", dest="getfloat", metavar="SETTING", help="print setting value, interpreted as a float", ) parser.add_option( "--getlist", dest="getlist", metavar="SETTING", help="print setting value, interpreted as a list", )
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--get", dest="get", metavar="SETTING", help="print raw setting value") parser.add_option("--getbool", dest="getbool", metavar="SETTING", help="print setting value, interpreted as a boolean") parser.add_option("--getint", dest="getint", metavar="SETTING", help="print setting value, interpreted as an integer") parser.add_option("--getfloat", dest="getfloat", metavar="SETTING", help="print setting value, interpreted as a float") parser.add_option("--getlist", dest="getlist", metavar="SETTING", help="print setting value, interpreted as a list")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "--postgres_uri", dest="postgres_uri", metavar="URI", help="connection string for PostgreSQL to put Strava data into", default="postgresql:///strava")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-c", dest="code", help="evaluate the code in the shell, print the result and exit") parser.add_option("--spider", dest="spider", help="use this spider") parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \ default=False, help="do not handle HTTP 3xx status codes and print response as-is")
def add_options(self, parser): parser.usage = "usage: scrapy spiderdocs [<module.name>] [-o <filename.md>]" ScrapyCommand.add_options(self, parser) parser.add_option("-o", "--output", dest="output_filename", metavar="FILE", help="Output file name.")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", help="use this spider") parser.add_option("--headers", dest="headers", action="store_true", \ help="print response HTTP headers instead of body") parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \ default=False, help="do not handle HTTP 3xx status codes and print response as-is")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option("-o", "--output", metavar="FILE", help="dump scraped items into FILE (use - for stdout)") parser.add_option("-t", "--output-format", metavar="FORMAT", help="format to use for dumping items with -o")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "--all", dest="all", action="store_true", help="Run validation on all scrapers", )
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-c", dest="code", help="evaluate the code in the shell, print the result and exit") parser.add_option("--spider", dest="spider", help="use this spider") parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \ default=False, help="do not handle HTTP 3xx status codes and print response as-is")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) arg = parser.add_option arg('-o', '--output', help='prefix for charts (without ".html")') arg('--step', type=float, default=30, help='time step, s') arg('--smooth', type=int, default=50, help='smooth span') arg('--top', type=int, default=30, help='top domains to show') arg('--no-show', action='store_true', help='don\'t show charts')
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option('--dry-run', action='store_true', help='Runs the spiders without writing any files') parser.add_option('--sample', type=int, help='The number of files to write')
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "--verbose", "-v", dest="verbose", action="store_true", help= "also display twisted/python/platform info (useful for bug reports)" )
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option("-o", "--output", metavar="FILE", help="dump scraped items into FILE (use - for stdout)") parser.add_option("-t", "--output-format", metavar="FORMAT", help="format to use for dumping items with -o") parser.add_option("-p", "--page", type="int", dest="page", default=9999999, help="set the num of pages you want to crawl")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", default=None, help="use this spider without looking for one") parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option("--pipelines", action="store_true", help="process items through pipelines") parser.add_option("--nolinks", dest="nolinks", action="store_true", help="don't show links to follow (extracted requests)") parser.add_option("--noitems", dest="noitems", action="store_true", help="don't show scraped items") parser.add_option("--nocolour", dest="nocolour", action="store_true", help="avoid using pygments to colorize the output") parser.add_option("-r", "--rules", dest="rules", action="store_true", help="use CrawlSpider rules to discover the callback") parser.add_option("-c", "--callback", dest="callback", help="use this callback for parsing, instead looking for a callback") parser.add_option("-d", "--depth", dest="depth", type="int", default=1, help="maximum depth for parsing requests [default: %default]") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="print each depth level one by one")
def add_options(self, parser): '''添加命令''' ScrapyCommand.add_options(self, parser) parser.add_option("-m", "--travelmode", dest="travelmode", default="飞机", type="str", action="store", help="travel mode") parser.add_option("-t", "--traveltime", dest="traveltime", default=time.strftime("%Y-%m-%d", time.localtime()), type="str", action="store", help="travel time") parser.add_option("-s", "--startstation", dest="startstation", default="北京", type="str", action="store", help="start city") parser.add_option("-e", "--endstation", dest="endstation", default="上海", type="str", action="store", help="end city")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="List available templates") parser.add_option("-e", "--edit", dest="edit", action="store_true", help="Edit spider after creating it") parser.add_option("-d", "--dump", dest="dump", metavar="TEMPLATE", help="Dump template to standard output") parser.add_option("-t", "--template", dest="template", default="basic", help="Uses a custom template.") parser.add_option("--force", dest="force", action="store_true", help="If the spider already exists, overwrite it with the template")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option('-p', '--package-pointer', help='The JSON Pointer to the value in the package') parser.add_option('-r', '--release-pointer', help='The JSON Pointer to the value in the release') parser.add_option( '-t', '--truncate', type=int, help='Truncate the value to this number of characters')
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", help="set spider argument (may be repeated)") parser.add_option("-i", dest="incremental", action="store_true", default=False, help="enable incremental crawl")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="only list contracts, without checking them") parser.add_option("-v", "--verbose", dest="verbose", default=False, action='store_true', help="print contract tests for all spiders")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "--mongo_uri", dest="mongo_uri", metavar="URI", help="connection string for MongoDB containing Strava data", default="mongodb://localhost:27017/strava") parser.add_option( "--postgres_uri", dest="postgres_uri", metavar="URI", help="connection string for PostgreSQL to put Strava data into", default="postgresql:///strava")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option('-a', dest='spargs', action='append', default=[], metavar='NAME=VALUE', help='set spider argument (may be repeated)') parser.add_option( '-o', '--output', metavar='FILE', help='dump scraped items into FILE(user - for stdout)') parser.add_option('-t', '--output-format', metavar='FORMAT', help='format to use for dumping items with -o')
class CommandSettings(unittest.TestCase): def setUp(self): self.command = ScrapyCommand() self.command.settings = Settings() self.parser = optparse.OptionParser( formatter=optparse.TitledHelpFormatter(), conflict_handler='resolve', ) self.command.add_options(self.parser) def test_settings_json_string(self): feeds_json = '{"data.json": {"format": "json"}, "data.xml": {"format": "xml"}}' opts, args = self.parser.parse_args(args=['-s', f'FEEDS={feeds_json}', 'spider.py']) self.command.process_options(args, opts) self.assertIsInstance(self.command.settings['FEEDS'], scrapy.settings.BaseSettings) self.assertEqual(dict(self.command.settings['FEEDS']), json.loads(feeds_json))
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option('-p', '--package-pointer', help='The JSON Pointer to the value in the package') parser.add_option('-r', '--release-pointer', help='The JSON Pointer to the value in the release') parser.add_option( '-t', '--truncate', type=int, help='Truncate the value to this number of characters') parser.add_option( '--max-bytes', type=int, help='Stop downloading an OCDS file after reading this many bytes')
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-c", dest="code", help="evaluate the code in the shell, print the result and exit") parser.add_option("--spider", dest="spider", help="use this spider")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", help="use this spider") parser.add_option("--headers", dest="headers", action="store_true", \ help="print response HTTP headers instead of body")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="only list contracts, without checking them") parser.add_option("-v", "--verbose", dest="verbose", default=False, action='store_true', help="print contract tests for all spiders")
def add_options(self, parser): ScrapyCommand.add_options(self, parser)
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--spider", dest="spider", help="use this spider")
def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("--verbose", "-v", dest="verbose", action="store_true", help="also display twisted/python/platform info (useful for bug reports)")