def run(self, args, opts): if len(args) != 1: raise UsageError() project_name = args[0] if not re.search(r'^[_a-zA-Z]\w*$', project_name): print('Error: Project names must begin with a letter and contain only\n' \ 'letters, numbers and underscores') sys.exit(1) elif exists(project_name): print("Error: directory %r already exists" % project_name) sys.exit(1) moduletpl = join(TEMPLATES_PATH, 'module') copytree(moduletpl, join(project_name, project_name), ignore=IGNORE) shutil.copy(join(TEMPLATES_PATH, 'scrapy.cfg'), project_name) for paths in TEMPLATES_TO_RENDER: path = join(*paths) tplfile = join(project_name, string.Template(path).substitute(project_name=project_name)) render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name)) print("New Scrapy project %r created in:" % project_name) print(" %s\n" % abspath(project_name)) print("You can start your first spider with:") print(" cd %s" % project_name) print(" scrapy genspider example example.com")
def run(self, project, opt=None): project_name = project project_dir = project if exists(join(project_dir, 'scrapy.cfg')): self.exitcode = 1 print('Error: scrapy.cfg already exists in %s' % abspath(project_dir)) return if not self._is_valid_name(project_name): self.exitcode = 1 return self._copytree(self.templates_dir, abspath(project_dir)) move(join(project_dir, 'module'), join(project_dir, project_name)) for paths in TEMPLATES_TO_RENDER: path = join(*paths) tplfile = join(project_dir, string.Template(path).substitute(project_name=project_name)) render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name)) print("New web-walker project %r, using template directory %r, created in:" % \ (project_name, self.templates_dir)) print(" %s\n" % abspath(project_dir)) print("You can start the demo spider with:") print(" custom-redis-server --host 127.0.0.1 -p 6379") print(" cd %s" % project_dir) print(" scrapy crawl bluefly")
def run(self, args, opts): if len(args) not in (1, 2): raise UsageError() project_name = args[0] project_dir = args[0] if len(args) == 2: project_dir = args[1] if exists(join(project_dir, 'scrapy.cfg')): self.exitcode = 1 print('Error: scrapy.cfg already exists in %s' % abspath(project_dir)) return if not self._is_valid_name(project_name): self.exitcode = 1 return self._copytree(self.templates_dir, abspath(project_dir)) move(join(project_dir, 'module'), join(project_dir, project_name)) for paths in TEMPLATES_TO_RENDER: path = join(*paths) tplfile = join(project_dir, string.Template(path).substitute(project_name=project_name)) render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name)) print("New Scrapy project '%s', using template directory '%s', " "created in:" % (project_name, self.templates_dir)) print(" %s\n" % abspath(project_dir)) print("You can start your first spider with:") print(" cd %s" % project_dir) print(" scrapy genspider example example.com")
def run(self, args, opts): if len(args) not in (1, 2): raise UsageError() project_name = args[0] project_dir = args[0] if len(args) == 2: project_dir = args[1] if exists(join(project_dir, 'scrapy.cfg')): self.exitcode = 1 print('Error: scrapy.cfg already exists in %s' % abspath(project_dir)) return if not self._is_valid_name(project_name): self.exitcode = 1 return self._copytree(self.templates_dir, abspath(project_dir)) move(join(project_dir, 'module'), join(project_dir, project_name)) for paths in TEMPLATES_TO_RENDER: path = join(*paths) tplfile = join(project_dir, string.Template(path).substitute(project_name=project_name)) render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name)) print("New Scrapy project %r, using template directory %r, created in:" % \ (project_name, self.templates_dir)) print(" %s\n" % abspath(project_dir)) print("You can start your first spider with:") print(" cd %s" % project_dir) print(" scrapy genspider example example.com")
def _genspider(self, module, name, domain, template_name, template_file): """Generate the spider module, based on the given template""" tvars = { 'project_name': self.settings.get('BOT_NAME'), 'ProjectName': string_camelcase(self.settings.get('BOT_NAME')), 'module': module, 'name': name, 'domain': domain, 'classname': '%sSpider' % ''.join(s.capitalize() for s in module.split('_')) } if self.settings.get('NEWSPIDER_MODULE'): spiders_module = import_module(self.settings['NEWSPIDER_MODULE']) spiders_dir = abspath(dirname(spiders_module.__file__)) else: spiders_module = None spiders_dir = "." spider_file = "%s.py" % join(spiders_dir, module) shutil.copyfile(template_file, spider_file) render_templatefile(spider_file, **tvars) print("Created spider %r using template %r " % (name, template_name), end=('' if spiders_module else '\n')) if spiders_module: print("in module:\n %s.%s" % (spiders_module.__name__, module))
def _genspider(self, module, name, domain, template_name, template_file): """Generate the spider module, based on the given template""" capitalized_module = "".join(s.capitalize() for s in module.split("_")) tvars = { "project_name": self.settings.get("BOT_NAME"), "ProjectName": string_camelcase(self.settings.get("BOT_NAME")), "module": module, "name": name, "domain": domain, "classname": f"{capitalized_module}Spider", } if self.settings.get("NEWSPIDER_MODULE"): spiders_module = import_module(self.settings["NEWSPIDER_MODULE"]) spiders_dir = abspath(dirname(spiders_module.__file__)) else: spiders_module = None spiders_dir = "." spider_file = f"{join(spiders_dir, module)}.py" shutil.copyfile(template_file, spider_file) render_templatefile(spider_file, **tvars) print( f"Created spider {name!r} using template {template_name!r} ", end=("" if spiders_module else "\n"), ) if spiders_module: print(f"in module:\n {spiders_module.__name__}.{module}")
def run(self, args, opts): if len(args) != 1: raise UsageError() project_name = args[0] if not self._is_valid_name(project_name): self.exitcode = 1 return moduletpl = join(TEMPLATES_PATH, 'module') copytree(moduletpl, join(project_name, project_name), ignore=IGNORE) shutil.copy(join(TEMPLATES_PATH, 'scrapy.cfg'), project_name) for paths in TEMPLATES_TO_RENDER: path = join(*paths) tplfile = join( project_name, string.Template(path).substitute(project_name=project_name)) render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name)) print("New Scrapy project %r created in:" % project_name) print(" %s\n" % abspath(project_name)) print("You can start your first spider with:") print(" cd %s" % project_name) print(" scrapy genspider example example.com")
def run(self, project, opt=None): project_name = project project_dir = project if exists(join(project_dir, 'scrapy.cfg')): self.exitcode = 1 print('Error: scrapy.cfg already exists in %s' % abspath(project_dir)) return if not self._is_valid_name(project_name): self.exitcode = 1 return self._copytree(self.templates_dir, abspath(project_dir)) move(join(project_dir, 'module'), join(project_dir, project_name)) for paths in (('scrapy.cfg',),('${project_name}', 'settings.py.tmpl'),): path = join(*paths) tplfile = join(project_dir, string.Template(path).substitute(project_name=project_name)) render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name)) print("New structure-spider project %r, using template directory %r, created in:" % \ (project_name, self.templates_dir)) print(" %s\n" % abspath(project_dir)) print("You can start the spider with:") print(" cd %s" % project_dir) print(" custom-redis-server -ll INFO -lf &") print(" scrapy crawl douban")
def run(self, args, opts): if len(args) != 1: raise UsageError() classifier_name = args[0] if not re.search(r'^[_a-z]*$', classifier_name): print('Error: Classifier names must be entirely lower case') sys.exit(1) elif exists("{0}data{0}{1}".format(os.sep, classifier_name)): print("Error: directory %r already exists" % classifier_name) sys.exit(1) #If this is the first classifier if not os.path.exists("data"): os.makedirs("data") with open("data/__init__.py", "wb") as package_file: package_file.close() if not os.path.exists("to_upload"): os.makedirs("to_upload") #Make classifier file moduletpl = join(TEMPLATES_PATH, 'classifier') copytree(moduletpl, join(CLASSIFIERS_PATH, classifier_name), ignore=IGNORE) for paths in TEMPLATES_TO_RENDER: path = join(*paths) tplfile = join( CLASSIFIERS_PATH, string.Template(path).substitute( classifier_name=classifier_name)) render_templatefile( tplfile, classifier_name=classifier_name, ClassifierName=string_camelcase(classifier_name)) #Make settings.cfg file config = ConfigParser.RawConfigParser() config.add_section("Classifier") classifications = raw_input( "Please input classifications separated by commas\n").split(",") config.set("Classifier", "classes", ",".join(sorted(c.strip() for c in classifications))) for class_type in config.get("Classifier", "classes").split(","): keep = int( raw_input( "Collect data classified as {0}?\n1. Yes\n 2. No".format( class_type))) if keep == 1: config.set("Classifier", class_type, True) else: config.set("Classifier", class_type, False) with open("data/{0}/settings.cfg".format(classifier_name), "wb") as configfile: config.write(configfile)
def _gen_tests(self, name, classname, start_url, fixture_file, template_file): """Creates tests from test template file""" template_dict = { "name": name, "classname": classname, "fixture_file": fixture_file, "date_str": datetime.now().strftime("%Y-%m-%d"), } if "legistar" not in name: template_dict["start_url"] = start_url test_file = join(self.tests_dir, "test_{}.py".format(name)) shutil.copyfile(join(self.templates_dir, template_file), test_file) render_templatefile(test_file, **template_dict) print("Created file: {}".format(test_file))
def _genspider(self, name, agency, classname, domain, start_url, template_file): """Create spider from custom template""" template_dict = { "name": name, "agency": agency, "domain": domain, "start_url": start_url, "classname": "{}Spider".format( string.capwords(name, sep="_").replace("_", "") ), } spider_file = "{}.py".format(join(self.spiders_dir, name)) shutil.copyfile(join(self.templates_dir, template_file), spider_file) render_templatefile(spider_file, **template_dict) print("Created file: {}".format(spider_file))
def _genspider(self, module, name, domain, template_name, template_file): """Generate the spider module, based on the given template""" tvars = { "project_name": self.settings.get("BOT_NAME"), "ProjectName": string_camelcase(self.settings.get("BOT_NAME")), "module": module, "name": name, "domain": domain, "classname": "%sSpider" % "".join([s.capitalize() for s in module.split("_")]), } spiders_module = import_module(self.settings["NEWSPIDER_MODULE"]) spiders_dir = abspath(dirname(spiders_module.__file__)) spider_file = "%s.py" % join(spiders_dir, module) shutil.copyfile(template_file, spider_file) render_templatefile(spider_file, **tvars) print("Created spider %r using template %r in module:" % (name, template_name)) print(" %s.%s" % (spiders_module.__name__, module))
def _genspider(self, module, name, domain, template_name, template_file): """Generate the spider module, based on the given template""" tvars = { 'project_name': self.settings.get('BOT_NAME'), 'ProjectName': string_camelcase(self.settings.get('BOT_NAME')), 'module': module, 'name': name, 'domain': domain, 'classname': '%sSpider' % ''.join([s.capitalize() \ for s in module.split('_')]) } spiders_module = __import__(self.settings['NEWSPIDER_MODULE'], {}, {}, ['']) spiders_dir = abspath(dirname(spiders_module.__file__)) spider_file = "%s.py" % join(spiders_dir, module) shutil.copyfile(template_file, spider_file) render_templatefile(spider_file, **tvars) print "Created spider %r using template %r in module:" % (name, \ template_name) print " %s.%s" % (spiders_module.__name__, module)
def _genspider(self, module, name, domain, template_name, template_file): """Generate the spider module, based on the given template""" tvars = { 'project_name': settings.get('BOT_NAME'), 'ProjectName': string_camelcase(settings.get('BOT_NAME')), 'module': module, 'name': name, 'domain': domain, 'classname': '%sSpider' % ''.join([s.capitalize() \ for s in module.split('_')]) } spiders_module = __import__(settings['NEWSPIDER_MODULE'], {}, {}, ['']) spiders_dir = abspath(dirname(spiders_module.__file__)) spider_file = "%s.py" % join(spiders_dir, module) shutil.copyfile(template_file, spider_file) render_templatefile(spider_file, **tvars) print "Created spider %r using template %r in module:" % (name, \ template_name) print " %s.%s" % (spiders_module.__name__, module)
def run(self, args, opts): if len(args) != 1: raise UsageError() project_name = args[0] if not re.search(r'^[_a-zA-Z]\w*$', project_name): print 'Error: Project names must begin with a letter and contain only\n' \ 'letters, numbers and underscores' sys.exit(1) elif exists(project_name): print "Error: directory %r already exists" % project_name sys.exit(1) moduletpl = join(TEMPLATES_PATH, 'module') copytree(moduletpl, join(project_name, project_name), ignore=IGNORE) shutil.copy(join(TEMPLATES_PATH, 'scrapy.cfg'), project_name) for paths in TEMPLATES_TO_RENDER: path = join(*paths) tplfile = join(project_name, string.Template(path).substitute(project_name=project_name)) render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name))
def test_simple_render(self): context = dict(project_name='proj', name='spi', classname='TheSpider') template = u'from ${project_name}.spiders.${name} import ${classname}' rendered = u'from proj.spiders.spi import TheSpider' template_path = os.path.join(self.tmp_path, 'templ.py.tmpl') render_path = os.path.join(self.tmp_path, 'templ.py') with open(template_path, 'wb') as tmpl_file: tmpl_file.write(template.encode('utf8')) assert os.path.isfile(template_path) # Failure of test itself render_templatefile(template_path, **context) self.assertFalse(os.path.exists(template_path)) with open(render_path, 'rb') as result: self.assertEqual(result.read().decode('utf8'), rendered) os.remove(render_path) assert not os.path.exists(render_path) # Failure of test iself
def test_simple_render(self): context = dict(project_name="proj", name="spi", classname="TheSpider") template = "from ${project_name}.spiders.${name} import ${classname}" rendered = "from proj.spiders.spi import TheSpider" template_path = os.path.join(self.tmp_path, "templ.py.tmpl") render_path = os.path.join(self.tmp_path, "templ.py") with open(template_path, "wb") as tmpl_file: tmpl_file.write(template.encode("utf8")) assert os.path.isfile(template_path) # Failure of test itself render_templatefile(template_path, **context) self.assertFalse(os.path.exists(template_path)) with open(render_path, "rb") as result: self.assertEqual(result.read().decode("utf8"), rendered) os.remove(render_path) assert not os.path.exists(render_path) # Failure of test iself
def test_simple_render(self): context = dict(project_name='proj', name='spi', classname='TheSpider') template = 'from ${project_name}.spiders.${name} import ${classname}' rendered = 'from proj.spiders.spi import TheSpider' template_path = os.path.join(self.tmp_path, 'templ.py.tmpl') render_path = os.path.join(self.tmp_path, 'templ.py') with open(template_path, 'wb') as tmpl_file: tmpl_file.write(template.encode('utf8')) assert os.path.isfile(template_path) # Failure of test itself render_templatefile(template_path, **context) self.assertFalse(os.path.exists(template_path)) with open(render_path, 'rb') as result: self.assertEqual(result.read().decode('utf8'), rendered) os.remove(render_path) assert not os.path.exists(render_path) # Failure of test iself
def run(self, args, opts): if len(args) != 1: raise UsageError() classifier_name = args[0] if not re.search(r'^[_a-z]*$', classifier_name): print('Error: Classifier names must be entirely lower case') sys.exit(1) elif exists("{0}data{0}{1}".format(os.sep, classifier_name)): print("Error: directory %r already exists" % classifier_name) sys.exit(1) #If this is the first classifier if not os.path.exists("data"): os.makedirs("data") with open("data/__init__.py", "wb") as package_file: package_file.close() if not os.path.exists("to_upload"): os.makedirs("to_upload") #Make classifier file moduletpl = join(TEMPLATES_PATH, 'classifier') copytree(moduletpl, join(CLASSIFIERS_PATH, classifier_name), ignore=IGNORE) for paths in TEMPLATES_TO_RENDER: path = join(*paths) tplfile = join(CLASSIFIERS_PATH, string.Template(path).substitute(classifier_name=classifier_name)) render_templatefile(tplfile, classifier_name=classifier_name, ClassifierName=string_camelcase(classifier_name)) #Make settings.cfg file config = ConfigParser.RawConfigParser() config.add_section("Classifier") classifications = raw_input("Please input classifications separated by commas\n").split(",") config.set("Classifier", "classes", ",".join(sorted(c.strip() for c in classifications))) for class_type in config.get("Classifier", "classes").split(","): keep = int(raw_input("Collect data classified as {0}?\n1. Yes\n 2. No".format(class_type))) if keep == 1: config.set("Classifier", class_type, True) else: config.set("Classifier", class_type, False) with open("data/{0}/settings.cfg".format(classifier_name), "wb") as configfile: config.write(configfile)
def run(self, args, opts): if len(args) not in (1, 2): raise UsageError() # 项目名 project_name = args[0] # 项目目录 project_dir = args[0] if len(args) == 2: project_dir = args[1] # 该项目根目录下是否存在scrapy.cfg文件 if exists(join(project_dir, 'scrapy.cfg')): self.exitcode = 1 print('Error: scrapy.cfg already exists in %s' % abspath(project_dir)) return # 项目名正确性校验 if not self._is_valid_name(project_name): self.exitcode = 1 return # 将模板目录templates下内容拷贝到当前项目下 self._copytree(self.templates_dir, abspath(project_dir)) # 将module替换成项目名 move(join(project_dir, 'module'), join(project_dir, project_name)) # 选择爬虫类模板文件,并填充内容,生成代码文件 for paths in TEMPLATES_TO_RENDER: path = join(*paths) tplfile = join( project_dir, string.Template(path).substitute(project_name=project_name)) render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name)) print("New Scrapy project %r, using template directory %r, created in:" % \ (project_name, self.templates_dir)) print(" %s\n" % abspath(project_dir)) print("You can start your first spider with:") print(" cd %s" % project_dir) print(" scrapy genspider example example.com")
def run(self, args, opts): if len(args) != 1: raise UsageError() project_name = args[0] if not self._is_valid_name(project_name): self.exitcode = 1 return copytree(self.templates_dir, project_name, ignore=IGNORE) move(join(project_name, 'module'), join(project_name, project_name)) for paths in TEMPLATES_TO_RENDER: path = join(*paths) tplfile = join(project_name, string.Template(path).substitute(project_name=project_name)) render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name)) print("New Scrapy project {0!r}, using template directory {1!r}, created in:".format(project_name, self.templates_dir)) print(" {0!s}\n".format(abspath(project_name))) print("You can start your first spider with:") print(" cd {0!s}".format(project_name)) print(" scrapy genspider example example.com")
def run(self, args, opts): if len(args) != 1: raise UsageError() project_name = args[0] if not self._is_valid_name(project_name): self.exitcode = 1 return moduletpl = join(TEMPLATES_PATH, 'module') copytree(moduletpl, join(project_name, project_name), ignore=IGNORE) shutil.copy(join(TEMPLATES_PATH, 'scrapy.cfg'), project_name) for paths in TEMPLATES_TO_RENDER: path = join(*paths) tplfile = join(project_name, string.Template(path).substitute(project_name=project_name)) render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name)) print("New Scrapy project %r created in:" % project_name) print(" %s\n" % abspath(project_name)) print("You can start your first spider with:") print(" cd %s" % project_name) print(" scrapy genspider example example.com")
def _genspider(self, module, name, domain, template_name, template_file): """Generate the spider module, based on the given template""" tvars = { 'project_name': self.settings.get('BOT_NAME'), 'ProjectName': string_camelcase(self.settings.get('BOT_NAME')), 'module': module, 'name': name, 'domain': domain, 'classname': '%sSpider' % ''.join(s.capitalize() \ for s in module.split('_')) } if self.settings.get('NEWSPIDER_MODULE'): spiders_module = import_module(self.settings['NEWSPIDER_MODULE']) spiders_dir = abspath(dirname(spiders_module.__file__)) else: spiders_module = None spiders_dir = "." spider_file = "%s.py" % join(spiders_dir, module) shutil.copyfile(template_file, spider_file) render_templatefile(spider_file, **tvars) print("Created spider %r using template %r " % (name, \ template_name), end=('' if spiders_module else '\n')) if spiders_module: print("in module:\n %s.%s" % (spiders_module.__name__, module))
def _genspider(self, module, name, domain, template_name, template_file): """Generate the spider module, based on the given template""" capitalized_module = ''.join(s.capitalize() for s in module.split('_')) tvars = { 'project_name': self.settings.get('BOT_NAME'), 'ProjectName': string_camelcase(self.settings.get('BOT_NAME')), 'module': module, 'name': name, 'domain': domain, 'classname': f'{capitalized_module}Spider' } if self.settings.get('NEWSPIDER_MODULE'): spiders_module = import_module(self.settings['NEWSPIDER_MODULE']) spiders_dir = abspath(dirname(spiders_module.__file__)) else: spiders_module = None spiders_dir = "." spider_file = f"{join(spiders_dir, module)}.py" shutil.copyfile(template_file, spider_file) render_templatefile(spider_file, **tvars) print(f"Created spider {name!r} using template {template_name!r} ", end=('' if spiders_module else '\n')) if spiders_module: print("in module:\n {spiders_module.__name__}.{module}")