def get_project_settings(): if ENVVAR not in os.environ: # ENVVAR = 'SCRAPY_SETTINGS_MODULE' project = os.environ.get('SCRAPY_PROJECT', 'default') # 似乎都是default init_env( project ) # 将scrapy.cfg模块加载到os.environ环境里面,名字叫SCRAPY_SETTINGS_MODULE -- # SCRAPY_SETTINGS_MODULE -> os.environ project -> sys.path settings = Settings() # 实例一个Settings管理对象 settings_module_path = os.environ.get(ENVVAR) # 获取上一步获取的os.environ if settings_module_path: settings.setmodule(settings_module_path, priority='project') # # XXX: remove this hack pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE") if pickled_settings: settings.setdict(pickle.loads(pickled_settings), priority='project') # XXX: deprecate and remove this functionality env_overrides = { k[7:]: v for k, v in os.environ.items() if k.startswith('SCRAPY_') } if env_overrides: settings.setdict(env_overrides, priority='project') return settings
def get_project_settings(): if ENVVAR not in os.environ: project = os.environ.get('SCRAPY_PROJECT', 'default') init_env(project) settings = Settings() settings_module_path = os.environ.get(ENVVAR) if settings_module_path: settings.setmodule(settings_module_path, priority='project') # XXX: remove this hack pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE") if pickled_settings: warnings.warn( "Use of environment variable " "'SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE' " "is deprecated.", ScrapyDeprecationWarning) settings.setdict(pickle.loads(pickled_settings), priority='project') # XXX: deprecate and remove this functionality env_overrides = { k[7:]: v for k, v in os.environ.items() if k.startswith('SCRAPY_') } if env_overrides: settings.setdict(env_overrides, priority='project') return settings
def get_project_settings(): # 在环境中检查是否有SCRAPY_SETTINGS_MODULE 这个变量,没有则载入默认 if ENVVAR not in os.environ: project = os.environ.get( 'SCRAPY_PROJECT', 'default') #os.environ.get类似于dict里取某个键的值,不过这个是从操作系统变量上搞 init_env(project) #修改os.path让其添加scrapy.cfg对应的路径 settings = Settings() settings_module_path = os.environ.get(ENVVAR) # 如果环境变量里有scrapy.cfg的路径 if settings_module_path: settings.setmodule(settings_module_path, priority='project') #用setting对象载入相关module scrapy_envvars = { k[7:]: v for k, v in os.environ.items() if k.startswith('SCRAPY_') } valid_envvars = { 'CHECK', 'PROJECT', 'PYTHON_SHELL', 'SETTINGS_MODULE', } setting_envvars = {k for k in scrapy_envvars if k not in valid_envvars} if setting_envvars: # 如果环境变量里 有SCRAPY_开头的其他变量,将其弄到setting里 setting_envvar_list = ', '.join(sorted(setting_envvars)) warnings.warn( 'Use of environment variables prefixed with SCRAPY_ to override ' 'settings is deprecated. The following environment variables are ' f'currently defined: {setting_envvar_list}', ScrapyDeprecationWarning) settings.setdict(scrapy_envvars, priority='project') return settings
def get_project_settings(): if ENVVAR not in os.environ: project = os.environ.get('SCRAPY_PROJECT', 'default') init_env(project) settings = Settings() settings_module_path = os.environ.get(ENVVAR) if settings_module_path: settings.setmodule(settings_module_path, priority='project') scrapy_envvars = {k[7:]: v for k, v in os.environ.items() if k.startswith('SCRAPY_')} valid_envvars = { 'CHECK', 'PROJECT', 'PYTHON_SHELL', 'SETTINGS_MODULE', } setting_envvars = {k for k in scrapy_envvars if k not in valid_envvars} if setting_envvars: setting_envvar_list = ', '.join(sorted(setting_envvars)) warnings.warn( 'Use of environment variables prefixed with SCRAPY_ to override ' 'settings is deprecated. The following environment variables are ' f'currently defined: {setting_envvar_list}', ScrapyDeprecationWarning ) settings.setdict(scrapy_envvars, priority='project') return settings
def get_project_dirs(): outer_dir = inner_dir = "" closest_cfg = closest_scrapy_cfg() if closest_cfg: outer_dir = os.path.dirname(closest_cfg) if os.environ.get('SCRAPY_PROJECT'): inner_dir = os.environ.get('SCRAPY_PROJECT') if outer_dir and inner_dir: return (outer_dir, inner_dir) init_env() scrapy_module = os.environ.get('SCRAPY_SETTINGS_MODULE') if scrapy_module is None and not outer_dir: raise Exception("Project configuration awry") if not inner_dir: inner_dir = scrapy_module.split('.')[0] if outer_dir and inner_dir: return (outer_dir, inner_dir) try: module = import_module(scrapy_module) outer_dir = os.path.dirname(os.path.dirname(module.__file__)) return (outer_dir, inner_dir) except ImportError: raise Exception("Project configuration awry")
def get_project_settings(): if ENVVAR not in os.environ: # 获取环境配置项 project = os.environ.get('SCRAPY_PROJECT', 'default') # 初始化项目配置,实际就是将项目根目录下的scrapy.cfg配置中的[settings]的default属性值配置到系统变量SCRAPY_SETTINGS_MODULE init_env(project) # 读取setting配置文件所有配置项到内存 settings = Settings() settings_module_path = os.environ.get(ENVVAR) #cfg文件中配置的settings路径 if settings_module_path: #项目的settings配置覆盖default.settings配置 settings.setmodule(settings_module_path, priority='project') # XXX: remove this hack pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE") if pickled_settings: settings.setdict(pickle.loads(pickled_settings), priority='project') # XXX: deprecate and remove this functionality env_overrides = { k[7:]: v for k, v in os.environ.items() if k.startswith('SCRAPY_') } if env_overrides: #覆盖settings配置 settings.setdict(env_overrides, priority='project') return settings
def get_project_settings(): # 指定设定(Designating the settings) # ENVVAR = 'SCRAPY_SETTINGS_MODULE' # 当您使用Scrapy时,您需要声明您所使用的设定。这可以通过使用环境变量: SCRAPY_SETTINGS_MODULE 来完成。 # # SCRAPY_SETTINGS_MODULE 必须以Python路径语法编写, 如 myproject.settings 。 注意,设定模块应该在 Python import search path 中。 if ENVVAR not in os.environ: project = os.environ.get('SCRAPY_PROJECT', 'default') init_env(project) settings_module_path = os.environ.get(ENVVAR) if settings_module_path: settings_module = import_module(settings_module_path) else: settings_module = None settings = CrawlerSettings(settings_module) # XXX: remove this hack pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE") settings.overrides = pickle.loads(pickled_settings) if pickled_settings else {} # XXX: deprecate and remove this functionality for k, v in os.environ.items(): if k.startswith('SCRAPY_'): settings.overrides[k[7:]] = v return settings
def spider_project(path_or_spider): cur_dir = os.path.abspath(os.curdir) mod_path = lambda cls: os.path.dirname(sys.modules[cls.__class__.__module__].__file__) path = mod_path(path_or_spider) if isinstance(path_or_spider, ScrapyBaseSpider) else path_or_spider os.chdir(path) init_env() try: yield get_project_settings() finally: os.chdir(cur_dir)
def spider_project(path_or_spider): cur_dir = os.path.abspath(os.curdir) mod_path = lambda cls: os.path.dirname(sys.modules[cls.__class__.__module__ ].__file__) path = mod_path(path_or_spider) if isinstance( path_or_spider, ScrapyBaseSpider) else path_or_spider os.chdir(path) init_env() try: yield get_project_settings() finally: os.chdir(cur_dir)
def get_project_settings(): if ENVVAR not in os.environ: project = os.environ.get('SCRAPY_PROJECT', 'default') init_env(project) settings = Settings() settings_module_path = os.environ.get(ENVVAR) if settings_module_path: _update_settings(settings, default_settings) settings.setmodule(settings_module_path, priority='project') if os.environ.get('ENV_FLAG_DDIY') == 'online': _update_settings(settings, online_settings) pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE") if pickled_settings: warnings.warn( "Use of environment variable " "'SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE' " "is deprecated.", ScrapyDeprecationWarning) settings.setdict(pickle.loads(pickled_settings), priority='project') scrapy_envvars = { k[7:]: v for k, v in os.environ.items() if k.startswith('SCRAPY_') } valid_envvars = { 'CHECK', 'PICKLED_SETTINGS_TO_OVERRIDE', 'PROJECT', 'PYTHON_SHELL', 'SETTINGS_MODULE', } setting_envvars = {k for k in scrapy_envvars if k not in valid_envvars} if setting_envvars: setting_envvar_list = ', '.join(sorted(setting_envvars)) warnings.warn( 'Use of environment variables prefixed with SCRAPY_ to override ' 'settings is deprecated. The following environment variables are ' 'currently defined: {}'.format(setting_envvar_list), ScrapyDeprecationWarning) settings.setdict(scrapy_envvars, priority='project') project_path = os.path.normpath( os.path.join(os.path.dirname(os.path.abspath(__file__)), '../..')) current_path = os.getcwd() if not current_path.startswith(project_path): raise EnvironmentError( f'The program runs in a non-project path (current_path:{current_path} => project_path:{project_path})' ) return settings
def get_project_settings(): import os import pickle import warnings from scrapy.utils.conf import init_env from aioscrapy.settings import AioSettings from scrapy.exceptions import ScrapyDeprecationWarning ENVVAR = 'SCRAPY_SETTINGS_MODULE' if ENVVAR not in os.environ: project = os.environ.get('SCRAPY_PROJECT', 'default') init_env(project) settings = AioSettings() settings_module_path = os.environ.get(ENVVAR) if settings_module_path: settings.setmodule(settings_module_path, priority='project') pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE") if pickled_settings: warnings.warn( "Use of environment variable " "'SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE' " "is deprecated.", ScrapyDeprecationWarning) settings.setdict(pickle.loads(pickled_settings), priority='project') scrapy_envvars = { k[7:]: v for k, v in os.environ.items() if k.startswith('SCRAPY_') } valid_envvars = { 'CHECK', 'PICKLED_SETTINGS_TO_OVERRIDE', 'PROJECT', 'PYTHON_SHELL', 'SETTINGS_MODULE', } setting_envvars = {k for k in scrapy_envvars if k not in valid_envvars} if setting_envvars: setting_envvar_list = ', '.join(sorted(setting_envvars)) warnings.warn( 'Use of environment variables prefixed with SCRAPY_ to override ' 'settings is deprecated. The following environment variables are ' f'currently defined: {setting_envvar_list}', ScrapyDeprecationWarning) settings.setdict(scrapy_envvars, priority='project') return settings
def get_project_dir(): closest_cfg = closest_scrapy_cfg() if closest_cfg: return Path(closest_cfg).parent init_env() scrapy_module = os.environ.get('SCRAPY_SETTINGS_MODULE') if scrapy_module is None: return None try: module = import_module(scrapy_module) return Path(module.__file__).parent.parent except ImportError: return None
def get_project_settings(): if ENVVAR not in os.environ: project = os.environ.get('SCRAPY_PROJECT', 'default') init_env(project) settings_module_path = os.environ.get(ENVVAR) if settings_module_path: settings_module = import_module(settings_module_path) else: settings_module = None settings = CrawlerSettings(settings_module) # XXX: remove this hack pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE") settings.overrides = pickle.loads(pickled_settings) if pickled_settings else {} # XXX: deprecate and remove this functionality for k, v in os.environ.items(): if k.startswith('SCRAPY_'): settings.overrides[k[7:]] = v return settings
def get_project_settings(): if ENVVAR not in os.environ: project = os.environ.get("SCRAPY_PROJECT", "default") init_env(project) settings_module_path = os.environ.get(ENVVAR, "scrapy_settings") try: settings_module = __import__(settings_module_path, {}, {}, [""]) except ImportError: settings_module = None settings = CrawlerSettings(settings_module) # XXX: remove this hack pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE") settings.overrides = pickle.loads(pickled_settings) if pickled_settings else {} # XXX: deprecate and remove this functionality for k, v in os.environ.items(): if k.startswith("SCRAPY_"): settings.overrides[k[7:]] = v return settings
def get_project_settings(): if ENVVAR not in os.environ: project = os.environ.get('SCRAPY_PROJECT', 'default') init_env(project) settings = Settings() settings_module_path = os.environ.get(ENVVAR) if settings_module_path: settings.setmodule(settings_module_path, priority='project') # XXX: remove this hack pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE") if pickled_settings: settings.setdict(pickle.loads(pickled_settings), priority='project') # XXX: deprecate and remove this functionality env_overrides = {k[7:]: v for k, v in os.environ.items() if k.startswith('SCRAPY_')} if env_overrides: settings.setdict(env_overrides, priority='project') return settings
def get_project_settings(): if ENVVAR not in os.environ: project = os.environ.get('SCRAPY_PROJECT', 'default') init_env(project) settings_module_path = os.environ.get(ENVVAR, 'scrapy_settings') try: settings_module = __import__(settings_module_path, {}, {}, ['']) except ImportError: settings_module = None settings = CrawlerSettings(settings_module) # XXX: remove this hack pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE") settings.overrides = pickle.loads( pickled_settings) if pickled_settings else {} # XXX: deprecate and remove this functionality for k, v in os.environ.items(): if k.startswith('SCRAPY_'): settings.overrides[k[7:]] = v return settings
def get_project_settings(): ## 环境变量中是否有 SCRAPY_SETTINGS_MODULE 配置 if ENVVAR not in os.environ: ## 从环境变量中获取 SCRAPY_PROJECT,若无则默认返回 'default' project = os.environ.get('SCRAPY_PROJECT', 'default') ## 初始化项目环境: ## 在项目目录内,通过命令行工具,基于配置文件 scrapy.cfg 初始化项目环境 ## 找到用户配置模块(settings),设置到环境变量 SCRAPY_SETTINGS_MODULE 中 ## 将项目基路径加入到 Python 模块的解析路径集中 init_env(project) ## 加载默认配置文件 default_settings.py,生成 settings 实例 ## 用于存储 scrapy 内置组件的配置(默认配置),是可定制的 ## 这里得到的是默认配置,默认配置的优先级为 default settings = Settings() ## 获取用户配置文件(settings.py)的路径 settings_module_path = os.environ.get(ENVVAR) if settings_module_path: ## 基于 settings.py 文件的路径加载用户配置 ## 更新配置:用用户配置更新默认配置 ## 用户配置的优先级为 project settings.setmodule(settings_module_path, priority='project') # XXX: remove this hack pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE") if pickled_settings: settings.setdict(pickle.loads(pickled_settings), priority='project') # XXX: deprecate and remove this functionality env_overrides = {k[7:]: v for k, v in os.environ.items() if k.startswith('SCRAPY_')} if env_overrides: settings.setdict(env_overrides, priority='project') ## 返回配置对象 return settings
@staticmethod def _save_redis_rule(rule_dict): key = 'Rule:' + rule_dict['name'] conn.hmset(key, rule_dict) conn.sadd('Rules', rule_dict['name']) @staticmethod def _save_csv_rule(rule_dict): raise NotImplementedError def save(self, dst='redis'): """ Save rule to destination. Author: David """ self.__check_vals() save_method = getattr(self, '_save_{}_rule'.format(dst)) save_method(self.rule_dict) if __name__ == '__main__': # rule = Rule.load('xici') init_env('default') rules = Rule.loads('csv') for r in rules: r.save() print(rules[0]) # rule.save_rule()