Пример #1
0
 def spider_opened(self, spider):
     self.key = config.read_redis_key(REDIS_CONFIG_KEY,
                                      spider_name=spider.name)
     self.image_key = self.key = config.read_redis_key(
         REDIS_CONFIG_IMAGE_KEY, spider_name=spider.name)
     self.set = NewsSet(self.key)
     self.image_set = NewsSet(key=self.image_key)
Пример #2
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.index = 0

        self.meta_keys = ["title", "block_type", "create_time", "index"]

        self.parser_domain_map = {
            'www.chd.edu.cn': self.parse_chdnews,
            'news.chd.edu.cn': self.parse_chdnews,
            'www.univs.cn': self.parse_univs,
            'mp.weixin.qq.com': self.parse_wechat,
            'www.gov.cn': self.parse_gov,
            'jyt.shaanxi.gov.cn': self.parse_jyt_shanxi,
            'www.xa.gov.cn': self.parse_xa_gov,
            'www.moe.gov.cn': self.parse_moe_gov,
            'www.qinfeng.gov.cn': self.parse_qinfeng_gov,
            'www.nhc.gov.cn': self.parse_nhc_gov,
            'www.shaanxi.gov.cn': self.parse_shanxi_gov,
            'www.mem.gov.cn': self.parse_mem_gov,
            'cpc.people.com.cn': self.parse_cpc_people,
            'news.cnhubei.com': self.parse_cnhubei,
            'py.cnhubei.com': self.parse_py_cnhubei,
            'www.piyao.org.cn': self.parse_piyao,
            'www.xinhuanet.com': self.parse_xinhua,
            'www.chinacdc.cn': self.parse_china_cdc,
            'www.chinanews.com': self.parse_chinanews,
        }  #域名和解析函数的映射字典

        # 用于判断 请求是否 已重复
        key = config.read_redis_key(REDIS_CONFIG_KEY, spider_name=self.name)
        self.set = NewsSet(key)
Пример #3
0
    def open_spider(self, spider):
        # redis
        self.image_key = self.key = config.read_redis_key(
            REDIS_CONFIG_IMAGE_KEY, spider_name=spider.name)
        self.image_set = NewsSet(key=self.image_key)

        self.upload = UploadImage()
        # 获取七牛云的链接
        *_, self.qiniu_url = config.read_qiniu_conf(QINIU_CONFIG_SECTION)
Пример #4
0
 def open_spider(self, spider):
     # 获取 Spider 在redis中存储文章链接的 键
     self.key = config.read_redis_key(REDIS_CONFIG_KEY,
                                      spider_name=spider.name)
     # 获取 栏目ID
     self.channel_id = self._chnnel_id(spider.name)
     # 数据库实例化
     self.model = SpiderModel()
     self.set = NewsSet(self.key)
Пример #5
0
 def _redis_key(self, name):
     '''
     返回Redis中 存储文章链接的集合 的 键key
     '''
     return config.read_redis_key(REDIS_CONFIG_KEY, name)