callback=self.articleCommentNum, meta={"item": article_item}) # 获取评论的数量 def articleCommentNum(self, response): article_item = response.meta['item'] response_json = json.loads(response.text) article_item['comment_num'] = response_json['result']['objcounts'][0][ 'replycountall'] yield article_item # 解析当前所在位置 def getLocation(self, originList): locationList = [] for item in originList: if len(item.strip()) > 0: locationList.append(item.strip()) tmp = '' for i in range(1, len(locationList) - 1): tmp = tmp + locationList[i] + '->' return locationList[0] + tmp + locationList[-1] if __name__ == "__main__": execute(['scrapy', 'crawl', 'news_2_article']) dbUtils = DbUtils('news_2_article') queryItems = dbUtils.select(None) excelUtils = ExcelUtils() excelUtils.generateExcel('news', 'news_2_article', list(queryItems))
def from_crawler(cls, crawler): dbUtils = DbUtils('ip_pool') queryItems = dbUtils.select(None) return cls(ip_pool=list(queryItems))
def __init__(self): PathUtils.initDir() # 初始化文件夹 dbUtils = DbUtils('config_5_spec') # 加载数据库 self.queryItems = dbUtils.select(None) # 查询数据
def __init__(self): # 数据库操作 dbUtils = DbUtils('config_1_brand') self.queryItems = dbUtils.select(None)
def __init__(self): # 数据库操作 dbUtils = DbUtils('config_3_series') self.queryItems = dbUtils.select(None)