Пример #1
0
 def parse(self, res):
     items_data = RankingItem.get_items(html=res.html)
     result = []
     res_dic = {}
     for item in items_data:
         each_book_list = []
         # 只取排名前十的书籍数据
         for index, value in enumerate(item.book_list[:10]):
             item_data = NameItem.get_item(html_etree=value)
             name = item_data.get('top_name') or item_data.get('other_name')
             each_book_list.append({
                 'num': index + 1,
                 'name': name
             })
         data = {
             'title': item.ranking_title,
             'more': item.more,
             'book_list': each_book_list,
             'updated_at': time.strftime("%Y-%m-%d %X", time.localtime()),
         }
         result.append(data)
     res_dic['data'] = result
     res_dic['target_url'] = res.url
     res_dic['type'] = self.qidian_type.get(res.url.split('=')[-1])
     res_dic['spider'] = "qidian"
     async_callback(self.save, res_dic=res_dic)
Пример #2
0
 def parse(self, res):
     item_data = HYNovelInfoItem.get_item(html=res.html)
     item_data['target_url'] = res.url
     item_data['spider'] = 'heiyan'
     item_data['updated_at'] = time.strftime("%Y-%m-%d %X", time.localtime())
     print('获取 {} 小说信息成功'.format(item_data['novel_name']))
     print(item_data)
     self.all_novels_info_col.update({'novel_name': item_data['novel_name'], 'spider': 'heiyan'}, item_data,
                                     upsert=True)
     async_callback(self.save, res_dic=item_data)
Пример #3
0
 def parse(self, res):
     data = res.html
     result = []
     res_dic = {}
     if data:
         for each_data in data:
             data = {
                 'name': each_data.get('bookName', ''),
                 'type': each_data.get('bookShortCateName', ''),
                 'num': each_data.get('orderNo', ''),
                 'updated_at': time.strftime("%Y-%m-%d %X", time.localtime()),
             }
             result.append(data)
         res_dic['data'] = result
         res_dic['target_url'] = res.url
         res_dic['type'] = "全部类别"
         res_dic['spider'] = "zh_bd_novels"
     async_callback(self.save, res_dic=res_dic)
Пример #4
0
 def parse_item(self, res):
     items_data = QidianNovelsItem.get_items(html=res.html)
     for item in items_data:
         res_dic = {
             'novel_url': item.novel_url,
             'novel_name': item.novel_name,
             'novel_author': item.novel_author,
             'novel_author_home_url': item.novel_author_home_url,
             'spider': 'qidian',
             'updated_at': time.strftime("%Y-%m-%d %X", time.localtime()),
         }
         if self.all_novels_col.find_one({
                 "novel_name": item.novel_name,
                 'novel_author': item.novel_author
         }) is None:
             self.all_novels_col.insert_one(res_dic)
             async_callback(self.save, res_dic=res_dic)
             print(item.novel_name + ' - 抓取成功')