Exemplo n.º 1
0
 def add_omitted(self):
     total_value = self.mongo_word.count_documents({})
     if self.mongo_word.count_documents({}) < 100:
         return
     t = ProgressTask("更新查询关键词字典",
                      total_value=total_value,
                      collection=db['tracer'])
     d = open('./biliob_analyzer/dict.txt', 'r',
              encoding='utf8').read().split('\n')
     for each in self.mongo_word.find():
         if 'aid' in each and each['aid'] not in d:
             d.append(each['aid'])
         elif 'mid' in each and each['mid'] not in d:
             d.append(each['mid'])
         t.current_value += 1
     pass
     t.finished = True
     o = open('./biliob_analyzer/dict.txt',
              'w',
              encoding='utf8',
              newline='')
     for each in d:
         o.write(each + '\n')
     o.close()
     self.mongo_word.delete_many({})
     jieba.load_userdict('./biliob_analyzer/dict.txt')
     self.refresh_all_author()
     self.refresh_all_video()
Exemplo n.º 2
0
 def __judge_author(self, author_filter):
     author_cursor = author_coll.find(author_filter)
     count = author_cursor.count()
     a = author_coll.aggregate([{
         '$match': author_filter
     }, {
         '$project': {
             "mid": 1,
             "face": 1,
             "name": 1,
             "data": {
                 "$filter": {
                     "input": "$data",
                     "as": "data",
                     "cond": {
                         "$gt": [
                             "$$data.datetime",
                             datetime.datetime.now() -
                             datetime.timedelta(32)
                         ]
                     }
                 }
             }
         }
     }, {
         "$match": {
             "data.0": {
                 "$exists": True
             }
         }
     }])
     print("待爬取作者数量:{}".format(count))
     t = ProgressTask("粉丝数变动探测", total_value=count, collection=db['tracer'])
     for each_author in a:
         print(each_author['mid'])
         t.current_value += 1
         self.__judge(each_author)
     t.finished = True
     pass