示例#1
0
 def get_account(self):
     collection_name = 'run_counts'
     try:
         # s = requests.Session()
         # s.keep_alive = False
         # s.adapters.DEFAULT_RETRIES = 5
         url = 'http://dispatch.yunrunyuqing.com:38082/ScheduleDispatch/dispatch?type=8'
         resp = requests.get(url,
                             timeout=self.timeout,
                             headers={'Connection': 'close'})
         data = json.loads(resp.text).get('data')
         if not data:
             # 即返回None
             return
         account = json.loads(data).get('account')
         db = mongo_conn()
         result = db[collection_name].find({})
         if result.count() == 0:
             db[collection_name].insert({
                 'account_count': 1,
                 'article_count': 0,
                 'start': time_strftime(),
                 'end': None,
                 'save_name': save_name()
             })
             log.info("插入mongo成功")
         else:
             updated = False
             for item in db[collection_name].find():
                 if item.get('save_name') == save_name():
                     count = item.get(
                         'account_count'
                     ) + 1  # if item.get('account_count') else 0
                     log.info(item)
                     db[collection_name].update(
                         {'save_name': save_name()}, {
                             '$set': {
                                 'account_count': count,
                                 'end': time_strftime()
                             }
                         },
                         upsert=True)
                     updated = True
                     log.info("更新mongo成功")
                     break
             if updated is False:
                 log.info('找不到save_name,需要插入')
                 db[collection_name].insert({
                     'account_count': 1,
                     'article_count': 0,
                     'start': time_strftime(),
                     'end': None,
                     'save_name': save_name()
                 })
                 log.info("插入mongo成功")
     except Exception as e:
         log.info('调度获取account出错:{}'.format(e))
         return None
     return [account]
示例#2
0
 def count_articles(count_article):
     # 统计文章数量
     collection_name = 'run_counts'
     log.info('文章数量:{}'.format(count_article))
     try:
         if count_article == 0:
             return
         db = mongo_conn()
         result = db[collection_name].find({})
         if result.count() == 0:
             db[collection_name].insert({
                 'save_name': save_name(),
                 'account_count': 1,
                 'article_count': 0,
                 'start': time_strftime(),
                 'end': None
             })
             log.info('插入文章数成功')
         for item in db[collection_name].find():
             if item.get('save_name') == save_name():
                 count = count_article + item.get(
                     'article_count') if item.get(
                         'article_count') else count_article
                 db[collection_name].update(
                     {'save_name': save_name()},
                     {'$set': {
                         'article_count': count
                     }},
                     upsert=True)
                 log.info('更新文章数量成功')
     except Exception as e:
         log.exception(e)
示例#3
0
 def urls_article(html):
     collection_name = 'run_counts'
     items = re.findall('"content_url":".*?,"copyright_stat"', html)
     urls = []
     for item in items:
         url_last = item[15:-18].replace('amp;', '')
         url = 'https://mp.weixin.qq.com' + url_last
         # 部分是永久链接
         if '_biz' in url_last:
             url = re.search('http://mp.weixin.qq.*?wechat_redirect',
                             url_last).group()
             urls.append(url)
             continue
         # 有的文章链接被包含在里面,需再次匹配
         if 'content_url' in url:
             item = re.search('"content_url":".*?wechat_redirect',
                              url).group()
             url = item[15:].replace('amp;', '')
         urls.append(url)
     # 统计文章数量
     count_article = len(urls)
     log.info('文章数量:{}'.format(count_article))
     try:
         if count_article == 0:
             return urls
         db = mongo_conn()
         result = db[collection_name].find({})
         if result.count() == 0:
             db[collection_name].insert({
                 'save_name': save_name(),
                 'account_count': 1,
                 'article_count': 0,
                 'start': time_strftime(),
                 'end': None
             })
             log.info('插入文章数成功')
         for item in db[collection_name].find():
             if item.get('save_name') == save_name():
                 count = count_article + item.get(
                     'article_count') if item.get(
                         'article_count') else count_article
                 db[collection_name].update(
                     {'save_name': save_name()},
                     {'$set': {
                         'article_count': count
                     }},
                     upsert=True)
                 log.info('更新文章数量成功')
     except Exception as e:
         log.exception(e)
     return urls
示例#4
0
 def save_to_mongo(entity):
     db = mongo_conn()
     entity['collection'] = time_strftime()
     db['daily_collection'].insert(entity)
示例#5
0
 def get_account():
     # 老版
     # url = 'http://124.239.144.181:7114/Schedule/dispatch?type=8'
     # # url = 'http://183.131.241.60:38011/nextaccount?label=5'
     # resp = requests.get(url, timeout=30)
     # # data 可能为空
     # data_json = resp.text.get('data')
     # data = json.loads(data_json)
     # self.search_name = data.get('name')
     # print(self.search_name)
     # return self.search_name
     # 重点采集接口
     # account_all = []
     # try:
     #     url = 'http://183.131.241.60:38011/nextaccount?label=5'
     #     resp = requests.get(url, timeout=21)
     #     items = json.loads(resp.text)
     #     if len(items) == 0:
     #         return []
     #     for item in items:
     #         account_all.append(item.get('account'))
     #     log.info("开始account列表 {}".format(account_all))
     # except Exception as e:
     #     log.info('获取账号列表错误 {}'.format(e))
     #     time.sleep(5)
     # 统计账号
     collection_name = 'run_counts'
     try:
         url = 'http://dispatch.yunrunyuqing.com:38082/ScheduleDispatch/dispatch?type=8'
         resp = requests.get(url, timeout=30)
         data = json.loads(resp.text).get('data')
         if not data:
             # 即返回None
             return
         account = json.loads(data).get('account')
         db = mongo_conn()
         result = db[collection_name].find({})
         if result.count() == 0:
             db[collection_name].insert({
                 'account_count': 1,
                 'article_count': 0,
                 'start': time_strftime(),
                 'end': None,
                 'save_name': save_name()
             })
             log.info("插入mongo成功")
         else:
             updated = False
             for item in db[collection_name].find():
                 if item.get('save_name') == save_name():
                     count = item.get(
                         'account_count'
                     ) + 1  # if item.get('account_count') else 0
                     log.info(item)
                     db[collection_name].update(
                         {'save_name': save_name()}, {
                             '$set': {
                                 'account_count': count,
                                 'end': time_strftime()
                             }
                         },
                         upsert=True)
                     updated = True
                     log.info("更新mongo成功")
                     break
             if updated is False:
                 log.info('找不到save_name,需要插入')
                 db[collection_name].insert({
                     'account_count': 1,
                     'article_count': 0,
                     'start': time_strftime(),
                     'end': None,
                     'save_name': save_name()
                 })
                 log.info("插入mongo成功")
     except Exception as e:
         log.info('调度获取account出错:{}'.format(e))
         return None
     return [account]