def get_account(self): collection_name = 'run_counts' try: # s = requests.Session() # s.keep_alive = False # s.adapters.DEFAULT_RETRIES = 5 url = 'http://dispatch.yunrunyuqing.com:38082/ScheduleDispatch/dispatch?type=8' resp = requests.get(url, timeout=self.timeout, headers={'Connection': 'close'}) data = json.loads(resp.text).get('data') if not data: # 即返回None return account = json.loads(data).get('account') db = mongo_conn() result = db[collection_name].find({}) if result.count() == 0: db[collection_name].insert({ 'account_count': 1, 'article_count': 0, 'start': time_strftime(), 'end': None, 'save_name': save_name() }) log.info("插入mongo成功") else: updated = False for item in db[collection_name].find(): if item.get('save_name') == save_name(): count = item.get( 'account_count' ) + 1 # if item.get('account_count') else 0 log.info(item) db[collection_name].update( {'save_name': save_name()}, { '$set': { 'account_count': count, 'end': time_strftime() } }, upsert=True) updated = True log.info("更新mongo成功") break if updated is False: log.info('找不到save_name,需要插入') db[collection_name].insert({ 'account_count': 1, 'article_count': 0, 'start': time_strftime(), 'end': None, 'save_name': save_name() }) log.info("插入mongo成功") except Exception as e: log.info('调度获取account出错:{}'.format(e)) return None return [account]
def count_articles(count_article): # 统计文章数量 collection_name = 'run_counts' log.info('文章数量:{}'.format(count_article)) try: if count_article == 0: return db = mongo_conn() result = db[collection_name].find({}) if result.count() == 0: db[collection_name].insert({ 'save_name': save_name(), 'account_count': 1, 'article_count': 0, 'start': time_strftime(), 'end': None }) log.info('插入文章数成功') for item in db[collection_name].find(): if item.get('save_name') == save_name(): count = count_article + item.get( 'article_count') if item.get( 'article_count') else count_article db[collection_name].update( {'save_name': save_name()}, {'$set': { 'article_count': count }}, upsert=True) log.info('更新文章数量成功') except Exception as e: log.exception(e)
def urls_article(html): collection_name = 'run_counts' items = re.findall('"content_url":".*?,"copyright_stat"', html) urls = [] for item in items: url_last = item[15:-18].replace('amp;', '') url = 'https://mp.weixin.qq.com' + url_last # 部分是永久链接 if '_biz' in url_last: url = re.search('http://mp.weixin.qq.*?wechat_redirect', url_last).group() urls.append(url) continue # 有的文章链接被包含在里面,需再次匹配 if 'content_url' in url: item = re.search('"content_url":".*?wechat_redirect', url).group() url = item[15:].replace('amp;', '') urls.append(url) # 统计文章数量 count_article = len(urls) log.info('文章数量:{}'.format(count_article)) try: if count_article == 0: return urls db = mongo_conn() result = db[collection_name].find({}) if result.count() == 0: db[collection_name].insert({ 'save_name': save_name(), 'account_count': 1, 'article_count': 0, 'start': time_strftime(), 'end': None }) log.info('插入文章数成功') for item in db[collection_name].find(): if item.get('save_name') == save_name(): count = count_article + item.get( 'article_count') if item.get( 'article_count') else count_article db[collection_name].update( {'save_name': save_name()}, {'$set': { 'article_count': count }}, upsert=True) log.info('更新文章数量成功') except Exception as e: log.exception(e) return urls
def save_to_mongo(entity): db = mongo_conn() entity['collection'] = time_strftime() db['daily_collection'].insert(entity)
def get_account(): # 老版 # url = 'http://124.239.144.181:7114/Schedule/dispatch?type=8' # # url = 'http://183.131.241.60:38011/nextaccount?label=5' # resp = requests.get(url, timeout=30) # # data 可能为空 # data_json = resp.text.get('data') # data = json.loads(data_json) # self.search_name = data.get('name') # print(self.search_name) # return self.search_name # 重点采集接口 # account_all = [] # try: # url = 'http://183.131.241.60:38011/nextaccount?label=5' # resp = requests.get(url, timeout=21) # items = json.loads(resp.text) # if len(items) == 0: # return [] # for item in items: # account_all.append(item.get('account')) # log.info("开始account列表 {}".format(account_all)) # except Exception as e: # log.info('获取账号列表错误 {}'.format(e)) # time.sleep(5) # 统计账号 collection_name = 'run_counts' try: url = 'http://dispatch.yunrunyuqing.com:38082/ScheduleDispatch/dispatch?type=8' resp = requests.get(url, timeout=30) data = json.loads(resp.text).get('data') if not data: # 即返回None return account = json.loads(data).get('account') db = mongo_conn() result = db[collection_name].find({}) if result.count() == 0: db[collection_name].insert({ 'account_count': 1, 'article_count': 0, 'start': time_strftime(), 'end': None, 'save_name': save_name() }) log.info("插入mongo成功") else: updated = False for item in db[collection_name].find(): if item.get('save_name') == save_name(): count = item.get( 'account_count' ) + 1 # if item.get('account_count') else 0 log.info(item) db[collection_name].update( {'save_name': save_name()}, { '$set': { 'account_count': count, 'end': time_strftime() } }, upsert=True) updated = True log.info("更新mongo成功") break if updated is False: log.info('找不到save_name,需要插入') db[collection_name].insert({ 'account_count': 1, 'article_count': 0, 'start': time_strftime(), 'end': None, 'save_name': save_name() }) log.info("插入mongo成功") except Exception as e: log.info('调度获取account出错:{}'.format(e)) return None return [account]