def get_citys(): ''' 获取城市 ''' try: html = rq.get_cookie(__city_url) except Exception, e: print e return
def getArticle(offset): ''' 获取文章列表 ''' print u'页码:%s' % offset url = 'http://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz=' + biz + '&f=json&offset=' + str( offset) + '&count=10&f=json' dataStr = rq.get_cookie(url, cookie_file_name="wx") # print dataStr dataJson = json.loads(dataStr) if dataJson["errmsg"] != 'ok': print u'获取数据返回:%s' % dataJson["errmsg"] return next_offset = dataJson["next_offset"] # 文章列表 if offset == next_offset: print u'已经是没有翻页数据' return general_msg_list_str = dataJson["general_msg_list"] general_msg_list_json = json.loads(general_msg_list_str) article_list = general_msg_list_json["list"] # print article_list for item in article_list: # 获取文章发布时间 publish_timestamp = item['comm_msg_info']['datetime'] publish_time = time.localtime(publish_timestamp) # 校验文章发布时间 if publish_time.tm_year < 2017: print u'超过限定日期,不继续捕获分页数据' return if not item.has_key('app_msg_ext_info'): continue # 文章标题 item_title = item['app_msg_ext_info']['title'] # 文章地址 item_url = item['app_msg_ext_info']['content_url'] # 获主文章 articleDatas.append({ 'title': item_title, 'url': item_url, 'time': time.strftime("%Y-%m-%d", publish_time) }) if item['app_msg_ext_info']['multi_app_msg_item_list'] is None: continue # 子文章 for multi_item in item['app_msg_ext_info']['multi_app_msg_item_list']: item_title = multi_item['title'] item_url = multi_item['content_url'] articleDatas.append({ 'title': item_title, 'url': item_url, 'time': time.strftime("%Y-%m-%d", publish_time) }) # 延迟 hp.sleep(1, 3) getArticle(next_offset)
def getCommetDatas(url): ''' 获取评论JSON数据 * 'url' 文章地址 ''' try: htmlStr = rq.get_cookie(url, cookie_file_name="wx") htmlStr = u'%s' % htmlStr except Exception, ex: print ex return
comment_id = None if re_comment_id: comment_id = re_comment_id.groups()[0] print u"文章ID=%s,biz=%s,留言板ID=%s" % (mid, biz, comment_id) if comment_id is None: print u'comment_id不存在' return # 请求留言接口 api_url = 'http://mp.weixin.qq.com/mp/appmsg_comment?action=getcomment&__biz=%s&appmsgid=%s&idx=1&comment_id=%s&offset=0&limit=100' % ( biz, mid, comment_id) commonDataStr = None try: commonDataStr = rq.get_cookie( api_url, cookie_file_name="wx", headers={ "User-agent": "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1" }) except Exception, ex: print ex return # 解析留言数据为JSON格式 if commonDataStr.find(u'请在微信客户端打开链接') > -1: print u'请在微信客户端打开链接' return commonDataJson = json.loads(commonDataStr) return commonDataJson