Exemplo n.º 1
0
def get_citys():
    '''
        获取城市
    '''
    try:
        html = rq.get_cookie(__city_url)
    except Exception, e:
        print e
        return
Exemplo n.º 2
0
def getArticle(offset):
    '''
        获取文章列表
    '''
    print u'页码:%s' % offset
    url = 'http://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz=' + biz + '&f=json&offset=' + str(
        offset) + '&count=10&f=json'
    dataStr = rq.get_cookie(url, cookie_file_name="wx")
    # print dataStr
    dataJson = json.loads(dataStr)
    if dataJson["errmsg"] != 'ok':
        print u'获取数据返回:%s' % dataJson["errmsg"]
        return
    next_offset = dataJson["next_offset"]
    # 文章列表
    if offset == next_offset:
        print u'已经是没有翻页数据'
        return
    general_msg_list_str = dataJson["general_msg_list"]
    general_msg_list_json = json.loads(general_msg_list_str)
    article_list = general_msg_list_json["list"]
    # print article_list
    for item in article_list:
        # 获取文章发布时间
        publish_timestamp = item['comm_msg_info']['datetime']
        publish_time = time.localtime(publish_timestamp)
        # 校验文章发布时间
        if publish_time.tm_year < 2017:
            print u'超过限定日期,不继续捕获分页数据'
            return
        if not item.has_key('app_msg_ext_info'):
            continue
        # 文章标题
        item_title = item['app_msg_ext_info']['title']
        # 文章地址
        item_url = item['app_msg_ext_info']['content_url']
        # 获主文章
        articleDatas.append({
            'title': item_title,
            'url': item_url,
            'time': time.strftime("%Y-%m-%d", publish_time)
        })
        if item['app_msg_ext_info']['multi_app_msg_item_list'] is None:
            continue
        # 子文章
        for multi_item in item['app_msg_ext_info']['multi_app_msg_item_list']:
            item_title = multi_item['title']
            item_url = multi_item['content_url']
            articleDatas.append({
                'title': item_title,
                'url': item_url,
                'time': time.strftime("%Y-%m-%d", publish_time)
            })
    # 延迟
    hp.sleep(1, 3)
    getArticle(next_offset)
Exemplo n.º 3
0
def getCommetDatas(url):
    '''
        获取评论JSON数据
        * 'url' 文章地址
    '''
    try:
        htmlStr = rq.get_cookie(url, cookie_file_name="wx")
        htmlStr = u'%s' % htmlStr
    except Exception, ex:
        print ex
        return
Exemplo n.º 4
0
    comment_id = None
    if re_comment_id:
        comment_id = re_comment_id.groups()[0]
    print u"文章ID=%s,biz=%s,留言板ID=%s" % (mid, biz, comment_id)
    if comment_id is None:
        print u'comment_id不存在'
        return
    # 请求留言接口
    api_url = 'http://mp.weixin.qq.com/mp/appmsg_comment?action=getcomment&__biz=%s&appmsgid=%s&idx=1&comment_id=%s&offset=0&limit=100' % (
        biz, mid, comment_id)
    commonDataStr = None
    try:
        commonDataStr = rq.get_cookie(
            api_url,
            cookie_file_name="wx",
            headers={
                "User-agent":
                "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1"
            })
    except Exception, ex:
        print ex
        return
    # 解析留言数据为JSON格式

    if commonDataStr.find(u'请在微信客户端打开链接') > -1:
        print u'请在微信客户端打开链接'
        return
    commonDataJson = json.loads(commonDataStr)
    return commonDataJson