def get_gzh_artilce_by_hot(self, hot_index, page=1, unlock_callback=None, identify_image_callback=None): """获取 首页热门文章 Parameters ---------- hot_index : WechatSogouConst.hot_index 首页热门文章的分类(常量):WechatSogouConst.hot_index.xxx page : int 页数 Returns ------- list[dict] { 'gzh': { 'headimage': str, # 公众号头像 'wechat_name': str, # 公众号名称 }, 'article': { 'url': str, # 文章临时链接 'title': str, # 文章标题 'abstract': str, # 文章摘要 'time': int, # 推送时间,10位时间戳 'open_id': str, # open id 'main_img': str # 封面图片 } } """ assert hasattr(WechatSogouConst.hot_index, hot_index) assert isinstance(page, int) and page > 0 url = WechatSogouRequest.gen_hot_url(hot_index, page) resp = self.__get_by_unlock( url, is_need_unlock=lambda x: 'antispider' in x.url, unlock_platform=self.__unlock_sogou, unlock_callback=unlock_callback, identify_image_callback=identify_image_callback) resp.encoding = 'utf-8' return WechatSogouStructuring.get_gzh_artilce_by_hot(resp.text)
def get_gzh_artilce_by_hot(self, hot_index, page=1, unlock_callback=None, identify_image_callback=None): """获取 首页热门文章 Parameters ---------- hot_index : WechatSogouConst.hot_index 首页热门文章的分类(常量):WechatSogouConst.hot_index.xxx page : int 页数 Returns ------- list[dict] { 'gzh': { 'headimage': str, # 公众号头像 'wechat_name': str, # 公众号名称 }, 'article': { 'url': str, # 文章临时链接 'title': str, # 文章标题 'abstract': str, # 文章摘要 'time': int, # 推送时间,10位时间戳 'open_id': str, # open id 'main_img': str # 封面图片 } } """ assert hasattr(WechatSogouConst.hot_index, hot_index) assert isinstance(page, int) and page > 0 url = WechatSogouRequest.gen_hot_url(hot_index, page) resp = self.__get_by_unlock(url, is_need_unlock=lambda x: 'antispider' in x.url, unlock_platform=self.__unlock_sogou, unlock_callback=unlock_callback, identify_image_callback=identify_image_callback) resp.encoding = 'utf-8' return WechatSogouStructuring.get_gzh_artilce_by_hot(resp.text)
def test_get_gzh_artilce_by_hot(self): file_name = os.path.join(fake_data_path, 'wapindex-wap-0612-wap_8-0.html') with io.open(file_name, encoding='utf-8') as f: gzh_artilce_by_hot = f.read() gzh_artilces = WechatSogouStructuring.get_gzh_artilce_by_hot( gzh_artilce_by_hot) for gzh_artilce in gzh_artilces: assert_in('gzh', gzh_artilce) assert_in('article', gzh_artilce) assert_in('http://mp.weixin.qq.com/s?src=', gzh_artilce['article']['url']) assert_greater_equal(len(gzh_artilces), 10) wechat_names = [] headimages = [] titles = [] times = [] for i in gzh_artilces: wechat_names.append(i['gzh']['wechat_name']) headimages.append(i['gzh']['headimage']) titles.append(i['article']['title']) times.append(i['article']['time']) assert_equal([ '全球汽车精选', '车早茶', '吴佩频道', '驾考宝典', '腾讯汽车', '新车评', '非常好车', '汽车情报所', '一猫汽车资讯', '资深科技控', '郎club', '科技日报', '汽车使用宝典', '名车报', '科普中国网' ], wechat_names) assert_equal([ 'http://img03.sogoucdn.com/app/a/100520090/oIWsFt1dGMefD1f8dOg2UCwQUjKs', 'http://img04.sogoucdn.com/app/a/100520090/oIWsFtwoQX8wX7w6loDevPqLEC_I', 'http://img03.sogoucdn.com/app/a/100520090/oIWsFt9Hbbtr9VLnfR9i_K5Z8D48', 'http://img04.sogoucdn.com/app/a/100520090/oIWsFt3txmWu-usvUa6gU0qlyEVo', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFt8VDujUqNSCfruXtMNfekaw', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFt9YD5HWLDe5QAkuvh0JWrgw', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFt_WUnpQ7lZajAstgL8o1lWo', 'http://img02.sogoucdn.com/app/a/100520090/oIWsFtzUnzWUMz1PMek5zjVlS42U', 'http://img03.sogoucdn.com/app/a/100520090/oIWsFt2yk491dhhSP940JzLEameY', 'http://img03.sogoucdn.com/app/a/100520090/oIWsFtzm9UtmgY-SkOTFwQFpGsU8', 'http://img02.sogoucdn.com/app/a/100520090/oIWsFt7VwiM8GqYcv8DBNb-k5NBQ', 'http://img03.sogoucdn.com/app/a/100520090/oIWsFt2tjckivF8b0MP_nNTdESkE', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFtzC2r61_riTCWp5iHX04fmo', 'http://img02.sogoucdn.com/app/a/100520090/oIWsFt8JIY_-o7DBMxorP19hcF0Q', 'http://img04.sogoucdn.com/app/a/100520090/oIWsFtyV5sdIXU2uy4m6oVBq77nA' ], headimages) assert_equal([ '不做这个动作,你的轮胎3个月就要换!', '新车质量最差的十个品牌?国人表示难以接受……', '带着米其林的指引去看古德伍德|品牌', '方向盘打法巧记口诀,科目二提分就靠它了!', '宝马“鸡腿”、奥迪“游艇”,这些奇葩的挡杆你见过几个?', '你没看错,我们做了期途昂和途锐的对比', '7成特斯拉被召回,难道是质量不过关?', '在中国惹不起的7种车,遇到请回避!', '迈腾摊上大事儿了 全新一代君威17.58万起', '面对这份驾享,朝廷大人都忍不住亲自上阵!', '外卖小哥被暴晒:底层人士的悲哀,有钱人不会懂', '自动驾驶还处于“新手”阶段,何时成为“老司机”?院士这样说……', '高速上碰到石头,是躲还是撞?', '装什么神秘,不就是加长版的讴歌TLX吗!', '一个动作,车里的人集体中毒!很多人都忽略了' ], titles) assert_equal([ 1501328135, 1501327941, 1501326826, 1501326716, 1501326675, 1501326455, 1501326222, 1501325595, 1501325529, 1501325521, 1501325223, 1501324531, 1501324443, 1501324310, 1501323274 ], times)
def test_get_gzh_artilce_by_hot(self): file_name = os.path.join(fake_data_path, 'wapindex-wap-0612-wap_8-0.html') with io.open(file_name, encoding='utf-8') as f: gzh_artilce_by_hot = f.read() gzh_artilces = WechatSogouStructuring.get_gzh_artilce_by_hot(gzh_artilce_by_hot) for gzh_artilce in gzh_artilces: assert_in('gzh', gzh_artilce) assert_in('article', gzh_artilce) assert_in('http://mp.weixin.qq.com/s?src=', gzh_artilce['article']['url']) assert_greater_equal(len(gzh_artilces), 10) wechat_names = [] headimages = [] titles = [] times = [] for i in gzh_artilces: wechat_names.append(i['gzh']['wechat_name']) headimages.append(i['gzh']['headimage']) titles.append(i['article']['title']) times.append(i['article']['time']) assert_equal( ['全球汽车精选', '车早茶', '吴佩频道', '驾考宝典', '腾讯汽车', '新车评', '非常好车', '汽车情报所', '一猫汽车资讯', '资深科技控', '郎club', '科技日报', '汽车使用宝典', '名车报', '科普中国网'], wechat_names) assert_equal(['http://img03.sogoucdn.com/app/a/100520090/oIWsFt1dGMefD1f8dOg2UCwQUjKs', 'http://img04.sogoucdn.com/app/a/100520090/oIWsFtwoQX8wX7w6loDevPqLEC_I', 'http://img03.sogoucdn.com/app/a/100520090/oIWsFt9Hbbtr9VLnfR9i_K5Z8D48', 'http://img04.sogoucdn.com/app/a/100520090/oIWsFt3txmWu-usvUa6gU0qlyEVo', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFt8VDujUqNSCfruXtMNfekaw', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFt9YD5HWLDe5QAkuvh0JWrgw', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFt_WUnpQ7lZajAstgL8o1lWo', 'http://img02.sogoucdn.com/app/a/100520090/oIWsFtzUnzWUMz1PMek5zjVlS42U', 'http://img03.sogoucdn.com/app/a/100520090/oIWsFt2yk491dhhSP940JzLEameY', 'http://img03.sogoucdn.com/app/a/100520090/oIWsFtzm9UtmgY-SkOTFwQFpGsU8', 'http://img02.sogoucdn.com/app/a/100520090/oIWsFt7VwiM8GqYcv8DBNb-k5NBQ', 'http://img03.sogoucdn.com/app/a/100520090/oIWsFt2tjckivF8b0MP_nNTdESkE', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFtzC2r61_riTCWp5iHX04fmo', 'http://img02.sogoucdn.com/app/a/100520090/oIWsFt8JIY_-o7DBMxorP19hcF0Q', 'http://img04.sogoucdn.com/app/a/100520090/oIWsFtyV5sdIXU2uy4m6oVBq77nA'], headimages) assert_equal(['不做这个动作,你的轮胎3个月就要换!', '新车质量最差的十个品牌?国人表示难以接受……', '带着米其林的指引去看古德伍德|品牌', '方向盘打法巧记口诀,科目二提分就靠它了!', '宝马“鸡腿”、奥迪“游艇”,这些奇葩的挡杆你见过几个?', '你没看错,我们做了期途昂和途锐的对比', '7成特斯拉被召回,难道是质量不过关?', '在中国惹不起的7种车,遇到请回避!', '迈腾摊上大事儿了 全新一代君威17.58万起', '面对这份驾享,朝廷大人都忍不住亲自上阵!', '外卖小哥被暴晒:底层人士的悲哀,有钱人不会懂', '自动驾驶还处于“新手”阶段,何时成为“老司机”?院士这样说……', '高速上碰到石头,是躲还是撞?', '装什么神秘,不就是加长版的讴歌TLX吗!', '一个动作,车里的人集体中毒!很多人都忽略了'], titles) assert_equal( [1501328135, 1501327941, 1501326826, 1501326716, 1501326675, 1501326455, 1501326222, 1501325595, 1501325529, 1501325521, 1501325223, 1501324531, 1501324443, 1501324310, 1501323274], times)