def pc_list_related_videos(self, cid, count=10): """ 根据cid返回类型影片 :param cid: 原影片 :param count: 推荐数量 :return: """ results = {'source': cid, 'data': []} rs = do_get(self.RELATE_API.format(cid=cid, count=count, time=int(time.time() * 1000)), is_json=True) videos = rs['data']['recItemList'] for video in videos: cid = video['itemId'] title = video['unionInfo']['title'] subtitle = video['unionInfo']['second_title'] cover_h = video['unionInfo']['new_pic_hz'] cover_v = video['unionInfo']['new_pic_vt'] score = video['unionInfo']['score']['score'] status = video['unionInfo']['episode_updated'] results['data'].append({ 'cid': cid, 'title': title, 'subTitle': subtitle, 'coverVT': cover_v, 'coverHZ': cover_h, 'score': score, 'status': status }) return results
def pc_list_video_detail_from_cid(self, cid): """ 根据cid查询影片详细信息 :param cid: :return: """ url = self.VIDEO_INFO_CID_API.format(cid=cid, time=int(time.time() * 1000)) rs = do_get(url, is_json=True) if 'c' not in rs: return False title = rs['c']['title'] desc = rs['c']['description'] video_ids = rs['c']['video_ids'] episodes = [] temp = {'groupDetail': []} temp_list = [] if len(video_ids) == 1: temp = self.pc_list_video_detail_from_vid(','.join(video_ids)) temp.update({'groupName': ''}) episodes.append(temp) else: count = 0 is_first = True for vid in video_ids: temp_list.append(vid) count += 1 if count % 10 == 0 or count == len(video_ids): if is_first: temp = self.pc_list_video_detail_from_vid( ','.join(temp_list)) is_first = False else: for l in temp_list: temp['groupDetail'].append({ 'vid': l, 'cid': cid, 'title': '', 'pic': '', 'desc': '', 'num': '' }) # vid_group = '{}-{}'.format((count - 1) // 10 * 10 + 1, count) temp.update({ 'groupName': '{}-{}'.format((count - 1) // 10 * 10 + 1, count) }) episodes.append(temp) temp = {'groupDetail': []} temp_list = [] cover = rs['c']['pic'] if 'pic' in rs['c'] else '' names = rs['nam'][0] if len(rs['nam']) != 0 else [] caption = rs['rec'] if 'rec' in rs else '' return VideoDetail(cid, title, desc, episodes, cover, names, caption)
def pc_list_search_word(self, word, count=10): results = {'data': []} rs = do_get(self.SEARCH_WORD_API.format(word=word, count=count, time=int(time.time() * 1000)), call_back='QZOutputJson') items = json.loads(rs)['item'] for item in items: word = item['word'] results['data'].append(word) return results
def parse_people(html): results = { 'success': True, 'type': 'people', 'avatar': '', 'name': '', 'hasNext': False, 'data': [] } dom = BeautifulSoup(html, 'html.parser') people_wrapper = dom.select('.result_people') if people_wrapper: avatar = people_wrapper[0].select( '.people_avatar img')[0]['src'] avatar = check_url(avatar) name = people_wrapper[0].select('.people_name')[0].text results['avatar'] = avatar results['name'] = name url = people_wrapper[0].select('a')[0]['href'] new_html = do_get(url, params={'tabid': 1}) dom = BeautifulSoup(new_html, 'html.parser') rows = dom.select('.mod_row_box') for row in rows: if not row.select('.mod_title'): continue tag = row.select('.mod_title')[0].contents[0].strip() items = row.select('.list_item') for item in items: cover = item.select('.figure_pic')[0]['src'] cover = check_url(cover) i_title = item.select('.figure')[0]['title'] url = item.select('.figure')[0]['href'] pattern = re.compile(r'/cover/(.+).html') if not pattern.search(url): continue cid = pattern.findall(url)[0] caption = item.select( '.figure_count')[0].text if item.select( '.figure_count') else '' results['data'].append({ 'cid': cid, 'cover': cover, 'tag': tag, 'caption': caption, 'title': i_title }) else: results['success'] = False return results
def pc_list_video_episodes(self, cids): """ 根据cid查询所有分集 :param cids: 带查询cid,多个以,分割 :return: """ results = [] rs = do_get(self.EPISODES_API.format(ids=cids), call_back='QZOutputJson') collections = json.loads(rs)['results'] for c in collections: cid = c['id'] vids = c['fields']['video_ids'] results.append(VideoEpisode(cid, len(vids), vids)) return BaseResponse(results)
def pc_list_channel_params(self, channel): """ 获取频道信息,包含可用筛选参数、页数、视频数 :param channel: 频道对应的字符串 :return: """ # 变量声明 params = [] # 所有可用筛选 results_count = 0 # 记录总数 page_count = 0 # 页数 url = self.LIST_API.format(channel=channel, offset=0) rs = do_get(url) dom = BeautifulSoup(rs, 'html.parser') # 获取总数、页数 results_count = dom.select('.filter_result')[0]['data-total'] page_count = dom.select('.filter_result')[0]['data-pagemax'] # 获取所有类型 lines = dom.select('.filter_line') for line in lines: # 变量声明 label = '' # 字段含义 key = '' # 字段 key_map = {} # 键值对 # 获取字段中文名称 label = line.select('.filter_label')[0].text tags = line.select('.filter_item') # 获取参数键值 for tag in tags: key = tag['data-key'] key_map.update({tag['data-value']: tag.text}) params.append({'field': key, 'label': label, 'key_map': key_map}) return { 'channel': channel, 'count': results_count, 'page': page_count, 'params': params } pass
def pc_list_channels(self): """ 获取所有频道 :return: 频道列表 """ channels = [] rs = do_get(self.INDEX) dom = BeautifulSoup(rs, 'html.parser') cells = dom.select('#mod_main_nav a') for cell in cells: href = cell['href'] # 去除子菜单和无关项 if href.startswith('/channel') and href.find('?') == -1: temp = Channel(href[href.rfind('/') + 1:], cell.text) # 去重 if temp not in channels: channels.append(temp) return BaseResponse(channels)
def pc_list_channel_videos(self, channel, offset=0, params={}): """ 获取所有视频 :param channel: 频道 :param offset: 偏移量 :param params: 额外筛选参数 :return: """ url = self.LIST_API.format(channel=channel, offset=offset) rs = do_get(url, params) lists = self.pc_parse_list(rs) return { 'channel': channel, 'offset': offset, 'params': params, 'count': len(lists), 'list': lists }
def parse_intent(html, word, page=0, page_size=10): dom = BeautifulSoup(html, 'html.parser') result = { 'success': True, 'type': 'intent', 'hasNext': True, 'data': [] } if dom.select('.result_intention'): rs_json = do_get(self.INTENT_SEARCH_API.format( word=word, page=page, pageSize=page_size), is_json=True) items = rs_json['data']['areaBoxList'][0]['itemList'] for item in items: cid = item['doc']['id'] title = item['videoInfo']['title'] title = check(title) _type = item['videoInfo']['typeName'] _type = check(_type) cover = item['videoInfo']['imgUrl'] cover = check_url(cover) # desc = item['videoInfo']['descrip'] result['data'].append({ 'cid': cid, 'title': title, 'caption': '', 'tag': _type, 'cover': cover }) if (page + 1) * page_size >= rs_json['data']['areaBoxList'][0][ 'totalNum']: result['hasNext'] = False else: result['success'] = False return result
def pc_list_video_detail_from_vid(self, vid, group_name=''): rs = do_get(self.VIDEO_INFO_VID_API.format(ids=vid, time=int(time.time())), call_back='QZOutputJson') print(rs) results = json.loads(rs)['results'] temp = [] for r in results: vid = r['id'] cid = r['fields']['cover_list'][0] if len( r['fields']['cover_list']) != 0 else [] pic = r['fields']['pic160x90'] desc = r['fields']['desc'] num = r['fields']['series_part_title'] title = r['fields']['title'] temp.append({ 'vid': vid, 'cid': cid, 'title': title, 'pic': pic, 'desc': desc, 'num': num }) return {'groupName': group_name, 'groupDetail': temp}
def pc_list_channel_index(self, channel): def parse(html): v_lists = [] cover_type = 'hz' for i in html.select('.list_item'): if 'data-float' not in i.select('.figure')[0].attrs: continue href = i.select('.figure')[0]['href'] # print(href) import re page_pattern = re.compile(r'x/page/(.+).html') cover_pattern1 = re.compile(r'/x/cover/(.+)/(.+).html') cover_pattern2 = re.compile(r'/x/cover/(.+).html') cid = '' vid = '' if page_pattern.search(href): vid = page_pattern.findall(href)[0] elif cover_pattern1.search(href): cid, vid = cover_pattern1.findall(href)[0] elif cover_pattern2.search(href): cid = cover_pattern2.findall(href)[0] score = i.select('.figure_score')[0].text.strip() if i.select( '.figure_score') else '' title = i.select('.figure_title')[0]['title'] if i.select( '.figure_title') else '' cover_tag = i.select('.figure_pic')[0] if 'lz_next' in cover_tag.attrs: cover = cover_tag['lz_next'] elif 'lz_src' in cover_tag.attrs: cover = cover_tag['lz_src'] else: cover = cover_tag['src'] if cover.find('vcover_vt_pic') != -1 or cover.find( 'qqlive') != -1: cover_type = 'vt' elif cover.find('vcover_hz_pic') != -1: cover_type = 'hz' caption = i.select('.figure_caption')[0].text.strip( ) if i.select('.figure_caption') else '' desc = i.select('.figure_desc')[0]['title'] if i.select( '.figure_desc') else '' if cover.startswith('//'): cover = 'https:' + cover v_lists.append({ 'cid': cid, 'vid': vid, 'title': title, 'caption': caption, 'score': score, 'desc': desc, 'cover': cover, }) return v_lists, cover_type url = self.CHANNEL.format(channel=channel) rs = do_get(url, is_json=False) dom = BeautifulSoup(rs, 'html.parser') boxs = dom.select('.mod_row_box') results = {'data': []} for box in boxs: if box.select('.mod_hd'): label = box.select('.mod_title')[0].text.strip() items = box.select('.mod_figure') tabs = [] if box.select('.mod_title_tabs'): tabs = box.select('.tab_item') if len(tabs) != 0: for tab, item in zip(tabs, items): label = tab.text.strip() lists, cover_type = parse(item) if len(lists) != 0: results['data'].append({ 'title': label, 'showTitle': True, 'contentCode': 101 if cover_type == 'hz' else 106, 'widgets': lists }) else: if len(items) == 0: continue lists, cover_type = parse(items[0]) if len(lists) != 0: results['data'].append({ 'title': label, 'showTitle': True, 'contentCode': 101 if cover_type == 'hz' else 106, 'widgets': lists }) return results
def pc_list_search(self, key, cur=1, is_intent=False): def parse_people(html): results = { 'success': True, 'type': 'people', 'avatar': '', 'name': '', 'hasNext': False, 'data': [] } dom = BeautifulSoup(html, 'html.parser') people_wrapper = dom.select('.result_people') if people_wrapper: avatar = people_wrapper[0].select( '.people_avatar img')[0]['src'] avatar = check_url(avatar) name = people_wrapper[0].select('.people_name')[0].text results['avatar'] = avatar results['name'] = name url = people_wrapper[0].select('a')[0]['href'] new_html = do_get(url, params={'tabid': 1}) dom = BeautifulSoup(new_html, 'html.parser') rows = dom.select('.mod_row_box') for row in rows: if not row.select('.mod_title'): continue tag = row.select('.mod_title')[0].contents[0].strip() items = row.select('.list_item') for item in items: cover = item.select('.figure_pic')[0]['src'] cover = check_url(cover) i_title = item.select('.figure')[0]['title'] url = item.select('.figure')[0]['href'] pattern = re.compile(r'/cover/(.+).html') if not pattern.search(url): continue cid = pattern.findall(url)[0] caption = item.select( '.figure_count')[0].text if item.select( '.figure_count') else '' results['data'].append({ 'cid': cid, 'cover': cover, 'tag': tag, 'caption': caption, 'title': i_title }) else: results['success'] = False return results def parse_normal(html): results = { 'success': True, 'type': 'no_template', 'hasNext': True, 'data': [] } dom = BeautifulSoup(html, 'html.parser') divs = dom.select('.wrapper_main div[data-index]') if len(divs) != 0: for div in divs: cid = div.select('.result_item_v')[0]['data-id'] title = div.select('.result_title')[0].text.replace( '\n', '').replace('\t', '') cover = div.select('.figure_pic')[0]['src'] cover = check_url(cover) caption = div.select('.figure_caption')[0].text.strip( ) if len(div.select('.figure_caption')) != 0 else '' _type = div.select('.type')[0].text.strip() title = title.replace(_type, '') results['data'].append({ 'cid': cid, 'title': title, 'caption': caption, 'tag': _type, 'cover': cover }) else: results['success'] = False results['hasNext'] = False return results def parse_intent(html, word, page=0, page_size=10): dom = BeautifulSoup(html, 'html.parser') result = { 'success': True, 'type': 'intent', 'hasNext': True, 'data': [] } if dom.select('.result_intention'): rs_json = do_get(self.INTENT_SEARCH_API.format( word=word, page=page, pageSize=page_size), is_json=True) items = rs_json['data']['areaBoxList'][0]['itemList'] for item in items: cid = item['doc']['id'] title = item['videoInfo']['title'] title = check(title) _type = item['videoInfo']['typeName'] _type = check(_type) cover = item['videoInfo']['imgUrl'] cover = check_url(cover) # desc = item['videoInfo']['descrip'] result['data'].append({ 'cid': cid, 'title': title, 'caption': '', 'tag': _type, 'cover': cover }) if (page + 1) * page_size >= rs_json['data']['areaBoxList'][0][ 'totalNum']: result['hasNext'] = False else: result['success'] = False return result def check(txt): m = '\u0005' n = '\u0006' return txt.replace(m, '').replace(n, '') if is_intent: rs = do_get(self.SEARCH.format(key=key, page=1)) else: rs = do_get(self.SEARCH.format(key=key, page=cur)) people = parse_people(rs) videos = parse_normal(rs) intent = parse_intent(rs, key, page=cur - 1) if people['success']: return people elif intent['success']: return intent else: return videos
def pc_list_player_url(self, vid): rs = do_get(self.PLAYER_URL_API.format(vid=vid), is_json=True) return rs