def __get_second_data(self, url): res = requests.get(url) if 304 >= res.status_code >= 200: try: soup = BeautifulSoup(res.text, 'lxml') except IndexError and KeyError: return BaseHandler().write_error() return BaseHandler().write_error()
def get_data(self): res = requests.get(url=self.url) if 304 >= res.status_code >= 200: data = json.loads(res.text)['result'] count = len(data) return BaseHandler().write_list(code=const.SUCCESS_CODE, message=const.SUCCESS_MESSAGE, count=count, data=data) return BaseHandler().write_error()
def search(): word = request.args.get('word') if not word: return BaseHandler().write_error(const.FAIL_CODE, const.KEYWORD_NONE_MESSAGE) try: page = request.args.get('page') return Search(word=word, page=page).search() except Exception: return BaseHandler().write_error(const.FAIL_CODE, const.FAIL_CODE)
def __filter_normal_video(self, data): result = {} soup = BeautifulSoup(data, 'lxml') try: mid = soup.select('.u-face > a')[0][ 'mid'] # mid可以根据http://space.bilibili.com/<mid>进入作者空间 author_info = self.__get_author_info(mid) play_info = self.__get_play_info() title = soup.select('.info > .v-title')[0].get_text() tag_list = [] for tag in soup.select('.tag-list > li'): tag_list.append(tag.select('a')[0].get_text()) desc = soup.select('#v_desc')[0].get_text() cid = 0 re_findall = re.findall( r'cid=(\d+)', soup.select('.scontent > script')[0].string) if re_findall: cid = re_findall[0] # 获取视频真正链接 - 弹幕 的重要参数 video_url_info = self.__get_video_url(cid) create_time = str(soup.select('time')[0]['datetime']).replace( 'T', ' ') category = None category_url = None for item in soup.select('.tminfo > span'): category = item.select('a')[0].string category_url = 'http://www.bilibili.com%s' % item.select( 'a')[0]['href'] # 相关视频 relative_list = self.__get_relative_video() result['author_info'] = author_info result['play_info'] = play_info result['title'] = title result['tag_list'] = tag_list result['desc'] = desc result['cid'] = int(cid) result['create_time'] = create_time result['video_info'] = video_url_info result['category'] = category result['category_url'] = category_url result['relative_list'] = relative_list except IndexError or KeyError: # 网页改版的情况 return BaseHandler().write_error() return BaseHandler().write_object(code=const.SUCCESS_CODE, message=const.SUCCESS_MESSAGE, result=result)
def __get_recommend_bangumi(self): url = 'http://bangumi.bilibili.com/web_api/season/recommend/{sid}.json'.format( sid=self.sid) res = requests.get(url) if 304 >= res.status_code >= 200: loads = json.loads(res.text) return loads['result']['list'] return BaseHandler().write_error()
def get_data(self): url = 'http://www.bilibili.com/video/av%s/' % self.aid res = requests.get(url, allow_redirects=True) history = res.history if len(history) == 0: return self.__filter_normal_video(res.text) elif len(history) > 0: # 出现重定向 return self.__filter_bangumi_video(res.text) return BaseHandler().write_error()
def __get_first_data(self, url): # 有banner | recommend 4 | 各个二级分类 12 ''' 一级分类中的各个二级分类接口:http://api.bilibili.com/typedynamic/region 参数: rid --> 二级分类id | pn --> page num | ps --> 当月第几天 建两张表 ''' result = {} res = requests.get(url) if 304 >= res.status_code >= 200: try: soup = BeautifulSoup(res.text, 'lxml') top_list = self.__get_top_list(self.first_tid) result['banner'] = top_list return BaseHandler().write_object(result=result) except IndexError and KeyError: return BaseHandler().write_error() return BaseHandler().write_error()
def get_data(self): url = 'http://bangumi.bilibili.com/jsonp/seasoninfo/{sid}.ver'.format( sid=self.sid) params = {'callback': 'seasonListCallback'} res = requests.get(url, params=params) if 304 >= res.status_code >= 200: try: result = json.loads( res.text.replace('seasonListCallback(', '').replace('});', '}'))['result'] recommend_bangumi = self.__get_recommend_bangumi() result['recommend_bangumi'] = recommend_bangumi return BaseHandler().write_object( code=const.SUCCESS_CODE, message=const.SUCCESS_MESSAGE, result=result) except KeyError: return BaseHandler().write_error(400, '协议错误') return BaseHandler().write_error()
def get_category(self): url = 'http://www.bilibili.com' res = requests.get(url) if 304 >= res.status_code >= 200: soup = BeautifulSoup(res.text, 'lxml') data = [] for item in soup.select('.nav-menu > li'): child = [] for i_item in item.select('.i_num > li'): title = i_item.select('a > b')[0].get_text() url = i_item.select('a')[0]['href'] url = 'http:' + url if url.startswith('//') else url L = dict([('second_name', title), ('second_url', url)]) child.append(L) first_url = item.select('a')[0]['href'] first_url = 'http:' + first_url if first_url.startswith('//') else first_url first_name = item.select('em')[0].get_text() L = dict([('first_name', first_name), ('first_url', first_url), ('child', child)]) data.append(L) return BaseHandler().write_object(result={'data': data}) return BaseHandler().write_error()
def get_category_video(self): # 先拿当前的分类表 --> 再去匹配链接 video_list = [] category_table = json.loads(self.get_category()) if self.first_tid is None: return video_list for first_item in category_table: if self.first_tid == first_item: if self.second_tid is None: url = category_table[first_item]['normal'] return self.__get_first_data(url=url) for second_item in first_item: if self.second_tid == second_item: url = category_table[first_item][second_item] return self.__get_second_data(url=url) # 匹配不到第二层分类的情况,有可能为空或没有这个分类 return BaseHandler().write_error()
def __filter(self, json_data): json_data = str(json_data).replace('callback(', '').replace('});', '}') json_data = json.loads(json_data)['data']['recommend'] return BaseHandler().write_list(const.SUCCESS_CODE, const.SUCCESS_MESSAGE, len(json_data), json_data)
def get_live(self): res = requests.get(url=self.url, params=self.params) if 304 >= res.status_code >= 200: return self.__filter(res.text) return BaseHandler().write_error()
def __filter_bangumi_video(self, data): # todo 获取番剧视频详情信息 result = {} return BaseHandler().write_object(code=const.SUCCESS_CODE, message=const.SUCCESS_MESSAGE, result=result)