def merge_fields(self, info): L = Contents() L.title = info.get("name") L.summary = info.get("description") L.iqiyi_tvId = info.get("tvId") L.iqiyi_vid = info.get("vid") L.iqiyi_plays_num = info.get("playCount") L.iqiyi_albumId = info.get("albumId") L.iqiyi_play_url = info.get("url") if info.get("duration") and info.get("duration") != "": L.duration = info.get("duration")/60 L.poster = [] if info.get("albumImageUrl"): L.img_url = info.get("albumImageUrl") L.poster.append({"url": info.get("albumImageUrl")}) if info.get("imageUrl"): L.poster.append({"url": info.get("imageUrl")}) if info.get("videoImageUrl"): L.poster.append({"url": info.get("videoImageUrl")}) if info.get("posterUrl"): L.poster.append({"url": info.get("posterUrl")}) if info.get("tvImageUrl"): L.poster.append({"url": info.get("tvImageUrl")}) if info.get("qualityImageUrl"): L.poster.append({"url": info.get("qualityImageUrl")}) if info["issueTime"]: L.release_date = mictime_to_ymd(info["issueTime"]) if info.get("crumbList"): level2 = True for x in info.get("crumbList"): if int(x["level"])==2 and x["title"]!=u'VIP会员': L.category = x["title"] level2 = False if level2: for x in info.get("crumbList"): if int(x["level"])==3: L.category = x["title"] level2 = False _temp = [] for x in info.get("categories"): if u"地区" in x.get("subName"): L.area = area_process(x.get("name")) elif u"类型" in x.get("subName") or u'风格' in x.get("subName") or u'分类' in x.get("subName") or u'小学' in x.get("subName") or u'高中' in x.get("subName") or u'短片' in x.get("subName"): _temp.append(x.get("name")) elif u"语种" in x.get("subName"): L.language = language_process(x.get("name")) elif x.get("subName") == u"年龄段": L.age = x.get("name") L.tags = ",".join(_temp) L.all_episode = info.get("videoCount") L.sub_title = info.get("subtitle") L.iqiyi_rating_num = info.get("commentCount") L.iqiyi_qitanId = info.get("qitanId") if info.get("cast") and info.get("cast").get("directors"): L.directors = [] L.directors_list = [] for x in info.get("cast").get("directors"): L.directors.append(x.get("name")) L.directors_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get( "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")}) L.directors = ",".join(L.directors) if info.get("cast") and info.get("cast").get("speakers"): L.speakers = [] L.speakers_list = [] for x in info.get("cast").get("speakers"): L.speakers.append(x.get("name")) L.speakers_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get( "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")}) L.speakers = ",".join(L.speakers) if info.get("cast") and info.get("cast").get("publishers"): L.publishers = [] L.publishers_list = [] for x in info.get("cast").get("publishers"): L.publishers.append(x.get("name")) L.publishers_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get( "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")}) L.publishers = ",".join(L.publishers) if info.get("cast") and info.get("cast").get("singers"): L.singers = [] L.singers_list = [] for x in info.get("cast").get("singers"): L.singers.append(x.get("name")) L.singers_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get( "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")}) L.singers = ",".join(L.singers) if info.get("cast") and info.get("cast").get("mainActors"): L.starring = [] L.starring_list = [] for x in info.get("cast").get("mainActors"): L.starring.append(x.get("name")) L.starring_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get( "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")}) L.starring = ",".join(L.starring) """编剧""" if info.get("cast") and info.get("cast").get("writers"): L.screenwriters = [] L.screenwriter_list = [] for x in info.get("cast").get("writers"): L.screenwriters.append(x.get("name")) L.screenwriter_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get( "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")}) L.screenwriters = ",".join(L.screenwriters) if info.get("cast") and info.get("cast").get("actors"): L.actors = [] L.actors_list = [] for x in info.get("cast").get("actors"): L.actors.append(x.get("name")) L.actors_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get( "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")}) L.actors = ",".join(L.actors) """嘉宾""" if info.get("cast") and info.get("cast").get("guests"): L.guests = [] L.guests_list = [] for x in info.get("cast").get("guests"): L.guests.append(x.get("name")) L.guests_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get( "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")}) L.guests = ",".join(L.guests) if info.get("cast") and info.get("cast").get("hosts"): L.hosts = [] L.hosts_list = [] for x in info.get("cast").get("hosts"): L.hosts.append(x.get("name")) L.hosts_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get( "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")}) L.hosts = ",".join(L.hosts) if L.release_date and L.year==None: m = re.search(u'(\d{4})',L.release_date) if m: L.year = m.group(1) if not L.directors_list and not L.directors_list and L.hosts: L.directors = L.hosts L.directors_list = L.hosts_list L.focuses = info.get("focuses") L.iqiyi_rating = info.get("score") L.created_at = time.time() return L.__dict__
def vdetail_parser(self, r, url=None): """视频详情页面""" try: page = etree.HTML(r) except Exception as e: return False L = Contents() result_div = page.xpath(u'//div[@class="mod_search_topic"]') if result_div: result_div = page.xpath(u'//div[@class="mod_reuslt"]') title = result_div.xpath(u'//h1[@class="main_title"]/a') if len(title) > 0: L.title = title[0].text sub_title = result_div.xpath(u'//span[@class="sub_title"]') if len(sub_title) > 0: L.sub_title = sub_title[0].text if re.search(u'(\d{4})', L.sub_title): L.year = re.search(u'(\d{4})', L.sub_title).group(1) category = page.xpath(u'//a[@class="channelTag"]') if len(category) > 0: L.category = category[0].text area = result_div.xpath(u'//em[contains(text(),"地区")]/a') if len(area) > 0: L.area = area[0].text L.iqiyi_tvId = self.get_vid_by_url(url) language = result_div.xpath(u'//em[contains(text(),"语言")]/a') if len(language) > 0: L.language = language[0].text tags = result_div.xpath(u'//em[contains(text(),"类型")]/a') if len(tags) > 0: _temp = [x.text for x in tags] L.tags = ",".join(set(_temp)) banben = result_div.xpath(u'//em[contains(text(),"版本")]/a') if len(banben) > 0: L.banben = banben[0].text summary = result_div.xpath(u'//em[contains(text(),"简介")]') if len(summary) > 0: L.summary = result_div.xpath( u'//em[contains(text(),"简介")]/following::text()[1]')[0] L.summary = parse_simple(L.summary) all_episode = result_div.xpath(u'//em[contains(text(),"集数")]/a') if len(all_episode) > 0: L.all_episode = self.parse_all_episode(all_episode[0].text) iqiyi_play_url = result_div.xpath(u'//a[contains(text(),"立即播放")]') if len(iqiyi_play_url) > 0: L.iqiyi_play_url = iqiyi_play_url[0].get("href") iqiyi_plays_num = result_div.xpath(u'//i[@id="widget-playcount"]') if len(iqiyi_plays_num) > 0: L.iqiyi_plays_num = iqiyi_plays_num[0].text peiyin = result_div.xpath(u'//em[contains(text(),"配音")]/a') if len(peiyin) > 0: L.peiyin_list = [] _temp = [] for x in peiyin: _temp.append(peiyin[0].text) L.peiyin_list.append({"name": peiyin[0].text, "iqiyi_url": x.get( "href"), "iqiyi_id": self.get_starId_by_url(x.get("href"))}) L.peiyin = ",".join(set(_temp)) iqiyi_rating = page.xpath(u'//span[@class="score_font"]') if len(iqiyi_rating) > 0: L.iqiyi_rating = iqiyi_rating[0].get("snsscore") poster = page.xpath(u'//div[@contains(@_stat,"result_pic"]/img') if len(poster) > 0: L.poster = {"url": poster[0].get("src"), "width": poster[0].get( "width"), "height": poster[0].get("height"), "name": poster[0].get("alt")} L.img_url = L.poster['url'] # 导演演员 # actor_list = page.xpath(u'//ul[@class="actor_list cf"]/li/') # starring_list = [] # starring = [] # directors_list = [] # directors = [] return L.__dict__