示例#1
0
    def merge_fields(self, info):
        L = Contents()
        L.title = info.get("name")
        L.summary = info.get("description")
        L.iqiyi_tvId = info.get("tvId")
        L.iqiyi_vid = info.get("vid")
        L.iqiyi_plays_num = info.get("playCount")
        L.iqiyi_albumId = info.get("albumId")
        L.iqiyi_play_url = info.get("url")
        if info.get("duration") and info.get("duration") != "":
            L.duration = info.get("duration")/60
        L.poster = []
        if info.get("albumImageUrl"):
            L.img_url = info.get("albumImageUrl")
            L.poster.append({"url": info.get("albumImageUrl")})
        if info.get("imageUrl"):
            L.poster.append({"url": info.get("imageUrl")})
        if info.get("videoImageUrl"):
            L.poster.append({"url": info.get("videoImageUrl")})
        if info.get("posterUrl"):
            L.poster.append({"url": info.get("posterUrl")})
        if info.get("tvImageUrl"):
            L.poster.append({"url": info.get("tvImageUrl")})
        if info.get("qualityImageUrl"):
            L.poster.append({"url": info.get("qualityImageUrl")})
        if info["issueTime"]:
            L.release_date = mictime_to_ymd(info["issueTime"])
        if info.get("crumbList"):
            level2 = True
            for x in info.get("crumbList"):
            	if int(x["level"])==2 and x["title"]!=u'VIP会员':
            		L.category = x["title"]
            		level2 = False
            if level2:
            	for x in info.get("crumbList"):
            		if int(x["level"])==3:
            			L.category = x["title"]
            			level2 = False
        _temp = []
        for x in info.get("categories"):
            if u"地区" in x.get("subName"):
                L.area = area_process(x.get("name"))
            elif u"类型" in x.get("subName") or u'风格' in x.get("subName") or u'分类' in x.get("subName") or u'小学' in x.get("subName") or u'高中' in x.get("subName") or u'短片' in x.get("subName"):
                _temp.append(x.get("name"))
            elif u"语种" in x.get("subName"):
                L.language = language_process(x.get("name"))
            elif x.get("subName") == u"年龄段":
                L.age = x.get("name")
        L.tags = ",".join(_temp)
        L.all_episode = info.get("videoCount")
        L.sub_title = info.get("subtitle")
        L.iqiyi_rating_num = info.get("commentCount")
        L.iqiyi_qitanId = info.get("qitanId")
        if info.get("cast") and info.get("cast").get("directors"):
            L.directors = []
            L.directors_list = []
            for x in info.get("cast").get("directors"):
                L.directors.append(x.get("name"))
                L.directors_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get(
                    "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")})
            L.directors = ",".join(L.directors)

        if info.get("cast") and info.get("cast").get("speakers"):
            L.speakers = []
            L.speakers_list = []
            for x in info.get("cast").get("speakers"):
                L.speakers.append(x.get("name"))
                L.speakers_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get(
                    "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")})
            L.speakers = ",".join(L.speakers)

        if info.get("cast") and info.get("cast").get("publishers"):
            L.publishers = []
            L.publishers_list = []
            for x in info.get("cast").get("publishers"):
                L.publishers.append(x.get("name"))
                L.publishers_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get(
                    "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")})
            L.publishers = ",".join(L.publishers)

        if info.get("cast") and info.get("cast").get("singers"):
            L.singers = []
            L.singers_list = []
            for x in info.get("cast").get("singers"):
                L.singers.append(x.get("name"))
                L.singers_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get(
                    "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")})
            L.singers = ",".join(L.singers)

        if info.get("cast") and info.get("cast").get("mainActors"):
            L.starring = []
            L.starring_list = []
            for x in info.get("cast").get("mainActors"):
                L.starring.append(x.get("name"))
                L.starring_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get(
                    "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")})
            L.starring = ",".join(L.starring)

        """编剧"""
        if info.get("cast") and info.get("cast").get("writers"):
            L.screenwriters = []
            L.screenwriter_list = []
            for x in info.get("cast").get("writers"):
                L.screenwriters.append(x.get("name"))
                L.screenwriter_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get(
                    "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")})
            L.screenwriters = ",".join(L.screenwriters)

        if info.get("cast") and info.get("cast").get("actors"):
            L.actors = []
            L.actors_list = []
            for x in info.get("cast").get("actors"):
                L.actors.append(x.get("name"))
                L.actors_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get(
                    "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")})
            L.actors = ",".join(L.actors)

        """嘉宾"""
        if info.get("cast") and info.get("cast").get("guests"):
            L.guests = []
            L.guests_list = []
            for x in info.get("cast").get("guests"):
                L.guests.append(x.get("name"))
                L.guests_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get(
                    "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")})
            L.guests = ",".join(L.guests)

        if info.get("cast") and info.get("cast").get("hosts"):
            L.hosts = []
            L.hosts_list = []
            for x in info.get("cast").get("hosts"):
                L.hosts.append(x.get("name"))
                L.hosts_list.append({"name": x.get("name"), "iqiyi_id": x.get("id"), "avatar": x.get(
                    "imageUrl"), "iqiyi_userId": x.get("userId"), "iqiyi_circleId": x.get("circleId")})
            L.hosts = ",".join(L.hosts)

        if L.release_date and L.year==None:
        	m = re.search(u'(\d{4})',L.release_date)
        	if m:
        		L.year = m.group(1)

        if not L.directors_list and not L.directors_list and L.hosts:
            L.directors = L.hosts
            L.directors_list = L.hosts_list

        L.focuses = info.get("focuses")
        L.iqiyi_rating = info.get("score")
        L.created_at = time.time()

        return L.__dict__
示例#2
0
    def vdetail_parser(self, r, url=None):
        """视频详情页面"""
        try:
            page = etree.HTML(r)
        except Exception as e:
            return False
        L = Contents()
        result_div = page.xpath(u'//div[@class="mod_search_topic"]')
        if result_div:
            result_div = page.xpath(u'//div[@class="mod_reuslt"]')

        title = result_div.xpath(u'//h1[@class="main_title"]/a')
        if len(title) > 0:
            L.title = title[0].text

        sub_title = result_div.xpath(u'//span[@class="sub_title"]')
        if len(sub_title) > 0:
            L.sub_title = sub_title[0].text
            if re.search(u'(\d{4})', L.sub_title):
                L.year = re.search(u'(\d{4})', L.sub_title).group(1)

        category = page.xpath(u'//a[@class="channelTag"]')
        if len(category) > 0:
            L.category = category[0].text

        area = result_div.xpath(u'//em[contains(text(),"地区")]/a')
        if len(area) > 0:
            L.area = area[0].text

        L.iqiyi_tvId = self.get_vid_by_url(url)

        language = result_div.xpath(u'//em[contains(text(),"语言")]/a')
        if len(language) > 0:
            L.language = language[0].text

        tags = result_div.xpath(u'//em[contains(text(),"类型")]/a')
        if len(tags) > 0:
            _temp = [x.text for x in tags]
            L.tags = ",".join(set(_temp))

        banben = result_div.xpath(u'//em[contains(text(),"版本")]/a')
        if len(banben) > 0:
            L.banben = banben[0].text

        summary = result_div.xpath(u'//em[contains(text(),"简介")]')
        if len(summary) > 0:
            L.summary = result_div.xpath(
                u'//em[contains(text(),"简介")]/following::text()[1]')[0]
            L.summary = parse_simple(L.summary)

        all_episode = result_div.xpath(u'//em[contains(text(),"集数")]/a')
        if len(all_episode) > 0:
            L.all_episode = self.parse_all_episode(all_episode[0].text)

        iqiyi_play_url = result_div.xpath(u'//a[contains(text(),"立即播放")]')
        if len(iqiyi_play_url) > 0:
            L.iqiyi_play_url = iqiyi_play_url[0].get("href")

        iqiyi_plays_num = result_div.xpath(u'//i[@id="widget-playcount"]')
        if len(iqiyi_plays_num) > 0:
            L.iqiyi_plays_num = iqiyi_plays_num[0].text

        peiyin = result_div.xpath(u'//em[contains(text(),"配音")]/a')
        if len(peiyin) > 0:
            L.peiyin_list = []
            _temp = []
            for x in peiyin:
                _temp.append(peiyin[0].text)
                L.peiyin_list.append({"name": peiyin[0].text, "iqiyi_url": x.get(
                    "href"), "iqiyi_id": self.get_starId_by_url(x.get("href"))})
            L.peiyin = ",".join(set(_temp))

        iqiyi_rating = page.xpath(u'//span[@class="score_font"]')
        if len(iqiyi_rating) > 0:
            L.iqiyi_rating = iqiyi_rating[0].get("snsscore")

        poster = page.xpath(u'//div[@contains(@_stat,"result_pic"]/img')
        if len(poster) > 0:
            L.poster = {"url": poster[0].get("src"), "width": poster[0].get(
                "width"), "height": poster[0].get("height"), "name": poster[0].get("alt")}
            L.img_url = L.poster['url']

        # 导演演员
        # actor_list = page.xpath(u'//ul[@class="actor_list cf"]/li/')
        # starring_list = []
        # starring = []
        # directors_list = []
        # directors = []
        return L.__dict__