Пример #1
0
    def detail_parse_by_subclass(self,url,page):

        res =  self.parser.get_parse_data(url,page)
        urllist=[]
        it = Item()
        for l in res['list']:
            li = l['li']
            if "地区:" in li:
                it.location = li.replace("地区:",'')
            if "年份:" in li:
                it.date = li.replace("年份:",'')
            if "导演:" in li:
                it.director = li.replace("导演:",'')
            if "主演:" in li:
                it.actors = li.replace("主演:",'')
            it.url = url
        quality = {}
        for tinfo in res['tinfo']:
            qs = tinfo['quality']
            if '720p' in qs:
                quality['720p'] = 1

            if '1080p' in qs:
                quality['1080p'] = 1

            if 'CAM' in qs or 'TS' in qs:
                quality['TS'] = 1
        it.quality = '/'.join([ k for k,v in quality.items()])

        imdb_url = res['imdb']
        if imdb_url != None and len(imdb_url) > 0:
            urllist.append("http://www.imdb.com/title/"+ imdb_url)
        return it ,urllist
Пример #2
0
    def detail_parse_by_subclass(self,url,page):
        it = Item()
        it.url = url
        con = self.parser.get_parse_data(url,page)
        ename =  con['name']
        if ename!=None and len(ename)>0:
            it.ename = ename.split("(")[0].strip()
        else:
            return None,[]
        tlist = []
        for t in con['typelist']:
            tlist.append(t['type'])

        it.type = '/'.join(tlist)
        imdbid = url.split("title/")[1].split("/")[0]
        if "tt" in imdbid:
            imdbid = imdbid.replace('tt','')
        it.id = int(imdbid)
    ##title/tt0061811
        it.date = con['date']
        #print "imdbname",it.ename
        #print "imdbid",it.id
        #print "imdb" ,it.type
        #print "year",con['date']
        it.pic_url = con['pic_url']
        if it.pic_url ==None:
            it.pic_url="nopic"
        it.rate = con['rate']
        if it.rate==None:
            it.rate=0
        it.director = con['director']
        it.actors = con['actors']
        if it.actors ==None:
            return None,[]
        it.actors = it.actors.split("|")[0].replace("Stars:",'').strip()
        info =  con['box']
    #    Budget: $170,000,000 (estimated)
        if "Budget:" in info:
            box = info.split("Budget:")[1].split("(estimated)")[0]
            box = box.replace(",",'').strip()
            p = re.compile(r'([\d]+)') 
            match = p.search(box)
            if match != None:
                it.box = int(match.group())/10000
            else:
                it.box =0
        return it,[] 
Пример #3
0
    def detail_parse_by_subclass(self,url,page):

        res =  self.parser.get_parse_data(url,page)
        con = res['content']
        strlist = con.split("◎")
        newurl = []
        it = None
        if len(strlist)>2:
            it = Item()
            it.url = url
            for s in strlist:
                if "译  名" in s:
                    it.cname = s.split("译  名")[1].strip()
                if "片  名" in s:
                    it.ename = s.split("片  名")[1].strip()
                if "年  代" in s:
                    it.date = s.split("年  代")[1].strip()
                if "国  家" in s:
                    it.location = s.split("国  家")[1].strip()
                if "上映时期" in s:
                    it.date = s.split("上映时期")[1].strip()
                elif "上映日期" in s:
                    it.date = s.split("上映日期")[1].strip()
                if "链接" in s:
                    u = s.split("链接")[1].strip()
                    if "http://" in u:
                        pos = u.find("http://")
                        newurl.append((u[pos:]).strip('/'))
                if "导  演" in s:
                    it.director = s.split("导  演")[1].strip()
                if "主  演" in s:
                    it.actors = s.split("主  演")[1].strip()
                elif "演  员" in s:
                    it.actors = s.split("演  员")[1].strip()
        else:
            it = Item()
            it.url = url
            it.content = con
    
        return it ,newurl