def detail_parse_by_subclass(self,url,page): res = self.parser.get_parse_data(url,page) urllist=[] it = Item() for l in res['list']: li = l['li'] if "地区:" in li: it.location = li.replace("地区:",'') if "年份:" in li: it.date = li.replace("年份:",'') if "导演:" in li: it.director = li.replace("导演:",'') if "主演:" in li: it.actors = li.replace("主演:",'') it.url = url quality = {} for tinfo in res['tinfo']: qs = tinfo['quality'] if '720p' in qs: quality['720p'] = 1 if '1080p' in qs: quality['1080p'] = 1 if 'CAM' in qs or 'TS' in qs: quality['TS'] = 1 it.quality = '/'.join([ k for k,v in quality.items()]) imdb_url = res['imdb'] if imdb_url != None and len(imdb_url) > 0: urllist.append("http://www.imdb.com/title/"+ imdb_url) return it ,urllist
def detail_parse_by_subclass(self,url,page): it = Item() it.url = url con = self.parser.get_parse_data(url,page) ename = con['name'] if ename!=None and len(ename)>0: it.ename = ename.split("(")[0].strip() else: return None,[] tlist = [] for t in con['typelist']: tlist.append(t['type']) it.type = '/'.join(tlist) imdbid = url.split("title/")[1].split("/")[0] if "tt" in imdbid: imdbid = imdbid.replace('tt','') it.id = int(imdbid) ##title/tt0061811 it.date = con['date'] #print "imdbname",it.ename #print "imdbid",it.id #print "imdb" ,it.type #print "year",con['date'] it.pic_url = con['pic_url'] if it.pic_url ==None: it.pic_url="nopic" it.rate = con['rate'] if it.rate==None: it.rate=0 it.director = con['director'] it.actors = con['actors'] if it.actors ==None: return None,[] it.actors = it.actors.split("|")[0].replace("Stars:",'').strip() info = con['box'] # Budget: $170,000,000 (estimated) if "Budget:" in info: box = info.split("Budget:")[1].split("(estimated)")[0] box = box.replace(",",'').strip() p = re.compile(r'([\d]+)') match = p.search(box) if match != None: it.box = int(match.group())/10000 else: it.box =0 return it,[]
def detail_parse_by_subclass(self,url,page): res = self.parser.get_parse_data(url,page) con = res['content'] strlist = con.split("◎") newurl = [] it = None if len(strlist)>2: it = Item() it.url = url for s in strlist: if "译 名" in s: it.cname = s.split("译 名")[1].strip() if "片 名" in s: it.ename = s.split("片 名")[1].strip() if "年 代" in s: it.date = s.split("年 代")[1].strip() if "国 家" in s: it.location = s.split("国 家")[1].strip() if "上映时期" in s: it.date = s.split("上映时期")[1].strip() elif "上映日期" in s: it.date = s.split("上映日期")[1].strip() if "链接" in s: u = s.split("链接")[1].strip() if "http://" in u: pos = u.find("http://") newurl.append((u[pos:]).strip('/')) if "导 演" in s: it.director = s.split("导 演")[1].strip() if "主 演" in s: it.actors = s.split("主 演")[1].strip() elif "演 员" in s: it.actors = s.split("演 员")[1].strip() else: it = Item() it.url = url it.content = con return it ,newurl