def parse_resource(self, response): logging.log(logging.INFO, "parse_resource:%s" % response.request.url) m_item = response.request.meta['m_item'] vtitle = response.request.meta['vtitle'] vnum = response.request.meta['vnum'] vitems = [] for x in ['Torrent magnet link', 'Download torrent file']: vurls = response.xpath('//table[@class="data"]//tr/td[1]//a[@title="%s"]/@href' % x).extract() rtitles = response.xpath('//table[@class="data"]//tr/td[1]//a[@title="%s"]/../../div[@class="torrentname"]/div/a/text()' % x).extract() if not vurls: continue vurl = vurls[0] rtitle = rtitles[0] if vurl.startswith('//'): vurl = 'https:' + vurl protocol = extract_protocol(vurl) if protocol in self.protocol_map and protocol != 'http': v_item = VideoItem() v_item['url'] = vurl v_item['title'] = rtitle v_item['vnum'] = vnum #v_item['vtitle'] = vtitle v_item['cont_id'] = md5(vurl).hexdigest() v_item['protocol_id'] = self.protocol_map[protocol] if protocol == 'magnet': v_item['tor_ih'] = extract_info_ih(vurl) vitems.append(v_item) mvitem = MediaVideoItem() mvitem["media"] = m_item mvitem['video'] = vitems yield mvitem
def parse_all_episode(self, response): logging.log(logging.INFO, "parse_all_episode:%s" % response.request.url) try: mtitle = response.xpath( '//table[@class="doublecelltable"]//tr/td[1]/h1/text()' ).extract()[0] imdb_url = response.xpath( '//div[@class="torrentMediaInfo"]/div[@class="dataList"]/ul/li/strong[text()="IMDb link:"]/../a/@href' ).extract()[0] imdb = extract_imdb(imdb_url) # tv seasons = response.xpath( '//table[@class="doublecelltable"]//tr/td[1]/h3') for season in seasons: snums = season.xpath('./text()').re('\d+') if snums: snum = int(snums[0]) resources = season.xpath( './following-sibling::div[1]/div/div[@class="infoList versionsFolded"]/a[@class="infoListCut"]' ) for r in resources: vnums = r.xpath( './span[@class="versionsEpNo"]/text()').re('\d+') vtitles = r.xpath( './span[@class="versionsEpName"]/text()').extract( ) if not vnums or not vtitles: continue vnum = int(vnums[0]) if vnum <= 0: continue vtitle = vtitles[0] episode_nums = r.xpath('./@onclick').re('\d+') if not episode_nums: continue episode_num = episode_nums[0] m_item = MediaItem() m_item['title'] = mtitle + '_Season' + str(snum) m_item['url'] = response.request.url m_item['site_id'] = self.site_id #m_item['imdb'] = imdb + '|' + str(snum) m_item['cont_id'] = imdb + '|' + str(snum) req = Request(url=self.url_getepisode % episode_num, callback=self.parse_resource, meta={ 'm_item': m_item, 'vtitle': vtitle, 'vnum': vnum }) yield req # movie vitems = [] dids = response.xpath( '//div[@class="tabs tabSwitcher"]/div[@id]/@id').extract() for did in dids: for x in ['Torrent magnet link', 'Download torrent file']: vurls = response.xpath( '//div[@id="%s"]/table//tr[@id]/td[1]/div[1]/a[@title="%s"]/@href' % (did, x)).extract() vtitles = response.xpath( '//div[@id="%s"]/table//tr[@id]/td[1]/div[1]/a[@title="%s"]/../../div[@class="torrentname"]/div/a/text()' % (did, x)).extract() if not vurls or not vtitles: continue vurl = vurls[0] vtitle = vtitles[0] if vurl.startswith('//'): vurl = 'https:' + vurl protocol = extract_protocol(vurl) if protocol in self.protocol_map and protocol != 'http': v_item = VideoItem() v_item['url'] = vurl v_item['title'] = vtitle v_item['cont_id'] = md5(vurl).hexdigest() v_item['protocol_id'] = self.protocol_map[protocol] if protocol == 'magnet': v_item['tor_ih'] = extract_info_ih(vurl) vitems.append(v_item) if vitems: m_item = MediaItem() m_item['title'] = mtitle m_item['url'] = response.request.url m_item['site_id'] = self.site_id #m_item['imdb'] = imdb m_item['cont_id'] = imdb mvitem = MediaVideoItem() mvitem["media"] = m_item mvitem['video'] = vitems yield mvitem except Exception, e: logging.log(logging.ERROR, traceback.format_exc())