Пример #1
0
 def get_source(self, start_url, title, year, season, episode, start_time):
     try:
         #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url
         count = 0
         headers = {'User-Agent': client.agent()}
         r = client.request(start_url, headers=headers)
         #print r
         Endlinks = re.compile(
             'torrent" rel="nofollow".+?img alt="(.+?)".+?href="(.+?)".+?class="is-hidden-touch">(.+?)</td>',
             re.DOTALL).findall(r)
         #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
         for qual, Magnet, size in Endlinks:
             Magnet = Magnet.replace('%3A',
                                     ':').replace('%3F', '?').replace(
                                         '%3D', '=').split('&dn=')[0]
             print Magnet + '<><><><><>'
             qual = quality_tags.get_release_quality(qual, None)[0]
             count += 1
             self.sources.append({
                 'source': 'Torrent',
                 'quality': qual + ' ' + size,
                 'scraper': self.name,
                 'url': Magnet,
                 'direct': False,
                 'debridonly': True
             })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return []
Пример #2
0
 def get_source(self, start_url, title, year, season, episode, start_time):
     try:
         #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url
         count = 0
         headers = {'User-Agent': client.agent()}
         #scraper= cfscrape.create_scraper()
         #r=scraper.get(start_url, headers=headers)
         r = client.request(start_url, headers=headers)
         #print r
         Endlinks = re.compile(
             'class="imagnet icon16" href="(.+?)">.+?<font color=#004E98>(.+?)</font>.+?><b>(.+?)</b></a',
             re.DOTALL).findall(r)
         #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
         for Magnet, size, quality in Endlinks:
             #Magnet=Magnet.replace('https://mylink.me.uk/?url=', '')
             qual = quality_tags.get_release_quality(quality, None)[0]
             #print Magnet + '<><><><><>'
             count += 1
             self.sources.append({
                 'source': 'Torrent',
                 'quality': qual + ' ' + size,
                 'scraper': self.name,
                 'url': Magnet,
                 'direct': False,
                 'debridonly': True
             })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return []
Пример #3
0
 def get_source(self, url, title, year, season, episode, start_time):
     sources = []
     try:
         count = 0
         if url is None:
             return sources
         data = urlparse.parse_qs(url)
         data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
         self.title = data['tvshowtitle']
         self.hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode']))
         query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode']))
         query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
         url = self.tvsearch.format(urllib.quote_plus(query).replace('+', '-'))
         items = self._get_items(url)
         for item in items:
             try:
                 name = item[0]
                 quality, info = quality_tags.get_release_quality(name, name)
                 info.append(item[2])
                 info = ' | '.join(info)
                 url = item[1]
                 url = url.split('&tr')[0]
                 count += 1
                 qual = '{0} | {1}'.format(quality, info)
                 self.sources.append({'source': 'MAGNET', 'quality': qual, 'scraper': self.name, 'url': url, 'direct': False, 'debridonly': True})
             except BaseException:
                 pass
         if dev_log == 'true':
             end_time = time.time() - float(start_time)
             send_log(self.name, end_time, count, title, year, season=season, episode=episode)
         return self.sources
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return self.sources
Пример #4
0
    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False):
        try:
            start_time = time.time()
            hdlr = 'S%02dE%02d' % (int(season), int(episode))
            query = clean_search(title)
            query = urllib.quote_plus(query + ' ' + hdlr)
            urls = []
            for link in self.search_links:
                try:
                    url = urlparse.urljoin(self.base_link, link % query)
                    url = urlparse.urljoin(self.base_link, url)
                    r = client.request(url)
                    posts = client.parseDOM(r, 'tbody')
                    posts = client.parseDOM(posts, 'tr')
                    urls += [(client.parseDOM(i, 'button', ret='data-clipboard-text')[0]) for i in posts if i]
                except:
                    pass
            count = 0
            for url in urls:
                name = url.split('/')[-1].lower()
                name = client.replaceHTMLCodes(name).replace('%20', '')
                if 'movies' in url:
                    continue
                if any(x in url for x in ['italian', 'dubbed', 'teaser', 'subs', 'sub', 'dub',
                                          'samples', 'extras', 'french', 'trailer', 'trailers', 'sample']):
                    continue

                t = re.sub('(\.|\(|\[|\s)(S\d+E\d+|S\d+)(\.|\)|\]|\s)(.+|)', '', name, flags=re.I)
                if clean_title(t) not in clean_title(title): continue
                y = re.findall('[\.|\(|\[|\s](S\d+E\d+|S\d+)[\.|\)|\]|\s]', name, re.I)[-1].upper()
                if not y == hdlr: continue

                res, info = quality_tags.get_release_quality(name, url)

                if any(x in url for x in ['hastidl', '1tehmovies', '62.210.103.107', '79.127', '213.32.113.82',
                                          'dl5.downloadha', '89.163.255.42', '185.56.20.142', 's1.0music',
                                          'dl3.yoozdl', 'dl4.lavinmovie.net', 'dl6.lavinmovie.net',
                                          'dl3.upload08.com', 'dl8.uploadt.com', '163.172.6.218',
                                          'samba.allunix.ru', 'server417']):
                    count += 1

                    url += '|User-Agent=%s&Referer=%s' % (client.agent(), self.base_link)
                    url = urllib.quote(url, '|:?/&+=_-')

                    self.sources.append(
                        {'source': 'DirectLink', 'quality': res, 'scraper': self.name, 'url': url, 'direct': True})

            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year)

            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources



#filepursuit().scrape_movie('Black Panther', '2018', '')
Пример #5
0
 def get_source(self, start_url, title, year, season, episode, start_time):
     try:
         #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url
         count = 0
         headers = {'User-Agent': client.agent()}
         r = client.request(start_url, headers=headers)
         #print r
         Endlinks = re.compile(
             'class="nobr center">(.+?)</span></td>.+?title="Torrent magnet link" href="(.+?)".+?class="cellMainLink">(.+?)</a>',
             re.DOTALL).findall(r)
         #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
         for size, Magnet, qual in Endlinks:
             Magnet = Magnet.replace('https://mylink.cx/?url=', '')
             Magnet = Magnet.replace('%3A',
                                     ':').replace('%3F', '?').replace(
                                         '%3D', '=').split('%26dn')[0]
             print Magnet + '<><><><><>'
             qual = quality_tags.get_release_quality(qual, None)[0]
             count += 1
             self.sources.append({
                 'source': 'Torrent',
                 'quality': qual + ' ' + size,
                 'scraper': self.name,
                 'url': Magnet,
                 'direct': False,
                 'debridonly': True
             })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return []
Пример #6
0
    def get_source(self,item_url,title,year,start_time):
        try:
            #print 'PASSEDURL >>>>>>'+item_url
            count = 0
            headers = {'User-Agent': client.agent()}
            OPEN = client.request(item_url, headers=headers)
            frame = client.parseDOM(OPEN, 'iframe', ret='src')[0]
            data = client.request(frame, headers=headers)
            data = client.parseDOM(data, 'ul', attrs={'class': 'menuPlayer'})[0]
            links = client.parseDOM(data, 'a', ret='href')

            for link in links:
                #print link+'<<<<<<<<<<<<<<<<<<<<<<<<<<'
                qual = quality_tags.check_sd_url(link)
                if qual == 'SD' and 'openload' in link:
                    data = client.request(link, headers=headers)
                    data = client.parseDOM(data, 'meta', ret='content')[0]
                    qual2, info = quality_tags.get_release_quality(data, None)
                else:
                    qual2 = qual
                count += 1
                host = link.split('//')[1].replace('www.','')
                host = host.split('/')[0].split('.')[0].title()
                self.sources.append({'source':host, 'quality':qual2, 'scraper': self.name, 'url':link, 'direct':False})
            if dev_log=='true':
                end_time = time.time() - start_time
                send_log(self.name,end_time,count,title,year)
        except Exception, argument:
            if dev_log=='true':
                error_log(self.name, argument)


#hdvix().scrape_movie('Black Panther', '2018', 'tt1825683', False)
Пример #7
0
 def get_source(self, item_url, title, year, season, episode, start_time):
     count = 0
     try:
         if item_url is None:
             return self.sources
         qual = re.search('Quality\s*:(.+?)<br', item_url,
                          re.DOTALL).groups()[0]
         qual = re.sub('<.+?>', '', qual)
         qual, info = quality_tags.get_release_quality(qual, qual)
         headers = {
             'Origin': self.base_link,
             'Referer': client.parseDOM(item_url, 'link')[0],
             'X-Requested-With': 'XMLHttpRequest',
             'User_Agent': client.agent()
         }
         fn = client.parseDOM(item_url,
                              'input',
                              attrs={'name': 'FName'},
                              ret='value')[0]
         fs = client.parseDOM(item_url,
                              'input',
                              attrs={'name': 'FSize'},
                              ret='value')[0]
         fsid = client.parseDOM(item_url,
                                'input',
                                attrs={'name': 'FSID'},
                                ret='value')[0]
         #params = re.compile('<input name="FName" type="hidden" value="(.+?)" /><input name="FSize" type="hidden" value="(.+?)" /><input name="FSID" type="hidden" value="(.+?)"').findall(html)
         post_url = self.base_link + '/thanks-for-downloading/'
         form_data = {'FName': fn, 'FSize': fs, 'FSID': fsid}
         #link = requests.post(request_url, data=form_data, headers=headers).content
         link = client.request(post_url, post=form_data, headers=headers)
         stream_url = client.parseDOM(link,
                                      'meta',
                                      attrs={'http-equiv': 'refresh'},
                                      ret='content')[0]
         stream_url = client.replaceHTMLCodes(stream_url).split('url=')[-1]
         stream_url += '|User-Agent=%s' % urllib.quote(client.agent())
         count += 1
         self.sources.append({
             'source': 'DirectLink',
             'quality': qual,
             'scraper': self.name,
             'url': stream_url,
             'direct': True
         })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name,
                      end_time,
                      count,
                      title + ' | ' + stream_url,
                      year,
                      season=season,
                      episode=episode)
     except:
         pass
    def get_source(self, item_url, title, year, season, episode, start_time):
        try:
            #print 'coolmovies pass ' + item_url
            headers = {'User-Agent': client.agent()}
            r = client.request(item_url, headers=headers)
            #xbmc.log('@#@HTML:%s' % r, xbmc.LOGNOTICE)

            data = client.parseDOM(r, 'table', attrs={'class':
                                                      'source-links'})[0]
            data = client.parseDOM(data, 'tr')
            data = [(client.parseDOM(i, 'a',
                                     ret='href')[0], client.parseDOM(i,
                                                                     'td')[1])
                    for i in data if 'version' in i.lower()]  #Watch Version
            Endlinks = [(i[0], re.sub('<.+?>', '', i[1])) for i in data if i]

            #Endlinks = re.compile('<td align="center"><strong><a href="(.+?)"',re.DOTALL).findall(r)
            #print 'coolmoviezone - scrape_movie - EndLinks: '+str(Endlinks)
            count = 0
            for link, host in Endlinks:
                if 'filebebo' in host: continue  #host with captcha
                if 'fruitad' in host:
                    link = client.request(link)
                    link = client.parseDOM(
                        link, 'meta', attrs={'name': 'og:url'},
                        ret='content')[0]  #returns the real url
                    if not link: continue

                import resolveurl
                if resolveurl.HostedMediaFile(link):
                    from universalscrapers.modules import quality_tags
                    quality, info = quality_tags.get_release_quality(
                        link, link)
                    if quality == 'SD':
                        quality = 'DVD'
                    host = host.split('/')[0].split('.')[0].title()
                    count += 1
                    self.sources.append({
                        'source': host,
                        'quality': quality,
                        'scraper': self.name,
                        'url': link,
                        'direct': False
                    })
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name,
                         end_time,
                         count,
                         title,
                         year,
                         season='',
                         episode='')
        except:
            pass
Пример #9
0
 def get_source(self, url, title, year, season, episode, start_time):
     sources = []
     try:
         count = 0
         if url is None:
             return sources
         data = urlparse.parse_qs(url)
         data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
         tit = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
         hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year']
         query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) \
                 if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year'])
         query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
         url = urlparse.urljoin(self.base_link, self.search_link%(urllib.quote(query)))
         r = client.request(url)
         r = client.parseDOM(r, 'table', attrs={'id': 'searchResult'})[0]
         posts = client.parseDOM(r, 'td')
         posts = [i for i in posts if 'detName' in i]
         for post in posts:
             post = post.replace('&nbsp;', ' ')
             name = client.parseDOM(post, 'a')[0]
             t = name.split(hdlr)[0]
             if not clean_title(re.sub('(|)', '', t)) == clean_title(tit):
                 continue
             try:
                 y = re.findall('[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper()
             except BaseException:
                 y = re.findall('[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper()
             if not y == hdlr:
                 continue
             links = client.parseDOM(post, 'a', ret='href')
             magnet = [i for i in links if 'magnet:' in i][0]
             url = magnet.split('&tr')[0]
             count += 1
             quality, info = quality_tags.get_release_quality(name, name)
             try:
                 size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0]
                 div = 1 if size.endswith(('GB', 'GiB')) else 1024
                 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div
                 size = '%.2f GB' % size
             except BaseException:
                 size = '0'
             info.append(size)
             info = ' | '.join(info)
             qual = '{0} | {1}'.format(quality, info)
             self.sources.append({'source': 'Torrent', 'quality': qual, 'scraper': self.name, 'url': url, 'direct': False, 'debridonly': True})
         if dev_log == 'true':
             end_time = time.time() - float(start_time)
             send_log(self.name, end_time, count, title, year, season=season, episode=episode)
         return self.sources
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return self.sources
Пример #10
0
    def _get_sources(self, item):
        try:
            name = item[0]
            quality, info = quality_tags.get_release_quality(item[1], name)
            info.append(item[2])
            info = ' | '.join(info)
            qual = '{0} | {1}'.format(quality, info)
            data = client.request(item[1])
            data = client.parseDOM(data, 'a', ret='href')
            url = [i for i in data if 'magnet:' in i][0]
            url = url.split('&tr')[0]

            self.sources.append(
                {'source': 'MAGNET', 'quality': qual, 'scraper': self.name, 'url': url, 'direct': False, 'debridonly': True})
        except BaseException:
            pass
Пример #11
0
    def get_source(self, item_url, title, year, start_time):
        try:
            #print 'PASSEDURL >>>>>>'+item_url
            count = 0
            headers = {'User-Agent': client.agent()}
            OPEN = client.request(item_url, headers=headers)
            frame = client.parseDOM(OPEN, 'iframe', ret='src')[0]
            data = client.request(frame, headers=headers)
            data = client.parseDOM(data, 'ul', attrs={'class':
                                                      'menuPlayer'})[0]
            links = client.parseDOM(data, 'a', ret='href')

            for link in links:
                #print link+'<<<<<<<<<<<<<<<<<<<<<<<<<<'
                qual = quality_tags.check_sd_url(link)
                if qual == 'SD' and 'openload' in link:
                    data = client.request(link, headers=headers)
                    data = client.parseDOM(data, 'meta', ret='content')[0]
                    qual2, info = quality_tags.get_release_quality(data, None)
                else:
                    qual2 = qual
                count += 1
                host = link.split('//')[1].replace('www.', '')
                host = host.split('/')[0].split('.')[0].title()
                self.sources.append({
                    'source': host,
                    'quality': qual2,
                    'scraper': self.name,
                    'url': link,
                    'direct': False
                })
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year)
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)


#hdvix().scrape_movie('Black Panther', '2018', 'tt1825683', False)
    def get_source(self,url, title, year, season, episode, start_time):
        try:
            scraper = cfscrape.create_scraper()
            headers = {'Origin': 'http://hdpopcorns.com', 'Referer': url,
                       'X-Requested-With': 'XMLHttpRequest',
                       'User-Agent': client.agent()}
            count = 0
            data = scraper.get(url, headers=headers).content
            data = client.parseDOM(data, 'div', attrs={'class': 'thecontent'})[0]
            FN720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName720p'})[0]
            FS720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize720p'})[0]
            FSID720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID720p'})[0]
            FN1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName1080p'})[0]
            FS1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize1080p'})[0]
            FSID1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID1080p'})[0]
            post = {'FileName720p': FN720p, 'FileSize720p': FS720p, 'FSID720p': FSID720p,
                    'FileName1080p': FN1080p, 'FileSize1080p': FS1080p, 'FSID1080p': FSID1080p,
                    'x': 173, 'y': 22}
            data = scraper.post('%s/select-movie-quality.php' % self.base_link, data=post).content
            data = client.parseDOM(data, 'div', attrs={'id': 'btn_\d+p'})

            u = [client.parseDOM(i, 'a', ret='href')[0] for i in data]
            for url in u:
                quality, info = quality_tags.get_release_quality(url, url)

                url = client.replaceHTMLCodes(url)
                url = url.encode('utf-8')
                count += 1
                self.sources.append(
                    {'source': 'DirectLink', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True})

            if dev_log=='true':
                end_time = time.time() - start_time
                send_log(self.name,end_time,count,title,year, season=season,episode=episode)              
        except:
            pass

#hdpopcorn().scrape_movie('Blade Runner 2049', '2017', '', False) title contains 2 years
#hdpopcorn().scrape_movie('Deadpool 2', '2018', '', False) title contains number
Пример #13
0
 def get_source(self, start_url, title, year, season, episode, start_time):
     try:
         #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url
         count = 0
         headers = {'User-Agent': client.agent()}
         r = client.request(start_url, headers=headers)
         #print r
         Endlinks = re.compile(
             'class="resultdiv".+?<a href="(.+?)".+?class="resultdivtopname" >(.+?)</div></a>.+?class="resultdivbottonlength">(.+?)</div>',
             re.DOTALL).findall(r)
         #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks)
         for nxtpg, info, size in Endlinks:
             nxtpg = self.base_link + nxtpg
             info = info.lstrip()
             #print nxtpg + '<><><><><>'+size+'><><><>'+info
             qual = quality_tags.get_release_quality(info, None)[0]
             #print qual
             nxtpg = nxtpg.split('torrent/')[1].split('/')[1]
             #print nxtpg
             Magnet = 'magnet:?xt=urn:btih:' + nxtpg
             count += 1
             self.sources.append({
                 'source': 'Torrent',
                 'quality': qual + ' ' + size,
                 'scraper': self.name,
                 'url': Magnet,
                 'direct': False,
                 'debridonly': True
             })
         if dev_log == 'true':
             end_time = time.time() - start_time
             send_log(self.name, end_time, count, title, year)
     except Exception, argument:
         if dev_log == 'true':
             error_log(self.name, argument)
         return []
Пример #14
0
    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time() 
            search_id = clean_search(title.lower())
            start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id))
            headers={'User-Agent': client.agent()}
            html = client.request(start_url, headers=headers)
            results = client.parseDOM(html, 'div', attrs={'class': 'video_title'})

            items = []
            for item in results:
                try:
                    data = dom_parser.parse_dom(item, 'a', req=['href', 'title'])[0]
                    t = data.content
                    y = re.findall('\((\d{4})\)', data.attrs['title'])[0]
                    qual = data.attrs['title'].split('-')[1]
                    link = data.attrs['href']

                    if not clean_title(t) == clean_title(title): continue
                    if not y == year: continue

                    items += [(link, qual)]

                except:
                    pass
            for item in items:
                count = 0
                try:
                    url = item[0] if item[0].startswith('http') else urlparse.urljoin(self.base_link, item[0])
                    r = client.request(url)

                    qual = client.parseDOM(r, 'h1')[0]
                    res = quality_tags.get_release_quality(item[1], qual)[0]

                    url = re.findall('''frame_url\s*=\s*["']([^']+)['"]\;''', r, re.DOTALL)[0]
                    url = url if url.startswith('http') else urlparse.urljoin('https://', url)
                    if 'vidlink' in url:
                        html = client.request(url, headers=headers)
                        action = re.findall("action'\s*:\s*'([^']+)", html)[0]
                        postID = re.findall("postID\s*=\s*'([^']+)", html)[0]
                        url = 'https://vidlink.org' + re.findall("var\s*url\s*=\s*'([^']+)", html)[0]
                        data = {'browserName': 'Firefox',
                                'platform': 'Win32',
                                'postID': postID,
                                'action': action}

                        headers['X-Requested-With'] = 'XMLHttpRequest'
                        headers['Referer'] = url
                        html = client.request(url, post=data, headers=headers)
                        html = jsunpack.unpack(html).replace('\\', '')
                        sources = json.loads(re.findall('window\.srcs\s*=\s*([^;]+)', html, re.DOTALL)[0])
                        for src in sources:
                            r = requests.head(src['url'], headers={'User-Agent': client.agent()})
                            if r.status_code < 400:
                                movie_link = src['url']
                                count += 1
                                self.sources.append({'source': 'Googlelink', 'quality': res,
                                                    'scraper': self.name, 'url': movie_link, 'direct': True})
                            else:
                               continue

                except:
                    pass
                if dev_log=='true':
                    end_time = time.time() - start_time
                    send_log(self.name,end_time, count, title,year)
            #print self.sources
            return self.sources
        except Exception, argument:
            print argument
            if dev_log == 'true':
                error_log(self.name,argument)
            return self.sources

#watch32().scrape_movie('Black Panther', '2018', 'tt1825683', False)
Пример #15
0
    def scrape_episode(self,
                       title,
                       show_year,
                       year,
                       season,
                       episode,
                       imdb,
                       tvdb,
                       debrid=False):
        try:
            start_time = time.time()
            hdlr = 'S%02dE%02d' % (int(season), int(episode))
            query = clean_search(title)
            query = urllib.quote_plus(query + ' ' + hdlr).replace('+', '%20')
            urls = []
            for link in self.search_links:
                try:
                    url = urlparse.urljoin(self.base_link, link % query)
                    url = urlparse.urljoin(self.base_link, url)
                    r = client.request(url)
                    posts = client.parseDOM(r, 'tbody')
                    posts = client.parseDOM(posts, 'tr')
                    urls += [(client.parseDOM(i,
                                              'button',
                                              ret='data-clipboard-text')[0])
                             for i in posts if i]
                except BaseException:
                    return
            count = 0
            for url in urls:
                name = url.split('/')[-1].lower()
                name = client.replaceHTMLCodes(name).replace('%20',
                                                             '').replace(
                                                                 '%27', "'")
                if 'movies' in url:
                    continue
                if any(x in url for x in [
                        'italian', 'dubbed', 'teaser', 'subs', 'sub', 'dub',
                        'samples', 'extras', 'french', 'trailer', 'trailers',
                        'sample'
                ]):
                    continue

                t = re.sub('(\.|\(|\[|\s)(S\d+E\d+|S\d+)(\.|\)|\]|\s)(.+|)',
                           '',
                           name,
                           flags=re.I)
                if clean_title(t) not in clean_title(title): continue
                y = re.findall('[\.|\(|\[|\s](S\d+E\d+|S\d+)[\.|\)|\]|\s]',
                               name, re.I)[-1].upper()
                if not y == hdlr: continue

                res, info = quality_tags.get_release_quality(name, url)

                count += 1

                url += '|User-Agent=%s&Referer=%s' % (client.agent(),
                                                      self.base_link)
                url = urllib.quote(url, '|%:?/&+=_-')
                host = url.split('/')[2]
                self.sources.append({
                    'source': host,
                    'quality': res,
                    'scraper': self.name,
                    'url': url,
                    'direct': True
                })

            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year)

            return self.sources
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources


#filepursuit().scrape_movie('Black Panther', '2018', '')
Пример #16
0
    def get_source(self, m_url, title, year, season, episode, start_time):
        #import xbmc
        try:
            hdlr = 'S%02dE%02d' % (int(season),
                                   int(episode)) if not season == '' else year
            r = client.request(m_url)
            if not hdlr in m_url.upper():
                quality = client.parseDOM(r, 'h4')[0]
                regex = '<p>\s*%s\s*</p>(.+?)</ul>' % hdlr
                data = re.search(regex, r, re.DOTALL | re.I).groups()[0]
                frames = client.parseDOM(data, 'a', ret='href')

            else:
                data = client.parseDOM(r,
                                       'div',
                                       attrs={'class': 'entry-content'})[0]
                data = re.compile('<h4>(.+?)</h4>(.+?)</ul>',
                                  re.DOTALL).findall(data)
                #xbmc.log('DATAAAA:%s' % data, xbmc.LOGNOTICE)
                frames = []
                for qual, links in data:
                    quality = qual
                    frames += client.parseDOM(links, 'a', ret='href')

            for link in frames:
                host = link.split('//')[1].replace('www.', '')
                host = host.split('/')[0]
                if not filter_host(host):
                    continue
                if 'filebebo' in link: continue
                rez, info = quality_tags.get_release_quality(quality, link)
                if '1080p' in rez and not host.lower() in [
                        'openload', 'oload'
                ]:
                    rez = '720p'
                elif '720p' in quality and not host.lower() in [
                        'openload', 'oload'
                ]:
                    rez = 'SD'
                else:
                    rez, info = quality_tags.get_release_quality(link, link)

                self.count += 1
                self.sources.append({
                    'source': host,
                    'quality': rez,
                    'scraper': self.name,
                    'url': link,
                    'direct': False
                })
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name,
                         end_time,
                         self.count,
                         title,
                         year,
                         season=season,
                         episode=episode)

        except:
            pass
Пример #17
0
    def _get_sources(self, link, title, year, _type, season, episode,
                     start_time):
        try:
            squery = self.query.replace('%20', '+')
            self.headers = {
                'User-Agent': self.ua,
                'Referer': self.search_referer.format(squery)
            }

            srch = cache.get(client.request, 8, self.base_link)
            srch = client.parseDOM(srch,
                                   'form',
                                   ret='action',
                                   attrs={'name': 'frm'})[0]
            srch = srch[1:] if srch.startswith('/') else srch

            link = urlparse.urljoin(self.base_link, link % (srch, self.query))

            r = client.request(link, headers=self.headers)
            posts = client.parseDOM(r, 'tbody')[0]
            posts = client.parseDOM(posts, 'tr')
            urls = [(client.parseDOM(i, 'a',
                                     ret='href')[1], client.parseDOM(i,
                                                                     'a')[1],
                     client.parseDOM(i,
                                     'a',
                                     ret='href',
                                     attrs={'id': 'refer.+?'})[0])
                    for i in posts if i]

            count = 0
            for url, name, host in urls:

                name = client.replaceHTMLCodes(name).replace('%20',
                                                             ' ').replace(
                                                                 '%27', "'")
                if any(x in url.lower() for x in [
                        'italian', 'teaser', 'bonus.disc', 'subs', 'sub',
                        'samples', 'extras', 'french', 'trailer', 'trailers',
                        'sample'
                ]):
                    continue

                if _type == 'movie':
                    t = name.split(year)[0]
                    if clean_title(t) not in clean_title(title): continue
                    y = re.findall(
                        '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s|\_|\-]', name,
                        re.I)[-1].upper()
                    if not year == y: continue
                else:
                    hdlr = 'S%02dE%02d' % (int(season), int(episode))
                    t = name.split(hdlr)[0]
                    if clean_title(t) not in clean_title(title): continue
                    y = re.findall(
                        '[\.|\(|\[|\s|\_](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_]',
                        name, re.I)[-1].upper()
                    if not y == hdlr: continue

                quality, info = quality_tags.get_release_quality(name, url)
                info = ' | '.join(info)
                res = '{0} | {1}'.format(quality, info)

                count += 1
                url = urlparse.urljoin(self.base_link,
                                       url) if url.startswith('/') else url
                host = host.split('/')[2]
                self.sources.append({
                    'source': host,
                    'quality': res,
                    'scraper': self.name,
                    'url': url,
                    'direct': True
                })

            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name, end_time, count, title, year)

        except:
            pass
Пример #18
0
    def get_sources(self, url, title, year, season, episode, start_time):
        try:
            count = 0
            if url is None:
                return self.sources

            r = client.request(url)
            frame = client.parseDOM(r, 'table', attrs={'class': 'striped'})[0]
            frame = client.parseDOM(frame, 'a', ret='href')[0]
            frame = urlparse.urljoin(self.base_link,
                                     frame) if frame.startswith('/') else frame
            r = client.request(frame)
            hash = re.findall(
                '''var\s*hash\s*=\s*['"]([^'"]+)''', r, re.MULTILINE)[
                    0]  #var hash = '9fafa6c0c1771b38a1c72a5bd893c503';
            pdata = 'hash=%s&confirm_continue=I+understand%s+I+want+to+continue' % (
                str(hash), '%2C')
            data = client.request(frame, post=pdata, referer=frame)
            frames = re.compile(
                '''vlink.+?title=['"]([^'"]+).+?href=['"]([^'"]+).+?onclick.+?>(.+?)</a''',
                re.M | re.DOTALL).findall(data.replace('\n', ''))
            #xbmc.log('@#@Frames:%s' % frames, xbmc.LOGNOTICE)

            for name, link, host in frames:
                try:
                    host = host.replace('\xc5\x8d', 'o').replace(
                        '\xc4\x93', 'e'
                    ).replace('\xc4\x81', 'a').replace(
                        '\xc4\xab', 'i'
                    )  #.replace('\u014d', 'o').replace('\u0113', 'e').replace('\u0101', 'a').replace('\u012b', 'i')
                    if not filter_host(host): continue

                    count += 1
                    quality, info = quality_tags.get_release_quality(
                        name, name)
                    if quality == '4K':
                        quality = '1080p'
                    elif quality == '1080p' and not 'openload' in host:
                        quality = '720p'

                    link = urlparse.urljoin(
                        self.base_link, link) if link.startswith('/') else link

                    self.sources.append({
                        'source': host,
                        'quality': quality,
                        'scraper': self.name,
                        'url': link,
                        'direct': False
                    })
                except:
                    pass

            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name,
                         end_time,
                         count,
                         title,
                         year,
                         season=season,
                         episode=episode)

        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources
Пример #19
0
    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            start_time = time.time()
            search_id = clean_search(title.lower())
            start_url = urlparse.urljoin(
                self.base_link,
                self.search_link % urllib.quote_plus(search_id))
            headers = {'User-Agent': client.agent()}
            html = client.request(start_url, headers=headers)
            results = client.parseDOM(html,
                                      'div',
                                      attrs={'class': 'video_title'})

            items = []
            for item in results:
                try:
                    data = dom_parser.parse_dom(item,
                                                'a',
                                                req=['href', 'title'])[0]
                    t = data.content
                    y = re.findall('\((\d{4})\)', data.attrs['title'])[0]
                    qual = data.attrs['title'].split('-')[1]
                    link = data.attrs['href']

                    if not clean_title(t) == clean_title(title): continue
                    if not y == year: continue

                    items += [(link, qual)]

                except:
                    pass
            for item in items:
                count = 0
                try:
                    url = item[0] if item[0].startswith(
                        'http') else urlparse.urljoin(self.base_link, item[0])
                    r = client.request(url)

                    qual = client.parseDOM(r, 'h1')[0]
                    res = quality_tags.get_release_quality(item[1], qual)[0]

                    url = re.findall('''frame_url\s*=\s*["']([^']+)['"]\;''',
                                     r, re.DOTALL)[0]
                    url = url if url.startswith('http') else urlparse.urljoin(
                        'https://', url)
                    if 'vidlink' in url:
                        html = client.request(url, headers=headers)
                        action = re.findall("action'\s*:\s*'([^']+)", html)[0]
                        postID = re.findall("postID\s*=\s*'([^']+)", html)[0]
                        url = 'https://vidlink.org' + re.findall(
                            "var\s*url\s*=\s*'([^']+)", html)[0]
                        data = {
                            'browserName': 'Firefox',
                            'platform': 'Win32',
                            'postID': postID,
                            'action': action
                        }

                        headers['X-Requested-With'] = 'XMLHttpRequest'
                        headers['Referer'] = url
                        html = client.request(url, post=data, headers=headers)
                        html = jsunpack.unpack(html).replace('\\', '')
                        sources = json.loads(
                            re.findall('window\.srcs\s*=\s*([^;]+)', html,
                                       re.DOTALL)[0])
                        for src in sources:
                            r = requests.head(
                                src['url'],
                                headers={'User-Agent': client.agent()})
                            if r.status_code < 400:
                                movie_link = src['url']
                                count += 1
                                self.sources.append({
                                    'source': 'Googlelink',
                                    'quality': res,
                                    'scraper': self.name,
                                    'url': movie_link,
                                    'direct': True
                                })
                            else:
                                continue

                except:
                    pass
                if dev_log == 'true':
                    end_time = time.time() - start_time
                    send_log(self.name, end_time, count, title, year)
            #print self.sources
            return self.sources
        except Exception, argument:
            print argument
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources


#watch32().scrape_movie('Black Panther', '2018', 'tt1825683', False)
Пример #20
0
    def get_sources(self, url, title, year, season, episode, start_time):
        try:
            if url is None: return self.sources

            count = 0
            url, hdlr = url[0], url[1]
            main = []
            try:
                headers = {
                    'User-Agent': client.agent(),
                    'Referer': self.base_link
                }
                scraper = cfscrape.create_scraper()
                data = scraper.get(url, headers=headers).content
                main = dom.parse_dom(data, 'div', {'class': 'postContent'})
                main = [i.content for i in main]

                comments = dom.parse_dom(data, 'div',
                                         {'class': re.compile('content')})
                main += [i.content for i in comments]
            except:
                pass

            for con in main:
                try:
                    frames = client.parseDOM(con, 'a', ret='href')

                    for link in frames:

                        if 'youtube' in link: continue
                        if any(x in link
                               for x in ['.rar', '.zip', '.iso']) or any(
                                   link.endswith(x)
                                   for x in ['.rar', '.zip', '.iso']):
                            continue
                        host = re.findall(
                            '([\w]+[.][\w]+)$',
                            urlparse.urlparse(link.strip().lower()).netloc)[0]
                        host = client.replaceHTMLCodes(host)
                        host = host.encode('utf-8')

                        if not hdlr.lower() in link.lower(): continue

                        quality, info = quality_tags.get_release_quality(
                            link, link)

                        if link in str(self.sources): continue
                        rd_domains = get_rd_domains()
                        if host in rd_domains:
                            count += 1
                            self.sources.append({
                                'source': host,
                                'quality': quality,
                                'scraper': self.name,
                                'url': link,
                                'direct': False,
                                'debridonly': True
                            })

                except:
                    pass
            if dev_log == 'true':
                end_time = time.time() - start_time
                send_log(self.name,
                         end_time,
                         count,
                         title,
                         year,
                         season=season,
                         episode=episode)
            # xbmc.log('@#@SOURCES:%s' % self._sources, xbmc.LOGNOTICE)
        except Exception, argument:
            if dev_log == 'true':
                error_log(self.name, argument)
            return self.sources


#Releasebb().scrape_movie('Black Panther', '2018', '', True)