Python scraper_error примеры, exoscrapers.modules.source_utils.scraper_error Python примеры использования

Пример #1

0

Показать файл

    def _get_sources(self, item):
        try:
            name = item[0]
            url = item[1]

            if any(x in url.lower() for x in [
                    'french', 'italian', 'spanish', 'truefrench', 'dublado',
                    'dubbed'
            ]):
                return

            quality, info = source_utils.get_release_quality(name, url)

            info.append(item[2])  # if item[2] != '0'
            info = ' | '.join(info)

            self._sources.append({
                'source': 'torrent',
                'quality': quality,
                'language': 'en',
                'url': url,
                'info': info,
                'direct': False,
                'debridonly': True
            })

        except:
            source_utils.scraper_error('KICKASS2')
            pass

Пример #2

0

Показать файл

    def _get_sources(self, item):
        try:
            name = item[0]

            quality, info = source_utils.get_release_quality(item[1], name)

            info.append(item[2])  # if item[2] != '0'
            info = ' | '.join(info)

            data = client.request(item[1])
            data = client.parseDOM(data, 'a', ret='href')

            url = [i for i in data if 'magnet:' in i][0]
            url = url.split('&tr')[0]

            if any(x in url.lower() for x in [
                    'french', 'italian', 'spanish', 'truefrench', 'dublado',
                    'dubbed'
            ]):
                return

            self._sources.append({
                'source': 'torrent',
                'quality': quality,
                'language': 'en',
                'url': url,
                'info': info,
                'direct': False,
                'debridonly': True
            })

        except:
            source_utils.scraper_error('1337X')
            pass

Пример #3

0

Показать файл

	def _get_sources(self, item):
		try:
			name = item[0]

			quality, info = source_utils.get_release_quality(name, name)

			info.append(item[2]) # if item[2] != '0'
			info = ' | '.join(info)

			data = client.request(item[1])
			if data is None:
				return

			try:
				url = re.search('''href=["'](magnet:\?[^"']+)''', data).groups()[0]
			except:
				return

			if any(x in url.lower() for x in ['french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed']):
				return

			self._sources.append({'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url,
												'info': info, 'direct': False, 'debridonly': True})

		except:
			source_utils.scraper_error('LIMETORRENTS')
			pass

Пример #4

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        try:
            self._sources = []

            if url is None:
                return self._sources

            if debrid.status() is False:
                raise Exception()

            self.hostDict = hostDict + hostprDict

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            self.title = data[
                'tvshowtitle'] if 'tvshowtitle' in data else data['title']
            self.title = self.title.replace('&', 'and').replace(
                'Special Victims Unit', 'SVU')

            self.hdlr = 'S%02dE%02d' % (
                int(data['season']), int(data['episode'])
            ) if 'tvshowtitle' in data else data['year']
            self.year = data['year']

            query = '%s %s' % (self.title, self.hdlr)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

            url = self.search_link % urllib.quote_plus(query)
            url = urlparse.urljoin(self.base_link, url)
            # log_utils.log('url = %s' % url, log_utils.LOGDEBUG)

            r = client.request(url)

            # switch to client.parseDOM() to rid import
            posts = dom_parser.parse_dom(r, 'div', {'class': 'eTitle'})
            posts = [
                dom_parser.parse_dom(i.content, 'a', req='href') for i in posts
                if i
            ]
            posts = [(i[0].attrs['href'], re.sub('<.+?>', '', i[0].content))
                     for i in posts if i]
            posts = [[i[0], i[1]] for i in posts]

            threads = []
            for i in posts:
                threads.append(workers.Thread(self._get_sources, i))
            [i.start() for i in threads]
            [i.join() for i in threads]

            alive = [x for x in threads if x.is_alive() is True]
            while alive:
                alive = [x for x in threads if x.is_alive() is True]
                time.sleep(0.1)
            return self._sources
        except:
            source_utils.scraper_error('ONLINESERIES')
            return self._sources

Пример #5

0

Показать файл

    def _get_sources(self, name, url):
        try:
            headers = {'User-Agent': client.agent()}
            r = self.scraper.get(url, headers=headers).content

            name = client.replaceHTMLCodes(name)
            l = dom_parser.parse_dom(r, 'div', {'class': 'ppu2h'})
            s = ''

            for i in l:
                s += i.content

            urls = re.findall(
                r'''((?:http|ftp|https)://[\w_-]+(?:(?:\.[\w_-]+)+)[\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])''',
                i.content,
                flags=re.MULTILINE | re.DOTALL)
            urls = [
                i for i in urls if '.rar' not in i or '.zip' not in i
                or '.iso' not in i or '.idx' not in i or '.sub' not in i
            ]

            for url in urls:
                if url in str(self.sources):
                    continue

                valid, host = source_utils.is_host_valid(url, self.hostDict)
                if not valid:
                    continue
                host = client.replaceHTMLCodes(host)
                host = host.encode('utf-8')

                quality, info = source_utils.get_release_quality(name, url)

                try:
                    size = re.findall(
                        '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))',
                        name)[0]
                    div = 1 if size.endswith(('GB', 'GiB')) else 1024
                    size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
                    size = '%.2f GB' % size
                    info.append(size)
                except:
                    pass

                info = ' | '.join(info)

                self.sources.append({
                    'source': host,
                    'quality': quality,
                    'language': 'en',
                    'url': url,
                    'info': info,
                    'direct': False,
                    'debridonly': True
                })
        except:
            source_utils.scraper_error('RAPIDMOVIEZ')
            pass

Пример #6

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        try:
            self._sources = []
            self.items = []

            if url is None:
                return self._sources

            if debrid.status() is False:
                return self._sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            self.title = data[
                'tvshowtitle'] if 'tvshowtitle' in data else data['title']
            self.title = self.title.replace('&', 'and').replace(
                'Special Victims Unit', 'SVU')

            self.hdlr = 'S%02dE%02d' % (
                int(data['season']), int(data['episode'])
            ) if 'tvshowtitle' in data else data['year']
            self.year = data['year']

            query = '%s %s' % (self.title, self.hdlr)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

            urls = []
            if 'tvshowtitle' in data:
                url = self.search2.format(urllib.quote(query))
            else:
                url = self.search.format(urllib.quote(query))
            url = urlparse.urljoin(self.base_link, url)
            urls.append(url)

            url2 = url + '/2/'
            urls.append(url2)
            # log_utils.log('urls = %s' % urls, log_utils.LOGDEBUG)

            threads = []
            for url in urls:
                threads.append(workers.Thread(self._get_items, url))
            [i.start() for i in threads]
            [i.join() for i in threads]

            threads2 = []
            for i in self.items:
                threads2.append(workers.Thread(self._get_sources, i))
            [i.start() for i in threads2]
            [i.join() for i in threads2]
            return self._sources

        except:
            source_utils.scraper_error('KICKASS2')
            return self._sources

Пример #7

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        try:
            self.sources = []

            if url is None:
                return self.sources

            if debrid.status() is False:
                raise Exception()

            self.hostDict = hostDict + hostprDict

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            # title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU')

            hdlr = data['year']
            hdlr2 = 'S%02dE%02d' % (int(data['season']), int(
                data['episode'])) if 'tvshowtitle' in data else ''
            imdb = data['imdb']

            url = self.search(title, hdlr)
            headers = {'User-Agent': client.agent()}
            r = self.scraper.get(url, headers=headers).content

            if hdlr2 == '':
                r = dom_parser.parse_dom(r, 'ul', {'id': 'releases'})[0]
            else:
                r = dom_parser.parse_dom(r, 'ul', {'id': 'episodes'})[0]

            r = dom_parser.parse_dom(r.content, 'a', req=['href'])
            r = [(i.content, urlparse.urljoin(self.base_link, i.attrs['href']))
                 for i in r if i and i.content != 'Watch']

            if hdlr2 != '':
                r = [(i[0], i[1]) for i in r if hdlr2.lower() in i[0].lower()]

            threads = []
            for i in r:
                threads.append(workers.Thread(self._get_sources, i[0], i[1]))
            [i.start() for i in threads]
            # [i.join() for i in threads]

            alive = [x for x in threads if x.is_alive() is True]
            while alive:
                alive = [x for x in threads if x.is_alive() is True]
                time.sleep(0.1)
            return self.sources
        except:
            source_utils.scraper_error('RAPIDMOVIEZ')
            return self.sources

Пример #8

0

Показать файл

	def _get_items(self, url):
		try:
			headers = {'User-Agent': client.agent()}
			r = client.request(url, headers=headers)

			posts = client.parseDOM(r, 'table', attrs={'class': 'table2'})[0]
			posts = client.parseDOM(posts, 'tr')

			for post in posts:
				data = client.parseDOM(post, 'a', ret='href')[1]
				if '/search/' in data:
					continue

				# Remove non-ASCII characters...freakin limetorrents
				try:
					data = data.encode('ascii', 'ignore')
				except:
					pass

				# some broken links with withespace
				data = re.sub('\s', '', data).strip()

				link = urlparse.urljoin(self.base_link, data)

				name = client.parseDOM(post, 'a')[1]

				t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and')
				if cleantitle.get(t) != cleantitle.get(self.title):
					continue

				if self.hdlr not in name:
					continue

				try:
					size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0]
					div = 1 if size.endswith('GB') else 1024
					size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div
					size = '%.2f GB' % size
				except:
					size = '0'
					pass

				self.items.append((name, link, size))

			return self.items

		except:
			source_utils.scraper_error('LIMETORRENTS')
			return self.items

Пример #9

0

Показать файл

    def _get_items(self, url):
        items = []
        try:
            headers = {'User-Agent': client.agent()}
            r = client.request(url, headers=headers)
            posts = client.parseDOM(r, 'tr', attrs={'class': 't-row'})
            posts = [i for i in posts if not 'racker:' in i]

            for post in posts:
                ref = client.parseDOM(post, 'a', ret='href')
                url = [i for i in ref if 'magnet:' in i][0]

                if any(x in url.lower() for x in [
                        'french', 'italian', 'spanish', 'truefrench',
                        'dublado', 'dubbed'
                ]):
                    continue

                name = client.parseDOM(post, 'a', ret='title')[0]

                t = name.split(self.hdlr)[0].replace(self.year, '').replace(
                    '(', '').replace(')', '').replace('&', 'and')
                if cleantitle.get(t) != cleantitle.get(self.title):
                    continue

                if self.hdlr not in name:
                    continue

                try:
                    size = re.findall(
                        '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))',
                        post)[0]
                    div = 1 if size.endswith('GB') else 1024
                    size = float(
                        re.sub('[^0-9|/.|/,]', '', size.replace(',',
                                                                '.'))) / div
                    size = '%.2f GB' % size
                except:
                    size = '0'
                    pass

                items.append((name, url, size))

            return items

        except:
            source_utils.scraper_error('GLODLS')
            return items

Пример #10

0

Показать файл

    def _get_items(self, url):
        try:
            headers = {'User-Agent': client.agent()}
            r = client.request(url, headers=headers)
            posts = client.parseDOM(r,
                                    'tr',
                                    attrs={'id': 'torrent_latest_torrents'})

            for post in posts:
                ref = client.parseDOM(post,
                                      'a',
                                      attrs={'title': 'Torrent magnet link'},
                                      ret='href')[0]
                link = urllib.unquote(ref).decode('utf8').replace(
                    'https://mylink.me.uk/?url=',
                    '').replace('https://mylink.cx/?url=', '')

                name = urllib.unquote_plus(
                    re.search('dn=([^&]+)', link).groups()[0])

                t = name.split(self.hdlr)[0].replace(self.year, '').replace(
                    '(', '').replace(')', '').replace('&', 'and')
                if cleantitle.get(t) != cleantitle.get(self.title):
                    continue

                if self.hdlr not in name:
                    continue

                try:
                    size = re.findall(
                        '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))',
                        post)[0]
                    div = 1 if size.endswith('GB') else 1024
                    size = float(
                        re.sub('[^0-9|/.|/,]', '', size.replace(',',
                                                                '.'))) / div
                    size = '%.2f GB' % size
                except:
                    size = '0'
                    pass

                self.items.append((name, link, size))

            return self.items

        except:
            source_utils.scraper_error('KICKASS2')
            return self.items

Пример #11

0

Показать файл

	def _get_sources(self, url):
		try:
			r = client.request(url)
			posts = client.parseDOM(r, 'tr')

			for post in posts:
				link = re.findall('a title="Download Torrent Magnet" href="(magnet:.+?)"', post, re.DOTALL)

				if link == []:
					continue

				for url in link:

					url = url.split('&tr')[0]

					if any(x in url.lower() for x in ['french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed']):
						continue

					name = url.split('&dn=')[1]
					name = urllib.unquote_plus(name)

					t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and')
					if cleantitle.get(t) != cleantitle.get(self.title):
						continue

					if self.hdlr not in url:
						continue

					quality, info = source_utils.get_release_quality(name, url)

					try:
						size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0]
						div = 1 if size.endswith(('GB', 'GiB')) else 1024
						size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
						size = '%.2f GB' % size
						info.append(size)
					except:
						pass

					info = ' | '.join(info)

					self.sources.append({'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url,
														'info': info, 'direct': False, 'debridonly': True})

		except:
			source_utils.scraper_error('BTSCENE')
			pass

Пример #12

0

Показать файл

    def _get_items(self, url):
        try:
            headers = {'User-Agent': client.agent()}
            r = client.request(url, headers=headers)
            if '<tbody' not in r:
                return self.items

            posts = client.parseDOM(r, 'tbody')[0]
            posts = client.parseDOM(posts, 'tr')

            for post in posts:
                data = client.parseDOM(post, 'a', ret='href')[1]
                link = urlparse.urljoin(self.base_link, data)

                name = client.parseDOM(post, 'a')[1]

                t = name.split(self.hdlr)[0].replace(self.year, '').replace(
                    '(', '').replace(')', '').replace('&', 'and')
                if cleantitle.get(t) != cleantitle.get(self.title):
                    continue

                if self.hdlr not in name:
                    raise Exception()

                try:
                    size = re.findall(
                        '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))',
                        post)[0]
                    div = 1 if size.endswith('GB') else 1024
                    size = float(
                        re.sub('[^0-9|/.|/,]', '', size.replace(',',
                                                                '.'))) / div
                    size = '%.2f GB' % size
                except:
                    size = '0'
                    pass

                self.items.append((name, link, size))

            return self.items

        except:
            source_utils.scraper_error('1337X')
            return self.items

Пример #13

0

Показать файл

	def sources(self, url, hostDict, hostprDict):
		try:
			self._sources = []

			if url is None:
				return self._sources

			if debrid.status() is False:
				return self._sources

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			self.title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			self.title = self.title.replace('&', 'and').replace('Special Victims Unit', 'SVU')

			self.hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year']
			self.year = data['year']

			query = '%s %s' % (self.title, self.hdlr)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

			if 'tvshowtitle' in data:
				url = self.search.format('8', urllib.quote(query))
			else:
				url = self.search.format('4', urllib.quote(query))
			# log_utils.log('url = %s' % url, log_utils.LOGDEBUG)

			headers = {'User-Agent': client.agent()}

			_html = client.request(url, headers=headers)

			threads = []
			for i in re.findall(r'<item>(.+?)</item>', _html, re.DOTALL):
				threads.append(workers.Thread(self._get_items, i))
			[i.start() for i in threads]
			[i.join() for i in threads]
			return self._sources

		except:
			source_utils.scraper_error('TORRENTDOWNLOADS')
			return self._sources

Пример #14

0

Показать файл

    def links(self, url):
        urls = []
        try:

            if url is None:
                return

            for url in url:
                r = client.request(url)
                r = client.parseDOM(r, 'div', attrs={'class': 'entry'})
                r = client.parseDOM(r, 'a', ret='href')

                r1 = [i for i in r if 'money' in i][0]
                r = client.request(r1)
                r = client.parseDOM(r, 'div', attrs={'id': 'post-\d+'})[0]

                if 'enter the password' in r:
                    plink = client.parseDOM(r, 'form', ret='action')[0]
                    post = {'post_password': '******', 'Submit': 'Submit'}
                    send_post = client.request(plink,
                                               post=post,
                                               output='cookie')
                    link = client.request(r1, cookie=send_post)

                else:
                    link = client.request(r1)

                if '<strong>Single' not in link:
                    continue

                link = re.findall('<strong>Single(.+?)</tr', link,
                                  re.DOTALL)[0]
                link = client.parseDOM(link, 'a', ret='href')
                link = [(i.split('=')[-1]) for i in link]

                for i in link:
                    urls.append(i)

                return urls
        except:
            source_utils.scraper_error('300MBFILMS')
            pass

Пример #15

0

Показать файл

	def _get_items(self, r):
		try:
			size = re.search(r'<size>([\d]+)</size>', r).groups()[0]
			seeders = re.search(r'<seeders>([\d]+)</seeders>', r).groups()[0]

			_hash = re.search(r'<info_hash>([a-zA-Z0-9]+)</info_hash>', r).groups()[0]
			name = re.search(r'<title>(.+?)</title>', r).groups()[0]

			url = 'magnet:?xt=urn:btih:%s&dn=%s' % (_hash.upper(), urllib.quote_plus(name))

			if any(x in url.lower() for x in ['french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed']):
					continue

			t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and')
			t = name.split(self.hdlr)[0]
			if cleantitle.get(t) != cleantitle.get(self.title):
				return

			if self.hdlr not in name:
				raise Exception()

			quality, info = source_utils.get_release_quality(name, name)

			try:
				div = 1000 ** 3
				size = float(size) / div
				size = '%.2f GB' % size
				info.append(size)
			except:
				pass

			info = ' | '.join(info)

			if seeders > self.min_seeders:
				self._sources.append({'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url,
													'info': info, 'direct': False, 'debridonly': True})
		except:
			source_utils.scraper_error('TORRENTDOWNLOADS')
			pass

Пример #16

0

Показать файл

    def resolve(self, url):
        try:
            b = urlparse.urlparse(url).netloc
            b = re.compile('([\w]+[.][\w]+)$').findall(b)[0]

            if not b in base64.b64decode(self.b_link): return url

            u, p, h = url.split('|')
            r = urlparse.parse_qs(h)['Referer'][0]

            c = self.request(r, output='cookie', close=False)
            result = self.request(u, post=p, referer=r, cookie=c)

            url = result.split('url=')
            url = [urllib.unquote_plus(i.strip()) for i in url]
            url = [i for i in url if i.startswith('http')]
            url = url[-1]

            return url
        except:
            source_utils.scraper_error('DIRECTDL')
            return

Пример #17

0

Показать файл

	def sources(self, url, hostDict, hostprDict):
		try:
			sources = []

			if url is None:
				return sources

			if debrid.status() is False:
				return sources

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['title'].replace('&', 'and')
			hdlr = data['year']

			query = '%s %s' % (title, hdlr)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

			url = self.search_link % urllib.quote(query)
			url = urlparse.urljoin(self.base_link, url).replace('%20', '-')
			# log_utils.log('url = %s' % url, log_utils.LOGDEBUG)

			html = client.request(url)
			if html is None:
				return sources

			quality_size = client.parseDOM(html, 'p', attrs={'class': 'quality-size'})

			tit = client.parseDOM(html, 'title')[0]

			try:
				results = client.parseDOM(html, 'div', attrs={'class': 'ava1'})
			except:
				return sources

			p = 0
			for torrent in results:
				link = re.findall('a data-torrent-id=".+?" href="(magnet:.+?)" class=".+?" title="(.+?)"', torrent, re.DOTALL)

				for url, ref in link:
					url = str(client.replaceHTMLCodes(url).split('&tr')[0])

					if any(x in url.lower() for x in ['french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed']):
						continue

					name = url.split('&dn=')[1]

					t = name.split(hdlr)[0].replace('&', 'and')
					if cleantitle.get(t) != cleantitle.get(title):
						continue

					if hdlr not in tit:
						continue

					quality, info = source_utils.get_release_quality(ref, url)

					try:
						size = re.findall('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', quality_size[p])[-1]
						div = 1 if size.endswith(('GB', 'GiB')) else 1024
						size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
						size = '%.2f GB' % size
						info.append(size)
					except:
						pass

					p += 1
					info = ' | '.join(info)

					sources.append({'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url,
												'info': info, 'direct': False, 'debridonly': True})
			return sources

		except:
			source_utils.scraper_error('YIFYDLL')
			return sources

Пример #18

0

Показать файл

	def sources(self, url, hostDict, hostprDict):
		try:
			sources = []

			if url is None:
				return sources

			if debrid.status() is False:
				return sources

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['tvshowtitle'].replace('&', 'and').replace('Special Victims Unit', 'SVU')

			hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode']))

			query = '%s %s' % (title, hdlr)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

			url = self.search_link % (urllib.quote_plus(query).replace('+', '-'))
			url = urlparse.urljoin(self.base_link, url)
			# log_utils.log('url = %s' % url, log_utils.LOGDEBUG)

			html = client.request(url)

			try:
				results = client.parseDOM(html, 'table', attrs={'class': 'forum_header_border'})
				for result in results:
					if 'magnet:' in result:
						results = result
						break
			except:
				return sources

			rows = re.findall('<tr name="hover" class="forum_header_border">(.+?)</tr>', results, re.DOTALL)

			if rows is None:
				return sources

			for entry in rows:
				try:
					try:
						columns = re.findall('<td\s.+?>(.+?)</td>', entry, re.DOTALL)
						derka = re.findall('href="magnet:(.+?)" class="magnet" title="(.+?)"', columns[2], re.DOTALL)[0]
					except:
						continue

					url = 'magnet:%s' % (str(client.replaceHTMLCodes(derka[0]).split('&tr')[0]))
					url = urllib.unquote(url).decode('utf8')

					if any(x in url.lower() for x in ['french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed']):
						continue

					name = derka[1]

					t = name.split(hdlr)[0].replace(data['year'], '').replace('(', '').replace(')', '').replace('&', 'and')
					if cleantitle.get(t) != cleantitle.get(title):
						continue

					if hdlr not in name:
						continue

					try:
						seeders = int(re.findall('<font color=".+?">(.+?)</font>', columns[5], re.DOTALL)[0])
					except:
						continue

					if self.min_seeders > seeders:
						continue

					quality, info = source_utils.get_release_quality(name, url)

					try:
						size = re.findall('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', name)[-1]
						div = 1 if size.endswith(('GB', 'GiB')) else 1024
						size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
						size = '%.2f GB' % size
						info.append(size)
					except:
						pass

					info = ' | '.join(info)

					sources.append({'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url,
												'info': info, 'direct': False, 'debridonly': True})
				except:
					source_utils.scraper_error('EZTV')
					continue

			return sources

		except:
			source_utils.scraper_error('EZTV')
			return sources

Пример #19

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []

            if url is None:
                return sources

            if debrid.status() is False:
                return sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            title = title.replace('&', 'and').replace('Special Victims Unit',
                                                      'SVU')

            hdlr = 'S%02dE%02d' % (int(data['season']), int(
                data['episode'])) if 'tvshowtitle' in data else data['year']

            category = '+category%3ATV' if 'tvshowtitle' in data else '+category%3AMovies'

            query = '%s %s' % (title, hdlr)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

            url = self.search_link % urllib.quote_plus(query)
            url = urlparse.urljoin(self.base_link, url) + str(category)
            # log_utils.log('url = %s' % url, log_utils.LOGDEBUG)

            html = client.request(url)
            html = html.replace('&nbsp;', ' ')

            try:
                results = client.parseDOM(
                    html,
                    'table',
                    attrs={
                        'class': 'table table-condensed table-torrents vmiddle'
                    })[0]
            except:
                return sources

            rows = re.findall('<tr(.+?)</tr>', results, re.DOTALL)

            if rows is None:
                return sources

            for entry in rows:
                try:
                    try:
                        url = 'magnet:%s' % (re.findall(
                            'href="magnet:(.+?)"', entry, re.DOTALL)[0])
                        url = str(client.replaceHTMLCodes(url).split('&tr')[0])
                        if url in str(sources):
                            continue
                    except:
                        continue

                    if any(x in url.lower() for x in [
                            'french', 'italian', 'spanish', 'truefrench',
                            'dublado', 'dubbed'
                    ]):
                        continue

                    try:
                        name = re.findall('<a class=".+?>(.+?)</a>', entry,
                                          re.DOTALL)[0]
                        name = client.replaceHTMLCodes(name).replace(
                            '<hl>', '').replace('</hl>', '')
                    except:
                        continue

                    # allot of movies have foreign title translation in front so remove it
                    if ' / ' in name:
                        name = name.split(' / ')[1]

                    t = name.split(hdlr)[0].replace(data['year'], '').replace(
                        '(', '').replace(')', '').replace('&', 'and')
                    if cleantitle.get(t) != cleantitle.get(title):
                        continue

                    if hdlr not in name:
                        continue

                    try:
                        seeders = int(
                            re.findall(
                                'class="progress prog trans90" title="Seeders: (.+?) \|',
                                entry, re.DOTALL)[0])
                    except:
                        continue

                    if self.min_seeders > seeders:
                        continue

                    quality, info = source_utils.get_release_quality(name, url)

                    try:
                        size = re.findall(
                            '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))',
                            entry)[-1]
                        div = 1 if size.endswith(('GB', 'GiB')) else 1024
                        size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
                        size = '%.2f GB' % size
                        info.append(size)
                    except:
                        pass

                    info = ' | '.join(info)

                    sources.append({
                        'source': 'torrent',
                        'quality': quality,
                        'language': 'en',
                        'url': url,
                        'info': info,
                        'direct': False,
                        'debridonly': True
                    })
                except:
                    continue

            return sources

        except:
            source_utils.scraper_error('ZOOGLE')
            return sources

Пример #20

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []

            if url is None:
                return sources

            if debrid.status() is False:
                return sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            title = title.replace('&', 'and').replace('Special Victims Unit',
                                                      'SVU')

            hdlr = 'S%02dE%02d' % (int(data['season']), int(
                data['episode'])) if 'tvshowtitle' in data else data['year']

            query = '%s %s' % (title, hdlr)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

            url = urlparse.urljoin(self.base_link, self.search_link)
            # url = url % urllib.quote_plus(query)
            url = url % urllib.quote(query)
            # log_utils.log('url = %s' % url, log_utils.LOGDEBUG)

            r = client.request(url)
            r = client.parseDOM(r, 'h2')

            # z = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            z = zip(client.parseDOM(r, 'a', ret='href'),
                    client.parseDOM(r, 'a'))

            if 'tvshowtitle' in data:
                posts = [(i[1], i[0]) for i in z]
            else:
                posts = [(i[1], i[0]) for i in z]

            hostDict = hostprDict + hostDict

            items = []

            for post in posts:
                try:
                    try:
                        t = post[0].encode('utf-8')
                    except:
                        t = post[0]

                    u = client.request(post[1])

                    u = re.findall('\'(http.+?)\'', u) + re.findall(
                        '\"(http.+?)\"', u)
                    u = [i for i in u if '/embed/' not in i]
                    u = [i for i in u if 'youtube' not in i]

                    try:
                        s = re.search(
                            '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))',
                            post)
                        s = s.groups()[0] if s else '0'
                    except:
                        s = '0'
                        pass

                    items += [(t, i, s) for i in u]

                except:
                    source_utils.scraper_error('MYVIDEOLINK')
                    pass

            for item in items:
                try:
                    url = item[1]

                    url = client.replaceHTMLCodes(url)
                    url = url.encode('utf-8')

                    void = ('.rar', '.zip', '.iso', '.part', '.png', '.jpg',
                            '.bmp', '.gif')
                    if url.endswith(void):
                        continue

                    valid, host = source_utils.is_host_valid(url, hostDict)
                    if not valid:
                        continue

                    host = client.replaceHTMLCodes(host)
                    host = host.encode('utf-8')

                    name = item[0]
                    name = client.replaceHTMLCodes(name)

                    t = name.split(hdlr)[0].replace(data['year'], '').replace(
                        '(', '').replace(')', '').replace('&', 'and')
                    if cleantitle.get(t) != cleantitle.get(title):
                        continue

                    if hdlr not in name:
                        continue

                    quality, info = source_utils.get_release_quality(name, url)

                    try:
                        size = re.findall(
                            '((?:\d+\.\d+|\d+\,\d+|\d+) (?:GB|GiB|MB|MiB))',
                            item[2])[-1]
                        div = 1 if size.endswith(('GB', 'GiB')) else 1024
                        size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
                        size = '%.2f GB' % size
                        info.append(size)
                    except:
                        pass

                    info = ' | '.join(info)

                    sources.append({
                        'source': host,
                        'quality': quality,
                        'language': 'en',
                        'url': url,
                        'info': info,
                        'direct': False,
                        'debridonly': True
                    })
                except:
                    source_utils.scraper_error('MYVIDEOLINK')
                    pass

            return sources
        except:
            source_utils.scraper_error('MYVIDEOLINK')
            return sources

Пример #21

0

Показать файл

    def _get_sources(self, url):
        try:
            item = client.request(url[0])
            if item is None:
                return

            name = url[1]

            self.title = self.title.replace('!', '')

            # some shows like "Power" have year and hdlr in name
            t = name.split(self.hdlr)[0].replace(self.year, '').replace(
                '(', '').replace(')', '').replace('&', 'and')
            tc = cleantitle.get(t)
            if tc != cleantitle.get(self.title):
                try:
                    if tc == self.aliases[0]:
                        pass
                    else:
                        return
                except:
                    return

            if self.hdlr not in name:
                return

            links = dom_parser.parse_dom(item, 'a', req='href')
            links = [i.attrs['href'] for i in links]

            info = []
            try:
                size = re.findall(
                    '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', item)[0]
                div = 1 if size.endswith(('GB', 'GiB')) else 1024
                size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
                size = '%.2f GB' % size
                info.append(size)
            except:
                pass

            info = ' | '.join(info)

            for url in links:
                if any(x in url.lower()
                       for x in ['.rar.', '.zip.', '.iso.']) or any(
                           url.lower().endswith(x)
                           for x in ['.rar', '.zip', '.iso']):
                    continue

                if any(x in url.lower()
                       for x in ['youtube', 'sample', 'trailer']):
                    continue

                valid, host = source_utils.is_host_valid(url, self.hostDict)
                if not valid:
                    continue

                host = client.replaceHTMLCodes(host)
                host = host.encode('utf-8')

                quality, info2 = source_utils.get_release_quality(name, url)

                if url in str(self._sources):
                    continue

                self._sources.append({
                    'source': host,
                    'quality': quality,
                    'language': 'en',
                    'url': url,
                    'info': info,
                    'direct': False,
                    'debridonly': True
                })

        except:
            source_utils.scraper_error('ONLINESERIES')
            pass

Пример #22

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []

            if url is None:
                return sources

            if debrid.status() is False:
                return sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            title = title.replace('&', 'and').replace('Special Victims Unit',
                                                      'SVU')

            hdlr = 'S%02dE%02d' % (int(data['season']), int(
                data['episode'])) if 'tvshowtitle' in data else data['year']

            query = '%s %s' % (title, hdlr)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

            url = self.search_link % urllib.quote(query)
            url = urlparse.urljoin(self.base_link, url)
            # log_utils.log('url = %s' % url, log_utils.LOGDEBUG)

            html = client.request(url)
            html = html.replace('&nbsp;', ' ')

            try:
                results = client.parseDOM(html,
                                          'table',
                                          attrs={'id': 'searchResult'})
            except:
                return sources

            url2 = url.replace('/1/', '/2/')

            html2 = client.request(url2)
            html2 = html2.replace('&nbsp;', ' ')

            try:
                results += client.parseDOM(html2,
                                           'table',
                                           attrs={'id': 'searchResult'})
            except:
                return sources

            results = ''.join(results)

            rows = re.findall('<tr(.+?)</tr>', results, re.DOTALL)
            if rows is None:
                return sources

            for entry in rows:
                try:
                    try:
                        url = 'magnet:%s' % (re.findall(
                            'a href="magnet:(.+?)"', entry, re.DOTALL)[0])
                        url = str(client.replaceHTMLCodes(url).split('&tr')[0])
                    except:
                        continue

                    if any(x in url.lower() for x in [
                            'french', 'italian', 'spanish', 'truefrench',
                            'dublado', 'dubbed'
                    ]):
                        continue

                    try:
                        name = re.findall(
                            'class="detLink" title=".+?">(.+?)</a>', entry,
                            re.DOTALL)[0]
                        name = client.replaceHTMLCodes(name)

                        t = name.split(hdlr)[0].replace(
                            data['year'],
                            '').replace('(',
                                        '').replace(')',
                                                    '').replace('&', 'and')
                        if cleantitle.get(t) != cleantitle.get(title):
                            continue
                    except:
                        continue

                    if hdlr not in name:
                        continue

                    try:
                        seeders = int(
                            re.findall('<td align="right">(.+?)</td>', entry,
                                       re.DOTALL)[0])
                    except:
                        continue

                    if self.min_seeders > seeders:
                        continue

                    quality, info = source_utils.get_release_quality(name, url)

                    try:
                        size = re.findall(
                            '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))',
                            entry)[-1]
                        div = 1 if size.endswith(('GB', 'GiB')) else 1024
                        size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
                        size = '%.2f GB' % size
                        info.append(size)
                    except:
                        pass

                    info = ' | '.join(info)

                    sources.append({
                        'source': 'torrent',
                        'quality': quality,
                        'language': 'en',
                        'url': url,
                        'info': info,
                        'direct': False,
                        'debridonly': True
                    })
                except:
                    source_utils.scraper_error('PIRATEBAY')
                    continue

            return sources

        except:
            source_utils.scraper_error('PIRATEBAY')
            return sources

Пример #23

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []

            if url is None:
                return sources

            if debrid.status() is False:
                return sources

            hostDict = hostprDict + hostDict

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['title'].replace('&', 'and')

            year = data['year']

            query = '%s %s' % (title, year)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

            url = urlparse.urljoin(self.base_link, self.post_link)

            post = 'do=search&subaction=search&search_start=0&full_search=0&result_from=1&story=%s' % urllib.quote_plus(
                query)

            r = client.request(url, post=post)
            # log_utils.log('r = %s' % r, log_utils.LOGDEBUG)

            r = client.parseDOM(r, 'div', attrs={'class': 'box-out margin'})
            # log_utils.log('r = %s' % r, log_utils.LOGDEBUG)

            # switch to client.parseDOM() to rid import
            r = [(dom_parser.parse_dom(i, 'div', attrs={'class':
                                                        'news-title'}))
                 for i in r if data['imdb'] in i]
            r = [(dom_parser.parse_dom(i[0], 'a', req='href')) for i in r if i]
            r = [(i[0].attrs['href'], i[0].content) for i in r if i]

            for item in r:
                try:
                    name = item[0]
                    s = re.findall(
                        '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))',
                        name)
                    s = s[0] if s else '0'

                    data = client.request(item[0])
                    data = dom_parser.parse_dom(data,
                                                'div',
                                                attrs={'id': 'r-content'})
                    data = re.findall('\s*<b><a href="(.+?)".+?</a></b>',
                                      data[0].content, re.DOTALL)

                    for url in data:
                        try:
                            try:
                                qual = client.request(url)
                                quals = re.findall(
                                    'span class="file-title" id="file-title">(.+?)</span',
                                    qual)
                                if quals == []:
                                    raise Exception()

                                for q in quals:
                                    quality, info = source_utils.get_release_quality(
                                        q, url)

                            except:
                                quality = 'SD'
                                info = []

                            url = client.replaceHTMLCodes(url)
                            url = url.encode('utf-8')

                            if any(x in url for x in ['.rar', '.zip', '.iso']):
                                raise Exception()

                            if not 'turbobit' in url:
                                continue

                            if url in str(sources):
                                continue

                            sources.append({
                                'source': 'turbobit',
                                'quality': quality,
                                'language': 'en',
                                'url': url,
                                'info': info,
                                'direct': True,
                                'debridonly': True
                            })
                            # log_utils.log('sources = %s' % sources, log_utils.LOGDEBUG)

                        except:
                            source_utils.scraper_error('ULTRAHDINDIR')
                            pass
                except:
                    source_utils.scraper_error('ULTRAHDINDIR')
                    pass

            # log_utils.log('sources = %s' % sources, log_utils.LOGDEBUG)
            return sources

        except:
            source_utils.scraper_error('ULTRAHDINDIR')
            return sources

Пример #24

0

Показать файл

	def sources(self, url, hostDict, hostprDict):
		try:
			sources = []

			if url is None:
				return sources

			if debrid.status() is False:
				return sources

			hostDict = hostprDict + hostDict

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU')

			hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year']

			query = '%s %s' % (title, hdlr)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

			url = self.search_link % urllib.quote_plus(query)
			url = urlparse.urljoin(self.base_link, url)
			# log_utils.log('url = %s' % url, log_utils.LOGDEBUG)

			html = client.request(url)
			posts = client.parseDOM(html, 'item')

			items = []
			for post in posts:
				try:
					t = client.parseDOM(post, 'title')[0]
					u = client.parseDOM(post, 'enclosure', ret='url')
					#---rss feed does not contain size info-another reason why switching to site search be better
					s = re.search('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', post)
					s = s.groups()[0] if s else '0'
					items += [(t, i, s) for i in u]
				except:
					pass

			for item in items:
				try:
					url = item[1]
					if any(x in url for x in ['.rar', '.zip', '.iso', '.part']):
						continue

					url = client.replaceHTMLCodes(url)
					url = url.encode('utf-8')

					valid, host = source_utils.is_host_valid(url, hostDict)
					if not valid:
						continue

					host = client.replaceHTMLCodes(host)
					host = host.encode('utf-8')

					name = item[0]
					name = client.replaceHTMLCodes(name)

					# some shows like "Power" have year and hdlr in name
					t = name.split(hdlr)[0].replace(data['year'], '').replace('(', '').replace(')', '').replace('&', 'and')
					if cleantitle.get(t) != cleantitle.get(title):
						continue

					if hdlr not in name:
						continue

					quality, info = source_utils.get_release_quality(name, url)

					try:
						size = re.findall('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', item[2])[-1]
						div = 1 if size.endswith(('GB', 'GiB')) else 1024
						size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
						size = '%.2f GB' % size
						info.append(size)
					except:
						pass

					info = ' | '.join(info)

					sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': url,
												'info': info, 'direct': False, 'debridonly': True})

				except:
					source_utils.scraper_error('MVRLS')
					pass

			return sources
		except:
			source_utils.scraper_error('MVRLS')
			return sources

Пример #25

0

Показать файл

	def sources(self, url, hostDict, hostprDict):
		try:
			sources = []

			if url is None:
				return sources

			if debrid.status() is False:
				return sources

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU')

			hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year']

			query = '%s %s' % (title, hdlr)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

			url = urlparse.urljoin(self.base_link, self.search_link.format(query[0].lower(), cleantitle.geturl(query)))
			# log_utils.log('url = %s' % url, log_utils.LOGDEBUG)

			r = client.request(url)
			if '<tbody' not in r:
				return sources

			r = client.parseDOM(r, 'tbody')[0]

			results = client.parseDOM(r, 'tr')
			posts = [i for i in results if 'magnet:' in i]

			try:
				next_page = [i for i in results if 'Next Page' in i]

				if next_page == []:
					raise Exception()

				page = client.parseDOM(next_page, 'a', ret='href', attrs={'title': 'Downloads | Page 2'})[0]

				r2 = client.request(self.base_link+page)
				results2 = client.parseDOM(r2, 'tr')
				posts += [i for i in results2 if 'magnet:' in i]
			except:
				pass

			for post in posts:
				post = post.replace('&nbsp;', ' ')

				links = client.parseDOM(post, 'a', ret='href')
				magnet = [i.replace('&amp;', '&') for i in links if 'magnet:' in i][0]
				url = magnet.split('&tr')[0]

				if any(x in url.lower() for x in ['french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed']):
					continue

				name = client.parseDOM(post, 'a', ret='title')[1]
				t = name.split(hdlr)[0].replace(data['year'], '').replace('(', '').replace(')', '').replace('&', 'and')
				if cleantitle.get(t) != cleantitle.get(title):
					continue

				if hdlr not in name:
					continue

				quality, info = source_utils.get_release_quality(name, url)

				try:
					size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0]
					div = 1 if size.endswith(('GB', 'GiB')) else 1024
					size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div
					size = '%.2f GB' % size
					info.append(size)
				except:
					pass

				info = ' | '.join(info)

				sources.append({'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url,
											'info': info, 'direct': False, 'debridonly': True})

			return sources
		except:
			source_utils.scraper_error('MAGNETDL')
			return sources

Пример #26

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []
            if url is None:
                return sources

            if debrid.status() is False:
                return sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            title = title.replace('&', 'and').replace('Special Victims Unit',
                                                      'SVU')

            hdlr = 'S%02dE%02d' % (int(data['season']), int(
                data['episode'])) if 'tvshowtitle' in data else data['year']

            query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) \
                    if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year'])

            url = self.search_link % urllib.quote_plus(query)
            url = urlparse.urljoin(self.base_link, url).replace('-', '+')

            r = self.scraper.get(url).content

            if r is None and 'tvshowtitle' in data:
                season = re.search('S(.*?)E', hdlr)
                season = season.group(1)
                url = title
                r = self.scraper.get(url).content

            for loopCount in range(0, 2):
                if loopCount == 1 or (r is None and 'tvshowtitle' in data):
                    r = self.scraper.get(url).content

                posts = client.parseDOM(r, "h2")

                hostDict = hostprDict + hostDict

                items = []
                for post in posts:
                    try:
                        u = client.parseDOM(post, 'a', ret='href')
                        for i in u:
                            try:
                                name = str(i)
                                items.append(name)
                            except:
                                source_utils.scraper_error('0DAY')
                                pass
                    except:
                        source_utils.scraper_error('0DAY')
                        pass

                if len(items) > 0:
                    break

            for item in items:
                try:
                    info = []
                    i = str(item)
                    r = self.scraper.get(i).content
                    u = client.parseDOM(r,
                                        "div",
                                        attrs={"class": "entry-content"})

                    for t in u:
                        r = re.compile('a href="(.+?)">.+?<').findall(t)
                        query = query.replace(' ', '.')

                        for url in r:

                            if not query in url:
                                continue

                            if any(x in url for x in ['.rar', '.zip', '.iso']):
                                continue

                            quality, info = source_utils.get_release_quality(
                                url)

                            valid, host = source_utils.is_host_valid(
                                url, hostDict)

                            sources.append({
                                'source': host,
                                'quality': quality,
                                'language': 'en',
                                'url': url,
                                'info': info,
                                'direct': False,
                                'debridonly': True
                            })

                except:
                    source_utils.scraper_error('0DAY')
                    pass

            return sources
        except:
            source_utils.scraper_error('0DAY')
            return sources

Пример #27

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []

            if url is None:
                return sources

            if debrid.status() is False:
                return sources

            hostDict = hostprDict + hostDict

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            title = title.replace('&', 'and').replace('Special Victims Unit',
                                                      'SVU')

            hdlr = 'S%02dE%02d' % (int(data['season']), int(
                data['episode'])) if 'tvshowtitle' in data else data['year']

            query = '%s %s' % (title, hdlr)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

            url = self.search_link % urllib.quote_plus(query)
            url = urlparse.urljoin(self.base_link, url).replace('-', '+')
            # log_utils.log('url = %s' % url, log_utils.LOGDEBUG)

            r = client.request(url)
            # r = self.scraper.get(url).content

            if r is None and 'tvshowtitle' in data:
                season = re.search('S(.*?)E', hdlr)
                season = season.group(1)
                url = title
                # r = self.scraper.get(url).content
                r = client.request(url)

            for loopCount in range(0, 2):
                if loopCount == 1 or (r is None and 'tvshowtitle' in data):
                    r = self.scraper.get(url).content
                    # r = client.request(url)
                posts = client.parseDOM(
                    r, "div", attrs={"class": "postpage_movie_download"})

                items = []
                for post in posts:
                    try:
                        u = client.parseDOM(post, 'a', ret='href')

                        for i in u:
                            name = str(i)
                            items.append(name)
                    except:
                        pass
                if len(items) > 0:
                    break

            for item in items:
                try:
                    i = str(item)
                    # r = self.scraper.get(i).content
                    r = client.request(i)
                    if r is None:
                        continue

                    tit = client.parseDOM(r,
                                          'meta',
                                          attrs={'property': 'og:title'},
                                          ret='content')[0]
                    t = tit.split(hdlr)[0].replace(data['year'], '').replace(
                        '(', '').replace(')', '').replace('&', 'and')
                    if cleantitle.get(t) != cleantitle.get(title):
                        continue

                    if hdlr not in tit:
                        continue

                    u = client.parseDOM(r,
                                        "div",
                                        attrs={"class": "multilink_lnks"})

                    for t in u:

                        r = client.parseDOM(t, 'a', ret='href')

                        for url in r:
                            if 'www.share-online.biz' in url:
                                continue

                            if url in str(sources):
                                continue

                            quality, info = source_utils.get_release_quality(
                                url, url)

                            valid, host = source_utils.is_host_valid(
                                url, hostDict)

                            if valid:
                                sources.append({
                                    'source': host,
                                    'quality': quality,
                                    'language': 'en',
                                    'url': url,
                                    'info': info,
                                    'direct': False,
                                    'debridonly': True
                                })
                except:
                    source_utils.scraper_error('2DDL')
                    pass

            return sources

        except:
            source_utils.scraper_error('2DDL')
            return sources

Пример #28

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url is None:
                return sources

            if debrid.status() is False:
                return sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            title = title.replace('&', 'and').replace('Special Victims Unit',
                                                      'SVU')

            hdlr = 'S%02dE%02d' % (int(data['season']), int(
                data['episode'])) if 'tvshowtitle' in data else data['year']

            query = '%s %s' % (title, hdlr)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

            url = self.search_link % urllib.quote_plus(query)
            url = urlparse.urljoin(self.base_link, url)
            # log_utils.log('url = %s' % url, log_utils.LOGDEBUG)

            try:
                r = self.scraper.get(url).content

                links = zip(
                    re.findall('href="(magnet:.+?)"', r, re.DOTALL),
                    re.findall(
                        '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))',
                        r, re.DOTALL))

                for link in links:
                    url = link[0].split('&xl')[0]

                    size = link[1]

                    if any(x in url.lower() for x in [
                            'french', 'italian', 'spanish', 'truefrench',
                            'dublado', 'dubbed'
                    ]):
                        continue

                    name = url.split('&dn=')[1]
                    t = name.split(hdlr)[0].replace(data['year'], '').replace(
                        '(', '').replace(')', '').replace('&', 'and')
                    if cleantitle.get(t) != cleantitle.get(title):
                        continue

                    if hdlr not in name:
                        continue

                    quality, info = source_utils.get_release_quality(name, url)

                    try:
                        div = 1 if size.endswith('GB') else 1024
                        size = float(
                            re.sub('[^0-9|/.|/,]', '', size.replace(
                                ',', '.'))) / div
                        size = '%.2f GB' % size
                        info.append(size)
                    except:
                        pass

                    info = ' | '.join(info)

                    sources.append({
                        'source': 'torrent',
                        'quality': quality,
                        'language': 'en',
                        'url': url,
                        'info': info,
                        'direct': False,
                        'debridonly': True
                    })

                return sources

            except:
                source_utils.scraper_error('PIRATEIRO')
                return sources

        except:
            source_utils.scraper_error('PIRATEIRO')
            return sources

Пример #29

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []

            if url is None:
                return sources

            if debrid.status() is False:
                return sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            title = title.replace('&', 'and').replace('Special Victims Unit',
                                                      'SVU')

            hdlr = 'S%02dE%02d' % (int(data['season']), int(
                data['episode'])) if 'tvshowtitle' in data else data['year']

            query = '%s %s' % (title, hdlr)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

            url = self.search_link % urllib.quote_plus(query)
            url = urlparse.urljoin(self.base_link, url).replace('%3A+', '+')
            # log_utils.log('url = %s' % url, log_utils.LOGDEBUG)

            r = client.request(url)

            if r is None and 'tvshowtitle' in data:
                season = re.search('S(.*?)E', hdlr)
                season = season.group(1)
                url = title
                r = client.request(url)

            for loopCount in range(0, 2):
                if loopCount == 1 or (r is None and 'tvshowtitle' in data):
                    r = client.request(url)
                posts = client.parseDOM(r, "h2", attrs={"class": "postTitle"})
                hostDict = hostprDict + hostDict

                items = []
                for post in posts:
                    try:
                        u = client.parseDOM(post, 'a', ret='href')
                        for i in u:
                            name = str(i)
                            items.append(name)

                            tit = name.rsplit('/', 1)[0]
                            tit = tit.rsplit('/', 1)[1].upper()
                            t = tit.split(hdlr)[0].replace(
                                data['year'], '').replace('(', '').replace(
                                    ')', '').replace('&', 'and')
                            if cleantitle.get(t) != cleantitle.get(title):
                                continue
                    except:
                        source_utils.scraper_error('MAXRLS')
                        pass
                if len(items) > 0:
                    break

            for item in items:
                try:
                    i = str(item)
                    r = client.request(i)
                    u = client.parseDOM(r,
                                        "div",
                                        attrs={"class": "postContent"})

                    for t in u:
                        r = client.parseDOM(t, 'a', ret='href')

                        for url in r:
                            quality, info = source_utils.get_release_quality(
                                url)

                            valid, host = source_utils.is_host_valid(
                                url, hostDict)
                            if not valid:
                                continue

                            sources.append({
                                'source': host,
                                'quality': quality,
                                'language': 'en',
                                'url': url,
                                'info': info,
                                'direct': False,
                                'debridonly': True
                            })
                except:
                    source_utils.scraper_error('MAXRLS')
                    pass

            return sources

        except:
            source_utils.scraper_error('MAXRLS')
            return sources

Пример #30

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        try:
            sources = []

            if url is None:
                return sources

            if debrid.status() is False:
                raise Exception()

            hostDict = hostprDict + hostDict

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            title = title.replace('&', 'and').replace('Special Victims Unit',
                                                      'SVU')

            hdlr = 'S%02dE%02d' % (int(data['season']), int(
                data['episode'])) if 'tvshowtitle' in data else data['year']

            query = '%s %s' % (title, hdlr)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)

            url = self.search_link % urllib.quote_plus(query)
            url = urlparse.urljoin(self.base_link, url).replace('-', '+')
            # log_utils.log('url = %s' % url, log_utils.LOGDEBUG)

            r = client.request(url)

            if r is None and 'tvshowtitle' in data:
                season = re.search('S(.*?)E', hdlr)
                season = season.group(1)
                url = title

                r = client.request(url)

            for loopCount in range(0, 2):
                if loopCount == 1 or (r is None and 'tvshowtitle' in data):
                    r = client.request(url)

                posts = client.parseDOM(r,
                                        "table",
                                        attrs={"class": "download"})

                items = []
                for post in posts:
                    try:
                        u = client.parseDOM(post, 'a', ret='href')
                        for i in u:
                            try:
                                name = str(i)
                                items.append(name)
                            except:
                                pass
                    except:
                        source_utils.scraper_error('DDLSPOT')
                        pass

                if len(items) > 0:
                    break

            for item in items:
                try:
                    info = []

                    i = str(item)
                    i = self.base_link + i
                    r = client.request(i)
                    u = client.parseDOM(r, "div", attrs={"class": "dl-links"})

                    for t in u:
                        r = re.compile(
                            'a href=".+?" rel=".+?">(.+?)<').findall(t)

                        for url in r:
                            if any(x in url for x in ['.rar', '.zip', '.iso']):
                                continue

                            quality, info = source_utils.get_release_quality(
                                url)

                            valid, host = source_utils.is_host_valid(
                                url, hostDict)

                            sources.append({
                                'source': host,
                                'quality': quality,
                                'language': 'en',
                                'url': url,
                                'info': info,
                                'direct': False,
                                'debridonly': True
                            })

                except:
                    source_utils.scraper_error('DDLSPOT')
                    pass

            return sources
        except:
            source_utils.scraper_error('DDLSPOT')
            return

Python scraper_error примеры использования