Python BeautifulSoup примеры, resources.lib.externals.beautifulsoup.BeautifulSoup Python примеры использования

Пример #1

0

Показать файл

Файл: torrent9.py Проект: gaiaorigin/gaiasource

 def _link(self, url, index):
     try:
         html = BeautifulSoup(client.request(url))
         buttons = html.find_all('div', class_='download-btn')
         link = None
         links = []
         for i in buttons:
             try:
                 links.append(i.find_all('a')[0].get('href'))
             except:
                 pass
         for i in links:
             if i.startswith('magnet:'):
                 link = i
                 break
         if not link:
             for i in links:
                 if i.startswith('/download'):
                     link = urlparse.urljoin(self.base_link, i)
                     break
         self.tLock.acquire()
         self.tSources[index]['url'] = link
     except:
         tools.Logger.error()
     finally:
         try:
             self.tLock.release()
         except:
             pass

Пример #2

0

Показать файл

Файл: verification.py Проект: azumimuo/family-xbmc-addon

	def _verifyAccountsImdb(self, checkDisabled = True):
		name = 'IMDb'
		if self.__done(name): return
		try:
			if not checkDisabled or self.__enabled('accounts.informants.imdb.enabled'):
				link = 'http://www.imdb.com/user/ur%s/watchlist' % tools.Settings.getString('accounts.informants.imdb.user').replace('ur', '')
				data = client.request(link)
				if data:
					indexStart = data.find('IMDbReactInitialState.push(') # Somtimes the page is not fully rendered yet and the JSON is still in a JS tag.
					if indexStart < 0: # Data was rendered into the HTML.
						data = BeautifulSoup(data)
						if len(data.find_all('div', class_ = 'error_code_404')) > 0:
							status = self.StatusFailure
						elif len(data.find_all('div', id = 'unavailable')) > 0:
							status = self.StatusLimited
						elif len(data.find_all('div', class_ = 'lister-widget')) > 0:
							status = self.StatusOperational
						else:
							status = self.StatusFailure
					else: # Data still in JS.
						indexStart += 27
						indexEnd = data.find(');', indexStart)
						data = json.loads(data[indexStart : indexEnd])
						if 'titles' in data and len(data['titles'].values()) > 0:
							status = self.StatusOperational
						else:
							status = self.StatusLimited
				else: # Wrong user ID, returns 404 error.
					status = self.StatusFailure
			else:
				status = self.StatusDisabled
		except:
			status = self.StatusFailure
		return self.__append(name, status)

Пример #3

0

Показать файл

Файл: 1337x.py Проект: bopopescu/repo-1

 def resolve(self, url):
     try:
         html = BeautifulSoup(client.request(url))
         html = html.find_all('ul', class_='download-links-dontblock')[0]
         return html.find_all('a')[0]['href']
     except:
         return None

Пример #4

0

Показать файл

Файл: movcr.py Проект: Atrion/Kodi_Repo-Retired

 def resolve(self, url):
     html = BeautifulSoup(client.request(url))
     htmlLinks = html.find_all('a')
     for htmlLink in htmlLinks:
         link = htmlLink['href']
         if link.startswith('magnet:'):
             return link
     return None

Пример #5

0

Показать файл

Файл: seventorrents.py Проект: gaiaorigin/gaiasource

	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None: raise Exception()

			data = self._decode(url)

			if 'exact' in data and data['exact']:
				query = title = data['title']
				titles = None
				year = None
			else:
				title = data['title']
				titles = data['alternatives'] if 'alternatives' in data else None
				year = int(data['year']) if 'year' in data and not data['year'] == None else None
				query = '%s %d' % (title, year)
				query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			if not self._query(query): return sources

			url = urlparse.urljoin(self.base_link, self.search_link) % urllib.quote_plus(query)
			html = BeautifulSoup(client.request(url))

			htmlTable = html.find_all('div', id = 'Torrents')[0].find_all('div', class_ = 'DownloadFlags')[0]
			htmlRows = htmlTable.find_all('a', recursive = False) # Do not search further down the tree (just the direct children), because that will also retrieve the header row.
			for i in range(1, len(htmlRows)): # Skip first entry
				try:
					htmlRow = htmlRows[i]
					htmlData = htmlRow['onmouseover'].split(',')

					if not len(htmlData) == 11: continue

					# Name
					htmlName = htmlData[5].strip().strip("'")

					# Link
					htmlLink = htmlRow['href'].strip()
					htmlLink = re.search('\/.*\/(.*)\.aspx', htmlLink).group(1).replace('-', '.')
					htmlLink = urlparse.urljoin(self.base_link, self.download_link) % urllib.quote_plus(htmlLink)

					# Size
					htmlSize = htmlData[7].strip().strip("'")

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, link = htmlLink, size = htmlSize, seeds = 1)

					# Ignore
					meta.mIgnoreLength = 10
					if meta.ignore(True): continue

					# Add
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
				except:
					pass

			return sources
		except:
			return sources

Пример #6

0

Показать файл

Файл: binzb.py Проект: bopopescu/repo-1

 def resolve(self, url):
     try:
         html = BeautifulSoup(client.request(url))
         html = html.find_all('table', class_='list')[0]
         html = html.find_all('tr', recursive=False)[4]
         return self.base_link + html.find_all('a')[0]['href']
     except:
         tools.Logger.error()
         return None

Пример #7

0

Показать файл

Файл: binzb.py Проект: gaiaorigin/gaiasource

 def _link(self, link):
     try:
         html = BeautifulSoup(client.request(link))
         html = html.find_all('table', class_='list')[0]
         html = html.find_all('tr', recursive=False)[4]
         resolved = self.base_link + html.find_all('a')[0]['href']
         self.tLock.acquire()
         self.tLinks[link] = resolved
         self.tLock.release()
     except:
         tools.Logger.error()

Пример #8

0

Показать файл

Файл: yggtorrent.py Проект: bopopescu/repo-1

	def _hash(self, url, index):
		try:
			htmlSingle = BeautifulSoup(client.request(url))
			htmlInfo = htmlSingle.find('table', 'informations')
			htmlHash = htmlInfo.find_all('tr')[4].find_all('td')[1].getText()
			self.tLock.acquire()
			if htmlHash: self.tSources[index]['hash'] = htmlHash
		except:
			tools.Logger.error()
		finally:
			try: self.tLock.release()
			except: pass

Пример #9

0

Показать файл

Файл: nzbfriends.py Проект: gaiaorigin/gaiasource

	def _link(self, url, index):
		try:
			html = BeautifulSoup(client.request(url))
			htmlCollection = html.find('input', {'name': 'collection'})['value']
			htmlUuid = html.find('input', {'name': 'uuid'})['value']
			self.tLock.acquire()
			if htmlUuid and htmlCollection: self.tSources[index]['url'] = urlparse.urljoin(self.base_link, self.download_link) % (htmlCollection, htmlUuid)
		except:
			tools.Logger.error()
		finally:
			try: self.tLock.release()
			except: pass

Пример #10

0

Показать файл

Файл: movcr.py Проект: gaiaorigin/gaiasource

 def _link(self, link):
     try:
         html = BeautifulSoup(client.request(link))
         htmlLinks = html.find_all('a')
         for i in range(len(htmlLinks)):
             resolved = htmlLinks[i]['href']
             if resolved.lower().startswith('magnet:'):
                 self.tLock.acquire()
                 self.tLinks[link] = resolved
                 self.tLock.release()
                 break
     except:
         pass

Пример #11

0

Показать файл

Файл: torrent9.py Проект: Atrion/Kodi_Repo-Retired

 def _link(self, url, index):
     try:
         html = BeautifulSoup(client.request(url))
         html = html.find_all('div', class_='download-btn')[0]
         link = html.find_all('a')[0]
         self.tLock.acquire()
         self.tLinks[index] = self.base_link + link['href']
     except:
         tools.Logger.error()
     finally:
         try:
             self.tLock.release()
         except:
             pass

Пример #12

0

Показать файл

Файл: t411.py Проект: bopopescu/repo-1

	def _link(self, url, index):
		try:
			html = BeautifulSoup(client.request(url))
			html = html.find_all('div', class_ = 'details')[0]
			links = html.find_all('a')
			for link in links:
				if link['href'].startswith('magnet:'):
					self.tLock.acquire()
					self.tLinks[index] = link['href']
					break
		except:
			tools.Logger.error()
		finally:
			try: self.tLock.release()
			except: pass

Пример #13

0

Показать файл

Файл: test_soup.py Проект: Atrion/Kodi_Repo-Retired

    def test_last_ditch_entity_replacement(self):
        # This is a UTF-8 document that contains bytestrings
        # completely incompatible with UTF-8 (ie. encoded with some other
        # encoding).
        #
        # Since there is no consistent encoding for the document,
        # Unicode, Dammit will eventually encode the document as UTF-8
        # and encode the incompatible characters as REPLACEMENT
        # CHARACTER.
        #
        # If chardet is installed, it will detect that the document
        # can be converted into ISO-8859-1 without errors. This happens
        # to be the wrong encoding, but it is a consistent encoding, so the
        # code we're testing here won't run.
        #
        # So we temporarily disable chardet if it's present.
        doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
<html><b>\330\250\330\252\330\261</b>
<i>\310\322\321\220\312\321\355\344</i></html>"""
        chardet = resources.lib.externals.beautifulsoup.dammit.chardet_dammit
        logging.disable(logging.WARNING)
        try:
            def noop(str):
                return None
            resources.lib.externals.beautifulsoup.dammit.chardet_dammit = noop
            dammit = UnicodeDammit(doc)
            self.assertEqual(True, dammit.contains_replacement_characters)
            self.assertTrue(u"\ufffd" in dammit.unicode_markup)

            soup = BeautifulSoup(doc, "html.parser")
            self.assertTrue(soup.contains_replacement_characters)
        finally:
            logging.disable(logging.NOTSET)
            resources.lib.externals.beautifulsoup.dammit.chardet_dammit = chardet

Пример #14

0

Показать файл

 def _link(self, url, index):
     try:
         html = BeautifulSoup(client.request(url))
         links = html.find_all('a')
         link = None
         for i in links:
             i = i.get('href')
             if i.startswith('magnet:'):
                 link = i
                 break
         self.tLock.acquire()
         self.tSources[index]['url'] = link
     except:
         tools.Logger.error()
     finally:
         try:
             self.tLock.release()
         except:
             pass

Пример #15

0

Показать файл

 def _verifyAccountsImdb(self, checkDisabled=True, user=None):
     name = 'IMDb'
     if self.__done(name): return
     try:
         if not checkDisabled or self.__enabled(
                 'accounts.informants.imdb.enabled'):
             if user == None:
                 user = tools.Settings.getString(
                     'accounts.informants.imdb.user').replace('ur', '')
             link = 'http://www.imdb.com/user/ur%s/watchlist' % user
             data = client.request(link)
             if data:
                 indexStart = data.find(
                     'IMDbReactInitialState.push('
                 )  # Somtimes the page is not fully rendered yet and the JSON is still in a JS tag.
                 if indexStart < 0:  # Data was rendered into the HTML.
                     data = BeautifulSoup(data)
                     if len(data.find_all('div',
                                          class_='error_code_404')) > 0:
                         status = Verification.StatusFailure
                     elif len(data.find_all('div', id='unavailable')) > 0:
                         status = Verification.StatusLimited
                     elif len(data.find_all('div',
                                            class_='lister-widget')) > 0:
                         status = Verification.StatusOperational
                     else:
                         status = Verification.StatusFailure
                 else:  # Data still in JS.
                     indexStart += 27
                     indexEnd = data.find(');', indexStart)
                     data = json.loads(data[indexStart:indexEnd])
                     if 'titles' in data and len(
                             data['titles'].values()) > 0:
                         status = Verification.StatusOperational
                     else:
                         status = Verification.StatusLimited
             else:  # Wrong user ID, returns 404 error.
                 status = Verification.StatusFailure
         else:
             status = Verification.StatusDisabled
     except:
         status = Verification.StatusFailure
     return self.__append(name=name, status=status)

Пример #16

0

Показать файл

Файл: eztv.py Проект: azumimuo/family-xbmc-addon

	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			year = int(data['year']) if 'year' in data and not data['year'] == None else None
			season = int(data['season']) if 'season' in data and not data['season'] == None else None
			episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
			pack = data['pack'] if 'pack' in data else False

			if pack: query = '%s %d' % (title, season)
			else: query = '%s S%02dE%02d' % (title, season, episode)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			url = urlparse.urljoin(self.base_link, self.search_link) % urllib.quote_plus(query)
			html = BeautifulSoup(client.request(url))

			htmlTable = None
			tables = html.find_all('table', class_ = 'forum_header_border')
			for table in tables:
				try:
					row = table.find_all('tr')[1]
					headers = row.find_all('td', class_ = 'forum_thread_header')
					if headers[0].getText() == 'Show' and headers[5].getText() == 'Seeds':
						htmlTable = table
						break
				except:
					pass

			if htmlTable == None:
				raise Exception()

			htmlRows = htmlTable.find_all('tr', recursive = False) # Use children and no further.

			for i in range(2, len(htmlRows)): # First two rows are the headers.
				htmlRow = htmlRows[i]
				htmlColumns = htmlRow.find_all('td', recursive = False) # Use children and no further.
				htmlInfo = htmlColumns[1]

				# Name
				htmlName = htmlInfo.find_all('a', class_ = 'epinfo')[0]['title'].strip()

				# Size
				try: htmlSize = htmlColumns[3].getText() # Does not always have size.
				except: htmlSize = None

				# Link
				htmlLink = htmlColumns[2].find_all('a', class_ = 'magnet')[0]['href']

				# Seeds
				try: htmlSeeds = htmlColumns[5].find_all('font')[0].getText() # Does not always have seeds.
				except: htmlSeeds = None

				# Metadata
				meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

				# Ignore
				if meta.ignore(False):
					continue

				# Add
				sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'info' : meta.information(), 'file' : htmlName})

			return sources
		except:
			return sources

Пример #17

0

Показать файл

    def _search(self, url, query, show, type, title, titles, year, season,
                episode, pack, packCount, packException, ignoreContains):
        pageLimit = tools.Settings.getInteger('scraping.providers.pages')
        pageCounter = 0
        page = 0
        added = False

        try:
            while True:
                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                html = BeautifulSoup(client.request(url % (type, query)))

                page += 1
                added = False

                htmlTable = html.find_all('table', class_='table-corps')
                if len(htmlTable) > 0:
                    htmlTable = htmlTable[0]
                    try:
                        htmlTable = htmlTable.find_all('tbody',
                                                       recursive=False)[0]
                    except:
                        pass
                    htmlRows = htmlTable.find_all('tr', recursive=False)
                    for i in range(len(htmlRows)):
                        htmlRow = htmlRows[i]
                        htmlColumns = htmlRow.find_all('td', recursive=False)

                        # Name
                        htmlName = htmlColumns[0].find_all(
                            'a')[0].getText().strip()

                        # Link
                        htmlLink = urlparse.urljoin(
                            self.base_link, htmlColumns[0].find_all('a')
                            [0].get('href').encode('utf-8'))

                        # Size
                        htmlSize = re.sub(
                            '([mMkKgGtT]?)[oO]', '\\1b',
                            htmlColumns[0].find_all(
                                'div', class_='poid')[0].getText())
                        if not 'b' in htmlSize: htmlSize = htmlSize + ' mb'

                        # Seeds
                        try:
                            htmlSeeds = int(htmlColumns[0].find_all(
                                'div', class_='up')[0].getText().strip())
                        except:
                            htmlSeeds = None

                        # Metadata
                        meta = metadata.Metadata(name=htmlName,
                                                 title=title,
                                                 titles=titles,
                                                 year=year,
                                                 season=season,
                                                 episode=episode,
                                                 pack=pack,
                                                 packCount=packCount,
                                                 link=htmlLink,
                                                 size=htmlSize,
                                                 seeds=htmlSeeds)

                        # Ignore
                        meta.ignoreAdjust(contains=ignoreContains)
                        if meta.ignore(True, season=not packException):
                            continue

                        # Add
                        self.tLock.acquire()
                        self.tSources.append({
                            'url': htmlLink,
                            'debridonly': False,
                            'direct': False,
                            'source': 'torrent',
                            'language': self.language[0],
                            'quality': meta.videoQuality(),
                            'metadata': meta,
                            'file': htmlName
                        })
                        self.tLock.release()
                        added = True

                        self.tLock.acquire()
                        thread = threading.Thread(
                            target=self._link,
                            args=(htmlLink, len(self.tSources) - 1))
                        self.tThreadsLinks.append(thread)
                        self.tLock.release()
                        thread.start()

                # Only shows 1 page.
                break
        except:
            tools.Logger.error()
        finally:
            try:
                self.tLock.release()
            except:
                pass

Пример #18

0

Показать файл

Файл: demonoid.py Проект: gaiaorigin/gaiasource

    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None: raise Exception()

            ignoreContains = None
            data = self._decode(url)

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                titles = None
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                titles = data[
                    'alternatives'] if 'alternatives' in data else None
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known.
                    if (season == 0
                            or episode == 0) and ('title' in data
                                                  and not data['title'] == None
                                                  and not data['title'] == ''):
                        title = '%s %s' % (
                            data['tvshowtitle'], data['title']
                        )  # Change the title for metadata filtering.
                        query = title
                        ignoreContains = len(data['title']) / float(
                            len(title)
                        )  # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well.
                    else:
                        if pack: query = '%s %d' % (title, season)
                        else:
                            query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            if not self._query(query): return sources
            query = urllib.quote_plus(query)

            category = self.category_shows if 'tvshowtitle' in data else self.category_movies
            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 1
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)
            '''
			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				pageCounter += 1
				if pageLimit > 0 and pageCounter > pageLimit:
					break

				urlNew = url % (category, query, page)
				html = client.request(urlNew)

				# Demonoid does not have a closing tag for the rows.
				# This causes BeautifulSoup to only detect the first row.
				# Manually add a closing </tr> tag, except fore the first row.
				html = html.replace('<tr align="left" bgcolor="#CCCCCC">', '<tr align="left" bgcolor="">', 1)
				html = html.replace('<tr align="left" bgcolor="#CCCCCC">', '</tr><tr align="left" bgcolor="#CCCCCC">')

				html = BeautifulSoup(html)

				page += 1
				added = False

				htmlTable = html.find_all('td', class_ = 'ctable_content_no_pad')[0].find_all('table', recursive = False)[1]
				htmlRows = html.find_all('tr')

				i = 0
				while i < len(htmlRows):
					try:
						htmlRow = htmlRows[i]
						i += 1 # Normal loop increment.

						if len(htmlRow.find_all('td', {'rowspan' : '2'})) == 0:
							continue

						# Name
						htmlName = htmlRow.find_all('td', {'colspan' : '9'})[0].find_all('a')[0].getText().strip()

						htmlRow = htmlRows[i]
						i += 1 # Go to next row, because items are split over to lines.

						# Size
						htmlSize = htmlColumns[3].getText().strip()

						# Link
						htmlLink = htmlColumns[2].find_all('a')[0]['href']

						# Seeds
						htmlSeeds = int(htmlColumns[6].getText().strip())

						items = htmlColumns[0].find_all('a')

						# Release
						try:
							htmlRelease = items[1].getText()
							if not 'other' in htmlRelease.lower(): htmlName += ' ' + htmlRelease
						except:
							pass

						# Language
						try:
							htmlLanguage = items[2].getText()
						except:
							htmlLanguage = None

						# Metadata
						meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds, languageAudio = htmlLanguage)

						# Ignore
						meta.ignoreAdjust(contains = ignoreContains)
						if meta.ignore(True): continue

						# Add
						sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
						added = True
					except:
						pass
			'''

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (category, query, page)
                html = client.request(urlNew)

                page += 1
                added = False

                htmlRows = re.findall(
                    '<!--\s*tstart\s*-->(.*?)<tr\s*align="left"\s*bgcolor="#CCCCCC">',
                    html, re.M | re.S)
                htmlRows = ['<tr><td>' + i for i in htmlRows]
                for htmlRow in htmlRows:
                    try:
                        htmlRow = BeautifulSoup(htmlRow)
                        htmlColumns = htmlRow.find_all('td')

                        # Name
                        htmlName = htmlRow.find_all('a')[1].getText().strip()

                        # Size
                        htmlSize = htmlColumns[4].getText().strip()

                        # Link
                        htmlLink = htmlRow.find_all('a')[1]['href']
                        htmlLink = urlparse.urljoin(self.base_link, htmlLink)
                        htmlLink = re.search('genidy=(.*)', htmlLink,
                                             re.IGNORECASE)
                        if not htmlLink: continue
                        htmlLink = self.download_link % htmlLink.group(1)

                        # Seeds
                        try:
                            htmlSeeds = int(htmlColumns[7].getText().strip())
                        except:
                            htmlSeeds = 0

                        items = htmlColumns[0].find_all('a')

                        # Metadata
                        meta = metadata.Metadata(name=htmlName,
                                                 title=title,
                                                 titles=titles,
                                                 year=year,
                                                 season=season,
                                                 episode=episode,
                                                 pack=pack,
                                                 packCount=packCount,
                                                 link=htmlLink,
                                                 size=htmlSize,
                                                 seeds=htmlSeeds)

                        # Ignore
                        meta.ignoreAdjust(contains=ignoreContains)
                        if meta.ignore(True): continue

                        # Add
                        sources.append({
                            'url': htmlLink,
                            'debridonly': False,
                            'direct': False,
                            'source': 'torrent',
                            'language': self.language[0],
                            'quality': meta.videoQuality(),
                            'metadata': meta,
                            'file': htmlName
                        })
                        added = True
                    except:
                        pass

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources

Пример #19

0

Показать файл

Файл: torrenttm.py Проект: gaiaorigin/gaiasource

	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None: raise Exception()

			ignoreContains = None
			data = self._decode(url)

			if 'exact' in data and data['exact']:
				query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				titles = None
				year = None
				season = None
				episode = None
				pack = False
				packCount = None
			else:
				title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				titles = data['alternatives'] if 'alternatives' in data else None
				year = int(data['year']) if 'year' in data and not data['year'] == None else None
				season = int(data['season']) if 'season' in data and not data['season'] == None else None
				episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
				pack = data['pack'] if 'pack' in data else False
				packCount = data['packcount'] if 'packcount' in data else None

				if 'tvshowtitle' in data:
					# Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known.
					if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''):
						title = '%s %s' % (data['tvshowtitle'], data['title']) # Change the title for metadata filtering.
						query = title
						ignoreContains = len(data['title']) / float(len(title)) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well.
					else:
						if pack: query = '%s %d' % (title, season)
						else: query = '%s S%02dE%02d' % (title, season, episode)
				else:
					query = '%s %d' % (title, year)
				query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			query = urllib.quote_plus(query)
			if not self._query(query): return sources

			pageLimit = tools.Settings.getInteger('scraping.providers.pages')
			pageCounter = 0

			page = 1 # Pages start at 1
			added = False

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				pageCounter += 1
				if pageLimit > 0 and pageCounter > pageLimit:
					break

				urlNew = (self.base_link + self.search_link) % (query, page)
				html = BeautifulSoup(client.request(urlNew))
				htmlTable = html.find_all('table', class_ = 'search-table')[0]
				htmlRows = htmlTable.find_all('tr', recursive = False)

				page += 1
				added = False

				for i in range(len(htmlRows)):
					htmlRow = htmlRows[i]
					htmlColumns = htmlRow.find_all('td', recursive = False)

					# Name
					htmlName = htmlColumns[0].getText().strip()

					# Size
					htmlSize = htmlColumns[2].getText().strip()

					# Link
					htmlLink = htmlColumns[0].find_all('a')[0]['href'].strip()
					htmlLink = network.Container(htmlLink).torrentMagnet(title = title)

					# Seeds
					htmlSeeds = int(htmlColumns[3].getText().strip())

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

					# Ignore
					meta.ignoreAdjust(contains = ignoreContains)
					if meta.ignore(True): continue

					# Add
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
					added = True

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources

Пример #20

0

Показать файл

Файл: nzbclub.py Проект: azumimuo/family-xbmc-addon

	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			year = int(data['year']) if 'year' in data and not data['year'] == None else None
			season = int(data['season']) if 'season' in data and not data['season'] == None else None
			episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
			pack = data['pack'] if 'pack' in data else False

			if 'tvshowtitle' in data:
				if pack: query = '%s %d' % (title, season)
				else: query = '%s S%02dE%02d' % (title, season, episode)
			else:
				query = '%s %d' % (title, year)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			url = urlparse.urljoin(self.base_link, self.search_link)

			page = 1 # Pages start at 1
			added = False

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				urlNew = url % (urllib.quote_plus(query), page)
				html = BeautifulSoup(client.request(urlNew))

				page += 1
				added = False

				htmlTable = html.find_all('span', id = 'ui_searchResult')[0]
				htmlRows = htmlTable.find_all('div', class_ = 'panel-body')

				for i in range(len(htmlRows)):
					htmlRow = htmlRows[i].find_all('div', class_ = 'media', recursive = False)[0].find_all('div', class_ = 'row', recursive = False)[0]
					htmlColumns = htmlRow.find_all('div', recursive = False) # Use children and no further.
					htmlInfo = htmlColumns[0]

					# Name
					htmlName = htmlInfo.find_all('a', class_ = 'text-primary')
					if len(htmlName) == 0: # 'Dangerous' items (encrypted or incompelte - see below at the ignore section) have a text-muted class and are already filtered out here.
						continue
					else:
						htmlName = htmlName[0].getText()

					# Size
					htmlSize = htmlColumns[1].getText().replace('&nbsp;', ' ')
					htmlSize = htmlSize.splitlines()[0] # Otherwise the find function does not work.
					indexEnd = htmlSize.find(' ', htmlSize.find(' ') + 1) # Second index
					htmlSize = htmlSize[: indexEnd]

					# Link
					htmlId = htmlColumns[3].find_all('div', class_ = 'author-info')[0].find_all('div')
					for id in htmlId:
						if id.has_attr('collectionid'):
							htmlId = id['collectionid']
							break
					htmlLink = self.base_link + self.download_link + htmlId

					# Age
					htmlAge = htmlColumns[2].getText()
					htmlAge = int(htmlAge[: htmlAge.find(' ')])

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, link = htmlLink, size = htmlSize, age = htmlAge)

					# Ignore
					if meta.ignore(False):
						continue

					htmlDanger = htmlInfo.find_all('small')[1].find_all('span', _class = 'text-danger')
					ignore = False
					for danger in htmlDanger:
						danger = danger['title']
						if danger.startswith('incomplete'): # Ignore files marked as incomplete.
							ignore = True
						if danger.find('password') >= 0 or htmlDanger.find('encrypted') >= 0: # Ignore password-protected files
							ignore = True
					if ignore:
						continue

					# Add
					# Some NZBs have the wrong size (often a few KB) indicated on the site, but are in reaility bigger. Hence, do not show the size of NZBs below 20MB, but still add them.
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'usenet', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'info' : meta.information(sizeLimit = 20971520), 'file' : htmlName})
					added = True

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources

Пример #21

0

Показать файл

Файл: limetorrents.py Проект: bopopescu/repo-1

    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    if pack: query = '%s %d' % (title, season)
                    else: query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            category = self.category_shows if 'tvshowtitle' in data else self.category_movies
            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 1  # Pages start at 1
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (category, urllib.quote_plus(query), page)
                html = client.request(urlNew)

                # HTML is corrupt. Try to fix it manually.
                try:
                    indexStart = html.find('class="table2"')
                    indexStart = html.find('<tr bgcolor', indexStart)
                    indexEnd = html.find('search_stat', indexStart)
                    html = html[indexStart:indexEnd]
                    indexEnd = html.rfind('</td>') + 5
                    html = html[:indexEnd]
                    html = html.replace('</a></td>', '</td>')
                    html = '<table>' + html + '</tr></table>'
                except:
                    pass

                html = BeautifulSoup(html)

                page += 1
                added = False

                htmlRows = html.find_all(
                    'tr'
                )  # Do not search further down the tree (just the direct children), because that will also retrieve the header row.
                for i in range(len(htmlRows)):
                    htmlRow = htmlRows[i]
                    htmlColumns = htmlRow.find_all('td')
                    htmlInfo = htmlColumns[0].find_all('div')[0]

                    # Name
                    htmlName = htmlInfo.find_all(
                        'a', recursive=False)[1].getText().strip()

                    # Link
                    htmlHash = htmlInfo.find_all('a',
                                                 recursive=False)[0]['href']
                    indexStart = htmlHash.find('torrent/')
                    if indexStart < 0: continue
                    indexStart += 8
                    indexEnd = htmlHash.find('.torrent', indexStart)
                    if indexEnd < 0: continue
                    htmlHash = htmlHash[indexStart:indexEnd]
                    if not tools.Hash.valid(htmlHash): continue
                    htmlLink = network.Container(htmlHash).torrentMagnet(
                        title=query)

                    # Size
                    htmlSize = htmlColumns[2].getText().strip()

                    # Seeds
                    htmlSeeds = int(htmlColumns[3].getText().replace(
                        ',', '').replace(' ', ''))

                    # Metadata
                    meta = metadata.Metadata(name=htmlName,
                                             title=title,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             pack=pack,
                                             packCount=packCount,
                                             link=htmlLink,
                                             size=htmlSize,
                                             seeds=htmlSeeds)

                    # Ignore
                    if meta.ignore(True):
                        continue

                    # Add
                    sources.append({
                        'url': htmlLink,
                        'debridonly': False,
                        'direct': False,
                        'source': 'torrent',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'metadata': meta,
                        'file': htmlName
                    })
                    added = True

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources

Пример #22

0

Показать файл

Файл: nzbndx.py Проект: azumimuo/family-xbmc-addon

	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			if not (self.enabled and self.username and not self.username == '' and self.password and not self.password == ''):
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			year = int(data['year']) if 'year' in data and not data['year'] == None else None
			season = int(data['season']) if 'season' in data and not data['season'] == None else None
			episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
			pack = data['pack'] if 'pack' in data else False

			if 'tvshowtitle' in data:
				if pack: query = '%s %d' % (title, season)
				else: query = '%s S%02dE%02d' % (title, season, episode)
			else:
				query = '%s %d' % (title, year)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			# Login
			login = urlparse.urljoin(self.base_link, '/login')
			post = urllib.urlencode({'username': self.username, 'password': self.password, 'submit': 'Login'})
			cookie = client.request(login, post = post, output = 'cookie', close = False)
			response = client.request(login, post = post, cookie = cookie, output = 'extended')
			headers = {'User-Agent': response[3]['User-Agent'], 'Cookie': response[3]['Cookie']}

			url = urlparse.urljoin(self.base_link, self.search_link)

			type = self.type_tvshows if 'tvshowtitle' in data else self.type_movies
			offset = 0

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				urlNew = url % (urllib.quote_plus(query), type, offset)
				html = BeautifulSoup(client.request(urlNew, cookie = cookie))

				offset += self.offset

				htmlTable = html.find_all('table', id = 'browsetable')[0] # Will fail if on last page and the table is not present.
				htmlRows = htmlTable.find_all('tr', recursive = False) # Use children and no further.

				for i in range(1, len(htmlRows)): # First row is the header.
					htmlRow = htmlRows[i]
					htmlColumns = htmlRow.find_all('td', recursive = False) # Use children and no further.
					htmlInfo = htmlColumns[1]

					# Name
					htmlName = htmlInfo.find_all('a', class_ = 'title')[0].getText().strip()

					# Size
					htmlSize = htmlColumns[4].getText()
					indexEnd = htmlSize.find('<br')
					if indexEnd >= 0:
						htmlSize = htmlSize[: indexEnd].replace('"', '')

					# Link
					htmlLink = self.base_link + htmlColumns[7].find_all('div', class_ = 'icon_nzb')[0].find_all('a')[0]['href']
					urlparse.urljoin(self.base_link, htmlLink)
					htmlLink += '|' + urllib.urlencode(headers)

					# Age
					htmlAge = htmlColumns[3]['title']
					htmlAge = tools.Time.datetime(htmlAge, '%Y-%m-%d %H:%M:%S')
					htmlAge = datetime.datetime.today() - htmlAge
					htmlAge = htmlAge.days

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, link = htmlLink, size = htmlSize, age = htmlAge)

					# Ignore
					if meta.ignore(False):
						continue

					# Ignore Incomplete
					try:
						htmlComplete = htmlColumns[4].find_all('span', class_ = 'label-success')[0].getText()
						if not '100' in htmlComplete:
							continue
					except:
						pass

					# Ignore Foreign
					if self.exclude_foreign:
						htmlCategory = htmlColumns[2].find_all('a')[0].getText()
						if 'foreign' in htmlCategory.lower():
							continue

					# Add
					# Some NZBs have the wrong size (often a few KB) indicated on the site, but are in reaility bigger. Hence, do not show the size of NZBs below 20MB, but still add them.
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'usenet', 'memberonly' : True, 'language' : self.language[0], 'quality':  meta.videoQuality(), 'info' : meta.information(), 'file' : htmlName})

			return sources
		except:
			return sources

Пример #23

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            year = int(
                data['year']
            ) if 'year' in data and not data['year'] == None else None
            season = int(
                data['season']
            ) if 'season' in data and not data['season'] == None else None
            episode = int(
                data['episode']
            ) if 'episode' in data and not data['episode'] == None else None
            pack = data['pack'] if 'pack' in data else False

            if 'tvshowtitle' in data:
                if pack: query = '%s %d' % (title, season)
                else: query = '%s S%02dE%02d' % (title, season, episode)
            else:
                query = '%s %d' % (title, year)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            url = urlparse.urljoin(self.base_link, self.search_link)

            page = 0  # Pages start at 0
            added = False

            #while True:
            while page == 0:  # KickassTorrents currently has a problem to view any other page than page 1 while sorted by seeders. Only view first page.
                urlNew = url % (urllib.quote_plus(query))
                html = client.request(urlNew)

                # KickassTorrents has major mistakes in their HTML. manually remove parts to create new HTML.
                indexStart = html.find('<',
                                       html.find('<!-- Start of Loop -->') + 1)
                indexEnd = html.rfind('<!-- End of Loop -->')
                html = html[indexStart:indexEnd]

                html = html.replace('<div class="markeredBlock',
                                    '</div><div class="markeredBlock'
                                    )  # torrentname div tag not closed.
                html = html.replace('</span></td>',
                                    '</td>')  # Dangling </span> closing tag.

                html = BeautifulSoup(html)

                page += 1
                added = False

                htmlRows = html.find_all(
                    'tr', recursive=False
                )  # Do not search further down the tree (just the direct children).
                for i in range(len(htmlRows)):
                    htmlRow = htmlRows[i]
                    if 'firstr' in htmlRow['class']:  # Header.
                        continue
                    htmlColumns = htmlRow.find_all('td')
                    htmlInfo = htmlColumns[0]

                    # Name
                    htmlName = htmlInfo.find_all(
                        'a', class_='cellMainLink')[0].getText().strip()

                    # Size
                    htmlSize = htmlColumns[1].getText().replace('&nbsp;', ' ')

                    # Link
                    htmlLink = ''
                    htmlLinks = htmlInfo.find_all('a')
                    for j in range(len(htmlLinks)):
                        link = htmlLinks[j]
                        if link.has_attr('href'):
                            link = link['href']
                            if link.startswith('magnet:'):
                                htmlLink = link
                                break

                    # Seeds
                    htmlSeeds = int(htmlColumns[3].getText())

                    # Metadata
                    meta = metadata.Metadata(name=htmlName,
                                             title=title,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             pack=pack,
                                             link=htmlLink,
                                             size=htmlSize,
                                             seeds=htmlSeeds)

                    # Ignore
                    if meta.ignore(True):
                        continue

                    # Add
                    sources.append({
                        'url': htmlLink,
                        'debridonly': False,
                        'direct': False,
                        'source': 'torrent',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'info': meta.information(),
                        'file': htmlName
                    })
                    added = True

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources

Пример #24

0

Показать файл

	def account(self, cached = True, minimal = False):
		account = None
		try:
			if self.accountValid():
				import datetime
				from resources.lib.externals.beautifulsoup import BeautifulSoup

				if cached: accountHtml = cache.Cache().cacheShort(self._request, Core.LinkAccount)
				else: accountHtml = cache.Cache().cacheClear(self._request, Core.LinkAccount)

				if accountHtml == None or accountHtml == '': raise Exception()

				accountHtml = BeautifulSoup(accountHtml)
				accountHtml = accountHtml.find_all('form', id = 'accountForm')[0]
				accountHtml = accountHtml.find_all('table', recursive = False)[0]
				accountHtml = accountHtml.find_all('tr', recursive = False)

				accountUsername = accountHtml[0].find_all('td', recursive = False)[1].getText()
				accountType = accountHtml[1].find_all('td', recursive = False)[2].getText()
				accountStatus = accountHtml[3].find_all('td', recursive = False)[2].getText()

				accountExpiration = accountHtml[2].find_all('td', recursive = False)[2].getText()
				accountTimestamp = convert.ConverterTime(accountExpiration, format = convert.ConverterTime.FormatDate).timestamp()
				accountExpiration = datetime.datetime.fromtimestamp(accountTimestamp)

				account = {
					'user' : accountUsername,
					'type' : accountType,
					'status' : accountStatus,
			 		'expiration' : {
						'timestamp' : accountTimestamp,
						'date' : accountExpiration.strftime('%Y-%m-%d'),
						'remaining' : (accountExpiration - datetime.datetime.today()).days,
					}
				}

				if not minimal:
					if cached: usageHtml = cache.Cache().cacheShort(self._request, Core.LinkUsage)
					else: usageHtml = cache.Cache().cacheClear(self._request, Core.LinkUsage)

					if usageHtml == None or usageHtml == '': raise Exception()

					usageHtml = BeautifulSoup(usageHtml)
					usageHtml = usageHtml.find_all('div', class_ = 'table-responsive')[0]
					usageHtml = usageHtml.find_all('table', recursive = False)[0]
					usageHtml = usageHtml.find_all('tr', recursive = False)

					usageTotal = usageHtml[0].find_all('td', recursive = False)[1].getText()
					index = usageTotal.find('(')
					if index >= 0: usageTotal = int(usageTotal[index + 1 : usageTotal.find(' ', index)].replace(',', '').strip())
					else: usageTotal = 0

					usageConsumed = usageHtml[1].find_all('td', recursive = False)[2].getText()
					index = usageConsumed.find('(')
					if index >= 0: usageConsumed = int(usageConsumed[index + 1 : usageConsumed.find(' ', index)].replace(',', '').strip())
					else: usageConsumed = 0

					usageWeb = usageHtml[2].find_all('td', recursive = False)[2].getText()
					index = usageWeb.find('(')
					if index >= 0: usageWeb = int(usageWeb[index + 1 : usageWeb.find(' ', index)].replace(',', '').strip())
					else: usageWeb = 0

					usageNntp = usageHtml[3].find_all('td', recursive = False)[2].getText()
					index = usageNntp.find('(')
					if index >= 0: usageNntp = int(usageNntp[index + 1 : usageNntp.find(' ', index)].replace(',', '').strip())
					else: usageNntp = 0

					usageNntpUnlimited = usageHtml[4].find_all('td', recursive = False)[2].getText()
					index = usageNntpUnlimited.find('(')
					if index >= 0: usageNntpUnlimited = int(usageNntpUnlimited[index + 1 : usageNntpUnlimited.find(' ', index)].replace(',', '').strip())
					else: usageNntpUnlimited = 0

					usageRemaining = usageHtml[5].find_all('td', recursive = False)[2].getText()
					index = usageRemaining.find('(')
					if index >= 0: usageRemaining = int(usageRemaining[index + 1 : usageRemaining.find(' ', index)].replace(',', '').strip())
					else: usageRemaining = 0

					usageLoyalty = usageHtml[6].find_all('td', recursive = False)[2].getText()
					index = usageLoyalty.find('(')
					if index >= 0:
						usageLoyaltyTime = usageLoyalty[:index].strip()
						usageLoyaltyTimestamp = convert.ConverterTime(usageLoyaltyTime, format = convert.ConverterTime.FormatDate).timestamp()
						usageLoyaltyTime = datetime.datetime.fromtimestamp(usageLoyaltyTimestamp)
						usageLoyaltyPoints = float(usageLoyalty[index + 1 : usageLoyalty.find(')', index)].strip())
					else:
						usageLoyaltyTimestamp = 0
						usageLoyaltyTime = None

					usagePrecentageRemaining = usageRemaining / float(usageTotal)
					usagePrecentageConsumed = usageConsumed / float(usageTotal)
					usagePrecentageWeb = usageWeb / float(usageTotal)
					usagePrecentageNntp = usageNntp / float(usageTotal)
					usagePrecentageNntpUnlimited = usageNntpUnlimited / float(usageTotal)

					account.update({
						'loyalty' : {
							'time' : {
								'timestamp' : usageLoyaltyTimestamp,
								'date' : usageLoyaltyTime.strftime('%Y-%m-%d')
							},
							'points' : usageLoyaltyPoints,
						},
						'usage' : {
							'total' : {
								'size' : {
									'bytes' : usageTotal,
									'description' : convert.ConverterSize(float(usageTotal)).stringOptimal(),
								},
							},
							'remaining' : {
								'value' : usagePrecentageRemaining,
								'percentage' : round(usagePrecentageRemaining * 100.0, 1),
								'size' : {
									'bytes' : usageRemaining,
									'description' : convert.ConverterSize(float(usageRemaining)).stringOptimal(),
								},
								'description' : '%.0f%%' % round(usagePrecentageRemaining * 100.0, 0), # Must round, otherwise 2.5% changes to 2% instead of 3%.
							},
							'consumed' : {
								'value' : usagePrecentageConsumed,
								'percentage' : round(usagePrecentageConsumed * 100.0, 1),
								'size' : {
									'bytes' : usageConsumed,
									'description' : convert.ConverterSize(usageConsumed).stringOptimal(),
								},
								'description' : '%.0f%%' % round(usagePrecentageConsumed * 100.0, 0), # Must round, otherwise 2.5% changes to 2% instead of 3%.
								'web' : {
									'value' : usagePrecentageWeb,
									'percentage' : round(usagePrecentageWeb * 100.0, 1),
									'size' : {
										'bytes' : usageWeb,
										'description' : convert.ConverterSize(usageWeb).stringOptimal(),
									},
									'description' : '%.0f%%' % round(usagePrecentageWeb * 100.0, 0), # Must round, otherwise 2.5% changes to 2% instead of 3%.
								},
								'nntp' : {
									'value' : usagePrecentageNntp,
									'percentage' : round(usagePrecentageNntp * 100.0, 1),
									'size' : {
										'bytes' : usageNntp,
										'description' : convert.ConverterSize(usageNntp).stringOptimal(),
									},
									'description' : '%.0f%%' % round(usagePrecentageNntp * 100.0, 0), # Must round, otherwise 2.5% changes to 2% instead of 3%.
								},
								'nntpunlimited' : {
									'value' : usagePrecentageNntpUnlimited,
									'percentage' : round(usagePrecentageNntpUnlimited * 100.0, 1),
									'size' : {
										'bytes' : usageNntpUnlimited,
										'description' : convert.ConverterSize(usageNntpUnlimited).stringOptimal(),
									},
									'description' : '%.0f%%' % round(usagePrecentageNntpUnlimited * 100.0, 0), # Must round, otherwise 2.5% changes to 2% instead of 3%.
								},
							}
						}
					})
		except:
			pass
		return account

Пример #25

0

Показать файл

Файл: rarbg.py Проект: azumimuo/family-xbmc-addon

	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if not tools.System.developers():
				raise Exception()

			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			year = int(data['year']) if 'year' in data and not data['year'] == None else None
			season = int(data['season']) if 'season' in data and not data['season'] == None else None
			episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
			pack = data['pack'] if 'pack' in data else False
			category = self.category_tvshows if ('tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.category_movies

			if 'tvshowtitle' in data:
				if pack: query = '%s %d' % (title, season)
				else: query = '%s S%02dE%02d' % (title, season, episode)
			else:
				query = '%s %d' % (title, year)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			url = urlparse.urljoin(self.base_link, self.search_link)

			page = 1 # Pages start at 1
			added = False

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				urlNew = url % (urllib.quote_plus(query), category, page)
				data = client.request(urlNew)

				# RarBg's HTML is not valid and a total mess, prababley to make it hard for scrapers.
				# First try to parse the HTML. If it fails, extract only the table from the markup and construct new HTML.
				# Sometimes both fail, seems like RarBg randomizes the corruption in its HTML.
				htmlRows = []
				try:
					html = BeautifulSoup(data)
					htmlTable = html.find_all('table', class_ = 'lista2t')[0]
					htmlRows = htmlTable.find_all('tr', class_ = 'lista2', recursive = False)
					if len(htmlRows) == 0: raise Exception()
				except:
					start = data.find('lista2t')
					if start < 0: raise Exception()
					start += 7
					start = data.find('lista2', start)
					start = data.find('>', start) + 1
					end = data.find('<tr><td align="center" colspan="2">', start)
					data = '<html><body><table class="lista2t"><tr class="lista2">' + data[start : end] + '</table></body></html>'
					html = BeautifulSoup(data)
					htmlTable = html.find_all('table', class_ = 'lista2t')[0]
					htmlRows = htmlTable.find_all('tr', class_ = 'lista2', recursive = False)

				page += 1
				added = False

				for i in range(len(htmlRows)):
					htmlRow = htmlRows[i]
					htmlColumns = htmlRow.find_all('td')
					htmlInfo = htmlColumns[1]

					# Name
					htmlName = htmlInfo.find_all('a')[0].getText().strip()

					# 3D
					htmlImages = htmlInfo.find_all('img')
					for j in range(len(htmlImages)):
						try:
							if htmlImages[j]['src'].endswith('3d.png'):
								htmlName += ' 3D'
								break
						except:
							pass

					# Size
					htmlSize = htmlColumns[3].getText().strip()

					# Link
					# TODO: If the hash cannot be retrieved from the mouse-over image, fallback to the .torrent file.
					try:
						htmlLink = htmlInfo.find_all('a')[0]['onmouseover']
						start = htmlLink.find('/over/')
						if start < 0:
							raise Exception()
						start += 6
						end = htmlLink.find('.', start)
						htmlLink = htmlLink[start : end]
						if not len(htmlLink) == 40:
							raise Exception()
						htmlLink = self.magnet_link % (htmlLink, htmlName.replace(' ', ''))
					except:
						try:
							htmlLink = htmlInfo.find_all('a')[0]['href']
							start = htmlLink.find('torrent/')
							if start < 0:
								raise Exception()
							start += 8
							htmlLink = htmlLink[start:]
							if len(htmlLink) == 0:
								raise Exception()
							htmlLink = self.torrent_link % (htmlLink, htmlName.replace(' ', ''))
						except:
							continue

					# Seeds
					htmlSeeds = int(htmlColumns[4].getText().strip())

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

					# Ignore
					if meta.ignore(True):
						continue

					# Add
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'info' : meta.information(), 'file' : htmlName})
					added = True

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources

Пример #26

0

Показать файл

Файл: thepiratebay.py Проект: azumimuo/family-xbmc-addon

	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			year = int(data['year']) if 'year' in data and not data['year'] == None else None
			season = int(data['season']) if 'season' in data and not data['season'] == None else None
			episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
			pack = data['pack'] if 'pack' in data else False

			if 'tvshowtitle' in data:
				if pack: query = '%s %d' % (title, season)
				else: query = '%s S%02dE%02d' % (title, season, episode)
			else:
				query = '%s %d' % (title, year)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			url = urlparse.urljoin(self.base_link, self.search_link)

			page = 0 # Pages start at 0
			added = False

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				urlNew = url % (urllib.quote_plus(query), page)
				html = BeautifulSoup(client.request(urlNew))

				page += 1
				added = False

				htmlTable = html.find_all('table', id = 'searchResult')[0]
				htmlRows = htmlTable.find_all('tr', recursive = False) # Do not search further down the tree (just the direct children), because that will also retrieve the header row.

				for i in range(len(htmlRows)):
					htmlRow = htmlRows[i]
					htmlColumns = htmlRow.find_all('td')
					htmlInfo = htmlColumns[1]

					# Name
					htmlName = htmlInfo.find_all('div', class_ = 'detName')[0].find_all('a')[0].getText().strip()

					# Size
					htmlSize = htmlInfo.find_all('font', class_ = 'detDesc')[0].getText().replace('&nbsp;', ' ')
					indexStart = htmlSize.find(', Size')
					indexEnd = htmlSize.find(', ', indexStart + 1)
					htmlSize = htmlSize[indexStart + 7 : indexEnd]

					# Link
					htmlLink = ''
					htmlLinks = htmlInfo.find_all('a')
					for j in range(len(htmlLinks)):
						link = htmlLinks[j]['href']
						if link.startswith('magnet:'):
							htmlLink = link
							break

					# Seeds
					htmlSeeds = int(htmlColumns[2].getText())

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

					# Ignore
					if meta.ignore(True):
						continue

					# Add
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'info' : meta.information(), 'file' : htmlName})
					added = True

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources

Пример #27

0

Показать файл

Файл: idope.py Проект: azumimuo/family-xbmc-addon

	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			type = self.type_tvshows if ('tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.type_movies
			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			titleYear = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s (%s)' % (data['title'], data['year'])

			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			year = int(data['year']) if 'year' in data and not data['year'] == None else None
			season = int(data['season']) if 'season' in data and not data['season'] == None else None
			episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
			pack = data['pack'] if 'pack' in data else False

			if 'tvshowtitle' in data:
				if pack: query = '%s %d' % (title, season)
				else: query = '%s S%02dE%02d' % (title, season, episode)
			else:
				query = '%s %d' % (title, year)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
			url = urlparse.urljoin(self.base_link, self.search_link)

			page = 1
			added = False

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				urlNew = url % (urllib.quote_plus(query), type, page)
				html = BeautifulSoup(client.request(urlNew))

				page += 1
				added = False

				htmlTable = html.find_all('div', id = 'div2child')[0]
				htmlRows = htmlTable.find_all('div', class_= 'resultdiv', recursive = False) # Do not search further down the tree (just the direct children), because that will also retrieve the header row.

				for i in range(len(htmlRows)):
					htmlRow = htmlRows[i]
					htmlInfo = htmlRow.find_all('div', class_= 'resultdivbotton')[0]

					# Name
					htmlName = htmlRow.find_all('div', class_= 'resultdivtop')[0].find_all('div', class_= 'resultdivtopname')[0].getText().strip()

					# Size
					htmlSize = htmlInfo.find_all('div', class_= 'resultlength')[0].find_all('div', class_= 'resultdivbottonlength')[0].getText()

					# Link
					htmlHash = htmlInfo.find_all('div', class_= 'hideinfohash')[0].getText()
					htmlLink = network.Container(htmlHash).torrentMagnet(title = titleYear)

					# Seeds
					htmlSeeds = int(htmlInfo.find_all('div', class_= 'resultseed')[0].find_all('div', class_= 'resultdivbottonseed')[0].getText())

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

					# Ignore
					if meta.ignore(True):
						continue

					# Add
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'info' : meta.information(), 'file' : htmlName})
					added = True

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources

Пример #28

0

Показать файл

Файл: torrentleech.py Проект: azumimuo/family-xbmc-addon

	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			year = int(data['year']) if 'year' in data and not data['year'] == None else None
			season = int(data['season']) if 'season' in data and not data['season'] == None else None
			episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
			pack = data['pack'] if 'pack' in data else False

			category = self.category_show if 'tvshowtitle' in data else self.category_movie

			if 'tvshowtitle' in data:
				if pack: query = '%s %d' % (title, season)
				else: query = '%s S%02dE%02d' % (title, season, episode)
			else:
				query = '%s %d' % (title, year)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
			querySplit = query.split()

			# Login
			if self.enabled and self.username and not self.username == '' and self.password and not self.password == '':
				login = self.base_link + self.login_link
				post = urllib.urlencode({'username': self.username, 'password': self.password, 'submit': 'submit'})
				cookie = client.request(login, post = post, output = 'cookie', close = False)
				response = client.request(login, post = post, cookie = cookie, output = 'extended')
				headers = {'User-Agent': response[3]['User-Agent'], 'Cookie': response[3]['Cookie']}
			else:
				cookie = None
				headers = None

			url = urlparse.urljoin(self.base_link, self.search_link)

			page = 1
			added = False
			firstLink = None

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				urlNew = url % (urllib.quote_plus(query), urllib.quote_plus(category), page)
				html = BeautifulSoup(client.request(urlNew, cookie = cookie))

				page += 1
				added = False

				htmlTable = html.find_all('table', id = 'torrenttable')[0].find_all('tbody')[0]
				htmlRows = htmlTable.find_all('tr', recursive = False) # Do not search further down the tree (just the direct children), because that will also retrieve the header row.

				for i in range(len(htmlRows)):
					htmlRow = htmlRows[i]

					# Name
					htmlName = htmlRow.find_all('td', class_ = 'name', recursive = False)[0]
					htmlName = htmlName.find_all('span', class_ = 'title', recursive = False)[0]
					htmlName = htmlName.find_all('a')[0].getText().strip()

					# Link
					htmlLink = htmlRow.find_all('td', class_ = 'quickdownload', recursive = False)[0]
					htmlLink = htmlLink.find_all('a')[0]['href']

					# Continuing with page will always show the torrents of the last page.
					# Stop once the first link is the same.
					if i == 0:
						if firstLink == htmlLink:
							break
						firstLink = htmlLink

					if not headers == None:
						htmlLink += '|' + urllib.urlencode(headers)

					# Size
					htmlSize = htmlRow.find_all('td')[4].getText().strip()

					# Seeds
					htmlSeeds = htmlRow.find_all('td', class_ = 'seeders')[0].getText().strip()

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

					# Ignore
					if meta.ignore(True):
						continue

					# Add
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'info' : meta.information(), 'file' : htmlName})
					added = True

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources

Пример #29

0

Показать файл

Файл: nnmclub.py Проект: bopopescu/repo-1

    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])
            pack = None

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    # Only this format works for season packs.
                    # Does not support individual episodes.
                    if pack:
                        query = '%s S%02d' % (title, season)
                    else:
                        pack = True
                        query = '%s сезон %d' % (title, season)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            url = urlparse.urljoin(self.base_link,
                                   self.search_link) % urllib.quote_plus(query)
            html = BeautifulSoup(client.request(url))

            htmlTable = html.find_all(
                'table', class_='tablesorter')[0].find_all('tbody',
                                                           recursive=False)[0]
            htmlRows = htmlTable.find_all('tr', recursive=False)
            for i in range(len(htmlRows)):
                htmlRow = htmlRows[i]
                htmlColumns = htmlRow.find_all('td')

                # Name
                htmlName = htmlColumns[1].find_all('a')[0].getText().strip()

                # Link
                htmlLink = self.base_link + self.download_link + htmlColumns[
                    2].find_all('a')[0]['href']

                # Size
                htmlSize = long(
                    htmlColumns[3].find_all('u')[0].getText().strip())

                # Seeds
                try:
                    htmlSeeds = int(htmlColumns[4].getText().strip())
                except:
                    htmlSeeds = None

                # Metadata
                meta = metadata.Metadata(name=htmlName,
                                         title=title,
                                         year=year,
                                         season=season,
                                         episode=episode,
                                         pack=pack,
                                         packCount=packCount,
                                         link=htmlLink,
                                         size=htmlSize,
                                         seeds=htmlSeeds)

                # Ignore
                if meta.ignore(True):
                    continue

                # Add
                sources.append({
                    'url': htmlLink,
                    'debridonly': False,
                    'direct': False,
                    'source': 'torrent',
                    'language': self.language[0],
                    'quality': meta.videoQuality(),
                    'metadata': meta,
                    'file': htmlName,
                    'pack': pack
                })

            return sources
        except:
            return sources

Пример #30

0

Показать файл

Файл: limetorrents.py Проект: azumimuo/family-xbmc-addon

	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			year = int(data['year']) if 'year' in data and not data['year'] == None else None
			season = int(data['season']) if 'season' in data and not data['season'] == None else None
			episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
			pack = data['pack'] if 'pack' in data else False
			category = self.category_shows if 'tvshowtitle' in data else self.category_movies

			if 'tvshowtitle' in data:
				if pack: query = '%s %d' % (title, season)
				else: query = '%s S%02dE%02d' % (title, season, episode)
			else:
				query = '%s %d' % (title, year)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			url = urlparse.urljoin(self.base_link, self.search_link)

			page = 1 # Pages start at 1
			added = False

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				urlNew = url % (category, urllib.quote_plus(query), page)
				html = client.request(urlNew)

				# HTML is corrupt. Try to fix it manually.
				indexStart = html.find('class="table2"')
				indexStart = html.find('<tr bgcolor', indexStart)
				indexEnd = html.find('search_stat', indexStart)
				html = html[indexStart : indexEnd]
				indexEnd = html.rfind('</td>') + 5
				html = html[:indexEnd]
				html = html.replace('</a></td>', '</td>')
				html = '<table>' + html + '</tr></table>'

				html = BeautifulSoup(html)

				page += 1
				added = False

				htmlRows = html.find_all('tr') # Do not search further down the tree (just the direct children), because that will also retrieve the header row.
				for i in range(len(htmlRows)):
					htmlRow = htmlRows[i]
					htmlColumns = htmlRow.find_all('td')
					htmlInfo = htmlColumns[0].find_all('div')[0]

					# Name
					htmlName = htmlInfo.find_all('a', recursive = False)[1].getText().strip()

					# Link
					htmlHash = htmlInfo.find_all('a', recursive = False)[0]['href']
					indexStart = htmlHash.find('torrent/')
					if indexStart < 0: continue
					indexStart += 8
					indexEnd = htmlHash.find('.torrent', indexStart)
					if indexEnd < 0: continue
					htmlHash = htmlHash[indexStart : indexEnd]
					if not tools.Hash.valid(htmlHash): continue
					htmlLink = network.Container(htmlHash).torrentMagnet(title = query)

					# Size
					htmlSize = htmlColumns[2].getText().strip()

					# Seeds
					htmlSeeds = int(htmlColumns[3].getText().replace(',', '').replace(' ', ''))

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

					# Ignore
					if meta.ignore(True):
						continue

					# Add
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'info' : meta.information(), 'file' : htmlName})
					added = True

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources

Пример #31

0

Показать файл

Файл: yggtorrent.py Проект: bopopescu/repo-1

	def sources(self, url, hostDict, hostprDict):
		self.tSources = []
		try:
			if url == None:
				raise Exception()

			if not self.enabled or self.username == '' or self.password == '':
				raise Exception()

			data = urlparse.parse_qs(url)

			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			show = 'tvshowtitle' in data
			title = data['tvshowtitle'] if show else data['title']
			titleYear = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) if show else '%s (%s)' % (data['title'], data['year'])

			if 'exact' in data and data['exact']:
				query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = None
				season = None
				episode = None
				pack = False
				packCount = None
			else:
				year = int(data['year']) if 'year' in data and not data['year'] == None else None
				season = int(data['season']) if 'season' in data and not data['season'] == None else None
				episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
				pack = data['pack'] if 'pack' in data else False
				packCount = data['packcount'] if 'packcount' in data else None

				if show: subcategory = self.subcategories_show.values()[0] if len(self.subcategories_show) == 1 else self.subcategory_any
				else: subcategory = self.subcategories_movie.values()[0] if len(self.subcategories_movie) == 1 else self.subcategory_any

				if show:
					if pack: query = '%s S%02d' % (title, season)
					else: query = '%s S%02dE%02d' % (title, season, episode)
				else:
					query = '%s %d' % (title, year)
				query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
				querySplit = query.split()

			url = urlparse.urljoin(self.base_link, self.search_link)
			query = urllib.quote_plus(query)

			pageLimit = tools.Settings.getInteger('scraping.providers.pages')
			pageCounter = 0

			page = 0
			added = False

			timerTimeout = tools.Settings.getInteger('scraping.providers.timeout')
			timerEnd = timerTimeout - 8
			timer = tools.Time(start = True)

			threads = []
			self.tLock = threading.Lock()
			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				pageCounter += 1
				if pageLimit > 0 and pageCounter > pageLimit:
					break

				urlNew = url % (self.category_video, subcategory, query, page)
				html = BeautifulSoup(client.request(urlNew))

				page += 25
				added = False

				htmlTables = html.find_all('table', class_ = 'table')
				if htmlTables:
					htmlTable = htmlTables[0]
					htmlTbody = htmlTable.find_all('tbody')[0]
					htmlRows = htmlTbody.find_all('tr', recursive = False)

					for i in range(len(htmlRows)):
						# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
						if timer.elapsed() > timerEnd:
							break

						htmlRow = htmlRows[i]

						# Name
						htmlInfo = htmlRows[i].find_all('a', href = True)[1]
						htmlName = htmlInfo.getText()

						# Category
						if subcategory is self.subcategory_any:
							htmlCategory = htmlRow.find_all('div', class_ = 'hidden')[0].getText()
							if show and len(self.subcategories_show) > 1:
								if htmlCategory not in self.subcategories_show.keys():
									continue
							elif len(self.subcategories_show) > 1:
								if htmlCategory not in self.subcategories_movie.keys():
									continue

						# Size
						htmlSize = re.sub('([mMkKgGtT]?)[oO]', '\\1b', htmlRow.find_all('td')[5].getText())

						# Link
						htmlLink = self.base_link + self.download_link + str(htmlInfo.get('href').encode('utf-8')).split('/')[-1].split('-')[0]

						# Seeds
						htmlSeeds = int(htmlRow.find_all('td')[7].getText())

						# Metadata
						meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds)
						
						# Ignore
						if meta.ignore(True):
							continue

						# Add
						self.tLock.acquire()
						self.tSources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
						self.tLock.release()
						added = True

						# Hash
						if self.inspection:
							htmlHash = urllib.quote(str(htmlInfo.get('href').encode('utf-8')), ':/+')
							thread = threading.Thread(target = self._hash, args = (htmlHash, len(self.tSources) - 1))
							threads.append(thread)
							thread.start()

				if not added: # Last page reached with a working torrent
					break

			# First filter out all non-related links before doing the hash lookup.
			if self.inspection:
				timerTimeout -= 2
				while True:
					if timer.elapsed() > timerTimeout: break
					if not any([thread.is_alive() for thread in threads]): break
					tools.Time.sleep(0.3)

			try: self.tLock.release()
			except: pass

			return self.tSources
		except:
			tools.Logger.error()
			try: self.tLock.release()
			except: pass
			return self.tSources

Пример #32

0

Показать файл

Файл: torrent9.py Проект: Atrion/Kodi_Repo-Retired

    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])
            pack = None

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    if pack: query = '%s saison %d' % (title, season)
                    else: query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = title  # Do not include year, otherwise there are few results.
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            type = self.type_shows if 'tvshowtitle' in data else self.type_movies

            url = urlparse.urljoin(self.base_link, self.search_link) % (
                type, urllib.quote_plus(query))
            html = BeautifulSoup(client.request(url))

            htmlTable = html.find_all(
                'table', class_='cust-table')[0].find_all('tbody',
                                                          recursive=False)[0]
            htmlRows = htmlTable.find_all('tr', recursive=False)

            self.tLock = threading.Lock()
            self.tLinks = [None] * len(htmlRows)
            threads = []
            for i in range(len(htmlRows)):
                urlTorrent = self.base_link + htmlRows[i].find_all(
                    'td', recursive=False)[0].find_all('a')[0]['href']
                threads.append(
                    threading.Thread(target=self._link, args=(urlTorrent, i)))

            [thread.start() for thread in threads]
            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)
            while timer.elapsed() < timerEnd and any(
                [thread.is_alive() for thread in threads]):
                tools.Time.sleep(0.5)

            self.tLock.acquire(
            )  # Just lock in case the threads are still running.

            for i in range(len(htmlRows)):
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                htmlRow = htmlRows[i]
                htmlColumns = htmlRow.find_all('td', recursive=False)

                # Name
                htmlName = htmlColumns[0].getText().strip()
                if not 'tvshowtitle' in data:
                    htmlName = re.sub(
                        r"^(.*?)(TRUE|TRUEFRENCH|FRENCH|VOSTFR|VO)(.*)([0-9]{4})$",
                        r"\1 \4 \2\3", htmlName)

                # Link
                htmlLink = self.tLinks[i]

                # Size
                htmlSize = htmlColumns[1].getText().strip().lower().replace(
                    ' mo', 'MB').replace(' go', 'GB').replace(' o', 'b')

                # Seeds
                try:
                    htmlSeeds = int(htmlColumns[2].getText().strip())
                except:
                    htmlSeeds = None

                # Metadata
                meta = metadata.Metadata(name=htmlName,
                                         title=title,
                                         year=year,
                                         season=season,
                                         episode=episode,
                                         pack=pack,
                                         packCount=packCount,
                                         link=htmlLink,
                                         size=htmlSize,
                                         seeds=htmlSeeds)

                # Ignore
                if meta.ignore(False):
                    continue

                # Add
                sources.append({
                    'url': htmlLink,
                    'debridonly': False,
                    'direct': False,
                    'source': 'torrent',
                    'language': self.language[0],
                    'quality': meta.videoQuality(),
                    'metadata': meta,
                    'file': htmlName
                })

            self.tLock.release()

            return sources
        except:
            tools.Logger.error()
            return sources

Пример #33

0

Показать файл

Файл: torrentproject.py Проект: azumimuo/family-xbmc-addon

	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			year = int(data['year']) if 'year' in data and not data['year'] == None else None
			season = int(data['season']) if 'season' in data and not data['season'] == None else None
			episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
			pack = data['pack'] if 'pack' in data else False

			if 'tvshowtitle' in data:
				if pack: query = '%s %d' % (title, season)
				else: query = '%s S%02dE%02d' % (title, season, episode)
			else:
				query = '%s %d' % (title, year)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
			querySplit = query.split()

			url = urlparse.urljoin(self.base_link, self.search_link)

			page = 0 # Pages start at 0
			added = False

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				urlNew = url % (page, urllib.quote_plus(query))
				html = BeautifulSoup(client.request(urlNew))

				page += 1
				added = False

				htmlTable = html.find_all('div', id = 'ires')[0].find_all('ol', recursive = False)[0]
				htmlRows = htmlTable.find_all('li', recursive = False) # Do not search further down the tree (just the direct children), because that will also retrieve the header row.

				for i in range(len(htmlRows)):
					row1 = htmlRows[i].find_all('h3', class_ = 'r')[0]
					row2 = htmlRows[i].find_all('div', class_ = 'sti')[0]

					# Name
					htmlName = row1.find_all('a', class_ = 'tl', recursive = False)[0].getText().strip()

					# Link
					htmlHash = row1.find_all('a', class_ = 'tl', recursive = False)[0]['href']
					if htmlHash.startswith('/'):
						htmlHash = htmlHash[1:]
					index = htmlHash.find('/')
					if index > 0:
						htmlHash = htmlHash[:index]
					if not tools.Hash.valid(htmlHash):
						continue
					htmlLink = network.Container(htmlHash).torrentMagnet(title = query)

					# Size
					htmlSize = row2.find_all('span', class_ = 'torrent-size')[0].getText().strip()

					# Seeds
					htmlSeeds = int(row2.find_all('span', class_ = 'seeders')[0].find_all('span', class_ = 'gac_b')[0].getText().strip())

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

					# Ignore
					if meta.ignore(True):
						continue

					# Ignore Name
					# TorrentProject has a lot of season packs, foreign titles, and other torrents that should be excluded. If the name does not contain the exact search string, ignore the result.
					if not all(q in htmlName for q in querySplit):
						continue

					# Add
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'info' : meta.information(), 'file' : htmlName})
					added = True

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources

Пример #34

0

Показать файл

	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			if 'exact' in data and data['exact']:
				query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = None
				season = None
				episode = None
				pack = False
				packCount = None
			else:
				title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = int(data['year']) if 'year' in data and not data['year'] == None else None
				season = int(data['season']) if 'season' in data and not data['season'] == None else None
				episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
				pack = data['pack'] if 'pack' in data else False
				packCount = data['packcount'] if 'packcount' in data else None

				if 'tvshowtitle' in data:
					if pack: query = '%s %d' % (title, season)
					else: query = '%s S%02dE%02d' % (title, season, episode)
				else:
					query = '%s %d' % (title, year)
				query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			url = urlparse.urljoin(self.base_link, self.search_link)
			category = self.category_shows if ('tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.category_movies

			pageLimit = tools.Settings.getInteger('scraping.providers.pages')
			pageCounter = 0

			page = 1
			added = False

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				pageCounter += 1
				if pageLimit > 0 and pageCounter > pageLimit:
					break

				urlNew = url % (category, urllib.quote_plus(query), page)
				html = BeautifulSoup(client.request(urlNew))

				page += 1
				added = False

				# NB: Do not use "tbody class=results", since the table has inner div/style that breaks parsing.
				htmlRows = html.find_all('tr', class_ = 'result') # Do not search further down the tree (just the direct children), because that will also retrieve the header row.
				for i in range(len(htmlRows)):
					try:
						htmlRow = htmlRows[i]
						htmlColumns = htmlRow.find_all('td', recursive = False)

						# Name
						htmlName = htmlColumns[0].find_all('a')[0].getText().strip()

						# Size
						htmlSize = htmlColumns[1].getText().strip()

						# Link
						htmlLink = ''
						htmlLinks = htmlColumns[0].find_all('a')
						for j in range(len(htmlLinks)):
							link = htmlLinks[j]['href']
							if link.startswith('magnet:'):
								htmlLink = link
								break

						# Seeds
						htmlSeeds = int(re.sub('[^0-9]', '', htmlColumns[4].getText().strip()))

						# Metadata
						meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

						# Ignore
						if meta.ignore(True):
							continue

						# Add
						sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
						added = True
					except:
						pass

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources

Пример #35

0

Показать файл

Файл: nzbfriends.py Проект: gaiaorigin/gaiasource

	def sources(self, url, hostDict, hostprDict):
		self.tSources = []
		try:
			if url == None: raise Exception()

			ignoreContains = None
			data = self._decode(url)

			if 'exact' in data and data['exact']:
				query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				titles = None
				year = None
				season = None
				episode = None
				pack = False
				packCount = None
			else:
				title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				titles = data['alternatives'] if 'alternatives' in data else None
				year = int(data['year']) if 'year' in data and not data['year'] == None else None
				season = int(data['season']) if 'season' in data and not data['season'] == None else None
				episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
				pack = data['pack'] if 'pack' in data else False
				packCount = data['packcount'] if 'packcount' in data else None

				if 'tvshowtitle' in data:
					# Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known.
					if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''):
						title = '%s %s' % (data['tvshowtitle'], data['title']) # Change the title for metadata filtering.
						query = title
						ignoreContains = len(data['title']) / float(len(title)) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well.
					else:
						if pack: query = '%s %d' % (title, season)
						else: query = '%s S%02dE%02d' % (title, season, episode)
				else:
					query = '%s %d' % (title, year)
				query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			query = urllib.quote_plus(query)
			if not self._query(query): return sources
			
			url = urlparse.urljoin(self.base_link, self.search_link)

			pageLimit = tools.Settings.getInteger('scraping.providers.pages')
			pageCounter = 0 # Page starts at 1, but incremented before first request.

			timerTimeout = tools.Settings.getInteger('scraping.providers.timeout')
			timerEnd = timerTimeout - 8
			timer = tools.Time(start = True)

			threads = []
			self.tLock = threading.Lock()

			while True:
				try:
					# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
					if timer.elapsed() > timerEnd: break

					added = False
					pageCounter += 1
					if pageLimit > 0 and pageCounter > pageLimit: break

					html = BeautifulSoup(client.request(url % (query, pageCounter)))
					htmlTable = html.find_all('table', class_ = 'results')
					htmlTable = htmlTable[len(htmlTable) - 1]
					htmlRows = htmlTable.find_all('tr')

					for i in range(1, len(htmlRows)):
						try:
							htmlRow = htmlRows[i]
							htmlColumns = htmlRow.find_all('td', recursive = False) # Use children and no further.

							# Name
							htmlName = htmlColumns[0].find_all('a')[0].getText()

							# Link
							htmlLink = urlparse.urljoin(self.base_link, htmlColumns[0].find_all('a')[0]['href'])

							# Size
							htmlSize = htmlColumns[1].getText()

							# Age
							htmlAge = htmlColumns[3].getText()
							htmlAge = int(convert.ConverterDuration(htmlAge).value(convert.ConverterDuration.UnitDay))

							# Metadata
							meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, age = htmlAge)

							# Ignore
							meta.ignoreAdjust(contains = ignoreContains, length = 0.3)
							if meta.ignore(False): continue

							# Add
							self.tLock.acquire()
							self.tSources.append({'url' : None, 'debridonly' : False, 'direct' : False, 'source' : 'usenet', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
							self.tLock.release()
							added = True

							# Link
							thread = threading.Thread(target = self._link, args = (htmlLink, len(self.tSources) - 1))
							threads.append(thread)
							thread.start()

						except:
							pass

					if not added: break
				except:
					break

			# First filter out all non-related links before doing the hash lookup.
			timerTimeout -= 2
			while True:
				if timer.elapsed() > timerTimeout: break
				if not any([thread.is_alive() for thread in threads]): break
				tools.Time.sleep(0.5)

			try: self.tLock.release()
			except: pass
		except:
			try: self.tLock.release()
			except: pass

		return [i for i in self.tSources if i['url']]

Пример #36

0

Показать файл

Файл: usenetcrawler.py Проект: azumimuo/family-xbmc-addon

	def sources(self, url, hostDict, hostprDict):
		sources = []
		found = []
		try:
			if url == None:
				raise Exception()

			if not (self.enabled and self.username and not self.username == '' and self.password and not self.password == ''):
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			year = int(data['year']) if 'year' in data and not data['year'] == None else None
			season = int(data['season']) if 'season' in data and not data['season'] == None else None
			episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
			pack = data['pack'] if 'pack' in data else False

			if 'tvshowtitle' in data:
				if pack: query = '%s %d' % (title, season)
				else: query = '%s S%02dE%02d' % (title, season, episode)
			else:
				query = '%s %d' % (title, year)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			# Login
			if self.enabled and self.username and not self.username == '' and self.password and not self.password == '':
				login = urlparse.urljoin(self.base_link, '/login')
				post = urllib.urlencode({'username': self.username, 'password': self.password, 'rememberme' : 'on', 'submit': 'Login'}) # Must have rememberme, otherwise cannot login (UsenetCrawler bug).
				cookie = client.request(login, post = post, output = 'cookie', close = False)
				response = client.request(login, post = post, cookie = cookie, output = 'extended')
				headers = {'User-Agent': response[3]['User-Agent'], 'Cookie': response[3]['Cookie']}
			else:
				cookie = None
				headers = None

			url = urlparse.urljoin(self.base_link, self.search_link)

			type = self.type_tvshows if 'tvshowtitle' in data else self.type_movies
			offset = 0

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				urlNew = url % (urllib.quote_plus(query), type, offset)
				html = BeautifulSoup(client.request(urlNew, cookie = cookie))

				offset += self.offset

				htmlTable = html.find_all('table', id = 'browsetable')[0] # Will fail if on last page and the table is not present.
				htmlRows = htmlTable.find_all('tr', recursive = False) # Use children and no further.

				for i in range(1, len(htmlRows)): # First row is the header.
					htmlRow = htmlRows[i]
					htmlColumns = htmlRow.find_all('td', recursive = False) # Use children and no further.
					htmlInfo = htmlColumns[0]

					# Name
					htmlName = htmlInfo.find_all('a', class_ = 'title')[0].getText()

					# Size
					htmlSize = htmlColumns[3].getText()
					indexEnd = htmlSize.find('<br')
					if indexEnd >= 0:
						htmlSize = htmlSize[: indexEnd]

					# Link
					htmlLink = self.base_link + htmlColumns[6].find_all('a')[0]['href']
					index = htmlLink.rfind('/')
					if index > 0:
						htmlLink = htmlLink[:index] # Remove name at end that contains spaces
					if not headers == None:
						htmlLink += '|' + urllib.urlencode(headers)

					# Age
					htmlAge = htmlColumns[2]['title']
					htmlAge = tools.Time.datetime(htmlAge, '%Y-%m-%d %H:%M:%S')
					htmlAge = datetime.datetime.today() - htmlAge
					htmlAge = htmlAge.days

					# Language
					htmlLanguage = htmlColumns[1].find_all('a')[0].getText()
					if 'Foreign >' in htmlLanguage:
						htmlLanguage = tools.Language.code(htmlLanguage[htmlLanguage.rfind('>') + 1:].strip())
					else:
						htmlLanguage = self.language[0]

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, link = htmlLink, size = htmlSize, age = htmlAge)

					# Ignore
					if meta.ignore(False):
						continue

					# Ignore Duplicates
					htmlCategory = htmlColumns[1].find_all('a')[0].getText()
					htmlFiles = htmlColumns[4].find_all('a')[0].getText()
					size = meta.size()
					if isinstance(size, (float, int, long)):
						size = int(math.ceil(size / 1048576.0) * 1048576.0) # Sometimes the file size slightly varies. Round to the upper MB.
					htmlAge = int(math.ceil(htmlAge))
					foundId = htmlName.lower() + '_' + str(htmlAge) + '_' + htmlCategory + '_' + htmlFiles + '_' + str(size)
					if foundId in found:
						continue
					found.append(foundId)

					# Add
					# Some NZBs have the wrong size (often a few KB) indicated on the site, but are in reaility bigger. Hence, do not show the size of NZBs below 20MB, but still add them.
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'usenet', 'memberonly' : True, 'language' : htmlLanguage, 'quality':  meta.videoQuality(), 'info' : meta.information(sizeLimit = 20971520), 'file' : htmlName})

			return sources
		except:
			return sources

Пример #37

0

Показать файл

Файл: ilcorsaronero.py Проект: bopopescu/repo-1

    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])
            pack = None

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    if pack: query = ['%s %d' % (title, season)]
                    else:
                        query = [
                            '%s S%02dE%02d' % (title, season, episode),
                            '%s %02dx%02d' % (title, season, episode)
                        ]
                else:
                    query = ['%s %d' % (title, year)]
                query = [
                    re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', q)
                    for q in query
                ]

            for q in query:
                url = urlparse.urljoin(self.base_link,
                                       self.search_link) % urllib.quote_plus(q)

                # Fix HTML closing tags.
                html = client.request(url,
                                      ignoreSsl=True)  # SSL Certificate fails.
                html = re.sub('<span.*>\s*<\/span>\s*<td', '</td><td', html)

                html = BeautifulSoup(html)
                htmlRows = html.find_all('tr', class_=['odd', 'odd2'])
                for i in range(len(htmlRows)):
                    try:
                        htmlColumns = htmlRows[i].find_all('td',
                                                           recursive=False)

                        # Name
                        # Name is abbriviated, use the name in the link instead.
                        htmlName = htmlColumns[1].find_all('a')[0]['href']
                        htmlName = htmlName[htmlName.rfind('/') + 1:]
                        htmlName = htmlName.replace('_', ' ')

                        # Link
                        htmlLink = htmlColumns[3].find_all('input')[0]['value']
                        htmlLink = network.Container(htmlLink).torrentMagnet(
                            title=q, trackers=self.trackers)

                        # Size
                        htmlSize = htmlColumns[2].getText().strip()

                        # Seeds
                        try:
                            htmlSeeds = int(htmlColumns[5].getText().strip())
                        except:
                            htmlSeeds = None

                        # Metadata
                        meta = metadata.Metadata(name=htmlName,
                                                 title=title,
                                                 year=year,
                                                 season=season,
                                                 episode=episode,
                                                 pack=pack,
                                                 packCount=packCount,
                                                 link=htmlLink,
                                                 size=htmlSize,
                                                 seeds=htmlSeeds)
                        meta.mIgnoreLength = 8  # Relax this, otherwise too many links are filtered out (eg: Avatar 2009).

                        # Ignore
                        if meta.ignore(True):
                            continue

                        # Add
                        sources.append({
                            'url': htmlLink,
                            'debridonly': False,
                            'direct': False,
                            'source': 'torrent',
                            'language': self.language[0],
                            'quality': meta.videoQuality(),
                            'metadata': meta,
                            'file': htmlName,
                            'pack': pack
                        })
                    except:
                        pass

            return sources
        except:
            return sources

Пример #38

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                year = None
                season = None
                episode = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                query = '%s S%02dE%02d' % (
                    title, season,
                    episode) if 'tvshowtitle' in data else '%s %d' % (title,
                                                                      year)
            query = urllib.quote_plus(query)

            # The returned website is different to the normal website.
            # Probably a mobile version.
            url = urlparse.urljoin(self.base_link, self.search_link) % query
            html = BeautifulSoup(client.request(url))
            htmlRows = html.find_all('div', class_='yt-lockup-content')

            for htmlRow in htmlRows:
                htmlInfo = htmlRow.find_all('a')[0]

                # Name
                htmlName = htmlInfo.getText().strip()

                # Link
                htmlLink = urlparse.urljoin(self.base_link, htmlInfo['href'])

                # Duration
                htmlDuration = 0
                try:
                    htmlDurationItem = htmlRow.find_all(
                        'span')[0].getText().lower()
                    indexStart = htmlDurationItem.find(':')
                    if indexStart > 0:
                        indexStart += 1
                        indexEnd = htmlDurationItem.find('.', indexStart)
                        if indexEnd > 0:
                            htmlDuration = htmlDurationItem[
                                indexStart:indexEnd].strip()
                            htmlDuration = htmlDuration.split(':')
                            if len(htmlDuration) == 3:
                                htmlDuration = (int(htmlDuration[0]) *
                                                3600) + (int(htmlDuration[1]) *
                                                         60) + int(
                                                             htmlDuration[2])
                            else:
                                htmlDuration = (int(htmlDuration[0]) *
                                                60) + int(htmlDuration[1])
                        else:
                            htmlDuration = 0
                except:
                    pass

                # Ignore trailers, etc.
                if any(s in htmlName.lower() for s in self.excludes):
                    continue

                # Ignore less than 10 minutes.
                if htmlDuration < 600:
                    continue

                # Metadata
                meta = metadata.Metadata(name=htmlName,
                                         title=title,
                                         year=year,
                                         season=season,
                                         episode=episode,
                                         link=htmlLink)

                # Ignore
                if meta.ignore(False):
                    continue

                # Add
                sources.append({
                    'url': htmlLink,
                    'debridonly': False,
                    'direct': False,
                    'source': 'youtube',
                    'language': self.language[0],
                    'quality': meta.videoQuality(),
                    'metadata': meta,
                    'file': htmlName
                })
                added = True

            return sources
        except:
            return sources

Пример #39

0

Показать файл

    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                type = None
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                type = 'tv' if 'tvshowtitle' in data else 'movie'
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    if pack: query = '%s %d' % (title, season)
                    else: query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 1
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (urllib.quote_plus(query), page)
                html = BeautifulSoup(client.request(urlNew))

                page += 1
                added = False

                htmlTable = html.find_all('table', class_='table')[0]
                htmlRows = htmlTable.find_all('td', class_='x-item')
                for i in range(0, len(htmlRows)):
                    try:
                        htmlRow = htmlRows[i]

                        # Name
                        htmlName = htmlRow.find_all(
                            'a', class_='title')[0]['title'].strip()

                        # Size
                        htmlSize = htmlRow.find_all(
                            'div', class_='tail')[0].getText().replace(
                                '\n', '').replace('\r',
                                                  '').replace('&nbsp;',
                                                              ' ').strip()
                        htmlSize = re.search('.*[sS]ize:(.*)[dD]ownloads.*',
                                             htmlSize, re.IGNORECASE)
                        if htmlSize: htmlSize = htmlSize.group(1).strip()
                        else: htmlSize = None

                        # Link
                        htmlLink = htmlRow.find_all(
                            'div', class_='tail')[0].find_all(
                                'a', class_='title')[0]['href'].strip()

                        # Metadata
                        meta = metadata.Metadata(name=htmlName,
                                                 title=title,
                                                 year=year,
                                                 season=season,
                                                 episode=episode,
                                                 pack=pack,
                                                 packCount=packCount,
                                                 link=htmlLink,
                                                 size=htmlSize,
                                                 seeds=1)

                        # Ignore
                        if meta.ignore(True):
                            continue

                        # Add
                        sources.append({
                            'url': htmlLink,
                            'debridonly': False,
                            'direct': False,
                            'source': 'torrent',
                            'language': self.language[0],
                            'quality': meta.videoQuality(),
                            'metadata': meta,
                            'file': htmlName
                        })
                        added = True
                    except:
                        pass

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources

Пример #40

0

Показать файл

Файл: yourbittorrent.py Проект: bopopescu/repo-1

	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			if 'exact' in data and data['exact']:
				query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = None
				season = None
				episode = None
				pack = False
				packCount = None
			else:
				title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = int(data['year']) if 'year' in data and not data['year'] == None else None
				season = int(data['season']) if 'season' in data and not data['season'] == None else None
				episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
				pack = data['pack'] if 'pack' in data else False
				packCount = data['packcount'] if 'packcount' in data else None

				if 'tvshowtitle' in data:
					if pack: query = '%s %d' % (title, season)
					else: query = '%s S%02dE%02d' % (title, season, episode)
				else:
					query = '%s %d' % (title, year)
				query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			query = urllib.quote_plus(query)
			category = self.category_tvshows if ('tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.category_movies

			pageLimit = tools.Settings.getInteger('scraping.providers.pages')
			pageCounter = 0

			page = 1 # Pages start at 1
			added = False

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				pageCounter += 1
				if pageLimit > 0 and pageCounter > pageLimit:
					break

				urlNew = (self.base_link + self.search_link) % (query, category, page)
				html = BeautifulSoup(client.request(urlNew))
				htmlTable = html.find_all('div', class_ = 'content')[0].find_all('table', class_ = 'table-sm', recursive = False)[1]
				htmlRows = htmlTable.find_all('tr', recursive = False)

				page += 1
				added = False

				for i in range(len(htmlRows)):
					htmlRow = htmlRows[i]
					htmlColumns = htmlRow.find_all('td', recursive = False)

					# Name
					htmlName = htmlColumns[0].getText().strip()

					# Size
					htmlSize = htmlColumns[1].getText().strip()

					# Link
					htmlLink = htmlRow.find_all('td', recursive = False)[0].find_all('a')[0]['href'].strip()
					htmlLink = re.search('\/torrent\/(.*)\/', htmlLink, re.IGNORECASE).group(1)
					htmlLink = (self.base_link + self.torrent_link) % htmlLink

					# Seeds
					htmlSeeds = int(htmlColumns[3].getText().strip())

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

					# Ignore
					if meta.ignore(True):
						continue

					# Add
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
					added = True

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources

Пример #41

0

Показать файл

Файл: zooqle.py Проект: gaiaorigin/gaiasource

    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None: raise Exception()

            ignoreContains = None
            data = self._decode(url)

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                titles = None
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                titles = data[
                    'alternatives'] if 'alternatives' in data else None
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known.
                    if (season == 0
                            or episode == 0) and ('title' in data
                                                  and not data['title'] == None
                                                  and not data['title'] == ''):
                        title = '%s %s' % (
                            data['tvshowtitle'], data['title']
                        )  # Change the title for metadata filtering.
                        query = title
                        ignoreContains = len(data['title']) / float(
                            len(title)
                        )  # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well.
                    else:
                        if pack: query = '%s %d' % (title, season)
                        else:
                            query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            if not self._query(query): return sources

            category = self.category_shows if 'tvshowtitle' in data else self.category_movies
            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 1
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (page, urllib.quote_plus(query), category)

                # For some reason Zooqle returns 404 even though the response has a body.
                # This is probably a bug on Zooqle's server and the error should just be ignored.
                html = BeautifulSoup(client.request(urlNew, ignoreErrors=404))

                page += 1
                added = False

                htmlTable = html.find_all('table', class_='table-torrents')[0]
                htmlRows = htmlTable.find_all('tr', recursive=False)
                for i in range(1, len(htmlRows)):  # First row is header.
                    htmlRow = htmlRows[i]
                    htmlColumns = htmlRow.find_all('td')
                    htmlInfo = htmlColumns[1]
                    htmlMeta = htmlInfo.find_all('div', recursive=False)[0]

                    # Name
                    htmlName = htmlInfo.find_all(
                        'a', recursive=False)[0].getText().strip()

                    # Size
                    htmlSize = htmlColumns[3].getText()

                    # Link
                    htmlLink = ''
                    htmlLinks = htmlColumns[2].find_all('a')
                    for j in range(len(htmlLinks)):
                        link = htmlLinks[j]['href']
                        if link.startswith('magnet:'):
                            htmlLink = link
                            break

                    # Seeds
                    htmlSeeds = htmlColumns[5].find_all(
                        'div', recursive=False)[0]['title']
                    indexStart = htmlSeeds.find(':')
                    if indexStart > 0:
                        indexStart += 1
                        indexEnd = htmlSeeds.find('|', indexStart)
                        if indexEnd > 0:
                            htmlSeeds = htmlSeeds[indexStart:indexEnd]
                        else:
                            htmlSeeds = htmlSeeds[indexStart:]
                        htmlSeeds = int(
                            htmlSeeds.replace(',', '').replace('.',
                                                               '').strip())
                    else:
                        htmlSeeds = None

                    # Quality & 3D
                    try:
                        htmlQuality = htmlMeta.find_all(
                            'span',
                            class_='hidden-xs')[0].getText().lower().strip()
                        if 'ultra' in htmlQuality: htmlQuality = '4K'
                        elif 'std' in htmlQuality: htmlQuality = 'SD'
                        elif 'med' in htmlQuality or 'low' in htmlQuality:
                            htmlQuality = 'CAM'
                        htmlName += ' ' + htmlQuality
                    except:
                        pass

                    # Audio
                    try:
                        htmlName += ' ' + htmlMeta.find_all(
                            'span', {'title': 'Audio format'})[0].getText()
                    except:
                        pass

                    # Languages
                    try:
                        htmlLanguages = htmlMeta.find_all(
                            'span', {'title': 'Detected languages'
                                     })[0].getText().split(',')
                    except:
                        htmlLanguages = None

                    # Metadata
                    meta = metadata.Metadata(name=htmlName,
                                             title=title,
                                             titles=titles,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             pack=pack,
                                             packCount=packCount,
                                             link=htmlLink,
                                             size=htmlSize,
                                             seeds=htmlSeeds,
                                             languageAudio=htmlLanguages)

                    # Ignore
                    meta.ignoreAdjust(contains=ignoreContains)
                    if meta.ignore(True): continue

                    # Add
                    sources.append({
                        'url': htmlLink,
                        'debridonly': False,
                        'direct': False,
                        'source': 'torrent',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'metadata': meta,
                        'file': htmlName
                    })
                    added = True

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources

Пример #42

0

Показать файл

Файл: sixbox.py Проект: azumimuo/family-xbmc-addon

	def sources(self, url, hostDict, hostprDict):
		sources = []
		found = []
		try:
			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
			year = int(data['year']) if 'year' in data and not data['year'] == None else None
			season = int(data['season']) if 'season' in data and not data['season'] == None else None
			episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
			pack = data['pack'] if 'pack' in data else False

			if 'tvshowtitle' in data:
				if pack: query = '%s %d' % (title, season)
				else: query = '%s S%02dE%02d' % (title, season, episode)
			else:
				query = '%s %d' % (title, year)
			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			url = urlparse.urljoin(self.base_link, self.search_link) % (urllib.quote_plus(query))
			html = BeautifulSoup(client.request(url))

			htmlTable = html.find_all('tbody', id = 'spots')[0]

			# Fix some problems with the markup.
			htmlTable = str(htmlTable)
			htmlTable = htmlTable.replace('\'=""', '=""') # Dangling single quote.
			htmlTable = htmlTable.replace('<b>', '').replace('</b>', '') # There are bold tabgs wrapped arround some td, casuing BeautifulSoup to skip them.
			htmlTable = BeautifulSoup(htmlTable)

			htmlRows = htmlTable.find_all('tr') # Do not switch recursive off here, for some reason BeautifulSoup then detects nothing. Probabley because of markup fixing.

			for i in range(len(htmlRows)):
				htmlRow = htmlRows[i]
				htmlColumns = htmlRow.find_all('td', recursive = False) # Use children and no further.
				htmlInfo = htmlColumns[1]

				# Category
				htmlCategory = htmlColumns[0].find_all('a')[0].getText()
				htmlCategory = htmlCategory.replace('HD', ' HD')

				# Name
				htmlName = htmlInfo.find_all('a')[0].getText()
				htmlName += ' ' + htmlCategory

				# Size
				htmlSize = htmlColumns[6].getText()

				# Link
				htmlLink = htmlColumns[7].find_all('a')[0]['href']

				# Age
				htmlAge = htmlColumns[5]['title']
				index = htmlAge.find(',')
				if index >= 0:
					htmlAge = htmlAge[index + 1:]
				htmlAge = htmlAge.strip()
				htmlAge = tools.Time.datetime(htmlAge, '%d-%b-%Y (%H:%M)')
				htmlAge = datetime.datetime.today() - htmlAge
				htmlAge = htmlAge.days

				# Metadata
				meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, link = htmlLink, size = htmlSize, age = htmlAge)

				# Ignore
				if meta.ignore(False):
					continue

				# Ignore Duplicates
				htmlPoster = htmlColumns[4].find_all('a')[0].getText()
				size = meta.size()
				if isinstance(size, (float, int, long)):
					size = int(math.ceil(size / 1048576.0) * 1048576.0) # Sometimes the file size slightly varies. Round to the upper MB.
				htmlAge = int(math.ceil(htmlAge))
				foundId = htmlName.lower() + '_' + str(htmlAge) + '_' + htmlCategory + '_' + htmlPoster + '_' + str(size)
				if foundId in found:
					continue
				found.append(foundId)

				# Add
				# Some NZBs have the wrong size (often a few KB) indicated on the site, but are in reaility bigger. Hence, do not show the size of NZBs below 20MB, but still add them.
				sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'usenet', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'info' : meta.information(sizeLimit = 20971520), 'file' : htmlName})

			return sources
		except:
			return sources

Python BeautifulSoup примеры использования