def _getetsrc(self, url, language=None): """Loads a URL using caching, returns an ElementTree of the source """ src = self._loadUrl(url, language=language) # TVDB doesn't sanitize \r (CR) from user input in some fields, # remove it to avoid errors. Change from SickBeard, from will14m if not IS_PY2: # Remove trailing \r byte src = src.replace(b"\r", b"") else: src = src.rstrip("\r") # FIXME: this seems wrong try: return ElementTree.fromstring(src) except SyntaxError: src = self._loadUrl(url, recache=True, language=language) try: return ElementTree.fromstring(src) except SyntaxError as exceptionmsg: errormsg = "There was an error with the XML retrieved from thetvdb.com:\n%s" % ( exceptionmsg ) if self.config['cache_enabled']: errormsg += "\nFirst try emptying the cache folder at..\n%s" % ( self.config['cache_location'] ) errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on" errormsg += "\nhttp://dbr.lighthouseapp.com/projects/13342-tvdb_api/overview\n" raise tvdb_error(errormsg)
def _getetsrc(self, url, language=None): """Loads a URL using caching, returns an ElementTree of the source """ src = self._loadUrl(url, language=language) # TVDB doesn't sanitize \r (CR) from user input in some fields, # remove it to avoid errors. Change from SickBeard, from will14m if not IS_PY2: # Remove trailing \r byte src = src.replace(b"\r", b"") else: src = src.rstrip("\r") # FIXME: this seems wrong try: return ElementTree.fromstring(src) except SyntaxError: src = self._loadUrl(url, recache=True, language=language) try: return ElementTree.fromstring(src) except SyntaxError as exceptionmsg: errormsg = "There was an error with the XML retrieved from thetvdb.com:\n%s" % ( exceptionmsg) if self.config['cache_enabled']: errormsg += "\nFirst try emptying the cache folder at..\n%s" % ( self.config['cache_location']) errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on" errormsg += "\nhttp://dbr.lighthouseapp.com/projects/13342-tvdb_api/overview\n" raise tvdb_error(errormsg)
def get_token(self): if sickbeard.THETVDB_V2_API_TOKEN.get('token') is None or datetime.datetime.now() - sickbeard.THETVDB_V2_API_TOKEN.get( 'datetime', datetime.datetime.fromordinal(1)) > datetime.timedelta(hours=23): sickbeard.THETVDB_V2_API_TOKEN = self.get_new_token() if not sickbeard.THETVDB_V2_API_TOKEN.get('token'): raise tvdb_error('Could not get Authentification Token') return sickbeard.THETVDB_V2_API_TOKEN.get('token')
def _loadUrl(self, url, recache = False, language=None): global lastTimeout try: log().debug("Retrieving URL %s" % url) resp = self.urlopener.open(url) if 'x-local-cache' in resp.headers: log().debug("URL %s was cached in %s" % ( url, resp.headers['x-local-cache']) ) if recache: log().debug("Attempting to recache %s" % url) resp.recache() except (IOError, urllib.error.URLError) as errormsg: if not str(errormsg).startswith('HTTP Error'): lastTimeout = datetime.datetime.now() raise tvdb_error("Could not connect to server: %s" % (errormsg)) # handle gzipped content, # http://dbr.lighthouseapp.com/projects/13342/tickets/72-gzipped-data-patch if 'gzip' in resp.headers.get("Content-Encoding", ''): if gzip: stream = io.StringIO(resp.read()) gz = gzip.GzipFile(fileobj=stream) return gz.read() raise tvdb_error("Received gzip data from thetvdb.com, but could not correctly handle it") if 'application/zip' in resp.headers.get("Content-Type", ''): try: # TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20] log().debug("We recived a zip file unpacking now ...") zipdata = io.StringIO() zipdata.write(resp.read()) myzipfile = zipfile.ZipFile(zipdata) return myzipfile.read('%s.xml' % language) except zipfile.BadZipfile: if 'x-local-cache' in resp.headers: resp.delete_cache() raise tvdb_error("Bad zip file received from thetvdb.com, could not read it") return resp.read()
def get_token(self): if sickbeard.THETVDB_V2_API_TOKEN.get( 'token') is None or datetime.datetime.now( ) - sickbeard.THETVDB_V2_API_TOKEN.get( 'datetime', datetime.datetime.fromordinal(1)) > datetime.timedelta( hours=23): sickbeard.THETVDB_V2_API_TOKEN = self.get_new_token() if not sickbeard.THETVDB_V2_API_TOKEN.get('token'): raise tvdb_error('Could not get Authentification Token') return sickbeard.THETVDB_V2_API_TOKEN.get('token')
def _getShowData(self, sid, language): """Takes a series ID, gets the epInfo URL and parses the TVDB XML file into the shows dict in layout: shows[series_id][season_number][episode_number] """ if self.config['language'] is None: log().debug('Config language is none, using show language') if language is None: raise tvdb_error("config['language'] was None, this should not happen") getShowInLanguage = language else: log().debug( 'Configured language %s override show language of %s' % ( self.config['language'], language ) ) getShowInLanguage = self.config['language'] # Parse show information log().debug('Getting all series data for %s' % (sid)) seriesInfoEt = self._getetsrc( self.config['url_seriesInfo'] % (sid, getShowInLanguage) ) for curInfo in seriesInfoEt.findall("Series")[0]: tag = curInfo.tag.lower() value = curInfo.text if value is not None: if tag in ['banner', 'fanart', 'poster']: value = self.config['url_artworkPrefix'] % (value) else: value = self._cleanData(value) self._setShowData(sid, tag, value) # Parse banners if self.config['banners_enabled']: self._parseBanners(sid) # Parse actors if self.config['actors_enabled']: self._parseActors(sid) # Parse episode data log().debug('Getting all episodes of %s' % (sid)) if self.config['useZip']: url = self.config['url_epInfo_zip'] % (sid, language) else: url = self.config['url_epInfo'] % (sid, language) epsEt = self._getetsrc( url, language=language) for cur_ep in epsEt.findall("Episode"): if self.config['dvdorder']: log().debug('Using DVD ordering.') use_dvd = cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None else: use_dvd = False if use_dvd: elem_seasnum, elem_epno = cur_ep.find('DVD_season'), cur_ep.find('DVD_episodenumber') else: elem_seasnum, elem_epno = cur_ep.find('SeasonNumber'), cur_ep.find('EpisodeNumber') if elem_seasnum is None or elem_epno is None: log().warning("An episode has incomplete season/episode number (season: %r, episode: %r)" % ( elem_seasnum, elem_epno)) log().debug( " ".join( "%r is %r" % (child.tag, child.text) for child in cur_ep.getchildren())) # TODO: Should this happen? continue # Skip to next episode # float() is because https://github.com/dbr/tvnamer/issues/95 - should probably be fixed in TVDB data seas_no = int(float(elem_seasnum.text)) ep_no = int(float(elem_epno.text)) for cur_item in cur_ep.getchildren(): tag = cur_item.tag.lower() value = cur_item.text if value is not None: if tag == 'filename': value = self.config['url_artworkPrefix'] % (value) else: value = self._cleanData(value) self._setItem(sid, seas_no, ep_no, tag, value)
def __init__(self, interactive = False, select_first = False, debug = False, cache = True, banners = False, actors = False, custom_ui = None, language = None, search_all_languages = False, apikey = None, base_url = "http://www.thetvdb.com", forceConnect=False, useZip=False, dvdorder=False, use_requests=False): """interactive (True/False): When True, uses built-in console UI is used to select the correct show. When False, the first search result is used. select_first (True/False): Automatically selects the first series search result (rather than showing the user a list of more than one series). Is overridden by interactive = False, or specifying a custom_ui debug (True/False) DEPRECATED: Replaced with proper use of logging module. To show debug messages: >>> import logging >>> logging.basicConfig(level = logging.DEBUG) cache (True/False/str/unicode/urllib2 opener): Retrieved XML are persisted to to disc. If true, stores in tvdb_api folder under your systems TEMP_DIR, if set to str/unicode instance it will use this as the cache location. If False, disables caching. Can also be passed an arbitrary Python object, which is used as a urllib2 opener, which should be created by urllib2.build_opener In Python 3, True/False enable or disable default caching. Passing string specified directory where to store the "tvdb.sqlite3" cache file. Also a custom requests.Session instance can be passed (e.g maybe a customised instance of requests_cache.CachedSession) banners (True/False): Retrieves the banners for a show. These are accessed via the _banners key of a Show(), for example: >>> Tvdb(banners=True)['scrubs']['_banners'].keys() ['fanart', 'poster', 'series', 'season'] actors (True/False): Retrieves a list of the actors for a show. These are accessed via the _actors key of a Show(), for example: >>> t = Tvdb(actors=True) >>> t['scrubs']['_actors'][0]['name'] u'Zach Braff' custom_ui (tvdb_ui.BaseUI subclass): A callable subclass of tvdb_ui.BaseUI (overrides interactive option) language (2 character language abbreviation): The language of the returned data. Is also the language search uses. Default is "en" (English). For full list, run.. >>> Tvdb().config['valid_languages'] #doctest: +ELLIPSIS ['da', 'fi', 'nl', ...] search_all_languages (True/False): By default, Tvdb will only search in the language specified using the language option. When this is True, it will search for the show in and language apikey (str/unicode): Override the default thetvdb.com API key. By default it will use tvdb_api's own key (fine for small scripts), but you can use your own key if desired - this is recommended if you are embedding tvdb_api in a larger application) See http://thetvdb.com/?tab=apiregister to get your own key base_url (str): Override the default thetvdb.com server url. forceConnect (bool): If true it will always try to connect to theTVDB.com even if we recently timed out. By default it will wait one minute before trying again, and any requests within that one minute window will return an exception immediately. useZip (bool): Download the zip archive where possibale, instead of the xml. This is only used when all episodes are pulled. And only the main language xml is used, the actor and banner xml are lost. """ global lastTimeout # if we're given a lastTimeout that is less than 1 min just give up if not forceConnect and lastTimeout != None and datetime.datetime.now() - lastTimeout < datetime.timedelta(minutes=1): raise tvdb_error("We recently timed out, so giving up early this time") self.shows = ShowContainer() # Holds all Show classes self.corrections = {} # Holds show-name to show_id mapping self.config = {} if apikey is not None: self.config['apikey'] = apikey else: self.config['apikey'] = "0629B785CE550C8D" # tvdb_api's API key self.config['debug_enabled'] = debug # show debugging messages self.config['custom_ui'] = custom_ui self.config['interactive'] = interactive # prompt for correct series? self.config['select_first'] = select_first self.config['search_all_languages'] = search_all_languages self.config['useZip'] = useZip self.config['dvdorder'] = dvdorder self.config['use_requests'] = use_requests if not IS_PY2 or self.config['use_requests']: import requests import requests_cache if cache is True: self.session = requests_cache.CachedSession( expire_after=21600, # 6 hours backend='sqlite', cache_name=self._getTempDir(), ) self.config['cache_enabled'] = True elif cache is False: self.session = requests.Session() self.config['cache_enabled'] = False elif isinstance(cache, text_type): # Specified cache path self.session = requests_cache.CachedSession( expire_after=21600, # 6 hours backend='sqlite', cache_name=os.path.join(cache, "tvdb_api"), ) else: self.session = cache try: self.session.get except AttributeError: raise ValueError("cache argument must be True/False, string as cache path or requests.Session-type object (e.g from requests_cache.CachedSession)") else: # For backwards compatibility in Python 2.x if cache is True: self.config['cache_enabled'] = True self.config['cache_location'] = self._getTempDir() self.urlopener = urllib2.build_opener( CacheHandler(self.config['cache_location']) ) elif cache is False: self.config['cache_enabled'] = False self.urlopener = urllib2.build_opener() # default opener with no caching elif isinstance(cache, basestring): self.config['cache_enabled'] = True self.config['cache_location'] = cache self.urlopener = urllib2.build_opener( CacheHandler(self.config['cache_location']) ) elif isinstance(cache, urllib2.OpenerDirector): # If passed something from urllib2.build_opener, use that log().debug("Using %r as urlopener" % cache) self.config['cache_enabled'] = True self.urlopener = cache else: raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache))) self.config['banners_enabled'] = banners self.config['actors_enabled'] = actors if self.config['debug_enabled']: warnings.warn("The debug argument to tvdb_api.__init__ will be removed in the next version. " "To enable debug messages, use the following code before importing: " "import logging; logging.basicConfig(level=logging.DEBUG)") logging.basicConfig(level=logging.DEBUG) # List of language from http://thetvdb.com/api/0629B785CE550C8D/languages.xml # Hard-coded here as it is realtively static, and saves another HTTP request, as # recommended on http://thetvdb.com/wiki/index.php/API:languages.xml self.config['valid_languages'] = [ "da", "fi", "nl", "de", "it", "es", "fr","pl", "hu","el","tr", "ru","he","ja","pt","zh","cs","sl", "hr","ko","en","sv","no" ] # thetvdb.com should be based around numeric language codes, # but to link to a series like http://thetvdb.com/?tab=series&id=79349&lid=16 # requires the language ID, thus this mapping is required (mainly # for usage in tvdb_ui - internally tvdb_api will use the language abbreviations) self.config['langabbv_to_id'] = {'el': 20, 'en': 7, 'zh': 27, 'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9, 'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11, 'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30} if language is None: self.config['language'] = 'en' else: if language not in self.config['valid_languages']: raise ValueError("Invalid language %s, options are: %s" % ( language, self.config['valid_languages'] )) else: self.config['language'] = language # The following url_ configs are based of the # http://thetvdb.com/wiki/index.php/Programmers_API self.config['base_url'] = base_url if self.config['search_all_languages']: self.config['url_getSeries'] = u("%(base_url)s/api/GetSeries.php?seriesname=%%s&language=all") % self.config else: self.config['url_getSeries'] = u("%(base_url)s/api/GetSeries.php?seriesname=%%s&language=%(language)s") % self.config self.config['url_epInfo'] = u("%(base_url)s/api/%(apikey)s/series/%%s/all/%%s.xml") % self.config self.config['url_epInfo_zip'] = u("%(base_url)s/api/%(apikey)s/series/%%s/all/%%s.zip") % self.config self.config['url_seriesInfo'] = u("%(base_url)s/api/%(apikey)s/series/%%s/%%s.xml") % self.config self.config['url_actorsInfo'] = u("%(base_url)s/api/%(apikey)s/series/%%s/actors.xml") % self.config self.config['url_seriesBanner'] = u("%(base_url)s/api/%(apikey)s/series/%%s/banners.xml") % self.config self.config['url_artworkPrefix'] = u("%(base_url)s/banners/%%s") % self.config
def _get_show_data(self, sid, language, get_ep_info=False): """Takes a series ID, gets the epInfo URL and parses the TVDB XML file into the shows dict in layout: shows[series_id][season_number][episode_number] """ # Parse show information log().debug('Getting all series data for %s' % sid) url = self.config['url_seriesInfo'] % sid show_data = self._getetsrc(url, language=language) # check and make sure we have data to process and that it contains a series name if not (show_data and 'seriesname' in show_data.get('data', {}) or {}): return False for k, v in show_data['data'].iteritems(): self._set_show_data(sid, k, v) if sid in self.shows: self.shows[sid].ep_loaded = get_ep_info p = '' if self.config['posters_enabled']: poster_data = self._getetsrc(self.config['url_seriesBanner'] % (sid, 'poster'), language=language) if poster_data and len(poster_data.get('data', '') or '') > 0: poster_data['data'] = sorted( poster_data['data'], reverse=True, key=lambda x: (x['ratingsinfo']['average'], x['ratingsinfo']['count'])) p = self.config['url_artworkPrefix'] % poster_data['data'][0][ 'filename'] self._parse_banners(sid, poster_data['data']) if p: self._set_show_data(sid, u'poster', p) b = '' if self.config['banners_enabled']: poster_data = self._getetsrc(self.config['url_seriesBanner'] % (sid, 'series'), language=language) if poster_data and len(poster_data.get('data', '') or '') > 0: poster_data['data'] = sorted( poster_data['data'], reverse=True, key=lambda x: (x['ratingsinfo']['average'], x['ratingsinfo']['count'])) b = self.config['url_artworkPrefix'] % poster_data['data'][0][ 'filename'] self._parse_banners(sid, poster_data['data']) if b: self._set_show_data(sid, u'banner', b) if self.config['seasons_enabled']: poster_data = self._getetsrc(self.config['url_seriesBanner'] % (sid, 'season'), language=language) if poster_data and len(poster_data.get('data', '') or '') > 0: poster_data['data'] = sorted( poster_data['data'], reverse=True, key=lambda x: (-1 * tryInt(x['subkey']), x['ratingsinfo'][ 'average'], x['ratingsinfo']['count'])) self._parse_banners(sid, poster_data['data']) if self.config['seasonwides_enabled']: poster_data = self._getetsrc(self.config['url_seriesBanner'] % (sid, 'seasonwide'), language=language) if poster_data and len(poster_data.get('data', '') or '') > 0: poster_data['data'] = sorted( poster_data['data'], reverse=True, key=lambda x: (-1 * tryInt(x['subkey']), x['ratingsinfo'][ 'average'], x['ratingsinfo']['count'])) self._parse_banners(sid, poster_data['data']) f = '' if self.config['fanart_enabled']: fanart_data = self._getetsrc(self.config['url_seriesBanner'] % (sid, 'fanart'), language=language) if fanart_data and len(fanart_data.get('data', '') or '') > 0: fanart_data['data'] = sorted( fanart_data['data'], reverse=True, key=lambda x: (x['ratingsinfo']['average'], x['ratingsinfo']['count'])) f = self.config['url_artworkPrefix'] % fanart_data['data'][0][ 'filename'] self._parse_banners(sid, fanart_data['data']) if f: self._set_show_data(sid, u'fanart', f) if self.config['actors_enabled']: actor_data = self._getetsrc(self.config['url_actorsInfo'] % sid, language=language) if actor_data and len(actor_data.get('data', '') or '') > 0: self._parse_actors(sid, actor_data['data']) if get_ep_info: # Parse episode data log().debug('Getting all episodes of %s' % sid) page = 1 episodes = [] while page is not None: episode_data = self._getetsrc(self.config['url_epInfo'] % (sid, page), language=language) if None is episode_data: raise tvdb_error('Exception retrieving episodes for show') if not getattr(self, 'not_found', False) and None is not episode_data.get('data'): episodes.extend(episode_data['data']) page = episode_data.get('links', {}).get('next', None) ep_map_keys = { 'absolutenumber': u'absolute_number', 'airedepisodenumber': u'episodenumber', 'airedseason': u'seasonnumber', 'airedseasonid': u'seasonid', 'dvdepisodenumber': u'dvd_episodenumber', 'dvdseason': u'dvd_season' } for cur_ep in episodes: if self.config['dvdorder']: log().debug('Using DVD ordering.') use_dvd = None is not cur_ep.get( 'dvdseason') and None is not cur_ep.get( 'dvdepisodenumber') else: use_dvd = False if use_dvd: elem_seasnum, elem_epno = cur_ep.get( 'dvdseason'), cur_ep.get('dvdepisodenumber') else: elem_seasnum, elem_epno = cur_ep.get( 'airedseason'), cur_ep.get('airedepisodenumber') if None is elem_seasnum or None is elem_epno: log().warning( 'An episode has incomplete season/episode number (season: %r, episode: %r)' % (elem_seasnum, elem_epno)) continue # Skip to next episode # float() is because https://github.com/dbr/tvnamer/issues/95 - should probably be fixed in TVDB data seas_no = int(float(elem_seasnum)) ep_no = int(float(elem_epno)) for k, v in cur_ep.iteritems(): k = k.lower() if None is not v: if 'filename' == k: v = self.config['url_artworkPrefix'] % v else: v = clean_data(v) if k in ep_map_keys: k = ep_map_keys[k] self._set_item(sid, seas_no, ep_no, k, v) return True
def _getShowData(self, sid, language): """Takes a series ID, gets the epInfo URL and parses the TVDB XML file into the shows dict in layout: shows[series_id][season_number][episode_number] """ if self.config['language'] is None: log().debug('Config language is none, using show language') if language is None: raise tvdb_error("config['language'] was None, this should not happen") getShowInLanguage = language else: log().debug( 'Configured language %s override show language of %s' % ( self.config['language'], language ) ) getShowInLanguage = self.config['language'] # Parse show information log().debug('Getting all series data for %s' % (sid)) seriesInfoEt = self._getetsrc( self.config['url_seriesInfo'] % (sid, getShowInLanguage) ) for curInfo in seriesInfoEt.findall("Series")[0]: tag = curInfo.tag.lower() value = curInfo.text if value is not None: if tag in ['banner', 'fanart', 'poster']: value = self.config['url_artworkPrefix'] % (value) else: value = self._cleanData(value) self._setShowData(sid, tag, value) # Parse banners if self.config['banners_enabled']: self._parseBanners(sid) # Parse actors if self.config['actors_enabled']: self._parseActors(sid) # Parse episode data log().debug('Getting all episodes of %s' % (sid)) if self.config['useZip']: url = self.config['url_epInfo_zip'] % (sid, language) else: url = self.config['url_epInfo'] % (sid, language) epsEt = self._getetsrc( url, language=language) for cur_ep in epsEt.findall("Episode"): if self.config['dvdorder']: log().debug('Using DVD ordering.') use_dvd = cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None else: use_dvd = False if use_dvd: seas_no = int(cur_ep.find('DVD_season').text) ep_no = int(float(cur_ep.find('DVD_episodenumber').text)) else: seas_no = int(cur_ep.find('SeasonNumber').text) ep_no = int(cur_ep.find('EpisodeNumber').text) for cur_item in cur_ep.getchildren(): tag = cur_item.tag.lower() value = cur_item.text if value is not None: if tag == 'filename': value = self.config['url_artworkPrefix'] % (value) else: value = self._cleanData(value) self._setItem(sid, seas_no, ep_no, tag, value)
def _getShowData(self, sid, language): """Takes a series ID, gets the epInfo URL and parses the TVDB XML file into the shows dict in layout: shows[series_id][season_number][episode_number] """ if self.config['language'] is None: log().debug('Config language is none, using show language') if language is None: raise tvdb_error( "config['language'] was None, this should not happen") getShowInLanguage = language else: log().debug('Configured language %s override show language of %s' % (self.config['language'], language)) getShowInLanguage = self.config['language'] # Parse show information log().debug('Getting all series data for %s' % (sid)) seriesInfoEt = self._getetsrc(self.config['url_seriesInfo'] % (sid, getShowInLanguage)) for curInfo in seriesInfoEt.findall("Series")[0]: tag = curInfo.tag.lower() value = curInfo.text if value is not None: if tag in ['banner', 'fanart', 'poster']: value = self.config['url_artworkPrefix'] % (value) else: value = self._cleanData(value) self._setShowData(sid, tag, value) # Parse banners if self.config['banners_enabled']: self._parseBanners(sid) # Parse actors if self.config['actors_enabled']: self._parseActors(sid) # Parse episode data log().debug('Getting all episodes of %s' % (sid)) if self.config['useZip']: url = self.config['url_epInfo_zip'] % (sid, language) else: url = self.config['url_epInfo'] % (sid, language) epsEt = self._getetsrc(url, language=language) for cur_ep in epsEt.findall("Episode"): if self.config['dvdorder']: log().debug('Using DVD ordering.') use_dvd = cur_ep.find( 'DVD_season').text != None and cur_ep.find( 'DVD_episodenumber').text != None else: use_dvd = False if use_dvd: elem_seasnum, elem_epno = cur_ep.find( 'DVD_season'), cur_ep.find('DVD_episodenumber') else: elem_seasnum, elem_epno = cur_ep.find( 'SeasonNumber'), cur_ep.find('EpisodeNumber') if elem_seasnum is None or elem_epno is None: log().warning( "An episode has incomplete season/episode number (season: %r, episode: %r)" % (elem_seasnum, elem_epno)) log().debug(" ".join("%r is %r" % (child.tag, child.text) for child in cur_ep.getchildren())) # TODO: Should this happen? continue # Skip to next episode # float() is because https://github.com/dbr/tvnamer/issues/95 - should probably be fixed in TVDB data seas_no = int(float(elem_seasnum.text)) ep_no = int(float(elem_epno.text)) for cur_item in cur_ep.getchildren(): tag = cur_item.tag.lower() value = cur_item.text if value is not None: if tag == 'filename': value = self.config['url_artworkPrefix'] % (value) else: value = self._cleanData(value) self._setItem(sid, seas_no, ep_no, tag, value)
def _loadUrl(self, url, recache=False, language=None): if not IS_PY2: # Python 3: return content at URL as bytes resp = self.session.get(url) if 'application/zip' in resp.headers.get("Content-Type", ''): try: # TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20] log().debug("We recived a zip file unpacking now ...") from io import BytesIO myzipfile = zipfile.ZipFile(BytesIO(resp.content)) return myzipfile.read('%s.xml' % language) except zipfile.BadZipfile: self.session.cache.delete_url(url) raise tvdb_error( "Bad zip file received from thetvdb.com, could not read it" ) return resp.content else: global lastTimeout try: log().debug("Retrieving URL %s" % url) resp = self.urlopener.open(url) if 'x-local-cache' in resp.headers: log().debug("URL %s was cached in %s" % (url, resp.headers['x-local-cache'])) if recache: log().debug("Attempting to recache %s" % url) resp.recache() except (IOError, urllib2.URLError) as errormsg: if not str(errormsg).startswith('HTTP Error'): lastTimeout = datetime.datetime.now() raise tvdb_error("Could not connect to server: %s" % (errormsg)) # handle gzipped content, # http://dbr.lighthouseapp.com/projects/13342/tickets/72-gzipped-data-patch if 'gzip' in resp.headers.get("Content-Encoding", ''): if gzip: from StringIO import StringIO stream = StringIO(resp.read()) gz = gzip.GzipFile(fileobj=stream) return gz.read() raise tvdb_error( "Received gzip data from thetvdb.com, but could not correctly handle it" ) if 'application/zip' in resp.headers.get("Content-Type", ''): try: # TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20] log().debug("We recived a zip file unpacking now ...") from StringIO import StringIO zipdata = StringIO() zipdata.write(resp.read()) myzipfile = zipfile.ZipFile(zipdata) return myzipfile.read('%s.xml' % language) except zipfile.BadZipfile: if 'x-local-cache' in resp.headers: resp.delete_cache() raise tvdb_error( "Bad zip file received from thetvdb.com, could not read it" ) return resp.read()
def __init__(self, interactive=False, select_first=False, debug=False, cache=True, banners=False, actors=False, custom_ui=None, language=None, search_all_languages=False, apikey=None, forceConnect=False, useZip=False, dvdorder=False): """interactive (True/False): When True, uses built-in console UI is used to select the correct show. When False, the first search result is used. select_first (True/False): Automatically selects the first series search result (rather than showing the user a list of more than one series). Is overridden by interactive = False, or specifying a custom_ui debug (True/False) DEPRECATED: Replaced with proper use of logging module. To show debug messages: >>> import logging >>> logging.basicConfig(level = logging.DEBUG) cache (True/False/str/unicode/urllib2 opener): Retrieved XML are persisted to to disc. If true, stores in tvdb_api folder under your systems TEMP_DIR, if set to str/unicode instance it will use this as the cache location. If False, disables caching. Can also be passed an arbitrary Python object, which is used as a urllib2 opener, which should be created by urllib2.build_opener In Python 3, True/False enable or disable default caching. Passing string specified directory where to store the "tvdb.sqlite3" cache file. Also a custom requests.Session instance can be passed (e.g maybe a customised instance of requests_cache.CachedSession) banners (True/False): Retrieves the banners for a show. These are accessed via the _banners key of a Show(), for example: >>> Tvdb(banners=True)['scrubs']['_banners'].keys() ['fanart', 'poster', 'series', 'season'] actors (True/False): Retrieves a list of the actors for a show. These are accessed via the _actors key of a Show(), for example: >>> t = Tvdb(actors=True) >>> t['scrubs']['_actors'][0]['name'] u'Zach Braff' custom_ui (tvdb_ui.BaseUI subclass): A callable subclass of tvdb_ui.BaseUI (overrides interactive option) language (2 character language abbreviation): The language of the returned data. Is also the language search uses. Default is "en" (English). For full list, run.. >>> Tvdb().config['valid_languages'] #doctest: +ELLIPSIS ['da', 'fi', 'nl', ...] search_all_languages (True/False): By default, Tvdb will only search in the language specified using the language option. When this is True, it will search for the show in and language apikey (str/unicode): Override the default thetvdb.com API key. By default it will use tvdb_api's own key (fine for small scripts), but you can use your own key if desired - this is recommended if you are embedding tvdb_api in a larger application) See http://thetvdb.com/?tab=apiregister to get your own key forceConnect (bool): If true it will always try to connect to theTVDB.com even if we recently timed out. By default it will wait one minute before trying again, and any requests within that one minute window will return an exception immediately. useZip (bool): Download the zip archive where possibale, instead of the xml. This is only used when all episodes are pulled. And only the main language xml is used, the actor and banner xml are lost. """ global lastTimeout # if we're given a lastTimeout that is less than 1 min just give up if not forceConnect and lastTimeout != None and datetime.datetime.now( ) - lastTimeout < datetime.timedelta(minutes=1): raise tvdb_error( "We recently timed out, so giving up early this time") self.shows = ShowContainer() # Holds all Show classes self.corrections = {} # Holds show-name to show_id mapping self.config = {} if apikey is not None: self.config['apikey'] = apikey else: self.config['apikey'] = "0629B785CE550C8D" # tvdb_api's API key self.config['debug_enabled'] = debug # show debugging messages self.config['custom_ui'] = custom_ui self.config['interactive'] = interactive # prompt for correct series? self.config['select_first'] = select_first self.config['search_all_languages'] = search_all_languages self.config['useZip'] = useZip self.config['dvdorder'] = dvdorder if not IS_PY2: # FIXME: Allow using requests in Python 2? import requests_cache if cache is True: self.session = requests_cache.CachedSession( expire_after=21600, # 6 hours backend='sqlite', cache_name=self._getTempDir(), ) self.config['cache_enabled'] = True elif cache is False: self.session = requests.Session() self.config['cache_enabled'] = False elif isinstance(cache, text_type): # Specified cache path self.session = requests_cache.CachedSession( expire_after=21600, # 6 hours backend='sqlite', cache_name=os.path.join(cache, "tvdb_api"), ) else: self.session = cache try: self.session.get except AttributeError: raise ValueError( "cache argument must be True/False, string as cache path or requests.Session-type object (e.g from requests_cache.CachedSession)" ) else: # For backwards compatibility in Python 2.x if cache is True: self.config['cache_enabled'] = True self.config['cache_location'] = self._getTempDir() self.urlopener = urllib2.build_opener( CacheHandler(self.config['cache_location'])) elif cache is False: self.config['cache_enabled'] = False self.urlopener = urllib2.build_opener( ) # default opener with no caching elif isinstance(cache, basestring): self.config['cache_enabled'] = True self.config['cache_location'] = cache self.urlopener = urllib2.build_opener( CacheHandler(self.config['cache_location'])) elif isinstance(cache, urllib2.OpenerDirector): # If passed something from urllib2.build_opener, use that log().debug("Using %r as urlopener" % cache) self.config['cache_enabled'] = True self.urlopener = cache else: raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache))) self.config['banners_enabled'] = banners self.config['actors_enabled'] = actors if self.config['debug_enabled']: warnings.warn( "The debug argument to tvdb_api.__init__ will be removed in the next version. " "To enable debug messages, use the following code before importing: " "import logging; logging.basicConfig(level=logging.DEBUG)") logging.basicConfig(level=logging.DEBUG) # List of language from http://thetvdb.com/api/0629B785CE550C8D/languages.xml # Hard-coded here as it is realtively static, and saves another HTTP request, as # recommended on http://thetvdb.com/wiki/index.php/API:languages.xml self.config['valid_languages'] = [ "da", "fi", "nl", "de", "it", "es", "fr", "pl", "hu", "el", "tr", "ru", "he", "ja", "pt", "zh", "cs", "sl", "hr", "ko", "en", "sv", "no" ] # thetvdb.com should be based around numeric language codes, # but to link to a series like http://thetvdb.com/?tab=series&id=79349&lid=16 # requires the language ID, thus this mapping is required (mainly # for usage in tvdb_ui - internally tvdb_api will use the language abbreviations) self.config['langabbv_to_id'] = { 'el': 20, 'en': 7, 'zh': 27, 'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9, 'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11, 'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30 } if language is None: self.config['language'] = 'en' else: if language not in self.config['valid_languages']: raise ValueError("Invalid language %s, options are: %s" % (language, self.config['valid_languages'])) else: self.config['language'] = language # The following url_ configs are based of the # http://thetvdb.com/wiki/index.php/Programmers_API self.config['base_url'] = "http://thetvdb.com" if self.config['search_all_languages']: self.config[ 'url_getSeries'] = u"%(base_url)s/api/GetSeries.php?seriesname=%%s&language=all" % self.config else: self.config[ 'url_getSeries'] = u"%(base_url)s/api/GetSeries.php?seriesname=%%s&language=%(language)s" % self.config self.config[ 'url_epInfo'] = u"%(base_url)s/api/%(apikey)s/series/%%s/all/%%s.xml" % self.config self.config[ 'url_epInfo_zip'] = u"%(base_url)s/api/%(apikey)s/series/%%s/all/%%s.zip" % self.config self.config[ 'url_seriesInfo'] = u"%(base_url)s/api/%(apikey)s/series/%%s/%%s.xml" % self.config self.config[ 'url_actorsInfo'] = u"%(base_url)s/api/%(apikey)s/series/%%s/actors.xml" % self.config self.config[ 'url_seriesBanner'] = u"%(base_url)s/api/%(apikey)s/series/%%s/banners.xml" % self.config self.config[ 'url_artworkPrefix'] = u"%(base_url)s/banners/%%s" % self.config
def _get_show_data(self, sid, language, get_ep_info=False): """Takes a series ID, gets the epInfo URL and parses the TVDB XML file into the shows dict in layout: shows[series_id][season_number][episode_number] """ # Parse show information log().debug('Getting all series data for %s' % sid) url = self.config['url_seriesInfo'] % sid show_data = self._getetsrc(url, language=language) # check and make sure we have data to process and that it contains a series name if not (show_data and 'seriesname' in show_data.get('data', {}) or {}): return False for k, v in show_data['data'].iteritems(): self._set_show_data(sid, k, v) if sid in self.shows: self.shows[sid].ep_loaded = get_ep_info p = '' if self.config['posters_enabled']: poster_data = self._getetsrc(self.config['url_seriesBanner'] % (sid, 'poster'), language=language) if poster_data and len(poster_data.get('data', '') or '') > 0: poster_data['data'] = sorted(poster_data['data'], reverse=True, key=lambda x: (x['ratingsinfo']['average'], x['ratingsinfo']['count'])) p = self.config['url_artworkPrefix'] % poster_data['data'][0]['filename'] self._parse_banners(sid, poster_data['data']) if p: self._set_show_data(sid, u'poster', p) b = '' if self.config['banners_enabled']: poster_data = self._getetsrc(self.config['url_seriesBanner'] % (sid, 'series'), language=language) if poster_data and len(poster_data.get('data', '') or '') > 0: poster_data['data'] = sorted(poster_data['data'], reverse=True, key=lambda x: (x['ratingsinfo']['average'], x['ratingsinfo']['count'])) b = self.config['url_artworkPrefix'] % poster_data['data'][0]['filename'] self._parse_banners(sid, poster_data['data']) if b: self._set_show_data(sid, u'banner', b) if self.config['seasons_enabled']: poster_data = self._getetsrc(self.config['url_seriesBanner'] % (sid, 'season'), language=language) if poster_data and len(poster_data.get('data', '') or '') > 0: poster_data['data'] = sorted(poster_data['data'], reverse=True, key=lambda x: (-1 * tryInt(x['subkey']), x['ratingsinfo']['average'], x['ratingsinfo']['count'])) self._parse_banners(sid, poster_data['data']) if self.config['seasonwides_enabled']: poster_data = self._getetsrc(self.config['url_seriesBanner'] % (sid, 'seasonwide'), language=language) if poster_data and len(poster_data.get('data', '') or '') > 0: poster_data['data'] = sorted(poster_data['data'], reverse=True, key=lambda x: (-1 * tryInt(x['subkey']), x['ratingsinfo']['average'], x['ratingsinfo']['count'])) self._parse_banners(sid, poster_data['data']) f = '' if self.config['fanart_enabled']: fanart_data = self._getetsrc(self.config['url_seriesBanner'] % (sid, 'fanart'), language=language) if fanart_data and len(fanart_data.get('data', '') or '') > 0: fanart_data['data'] = sorted(fanart_data['data'], reverse=True, key=lambda x: (x['ratingsinfo']['average'], x['ratingsinfo']['count'])) f = self.config['url_artworkPrefix'] % fanart_data['data'][0]['filename'] self._parse_banners(sid, fanart_data['data']) if f: self._set_show_data(sid, u'fanart', f) if self.config['actors_enabled']: actor_data = self._getetsrc(self.config['url_actorsInfo'] % sid, language=language) if actor_data and len(actor_data.get('data', '') or '') > 0: self._parse_actors(sid, actor_data['data']) if get_ep_info: # Parse episode data log().debug('Getting all episodes of %s' % sid) page = 1 episodes = [] while page is not None: episode_data = self._getetsrc(self.config['url_epInfo'] % (sid, page), language=language) if None is episode_data: raise tvdb_error('Exception retrieving episodes for show') if not getattr(self, 'not_found', False) and None is not episode_data.get('data'): episodes.extend(episode_data['data']) page = episode_data.get('links', {}).get('next', None) ep_map_keys = {'absolutenumber': u'absolute_number', 'airedepisodenumber': u'episodenumber', 'airedseason': u'seasonnumber', 'airedseasonid': u'seasonid', 'dvdepisodenumber': u'dvd_episodenumber', 'dvdseason': u'dvd_season'} for cur_ep in episodes: if self.config['dvdorder']: log().debug('Using DVD ordering.') use_dvd = None is not cur_ep.get('dvdseason') and None is not cur_ep.get('dvdepisodenumber') else: use_dvd = False if use_dvd: elem_seasnum, elem_epno = cur_ep.get('dvdseason'), cur_ep.get('dvdepisodenumber') else: elem_seasnum, elem_epno = cur_ep.get('airedseason'), cur_ep.get('airedepisodenumber') if None is elem_seasnum or None is elem_epno: log().warning('An episode has incomplete season/episode number (season: %r, episode: %r)' % ( elem_seasnum, elem_epno)) continue # Skip to next episode # float() is because https://github.com/dbr/tvnamer/issues/95 - should probably be fixed in TVDB data seas_no = int(float(elem_seasnum)) ep_no = int(float(elem_epno)) for k, v in cur_ep.iteritems(): k = k.lower() if None is not v: if 'filename' == k: v = self.config['url_artworkPrefix'] % v else: v = clean_data(v) if k in ep_map_keys: k = ep_map_keys[k] self._set_item(sid, seas_no, ep_no, k, v) return True