示例#1
0
 def open_unknown_proxy(self, proxy, fullurl, data=None):
     raise IMDbDataAccessError({
         'proxy': str(proxy),
         'fullurl': fullurl,
         'error type': 'open_unknown_proxy',
         'data': str(data)
     })
示例#2
0
 def open_unknown(self, fullurl, data=None):
     raise IMDbDataAccessError({
         'fullurl': fullurl,
         'data': str(data),
         'error type': 'open_unknown',
         'proxy': self.get_proxy()
     })
示例#3
0
 def retrieve_unicode(self, url, size=-1):
     """Retrieves the given URL, and returns a unicode string,
     trying to guess the encoding of the data (assuming utf8
     by default)"""
     encode = None
     try:
         if size != -1:
             self.set_header('Range', 'bytes=0-%d' % size)
         uopener = self.open(url)
         kwds = {}
         content = uopener.read(**kwds)
         self._last_url = uopener.url
         # Maybe the server is so nice to tell us the charset...
         if PY2:
             server_encode = uopener.headers.getparam('charset')
         else:
             server_encode = (uopener.info().get_charsets() or [None])[0]
         # Otherwise, look at the content-type HTML meta tag.
         if server_encode is None and content:
             begin_h = content.find(b'text/html; charset=')
             if begin_h != -1:
                 end_h = content[19 + begin_h:].find('"')
                 if end_h != -1:
                     server_encode = content[19 + begin_h:19 + begin_h +
                                             end_h]
         if server_encode:
             try:
                 if lookup(server_encode):
                     encode = server_encode
             except (LookupError, ValueError, TypeError):
                 pass
         uopener.close()
         if size != -1:
             self.del_header('Range')
         self.close()
     except IOError as e:
         if size != -1:
             # Ensure that the Range header is removed.
             self.del_header('Range')
         raise IMDbDataAccessError({
             'errcode': e.errno,
             'errmsg': str(e.strerror),
             'url': url,
             'proxy': self.get_proxy(),
             'exception type': 'IOError',
             'original exception': e
         })
     if encode is None:
         encode = 'utf8'
         # The detection of the encoding is error prone...
         self._logger.warn(
             'Unable to detect the encoding of the retrieved page [%s];'
             ' falling back to default utf8.', encode)
     if isinstance(content, str):
         return content
     return str(content, encode, 'replace')
示例#4
0
 def http_error_default(self, url, fp, errcode, errmsg, headers):
     if errcode == 404:
         self._logger.warn('404 code returned for %s: %s (headers: %s)',
                             url, errmsg, headers)
         return _FakeURLOpener(url, headers)
     raise IMDbDataAccessError({'url': 'http:%s' % url,
                                 'errcode': errcode,
                                 'errmsg': errmsg,
                                 'headers': headers,
                                 'error type': 'http_error_default',
                                 'proxy': self.get_proxy()})
示例#5
0
 def retrieve_unicode(self, url, size=-1):
     """Retrieves the given URL, and returns a unicode string,
     trying to guess the encoding of the data (assuming latin_1
     by default)"""
     encode = None
     try:
         if size != -1:
             self.set_header('Range', 'bytes=0-%d' % size)
         uopener = self.open(url)
         kwds = {}
         if PY_VERSION > (2, 3) and not IN_GAE:
             kwds['size'] = size
         content = uopener.read(**kwds)
         self._last_url = uopener.url
         # Maybe the server is so nice to tell us the charset...
         server_encode = uopener.info().getparam('charset')
         # Otherwise, look at the content-type HTML meta tag.
         if server_encode is None and content:
             first_bytes = content[:512]
             begin_h = first_bytes.find('text/html; charset=')
             if begin_h != -1:
                 end_h = first_bytes[19 + begin_h:].find('"')
                 if end_h != -1:
                     server_encode = first_bytes[19 + begin_h:19 + begin_h +
                                                 end_h]
         if server_encode:
             try:
                 if lookup(server_encode):
                     encode = server_encode
             except (LookupError, ValueError, TypeError):
                 pass
         uopener.close()
         if size != -1:
             self.del_header('Range')
         self.close()
     except IOError, e:
         if size != -1:
             # Ensure that the Range header is removed.
             self.del_header('Range')
         raise IMDbDataAccessError({
             'errcode': e.errno,
             'errmsg': str(e.strerror),
             'url': url,
             'proxy': self.get_proxy(),
             'exception type': 'IOError',
             'original exception': e
         })
示例#6
0
    def update(self, mop, info=None, override=0):
        """Given a Movie, Person, Character or Company object with only
        partial information, retrieve the required set of information.

        info is the list of sets of information to retrieve.

        If override is set, the information are retrieved and updated
        even if they're already in the object."""
        # XXX: should this be a method of the Movie/Person/Character/Company
        #      classes?  NO!  What for instances created by external functions?
        mopID = None
        prefix = ''
        if isinstance(mop, Movie.Movie):
            mopID = mop.movieID
            prefix = 'movie'
        elif isinstance(mop, Person.Person):
            mopID = mop.personID
            prefix = 'person'
        elif isinstance(mop, Character.Character):
            mopID = mop.characterID
            prefix = 'character'
        elif isinstance(mop, Company.Company):
            mopID = mop.companyID
            prefix = 'company'
        else:
            raise IMDbError('object ' + repr(mop) + \
                    ' is not a Movie, Person, Character or Company instance')
        if mopID is None:
            # XXX: enough?  It's obvious that there are Characters
            #      objects without characterID, so I think they should
            #      just do nothing, when an i.update(character) is tried.
            if prefix == 'character':
                return
            raise IMDbDataAccessError( \
                'the supplied object has null movieID, personID or companyID')
        if mop.accessSystem == self.accessSystem:
            aSystem = self
        else:
            aSystem = IMDb(mop.accessSystem)
        if info is None:
            info = mop.default_info
        elif info == 'all':
            if isinstance(mop, Movie.Movie):
                info = self.get_movie_infoset()
            elif isinstance(mop, Person.Person):
                info = self.get_person_infoset()
            elif isinstance(mop, Character.Character):
                info = self.get_character_infoset()
            else:
                info = self.get_company_infoset()
        if not isinstance(info, (tuple, list)):
            info = (info, )
        res = {}
        for i in info:
            if i in mop.current_info and not override:
                continue
            if not i:
                continue
            self._imdb_logger.debug('retrieving "%s" info set', i)
            try:
                method = getattr(aSystem,
                                 'get_%s_%s' % (prefix, i.replace(' ', '_')))
            except AttributeError:
                self._imdb_logger.error('unknown information set "%s"', i)
                # Keeps going.
                method = lambda *x: {}
            try:
                ret = method(mopID)
            except Exception as e:
                self._imdb_logger.critical('caught an exception retrieving ' \
                                    'or parsing "%s" info set for mopID ' \
                                    '"%s" (accessSystem: %s)',
                                    i, mopID, mop.accessSystem, exc_info=True)
                ret = {}
                # If requested by the user, reraise the exception.
                if self._reraise_exceptions:
                    raise
            keys = None
            if 'data' in ret:
                res.update(ret['data'])
                if isinstance(ret['data'], dict):
                    keys = ret['data'].keys()
            if 'info sets' in ret:
                for ri in ret['info sets']:
                    mop.add_to_current_info(ri, keys, mainInfoset=i)
            else:
                mop.add_to_current_info(i, keys)
            if 'titlesRefs' in ret:
                mop.update_titlesRefs(ret['titlesRefs'])
            if 'namesRefs' in ret:
                mop.update_namesRefs(ret['namesRefs'])
            if 'charactersRefs' in ret:
                mop.update_charactersRefs(ret['charactersRefs'])
        mop.set_data(res, override=0)
示例#7
0
    def update_series_seasons(self, mop, season_nums, override=0):
        """Given a Movie object with only retrieve the season data.

        season_nums is the list of the specific seasons to retrieve.

        If override is set, the information are retrieved and updated
        even if they're already in the object."""
        mopID = None
        if isinstance(mop, Movie.Movie):
            mopID = mop.movieID
        else:
            raise IMDbError('object ' + repr(mop) + ' is not a Movie instance')
        if mopID is None:
            raise IMDbDataAccessError(
                'supplied object has null movieID, personID or companyID')
        if mop.accessSystem == self.accessSystem:
            aSystem = self
        else:
            aSystem = IMDb(mop.accessSystem)

        info = 'episodes'

        res = {}

        if info in mop.current_info and not override:
            return
        _imdb_logger.debug('retrieving "%s" info set', info)
        try:
            method = getattr(aSystem, 'get_movie_episodes')
        except AttributeError:
            _imdb_logger.error('unknown information set "%s"', info)
            # Keeps going.
            method = lambda *x: {}
        try:
            ret = method(mopID, season_nums)
        except Exception:
            _imdb_logger.critical(
                'caught an exception retrieving or parsing "%s" info set'
                ' for mopID "%s" (accessSystem: %s)',
                info,
                mopID,
                mop.accessSystem,
                exc_info=True)
            ret = {}
            # If requested by the user, reraise the exception.
            if self._reraise_exceptions:
                raise
        keys = None
        if 'data' in ret:
            res.update(ret['data'])
            if isinstance(ret['data'], dict):
                keys = list(ret['data'].keys())
        if 'info sets' in ret:
            for ri in ret['info sets']:
                mop.add_to_current_info(ri, keys, mainInfoset=info)
        else:
            mop.add_to_current_info(info, keys)
        if 'titlesRefs' in ret:
            mop.update_titlesRefs(ret['titlesRefs'])
        if 'namesRefs' in ret:
            mop.update_namesRefs(ret['namesRefs'])
        if 'charactersRefs' in ret:
            mop.update_charactersRefs(ret['charactersRefs'])
        mop.set_data(res, override=0)
示例#8
0
 def retrieve_unicode(self, url, size=-1):
     """Retrieves the given URL, and returns a unicode string,
     trying to guess the encoding of the data (assuming utf8
     by default)"""
     encode = None
     try:
         if size != -1:
             self.set_header('Range', 'bytes=0-%d' % size)
         handlers = []
         if 'http' in self.proxies:
             proxy_handler = ProxyHandler({
                 'http': self.proxies['http'],
                 'https': self.proxies['http']
             })
             handlers.append(proxy_handler)
         handlers.append(self.https_handler)
         uopener = build_opener(*handlers)
         uopener.addheaders = list(self.addheaders)
         response = uopener.open(url)
         content = response.read()
         self._last_url = response.url
         # Maybe the server is so nice to tell us the charset...
         if PY2:
             server_encode = response.headers.getparam('charset') or None
         else:
             server_encode = response.headers.get_content_charset(None)
         # Otherwise, look at the content-type HTML meta tag.
         if server_encode is None and content:
             begin_h = content.find(b'text/html; charset=')
             if begin_h != -1:
                 end_h = content[19 + begin_h:].find('"')
                 if end_h != -1:
                     server_encode = content[19 + begin_h:19 + begin_h +
                                             end_h]
         if server_encode:
             try:
                 if lookup(server_encode):
                     encode = server_encode
             except (LookupError, ValueError, TypeError):
                 pass
         if size != -1:
             self.del_header('Range')
         response.close()
     except IOError as e:
         if size != -1:
             # Ensure that the Range header is removed.
             self.del_header('Range')
         raise IMDbDataAccessError({
             'errcode': e.errno,
             'errmsg': str(e.strerror),
             'url': url,
             'proxy': self.get_proxy(),
             'exception type': 'IOError',
             'original exception': e
         })
     if encode is None:
         encode = 'utf8'
         # The detection of the encoding is error prone...
         self._logger.warn(
             'Unable to detect the encoding of the retrieved page [%s];'
             ' falling back to default utf8.', encode)
     if isinstance(content, str):
         return content
     return str(content, encode, 'replace')
示例#9
0
 def get_person_main(self, personID, _parseChr=False):
     if not _parseChr:
         url = self.urls['person_main'] % personID + 'maindetails'
     else:
         url = self.urls['character_main'] % personID
     s = self._mretrieve(url)
     r = {}
     name = _findBetween(s, '<title>', '</title>', maxRes=1)
     if not name:
         if _parseChr: w = 'characterID'
         else: w = 'personID'
         raise IMDbDataAccessError('unable to get %s "%s"' % (w, personID))
     name = _unHtml(name[0].replace(' - IMDb', ''))
     if _parseChr:
         name = name.replace('(Character)', '').strip()
         name = name.replace('- Filmography by type', '').strip()
     else:
         name = name.replace('- Filmography by', '').strip()
     r = analyze_name(name, canonical=not _parseChr)
     for dKind in ('Born', 'Died'):
         date = _findBetween(s, '%s:</h4>' % dKind.capitalize(),
                             ('<div class', '</div>', '<br/><br/>'), maxRes=1)
         if date:
             date = _unHtml(date[0])
             if date:
                 #date, notes = date_and_notes(date)
                 # TODO: fix to handle real names.
                 date_notes = date.split(' in ', 1)
                 notes = u''
                 date = date_notes[0]
                 if len(date_notes) == 2:
                     notes = date_notes[1]
                 dtitle = 'birth'
                 if dKind == 'Died':
                     dtitle = 'death'
                 if date:
                     r['%s date' % dtitle] = date
                 if notes:
                     r['%s notes' % dtitle] = notes
     akas = _findBetween(s, 'Alternate Names:</h4>', ('</div>',
                         '<br/><br/>'), maxRes=1)
     if akas:
         akas = akas[0]
         if akas:
             akas = _unHtml(akas)
         if akas.find(' | ') != -1:
             akas = akas.split(' | ')
         else:
             akas = akas.split(' / ')
         if akas: r['akas'] = filter(None, [x.strip() for x in akas])
     hs = _findBetween(s, "rel='image_src'", '>', maxRes=1)
     if not hs:
         hs = _findBetween(s, 'rel="image_src"', '>', maxRes=1)
     if not hs:
         hs = _findBetween(s, '<a name="headshot"', '</a>', maxRes=1)
     if hs:
         hsl = _findBetween(hs[0], "href='", "'", maxRes=1)
         if not hsl:
             hsl = _findBetween(hs[0], 'href="', '"', maxRes=1)
         if hsl and 'imdb-share-logo' not in hsl[0]:
             r['headshot'] = hsl[0]
     # Build a list of tuples such [('hrefLink', 'section name')]
     workkind = _findBetween(s, 'id="jumpto_', '</a>')
     ws = []
     for work in workkind:
         sep = '" >'
         if '">' in work:
             sep = '">'
         wsplit = work.split(sep, 1)
         if len(wsplit) == 2:
             sect = wsplit[0]
             if '"' in sect:
                 sect = sect[:sect.find('"')]
             ws.append((sect, wsplit[1].lower()))
     # XXX: I think "guest appearances" are gone.
     if s.find('<a href="#guest-appearances"') != -1:
         ws.append(('guest-appearances', 'notable tv guest appearances'))
     #if _parseChr:
     #    ws.append(('filmography', 'filmography'))
     for sect, sectName in ws:
         raws = u''
         if sectName == 'self':
             sect = 'Self'
         # Everything between the current section link and the end
         # of the <ol> tag.
         if _parseChr and sect == 'filmography':
             inisect = s.find('<div class="filmo">')
         else:
             inisect = s.find('<a name="%s' % sect)
         if inisect != -1:
             endsect = s[inisect:].find('<div id="filmo-head-')
             if endsect == -1:
                 endsect = s[inisect:].find('<div class="article"')
             if endsect != -1: raws = s[inisect:inisect+endsect]
         #if not raws: continue
         mlist = _findBetween(raws, '<div class="filmo-row',
                 ('<div class="clear"/>',))
         for m in mlist:
             fCB = m.find('>')
             if fCB != -1:
                 m = m[fCB+1:].lstrip()
             m = re_filmo_episodes.sub('', m)
             # For every movie in the current section.
             movieID = re_imdbID.findall(m)
             if not movieID:
                 self._mobile_logger.debug('no movieID in %s', m)
                 continue
             m = m.replace('<br/>', ' .... ', 1)
             if not _parseChr:
                 chrIndx = m.find(' .... ')
             else:
                 chrIndx = m.find(' Played by ')
             chids = []
             if chrIndx != -1:
                 chrtxt = m[chrIndx+6:]
                 if _parseChr:
                     chrtxt = chrtxt[5:]
                 for ch in chrtxt.split(' / '):
                     chid = re_imdbID.findall(ch)
                     if not chid:
                         chids.append(None)
                     else:
                         chids.append(chid[-1])
             if not chids:
                 chids = None
             elif len(chids) == 1:
                 chids = chids[0]
             movieID = str(movieID[0])
             # Search the status.
             stidx = m.find('<i>')
             status = u''
             if stidx != -1:
                 stendidx = m.rfind('</i>')
                 if stendidx != -1:
                     status = _unHtml(m[stidx+3:stendidx])
                     m = m.replace(m[stidx+3:stendidx], '')
             year = _findBetween(m, 'year_column">', '</span>', maxRes=1)
             if year:
                 year = year[0]
                 m = m.replace('<span class="year_column">%s</span>' % year,
                         '')
             else:
                 year = None
             m = _unHtml(m)
             if not m:
                 self._mobile_logger.warn('no title for movieID %s', movieID)
                 continue
             movie = build_movie(m, movieID=movieID, status=status,
                                 roleID=chids, modFunct=self._defModFunct,
                                 accessSystem=self.accessSystem,
                                 _parsingCharacter=_parseChr, year=year)
             sectName = sectName.split(':')[0]
             r.setdefault(sectName, []).append(movie)
     # If available, take the always correct name from a form.
     itag = _getTagsWith(s, 'NAME="primary"', maxRes=1)
     if not itag:
         itag = _getTagsWith(s, 'name="primary"', maxRes=1)
     if itag:
         vtag = _findBetween(itag[0], 'VALUE="', ('"', '>'), maxRes=1)
         if not vtag:
             vtag = _findBetween(itag[0], 'value="', ('"', '>'), maxRes=1)
         if vtag:
             try:
                 vtag = unquote(str(vtag[0]))
                 vtag = unicode(vtag, 'latin_1')
                 r.update(analyze_name(vtag))
             except UnicodeEncodeError:
                 pass
     return {'data': r, 'info sets': ('main', 'filmography')}
示例#10
0
 def get_movie_main(self, movieID):
     cont = self._mretrieve(self.urls['movie_main'] % movieID + 'maindetails')
     title = _findBetween(cont, '<title>', '</title>', maxRes=1)
     if not title:
         raise IMDbDataAccessError('unable to get movieID "%s"' % movieID)
     title = _unHtml(title[0])
     if title.endswith(' - IMDb'):
         title = title[:-7]
     if cont.find('<span class="tv-extra">TV mini-series</span>') != -1:
         title += ' (mini)'
     d = analyze_title(title)
     kind = d.get('kind')
     tv_series = _findBetween(cont, 'TV Series:</h5>', '</a>', maxRes=1)
     if tv_series: mid = re_imdbID.findall(tv_series[0])
     else: mid = None
     if tv_series and mid:
         s_title = _unHtml(tv_series[0])
         s_data = analyze_title(s_title)
         m = Movie(movieID=str(mid[0]), data=s_data,
                     accessSystem=self.accessSystem,
                     modFunct=self._defModFunct)
         d['kind'] = kind = u'episode'
         d['episode of'] = m
     if kind in ('tv series', 'tv mini series'):
         years = _findBetween(cont, '<h1>', '</h1>', maxRes=1)
         if years:
             years[:] = _findBetween(years[0], 'TV series', '</span>',
                                     maxRes=1)
             if years:
                 d['series years'] = years[0].strip()
     air_date = _findBetween(cont, 'Original Air Date:</h5>', '</div>',
                             maxRes=1)
     if air_date:
         air_date = air_date[0]
         vi = air_date.find('(')
         if vi != -1:
             date = _unHtml(air_date[:vi]).strip()
             if date != '????':
                 d['original air date'] = date
             air_date = air_date[vi:]
             season = _findBetween(air_date, 'Season', ',', maxRes=1)
             if season:
                 season = season[0].strip()
                 try: season = int(season)
                 except: pass
                 if season or type(season) is _inttype:
                     d['season'] = season
             episode = _findBetween(air_date, 'Episode', ')', maxRes=1)
             if episode:
                 episode = episode[0].strip()
                 try: episode = int(episode)
                 except: pass
                 if episode or type(season) is _inttype:
                     d['episode'] = episode
     direct = _findBetween(cont, '<h5>Director', ('</div>', '<br/> <br/>'),
                             maxRes=1)
     if direct:
         direct = direct[0]
         h5idx = direct.find('/h5>')
         if h5idx != -1:
             direct = direct[h5idx+4:]
         direct = self._getPersons(direct)
         if direct: d['director'] = direct
     if kind in ('tv series', 'tv mini series', 'episode'):
         if kind != 'episode':
             seasons = _findBetween(cont, 'Seasons:</h5>', '</div>',
                                     maxRes=1)
             if seasons:
                 d['number of seasons'] = seasons[0].count('|') + 1
         creator = _findBetween(cont, 'Created by</h5>', ('class="tn15more"',
                                                         '</div>',
                                                         '<br/> <br/>'),
                                                         maxRes=1)
         if not creator:
             # They change 'Created by' to 'Creator' and viceversa
             # from time to time...
             # XXX: is 'Creators' also used?
             creator = _findBetween(cont, 'Creator:</h5>',
                                     ('class="tn15more"', '</div>',
                                     '<br/> <br/>'), maxRes=1)
         if creator:
             creator = creator[0]
             if creator.find('tn15more'): creator = '%s>' % creator
             creator = self._getPersons(creator)
             if creator: d['creator'] = creator
     writers = _findBetween(cont, '<h5>Writer', ('</div>', '<br/> <br/>'),
                             maxRes=1)
     if writers:
         writers = writers[0]
         h5idx = writers.find('/h5>')
         if h5idx != -1:
             writers = writers[h5idx+4:]
         writers = self._getPersons(writers)
         if writers: d['writer'] = writers
     cvurl = _getTagsWith(cont, 'name="poster"', toClosure=True, maxRes=1)
     if cvurl:
         cvurl = _findBetween(cvurl[0], 'src="', '"', maxRes=1)
         if cvurl: d['cover url'] = cvurl[0]
     genres = _findBetween(cont, 'href="/genre/', '"')
     if genres:
         d['genres'] = list(set(genres))
     ur = _findBetween(cont, 'id="star-bar-user-rate">', '</div>',
                         maxRes=1)
     if ur:
         rat = _findBetween(ur[0], '<b>', '</b>', maxRes=1)
         if rat:
             if rat:
                 d['rating'] = rat[0].strip()
             else:
                 self._mobile_logger.warn('wrong rating: %s', rat)
         vi = ur[0].rfind('href="ratings"')
         if vi != -1 and ur[0][vi+10:].find('await') == -1:
             try:
                 votes = _findBetween(ur[0][vi:], "title='",
                                     " IMDb", maxRes=1)
                 votes = int(votes[0].replace(',', ''))
                 d['votes'] = votes
             except (ValueError, IndexError):
                 self._mobile_logger.warn('wrong votes: %s', ur)
     top250 = _findBetween(cont, 'href="/chart/top?', '</a>', maxRes=1)
     if top250:
         fn = top250[0].rfind('#')
         if fn != -1:
             try:
                 td = int(top250[0][fn+1:])
                 d['top 250 rank'] = td
             except ValueError:
                 self._mobile_logger.warn('wrong top250: %s', top250)
     castdata = _findBetween(cont, 'Cast overview', '</table>', maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, 'Credited cast', '</table>', maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, 'Complete credited cast', '</table>',
                                 maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, 'Series Cast Summary', '</table>',
                                 maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, 'Episode Credited cast', '</table>',
                                 maxRes=1)
     if castdata:
         castdata = castdata[0]
         # Reintegrate the fist tag.
         fl = castdata.find('href=')
         if fl != -1: castdata = '<a ' + castdata[fl:]
         # Exclude the 'rest of cast listed alphabetically' row.
         smib = castdata.find('<tr><td align="center" colspan="4"><small>')
         if smib != -1:
             smie = castdata.rfind('</small></td></tr>')
             if smie != -1:
                 castdata = castdata[:smib].strip() + \
                             castdata[smie+18:].strip()
         castdata = castdata.replace('/tr> <tr', '/tr><tr')
         cast = self._getPersons(castdata, sep='</tr><tr')
         if cast: d['cast'] = cast
     akas = _findBetween(cont, 'Also Known As:</h5>', '</div>', maxRes=1)
     if akas:
         # For some reason, here <br> is still used in place of <br/>.
         akas[:] = [x for x in akas[0].split('<br>') if x.strip()]
         akas = [_unHtml(x).replace('" - ','::', 1).lstrip('"').strip()
                 for x in akas]
         if 'See more' in akas: akas.remove('See more')
         akas[:] = [x for x in akas if x]
         if akas:
             d['akas'] = akas
     mpaa = _findBetween(cont, 'MPAA</a>:', '</div>', maxRes=1)
     if mpaa: d['mpaa'] = _unHtml(mpaa[0])
     runtimes = _findBetween(cont, 'Runtime:</h5>', '</div>', maxRes=1)
     if runtimes:
         runtimes = runtimes[0]
         runtimes = [x.strip().replace(' min', '').replace(' (', '::(', 1)
                 for x in runtimes.split('|')]
         d['runtimes'] = [_unHtml(x).strip() for x in runtimes]
     if kind == 'episode':
         # number of episodes.
         epsn = _findBetween(cont, 'title="Full Episode List">', '</a>',
                             maxRes=1)
         if epsn:
             epsn = epsn[0].replace(' Episodes', '').strip()
             if epsn:
                 try:
                     epsn = int(epsn)
                 except:
                     self._mobile_logger.warn('wrong episodes #: %s', epsn)
                 d['number of episodes'] = epsn
     country = _findBetween(cont, 'Country:</h5>', '</div>', maxRes=1)
     if country:
         country[:] = country[0].split(' | ')
         country[:] = ['<a %s' % x for x in country if x]
         country[:] = [_unHtml(x.replace(' <i>', '::')) for x in country]
         if country: d['countries'] = country
     lang = _findBetween(cont, 'Language:</h5>', '</div>', maxRes=1)
     if lang:
         lang[:] = lang[0].split(' | ')
         lang[:] = ['<a %s' % x for x in lang if x]
         lang[:] = [_unHtml(x.replace(' <i>', '::')) for x in lang]
         if lang: d['languages'] = lang
     col = _findBetween(cont, '"/search/title?colors=', '</div>')
     if col:
         col[:] = col[0].split(' | ')
         col[:] = ['<a %s' % x for x in col if x]
         col[:] = [_unHtml(x.replace(' <i>', '::')) for x in col]
         if col: d['color info'] = col
     sm = _findBetween(cont, '/search/title?sound_mixes=', '</div>',
                         maxRes=1)
     if sm:
         sm[:] = sm[0].split(' | ')
         sm[:] = ['<a %s' % x for x in sm if x]
         sm[:] = [_unHtml(x.replace(' <i>', '::')) for x in sm]
         if sm: d['sound mix'] = sm
     cert = _findBetween(cont, 'Certification:</h5>', '</div>', maxRes=1)
     if cert:
         cert[:] = cert[0].split(' | ')
         cert[:] = [_unHtml(x.replace(' <i>', '::')) for x in cert]
         if cert: d['certificates'] = cert
     plotoutline = _findBetween(cont, 'Plot:</h5>', ['<a ', '</div>'],
                                 maxRes=1)
     if plotoutline:
         plotoutline = plotoutline[0].strip()
         plotoutline = plotoutline.rstrip('|').rstrip()
         if plotoutline: d['plot outline'] = _unHtml(plotoutline)
     aratio = _findBetween(cont, 'Aspect Ratio:</h5>', ['<a ', '</div>'],
                         maxRes=1)
     if aratio:
         aratio = aratio[0].strip().replace(' (', '::(', 1)
         if aratio:
             d['aspect ratio'] = _unHtml(aratio)
     return {'data': d}