Пример #1
0
 def _search_movie(self, title, results):
     ##params = urllib.urlencode({'tt': 'on','mx': str(results),'q': title})
     ##params = 'q=%s&tt=on&mx=%s' % (urllib.quote_plus(title), str(results))
     ##cont = self._mretrieve(imdbURL_search % params)
     cont = subXMLRefs(self._get_search_content('tt', title, results))
     title = _findBetween(cont, '<title>', '</title>', maxRes=1)
     res = []
     if not title:
         self._mobile_logger.error('no title tag searching for movie %s',
                                 title)
         return res
     tl = title[0].lower()
     if not tl.startswith('imdb title'):
         # a direct hit!
         title = _unHtml(title[0])
         mid = None
         midtag = _getTagsWith(cont, 'rel="canonical"', maxRes=1)
         if midtag:
             mid = _findBetween(midtag[0], '/title/tt', '/', maxRes=1)
         if not (mid and title):
             self._mobile_logger.error('no direct hit title/movieID for' \
                                         ' title %s', title)
             return res
         if cont.find('<span class="tv-extra">TV mini-series</span>') != -1:
             title += ' (mini)'
         res[:] = [(str(mid[0]), analyze_title(title))]
     else:
         # XXX: this results*3 prevents some recursion errors, but...
         #      it's not exactly understandable (i.e.: why 'results' is
         #      not enough to get all the results?)
         lis = _findBetween(cont, 'td valign="top">', '</td>',
                             maxRes=results*3)
         for li in lis:
             akaIdx = li.find('aka <em>')
             akas = []
             if akaIdx != -1:
                 akas = [_unHtml(x) for x in li[akaIdx:].split('<br>')]
                 li = li[:akaIdx]
             if akas:
                 for idx, aka in enumerate(akas):
                     aka = aka.replace('" - ', '::')
                     if aka.startswith('aka "'):
                         aka = aka[5:]
                     if aka[-1] == '"':
                         aka = aka[:-1]
                     akas[idx] = aka
             imdbid = re_imdbID.findall(li)
             mtitle = _unHtml(li)
             if not (imdbid and mtitle):
                 self._mobile_logger.debug('no title/movieID parsing' \
                                         ' %s searching for title %s', li,
                                         title)
                 continue
             mtitle = mtitle.replace('(TV mini-series)', '(mini)')
             resd = analyze_title(mtitle)
             if akas:
                 resd['akas'] = akas
             res.append((str(imdbid[0]), resd))
     return res
Пример #2
0
 def _search_movie(self, title, results):
     ##params = urllib.urlencode({'tt': 'on','mx': str(results),'q': title})
     ##params = 'q=%s&tt=on&mx=%s' % (urllib.quote_plus(title), str(results))
     ##cont = self._mretrieve(imdbURL_search % params)
     cont = subXMLRefs(self._get_search_content('tt', title, results))
     title = _findBetween(cont, '<title>', '</title>', maxRes=1)
     res = []
     if not title:
         self._mobile_logger.error('no title tag searching for movie %s',
                                 title)
         return res
     tl = title[0].lower()
     if not tl.startswith('find - imdb'):
         # a direct hit!
         title = _unHtml(title[0])
         mid = None
         midtag = _getTagsWith(cont, 'rel="canonical"', maxRes=1)
         if midtag:
             mid = _findBetween(midtag[0], '/title/tt', '/', maxRes=1)
         if not (mid and title):
             self._mobile_logger.error('no direct hit title/movieID for' \
                                         ' title %s', title)
             return res
         if cont.find('<span class="tv-extra">TV mini-series</span>') != -1:
             title += ' (mini)'
         res[:] = [(str(mid[0]), analyze_title(title))]
     else:
         # XXX: this results*3 prevents some recursion errors, but...
         #      it's not exactly understandable (i.e.: why 'results' is
         #      not enough to get all the results?)
         lis = _findBetween(cont, 'td class="result_text">', '</td>',
                             maxRes=results*3)
         for li in lis:
             akas = re_makas.findall(li)
             for idx, aka in enumerate(akas):
                 aka = aka.replace('" - ', '::', 1)
                 aka = _unHtml(aka)
                 if aka.startswith('aka "'):
                     aka = aka[5:].strip()
                 if aka[-1] == '"':
                     aka = aka[:-1]
                 akas[idx] = aka
             imdbid = re_imdbID.findall(li)
             li = re_makas.sub('', li)
             mtitle = _unHtml(li)
             if not (imdbid and mtitle):
                 self._mobile_logger.debug('no title/movieID parsing' \
                                         ' %s searching for title %s', li,
                                         title)
                 continue
             mtitle = mtitle.replace('(TV mini-series)', '(mini)')
             resd = analyze_title(mtitle)
             if akas:
                 resd['akas'] = akas
             res.append((str(imdbid[0]), resd))
     return res
Пример #3
0
 def _search_movie(self, title, results):
     ##params = urllib.urlencode({'tt': 'on','mx': str(results),'q': title})
     ##params = 'q=%s&tt=on&mx=%s' % (urllib.quote_plus(title), str(results))
     ##cont = self._mretrieve(imdbURL_search % params)
     cont = subXMLRefs(self._get_search_content("tt", title, results))
     title = _findBetween(cont, "<title>", "</title>", maxRes=1)
     res = []
     if not title:
         self._mobile_logger.error("no title tag searching for movie %s", title)
         return res
     tl = title[0].lower()
     if not tl.startswith("find - imdb"):
         # a direct hit!
         title = _unHtml(title[0])
         mid = None
         midtag = _getTagsWith(cont, 'rel="canonical"', maxRes=1)
         if midtag:
             mid = _findBetween(midtag[0], "/title/tt", "/", maxRes=1)
         if not (mid and title):
             self._mobile_logger.error("no direct hit title/movieID for" " title %s", title)
             return res
         if cont.find('<span class="tv-extra">TV mini-series</span>') != -1:
             title += " (mini)"
         res[:] = [(str(mid[0]), analyze_title(title))]
     else:
         # XXX: this results*3 prevents some recursion errors, but...
         #      it's not exactly understandable (i.e.: why 'results' is
         #      not enough to get all the results?)
         lis = _findBetween(cont, 'td class="result_text">', "</td>", maxRes=results * 3)
         for li in lis:
             akas = re_makas.findall(li)
             for idx, aka in enumerate(akas):
                 aka = aka.replace('" - ', "::", 1)
                 aka = _unHtml(aka)
                 if aka.startswith('aka "'):
                     aka = aka[5:].strip()
                 if aka[-1] == '"':
                     aka = aka[:-1]
                 akas[idx] = aka
             imdbid = re_imdbID.findall(li)
             li = re_makas.sub("", li)
             mtitle = _unHtml(li)
             if not (imdbid and mtitle):
                 self._mobile_logger.debug("no title/movieID parsing" " %s searching for title %s", li, title)
                 continue
             mtitle = mtitle.replace("(TV mini-series)", "(mini)")
             resd = analyze_title(mtitle)
             if akas:
                 resd["akas"] = akas
             res.append((str(imdbid[0]), resd))
     return res
Пример #4
0
 def _search_episode(self, title, results):
     title = title.strip()
     if not title: return
     _episodes = True
     if analyze_title(title)['kind'] == 'episode':
         _episodes = False
     return self._search_movie(title, results, _episodes=_episodes)
Пример #5
0
def titleVariations(title, fromPtdf=0):
    """Build title variations useful for searches; if fromPtdf is true,
    the input is assumed to be in the plain text data files format."""
    if fromPtdf: title1 = u''
    else: title1 = title
    title2 = title3 = u''
    if fromPtdf or re_year_index.search(title):
        # If it appears to have a (year[/imdbIndex]) indication,
        # assume that a long imdb canonical name was provided.
        titldict = analyze_title(title, canonical=1)
        # title1: the canonical name.
        title1 = titldict['title']
        if titldict['kind'] != 'episode':
            # title3: the long imdb canonical name.
            if fromPtdf: title3 = title
            else: title3 = build_title(titldict, canonical=1, ptdf=1)
        else:
            title1 = normalizeTitle(title1)
            title3 = build_title(titldict, canonical=1, ptdf=1)
    else:
        # Just a title.
        # title1: the canonical title.
        title1 = canonicalTitle(title)
        title3 = u''
    # title2 is title1 without the article, or title1 unchanged.
    if title1:
        title2 = title1
        t2s = title2.split(u', ')
        if t2s[-1].lower() in _unicodeArticles:
            title2 = u', '.join(t2s[:-1])
    return title1, title2, title3
Пример #6
0
 def do_br(self, attrs):
     if self._in_series_title:
         self._in_series_title = 0
         st = self._series_title.strip()
         if st and self.__seriesID:
             d_title = analyze_title(st, canonical=1)
             m = Movie(movieID=str(self.__seriesID), data=d_title,
                         accessSystem=self._as, modFunct=self._modFunct)
             self._result['kind'] = u'episode'
             self._result['episode of'] = m
         self._series_title = u''
     elif self._in_series_info:
         self._in_series_info = 0
         si = ' '.join([x for x in self._series_info.split() if x])
         if si:
             aid = self.re_airdate.findall(si)
             if aid and len(aid[0]) == 3:
                 date, season, episode = aid[0]
                 date = date.strip()
                 try: season = int(season)
                 except: pass
                 try: episode = int(episode)
                 except: pass
                 if date and date != '????':
                     self._result['original air date'] = date
                 # Handle also "episode 0".
                 if season or type(season) is type(0):
                     self._result['season'] = season
                 if episode or type(season) is type(0):
                     self._result['episode'] = episode
         self._series_info = u''
Пример #7
0
 def _search_episode(self, title, results):
     t_dict = analyze_title(title)
     if t_dict['kind'] == 'episode':
         title = t_dict['title']
     cont = self._get_search_content('ep', title, results)
     return self.smProxy.search_movie_parser.parse(cont,
                                                   results=results)['data']
Пример #8
0
 def _search_episode(self, title, results):
     title = title.strip()
     if not title: return
     _episodes = True
     if analyze_title(title)['kind'] == 'episode':
         _episodes = False
     return self._search_movie(title, results, _episodes=_episodes)
Пример #9
0
def titleVariations(title, fromPtdf=0):
    """Build title variations useful for searches; if fromPtdf is true,
    the input is assumed to be in the plain text data files format."""
    if fromPtdf: title1 = u''
    else: title1 = title
    title2 = title3 = u''
    if fromPtdf or re_year_index.search(title):
        # If it appears to have a (year[/imdbIndex]) indication,
        # assume that a long imdb canonical name was provided.
        titldict = analyze_title(title, canonical=1)
        # title1: the canonical name.
        title1 = titldict['title']
        if titldict['kind'] != 'episode':
            # title3: the long imdb canonical name.
            if fromPtdf: title3 = title
            else: title3 = build_title(titldict, canonical=1, ptdf=1)
        else:
            title1 = normalizeTitle(title1)
            title3 = build_title(titldict, canonical=1, ptdf=1)
    else:
        # Just a title.
        # title1: the canonical title.
        title1 = canonicalTitle(title)
        title3 = u''
    # title2 is title1 without the article, or title1 unchanged.
    if title1:
        title2 = title1
        t2s = title2.split(u', ')
        if t2s[-1].lower() in _articles:
            title2 = u', '.join(t2s[:-1])
    return title1, title2, title3
Пример #10
0
 def postprocess_data(self, data):
     if not data or self.label not in data:
         return []
     mlist = []
     data = data[self.label]
     # Avoid duplicates.  A real fix, using XPath, is auspicabile.
     # XXX: probably this is no more needed.
     seenIDs = []
     for d in data:
         if 'movieID' not in d: continue
         if self.ranktext not in d: continue
         if 'title' not in d: continue
         theID = analyze_imdbid(d['movieID'])
         if theID is None:
             continue
         theID = str(theID)
         if theID in seenIDs:
             continue
         seenIDs.append(theID)
         minfo = analyze_title(d['title']+" "+d['year'])
         try: minfo[self.ranktext] = int(d[self.ranktext].replace('.', ''))
         except: pass
         if 'votes' in d:
             try:
                 votes = d['votes'].replace(' votes','')
                 votes = votes.split(' based on ')[1]
                 minfo['votes'] = int(votes.replace(',', ''))
             except:
                 pass
         if 'rating' in d:
             try: minfo['rating'] = float(d['rating'])
             except: pass
         mlist.append((theID, minfo))
     return mlist
Пример #11
0
 def _buildEpisodes(self, eps_list, parentID):
     episodes = {}
     parentTitle = getLabel(parentID, '%stitles.index' % self.__db,
                            '%stitles.key' % self.__db)
     parentSeries = Movie(title=parentTitle,
                          movieID=parentID,
                          accessSystem='local')
     for episodeID, episodeTitle in eps_list:
         episodeTitle = unicode(episodeTitle, 'latin_1', 'replace')
         data = analyze_title(episodeTitle, canonical=1)
         m = Movie(data=data, movieID=episodeID, accessSystem='local')
         m['episode of'] = parentSeries
         if data.get('year') is None:
             year = getFullIndex('%smovies.data' % self.__db,
                                 key=episodeID,
                                 kind='moviedata',
                                 rindex=1)
             if year: m['year'] = year
         season = data.get('season', 'UNKNOWN')
         if not episodes.has_key(season): episodes[season] = {}
         ep_number = data.get('episode')
         if ep_number is None:
             ep_number = max((episodes[season].keys() or [0])) + 1
         episodes[season][ep_number] = m
     return episodes
Пример #12
0
 def _search_movie(self, title, results, _episodes=False):
     title = title.strip()
     if not title: return []
     # Search for these title variations.
     if not _episodes:
         title1, title2, title3 = titleVariations(title, fromPtdf=1)
     else:
         title1 = normalizeTitle(title)
         title2 = ''
         title3 = ''
     # XXX: only a guess: results are shrinked, to exclude Adult
     #      titles and to remove duplicated entries.
     resultsST = results * 3
     res = _scan_titles('%stitles.key' % self.__db,
                         title1, title2, title3, resultsST, _episodes)
     res[:] = [x[1] for x in res]
     # Check for adult movies.
     if not self.doAdult:
         newlist = []
         for entry in res:
             genres = getMovieMisc(movieID=entry[0],
                             dataF='%s%s.data' % (self.__db, 'genres'),
                             indexF='%s%s.index' % (self.__db, 'genres'),
                             attrIF='%sattributes.index' % self.__db,
                             attrKF='%sattributes.key' % self.__db)
             if 'Adult' not in genres: newlist.append(entry)
         res[:] = newlist
     # Get the real name, if this is an AKA.
     # XXX: duplicated code!
     new_res = []
     seen_MID = []
     for idx, (movieID, r) in enumerate(res):
         # Remove duplicates.
         # XXX: find a way to prefer titles with an AKA?  Or prefer
         #      the original title?
         if movieID in seen_MID:
             continue
         else:
             seen_MID.append(movieID)
         realMID = self._get_real_movieID(movieID)
         if movieID == realMID:
             new_res.append((movieID, r))
             continue
         if realMID in seen_MID:
             continue
         else:
             seen_MID.append(realMID)
         aka_title = build_title(r, canonical=0)
         real_title = getLabel(realMID, '%stitles.index' % self.__db,
                             '%stitles.key' % self.__db)
         if aka_title == real_title:
             new_res.append((realMID, r))
             continue
         new_r = analyze_title(real_title, canonical=1)
         new_r['akas'] = [aka_title]
         new_res.append((realMID, new_r))
     if results > 0: new_res[:] = new_res[:results]
     return new_res
def custom_analyze_title(title):
    """Remove garbage notes after the (year), (year/imdbIndex) or (year) (TV)"""
    # XXX: very crappy. :-(
    nt = title.split(' aka ')[0]
    if nt:
        title = nt
    if not title:
        return {}
    return analyze_title(title)
Пример #14
0
def custom_analyze_title(title):
    """Remove garbage notes after the (year), (year/imdbIndex) or (year) (TV)"""
    # XXX: very crappy. :-(
    nt = title.split(' aka ')[0]
    if nt:
        title = nt
    if not title:
        return {}
    return analyze_title(title)
Пример #15
0
class DOMHTMLSearchMovieParser(DOMParserBase):
    """A parser for the title search page."""

    rules = [
        Rule(
            key='data',
            extractor=Rules(
                foreach='//td[@class="result_text"]',
                rules=[
                    Rule(key='link',
                         extractor=Path('./a/@href', reduce=reducers.first)),
                    Rule(key='info', extractor=Path('.//text()')),
                    Rule(key='akas',
                         extractor=Path(foreach='./i', path='./text()')),
                    Rule(key='cover url',
                         extractor=Path(
                             '../td[@class="primary_photo"]/a/img/@src'))
                ],
                transform=lambda x:
                (analyze_imdbid(x.get('link')), analyze_title(x.get(
                    'info', '')), x.get('akas'), x.get('cover url'))))
    ]

    def _init(self):
        self.url = ''
        self.img_type = 'cover url'

    def _reset(self):
        self.url = ''

    def postprocess_data(self, data):
        if 'data' not in data:
            data['data'] = []
        results = getattr(self, 'results', None)
        if results is not None:
            data['data'][:] = data['data'][:results]
            # Horrible hack to support AKAs.
            data['data'] = [x for x in data['data'] if x[0] and x[1]]
        if data and data['data'] and len(data['data'][0]) == 4 and isinstance(
                data['data'][0], tuple):
            for idx, datum in enumerate(data['data']):
                if not isinstance(datum, tuple):
                    continue
                if not datum[0] and datum[1]:
                    continue
                if datum[2] is not None:
                    akas = [aka[1:-1] for aka in datum[2]]  # remove the quotes
                    datum[1]['akas'] = akas
                if datum[3] is not None:
                    datum[1][self.img_type] = datum[3]
                data['data'][idx] = (datum[0], datum[1])
        return data

    def add_refs(self, data):
        return data
Пример #16
0
 def _search_movie(self, title, results):
     ##params = urllib.urlencode({'tt': 'on','mx': str(results),'q': title})
     ##params = 'q=%s&tt=on&mx=%s' % (urllib.quote_plus(title), str(results))
     ##cont = self._mretrieve(imdbURL_search % params)
     cont = subXMLRefs(self._get_search_content('tt', title, results))
     title = _findBetween(cont, '<title>', '</title>', maxRes=1)
     res = []
     if not title: return res
     tl = title[0].lower()
     if not tl.startswith('imdb title'):
         # XXX: a direct hit!
         title = _unHtml(title[0])
         midtag = _getTagsWith(cont, 'name="arg"', maxRes=1)
         if not midtag: midtag = _getTagsWith(cont, 'name="auto"', maxRes=1)
         mid = None
         if midtag:
             mid = _findBetween(midtag[0], 'value="', '"', maxRes=1)
             if mid and not mid[0].isdigit():
                 mid = re_imdbID.findall(mid[0])
         if not (mid and title): return res
         if cont.find('<span class="tv-extra">TV mini-series</span>') != -1:
             title += ' (mini)'
         res[:] = [(str(mid[0]), analyze_title(title, canonical=1))]
     else:
         cont = _reAKAS.sub('</td>', cont)
         lis = _findBetween(cont, 'td valign="top">', ['</td>', '</small>'])
         for li in lis:
             imdbid = re_imdbID.findall(li)
             mtitle = _unHtml(li)
             if not (imdbid and mtitle): 
                 img = _findBetween(li, '<img src="', ['" '])
                 if img and len(img)>0:
                     img = img[0]
                 else:
                     img=''
                 continue
             
             mtitle = mtitle.replace('(TV mini-series)', '(mini)')
             movie = (str(imdbid[0]), analyze_title(mtitle, canonical=1))
             movie[1]['image']=img
             res.append(movie)
     return res
Пример #17
0
 def _scan_titles(keyFile, title1, title2, title3, results=0):
     """Scan the given file, using the cutils.search_title
     C function, for title variations."""
     title1, title2, title3 = [x.encode('latin_1', 'replace')
                                 for x in title1, title2, title3]
     st = search_title(keyFile, title1, title2, title3, results)
     res = []
     for x in st:
         tmpd = analyze_title(latin2utf(x[2]))
         res.append((x[0], (x[1], tmpd)))
     return res
def custom_analyze_title4kwd(title, yearNote, outline):
    """Return a dictionary with the needed info."""
    title = title.strip()
    if not title:
        return {}
    if yearNote:
        yearNote = '%s)' % yearNote.split(' ')[0]
        title = title + ' ' + yearNote
    retDict = analyze_title(title)
    if outline:
        retDict['plot outline'] = outline
    return retDict
Пример #19
0
 def end_a(self):
     if self._in_episode_title:
         self._in_episode_title = 0
         self._in_misc_info = 1
     elif self._in_series_title:
         self._in_series_title = 0
         st = self._cur_series_title.strip()
         if st and self._series_id is not None:
             series_data = analyze_title(st, canonical=1)
             s = Movie(movieID=str(self._series_id), data=series_data,
                         accessSystem=self._as, modFunct=self._modFunct)
             self._cur_series = s
Пример #20
0
def custom_analyze_title4kwd(title, yearNote, outline):
    """Return a dictionary with the needed info."""
    title = title.strip()
    if not title:
        return {}
    if yearNote:
        yearNote = '%s)' % yearNote.split(' ')[0]
        title = title + ' ' + yearNote
    retDict = analyze_title(title)
    if outline:
        retDict['plot outline'] = outline
    return retDict
Пример #21
0
 def _scan_titles(keyFile, title1, title2, title3, results=0,
                 _only_episodes=0):
     """Scan the given file, using the cutils.search_title
     C function, for title variations."""
     title1, title2, title3 = [x.encode('latin_1', 'replace')
                                 for x in title1, title2, title3]
     st = search_title(keyFile, title1, title2, title3, results,
                         _only_episodes)
     res = []
     for x in st:
         tmpd = analyze_title(latin2utf(x[2]))
         res.append((x[0], (x[1], tmpd)))
     return res
Пример #22
0
 def end_a(self):
     if self._in_episode_title:
         self._in_episode_title = 0
         self._in_misc_info = 1
     elif self._in_series_title:
         self._in_series_title = 0
         st = self._cur_series_title.strip()
         if st and self._series_id is not None:
             series_data = analyze_title(st, canonical=1)
             s = Movie(movieID=str(self._series_id),
                       data=series_data,
                       accessSystem=self._as,
                       modFunct=self._modFunct)
             self._cur_series = s
Пример #23
0
 def _readTitlesKeyFile(keyFile, searchingEpisode=0):
     """Iterate over the given file, returning tuples suited for
     the common.locsql.scan_titles function."""
     try: kf = open(keyFile, 'r')
     except IOError, e: raise IMDbDataAccessError, str(e)
     for line in kf:
         ls = line.split('|')
         t = ls[0]
         if not t: continue
         if searchingEpisode:
             if t[-1] != '}': continue
         elif t[-1] == '}': continue
         titled = analyze_title(latin2utf(t))
         yield (long(ls[1], 16), titled)
     kf.close()
Пример #24
0
 def _readTitlesKeyFile(keyFile, searchingEpisode=0):
     """Iterate over the given file, returning tuples suited for
     the common.locsql.scan_titles function."""
     try: kf = open(keyFile, 'r')
     except IOError, e: raise IMDbDataAccessError, str(e)
     for line in kf:
         ls = line.split('|')
         t = ls[0]
         if not t: continue
         if searchingEpisode:
             if t[-1] != '}': continue
         elif t[-1] == '}': continue
         titled = analyze_title(latin2utf(t))
         yield (long(ls[1], 16), titled)
     kf.close()
Пример #25
0
class DOMBasicMovieParser(DOMParserBase):
    """Simply get the title of a movie and the imdbID.

    It's used by the DOMHTMLSearchMovieParser class to return a result
    for a direct match (when a search on IMDb results in a single
    movie, the web server sends directly the movie page."""
    # Stay generic enough to be used also for other DOMBasic*Parser classes.
    _titleAttrPath = ".//text()"
    _linkPath = "//link[@rel='canonical']"
    _titleFunct = lambda self, x: analyze_title(x or '')

    def _init(self):
        self.preprocessors += [('<span class="tv-extra">TV mini-series</span>',
                                '<span class="tv-extra">(mini)</span>')]

        self.extractors = [
            Extractor(label='title',
                      path="//h1",
                      attrs=Attribute(key='title',
                                      path=self._titleAttrPath,
                                      postprocess=self._titleFunct)),
            Extractor(label='link',
                      path=self._linkPath,
                      attrs=Attribute(
                          key='link',
                          path="./@href",
                          postprocess=lambda x: analyze_imdbid(
                              (x or '').replace('http://pro.imdb.com', ''))))
        ]

    # Remove 'More at IMDb Pro' links.
    preprocessors = [
        (re.compile(r'<span class="pro-link".*?</span>'), ''),
        (re.compile(r'<a href="http://ad.doubleclick.net.*?;id=(co[0-9]{7});'),
         r'<a href="http://pro.imdb.com/company/\1"></a>< a href="')
    ]

    def postprocess_data(self, data):
        if 'link' not in data:
            data = []
        else:
            link = data.pop('link')
            if link and data:
                data = [(link, data)]
            else:
                data = []
        return data
Пример #26
0
 def end_li(self):
     self._in_li = 0
     if self._in_episodes:
         et = self._cur_episode_title.strip()
         minfo = self._misc_info.strip()
         if et and self._episode_id:
             eps_data = analyze_title(et, canonical=1)
             eps_data['kind'] = u'episode'
             e = Movie(movieID=str(self._episode_id),
                       data=eps_data,
                       accessSystem=self._as,
                       modFunct=self._modFunct)
             e['episode of'] = self._cur_series
             if minfo.startswith('('):
                 pe = minfo.find(')')
                 if pe != -1:
                     date = minfo[1:pe]
                     if date != '????':
                         e['original air date'] = date
                         if eps_data.get('year', '????') == '????':
                             syear = date.split()[-1]
                             if syear.isdigit():
                                 e['year'] = syear
             rolei = minfo.find(' - ')
             if rolei != -1:
                 if not self._got_i_info:
                     role = u''
                     role = minfo[rolei + 3:].strip()
                     notei = role.rfind('(')
                     note = u''
                     if notei != -1 and role and role[-1] == ')':
                         note = role[notei:]
                         role = role[:notei].strip()
                     e.notes = note
                     e.currentRole = role
                 else:
                     randn = minfo[rolei + 3:].strip().split()
                     note = '[%s]' % randn[0]
                     note += ' '.join(randn[1:])
                     e.notes = note
             self._episodes.setdefault(self._cur_series, []).append(e)
         self._cur_episode_title = u''
         self._episode_id = None
     self._in_misc_info = 0
     self._misc_info = u''
Пример #27
0
 def _findRefs(self, o, trefs, nrefs):
     """Find titles or names references in strings."""
     if isinstance(o, (unicode, str)):
         for title in re_titleRef.findall(o):
             a_title = analyze_title(title, canonical=0)
             rtitle = build_title(a_title, ptdf=1)
             if trefs.has_key(rtitle): continue
             movieID = self._getTitleID(rtitle)
             if movieID is None:
                 movieID = self._getTitleID(title)
             if movieID is None:
                 continue
             m = Movie(title=rtitle,
                       movieID=movieID,
                       accessSystem=self.accessSystem)
             trefs[rtitle] = m
             rtitle2 = canonicalTitle(a_title.get('title', u''))
             if rtitle2 and rtitle2 != rtitle and rtitle2 != title:
                 trefs[rtitle2] = m
             if title != rtitle:
                 trefs[title] = m
         for name in re_nameRef.findall(o):
             a_name = analyze_name(name, canonical=1)
             rname = build_name(a_name, canonical=1)
             if nrefs.has_key(rname): continue
             personID = self._getNameID(rname)
             if personID is None:
                 personID = self._getNameID(name)
             if personID is None: continue
             p = Person(name=rname,
                        personID=personID,
                        accessSystem=self.accessSystem)
             nrefs[rname] = p
             rname2 = normalizeName(a_name.get('name', u''))
             if rname2 and rname2 != rname:
                 nrefs[rname2] = p
             if name != rname and name != rname2:
                 nrefs[name] = p
     elif isinstance(o, (list, tuple)):
         for item in o:
             self._findRefs(item, trefs, nrefs)
     elif isinstance(o, dict):
         for value in o.values():
             self._findRefs(value, trefs, nrefs)
     return (trefs, nrefs)
Пример #28
0
 def postprocess_data(self, data):
     if len(data) == 0:
         return {}
     nd = {}
     for key in data.keys():
         dom = self.get_dom(key)
         link = self.xpath(dom, "//a/@href")[0]
         title = self.xpath(dom, "//a/text()")[0][1:-1]
         series = Movie(movieID=analyze_imdbid(link),
                        data=analyze_title(title),
                        accessSystem=self._as, modFunct=self._modFunct)
         nd[series] = []
         for episode in data[key]:
             # XXX: should we create a copy of 'series', to avoid
             #      circular references?
             episode['episode of'] = series
             nd[series].append(episode)
     return {'episodes': nd}
Пример #29
0
 def postprocess_data(self, data):
     if len(data) == 0:
         return {}
     nd = {}
     for key in data.keys():
         dom = self.get_dom(key)
         link = self.xpath(dom, "//a/@href")[0]
         title = self.xpath(dom, "//a/text()")[0][1:-1]
         series = Movie(movieID=analyze_imdbid(link),
                        data=analyze_title(title),
                        accessSystem=self._as, modFunct=self._modFunct)
         nd[series] = []
         for episode in data[key]:
             # XXX: should we create a copy of 'series', to avoid
             #      circular references?
             episode['episode of'] = series
             nd[series].append(episode)
     return {'episodes': nd}
Пример #30
0
def _build_episode(link, title, minfo, role, roleA, roleAID):
    """Build an Movie object for a given episode of a series."""
    episode_id = analyze_imdbid(link)
    notes = ''
    minidx = minfo.find(' -')
    # Sometimes, for some unknown reason, the role is left in minfo.
    if minidx != -1:
        slfRole = minfo[minidx + 3:].lstrip()
        minfo = minfo[:minidx].rstrip()
        if slfRole.endswith(')'):
            commidx = slfRole.rfind('(')
            if commidx != -1:
                notes = slfRole[commidx:]
                slfRole = slfRole[:commidx]
        if slfRole and role is None and roleA is None:
            role = slfRole
    eps_data = analyze_title(title)
    eps_data['kind'] = 'episode'
    # FIXME: it's wrong for multiple characters (very rare on tv series?).
    if role is None:
        role = roleA  # At worse, it's None.
    if role is None:
        roleAID = None
    if roleAID is not None:
        roleAID = analyze_imdbid(roleAID)
    e = Movie(movieID=episode_id,
              data=eps_data,
              currentRole=role,
              roleID=roleAID,
              notes=notes)
    # XXX: are we missing some notes?
    # XXX: does it parse things as "Episode dated 12 May 2005 (12 May 2005)"?
    if minfo.startswith('('):
        pe = minfo.find(')')
        if pe != -1:
            date = minfo[1:pe]
            if date != '????':
                e['original air date'] = date
                if eps_data.get('year', '????') == '????':
                    syear = date.split()[-1]
                    if syear.isdigit():
                        e['year'] = int(syear)
    return e
Пример #31
0
 def end_li(self):
     self._in_li = 0
     if self._in_episodes:
         et = self._cur_episode_title.strip()
         minfo = self._misc_info.strip()
         if et and self._episode_id:
             eps_data = analyze_title(et, canonical=1)
             eps_data['kind'] = u'episode'
             e = Movie(movieID=str(self._episode_id), data=eps_data,
                         accessSystem=self._as, modFunct=self._modFunct)
             e['episode of'] = self._cur_series
             if minfo.startswith('('):
                 pe = minfo.find(')')
                 if pe != -1:
                     date = minfo[1:pe]
                     if date != '????':
                         e['original air date'] = date
                         if eps_data.get('year', '????') == '????':
                             syear = date.split()[-1]
                             if syear.isdigit():
                                 e['year'] = syear
             rolei = minfo.find(' - ')
             if rolei != -1:
                 if not self._got_i_info:
                     role = u''
                     role = minfo[rolei+3:].strip()
                     notei = role.rfind('(')
                     note = u''
                     if notei != -1 and role and role[-1] == ')':
                         note = role[notei:]
                         role = role[:notei].strip()
                     e.notes = note
                     e.currentRole = role
                 else:
                     randn = minfo[rolei+3:].strip().split()
                     note = '[%s]' % randn[0]
                     note += ' '.join(randn[1:])
                     e.notes = note
             self._episodes.setdefault(self._cur_series, []).append(e)
         self._cur_episode_title = u''
         self._episode_id = None
     self._in_misc_info = 0
     self._misc_info = u''
Пример #32
0
 def _findRefs(self, o, trefs, nrefs):
     """Find titles or names references in strings."""
     if isinstance(o, (UnicodeType, StringType)):
         for title in re_titleRef.findall(o):
             a_title = analyze_title(title, canonical=1)
             rtitle = build_title(a_title, canonical=1, ptdf=1)
             if trefs.has_key(rtitle): continue
             movieID = self._getTitleID(rtitle)
             if movieID is None:
                 movieID = self._getTitleID(title)
             if movieID is None:
                 continue
             m = Movie(title=rtitle, movieID=movieID,
                         accessSystem=self.accessSystem)
             trefs[rtitle] = m
             rtitle2 = canonicalTitle(a_title.get('title', u''))
             if rtitle2 and rtitle2 != rtitle and rtitle2 != title:
                 trefs[rtitle2] = m
             if title != rtitle:
                 trefs[title] = m
         for name in re_nameRef.findall(o):
             a_name = analyze_name(name, canonical=1)
             rname = build_name(a_name, canonical=1)
             if nrefs.has_key(rname): continue
             personID = self._getNameID(rname)
             if personID is None:
                 personID = self._getNameID(name)
             if personID is None: continue
             p = Person(name=rname, personID=personID,
                         accessSystem=self.accessSystem)
             nrefs[rname] = p
             rname2 = normalizeName(a_name.get('name', u''))
             if rname2 and rname2 != rname:
                 nrefs[rname2] = p
             if name != rname and name != rname2:
                 nrefs[name] = p
     elif isinstance(o, (ListType, TupleType)):
         for item in o:
             self._findRefs(item, trefs, nrefs)
     elif isinstance(o, DictType):
         for value in o.values():
             self._findRefs(value, trefs, nrefs)
     return (trefs, nrefs)
Пример #33
0
 def _getTitleID(self, title):
     """Given a long imdb canonical title, returns a movieID or
     None if not found."""
     td = analyze_title(title)
     condition = None
     if td['kind'] == 'episode':
         epof = td['episode of']
         seriesID = [
             s.id for s in Title.select(
                 AND(
                     Title.q.title == self.toUTF8(epof['title']),
                     self._buildNULLCondition(Title.q.imdbIndex,
                                              epof.get('imdbIndex')),
                     Title.q.kindID == self._kindRev[epof['kind']],
                     self._buildNULLCondition(Title.q.productionYear,
                                              epof.get('year'))))
         ]
         if seriesID:
             condition = AND(
                 IN(Title.q.episodeOfID,
                    seriesID), Title.q.title == self.toUTF8(td['title']),
                 self._buildNULLCondition(Title.q.imdbIndex,
                                          td.get('imdbIndex')),
                 Title.q.kindID == self._kindRev[td['kind']],
                 self._buildNULLCondition(Title.q.productionYear,
                                          td.get('year')))
     if condition is None:
         condition = AND(
             Title.q.title == self.toUTF8(td['title']),
             self._buildNULLCondition(Title.q.imdbIndex,
                                      td.get('imdbIndex')),
             Title.q.kindID == self._kindRev[td['kind']],
             self._buildNULLCondition(Title.q.productionYear,
                                      td.get('year')))
     res = Title.select(condition)
     try:
         if res.count() != 1:
             return None
     except (UnicodeDecodeError, TypeError):
         return None
     return res[0].id
Пример #34
0
 def postprocess_data(self, data):
     if not data or self.label not in data:
         return []
     mlist = []
     data = data[self.label]
     # Avoid duplicates.  A real fix, using XPath, is auspicabile.
     # XXX: probably this is no more needed.
     seenIDs = []
     for d in data:
         if 'movieID' not in d:
             continue
         if self.ranktext not in d:
             continue
         if 'title' not in d:
             continue
         theID = analyze_imdbid(d['movieID'])
         if theID is None:
             continue
         theID = str(theID)
         if theID in seenIDs:
             continue
         seenIDs.append(theID)
         minfo = analyze_title(d['title'] + ' ' + d['year'])
         try:
             minfo[self.ranktext] = int(d[self.ranktext].replace('.', ''))
         except ValueError:
             pass
         if 'votes' in d:
             try:
                 votes = d['votes'].replace(' user ratings', '')
                 votes = votes.split(' based on ')[1]    # is IndexError possible?
                 minfo['votes'] = int(votes.replace(',', ''))
             except (IndexError, ValueError):
                 pass
         if 'rating' in d:
             try:
                 minfo['rating'] = float(d['rating'])
             except ValueError:
                 pass
         mlist.append((theID, minfo))
     return mlist
Пример #35
0
 def postprocess_data(self, data):
     if not data or self.label not in data:
         return []
     mlist = []
     data = data[self.label]
     # Avoid duplicates.  A real fix, using XPath, is auspicabile.
     # XXX: probably this is no more needed.
     seenIDs = []
     for d in data:
         if "movieID" not in d:
             continue
         if self.ranktext not in d:
             continue
         if "title" not in d:
             continue
         theID = analyze_imdbid(d["movieID"])
         if theID is None:
             continue
         theID = str(theID)
         if theID in seenIDs:
             continue
         seenIDs.append(theID)
         minfo = analyze_title(d["title"] + " " + d["year"])
         try:
             minfo[self.ranktext] = int(d[self.ranktext].replace(".", ""))
         except:
             pass
         if "votes" in d:
             try:
                 votes = d["votes"].replace(" votes", "")
                 votes = votes.split(" based on ")[1]
                 minfo["votes"] = int(votes.replace(",", ""))
             except:
                 pass
         if "rating" in d:
             try:
                 minfo["rating"] = float(d["rating"])
             except:
                 pass
         mlist.append((theID, minfo))
     return mlist
Пример #36
0
def _build_episode(link, title, minfo, role, roleA, roleAID):
    """Build an Movie object for a given episode of a series."""
    episode_id = analyze_imdbid(link)
    notes = u''
    minidx = minfo.find(' -')
    # Sometimes, for some unknown reason, the role is left in minfo.
    if minidx != -1:
        slfRole = minfo[minidx+3:].lstrip()
        minfo = minfo[:minidx].rstrip()
        if slfRole.endswith(')'):
            commidx = slfRole.rfind('(')
            if commidx != -1:
                notes = slfRole[commidx:]
                slfRole = slfRole[:commidx]
        if slfRole and role is None and roleA is None:
            role = slfRole
    eps_data = analyze_title(title)
    eps_data['kind'] = u'episode'
    # FIXME: it's wrong for multiple characters (very rare on tv series?).
    if role is None:
        role = roleA # At worse, it's None.
    if role is None:
        roleAID = None
    if roleAID is not None:
        roleAID = analyze_imdbid(roleAID)
    e = Movie(movieID=episode_id, data=eps_data, currentRole=role,
            roleID=roleAID, notes=notes)
    # XXX: are we missing some notes?
    # XXX: does it parse things as "Episode dated 12 May 2005 (12 May 2005)"?
    if minfo.startswith('('):
        pe = minfo.find(')')
        if pe != -1:
            date = minfo[1:pe]
            if date != '????':
                e['original air date'] = date
                if eps_data.get('year', '????') == '????':
                    syear = date.split()[-1]
                    if syear.isdigit():
                        e['year'] = int(syear)
    return e
Пример #37
0
 def _get_top_bottom_movies(self, kind):
     if kind == 'top':
         kind = 'top 250 rank'
     elif kind == 'bottom':
         kind = 'bottom 10 rank'
     else:
         return []
     info = getTopBottomList(kind, '%stopbottom.db' % self.__db)
     if not info:
         return []
     res = []
     for d in info:
         if not 'movieID' in d:
             continue
         movieID = d['movieID']
         del d['movieID']
         minfo = analyze_title(getLabel(movieID,
                                 '%stitles.index' % self.__db,
                                 '%stitles.key' % self.__db))
         minfo.update(d)
         res.append((movieID, minfo))
     return res
Пример #38
0
    def postprocess_data(self, data):
        if (not data) or ('chart' not in data):
            return []

        movies = []
        for entry in data['chart']:
            if ('movieID' not in entry) or ('rank' not in entry) or ('title' not in entry):
                continue

            movie_id = analyze_imdbid(entry['movieID'])
            if movie_id is None:
                continue
            del entry['movieID']

            entry[self.ranktext] = entry['rank']
            del entry['rank']

            title = analyze_title(entry['title'] + ' ' + entry.get('year', ''))
            entry.update(title)

            movies.append((movie_id, entry))
        return movies
Пример #39
0
 def _buildEpisodes(self, eps_list, parentID):
     episodes = {}
     parentTitle = getLabel(parentID, '%stitles.index' % self.__db,
                         '%stitles.key' % self.__db)
     parentSeries = Movie(title=parentTitle,
                         movieID=parentID, accessSystem='local')
     for episodeID, episodeTitle in eps_list:
         episodeTitle = unicode(episodeTitle, 'latin_1', 'replace')
         data = analyze_title(episodeTitle, canonical=1)
         m = Movie(data=data, movieID=episodeID, accessSystem='local')
         m['episode of'] = parentSeries
         if data.get('year') is None:
             year = getFullIndex('%smovies.data' % self.__db,
                                 key=episodeID, kind='moviedata', rindex=1)
             if year: m['year'] = year
         season = data.get('season', 'UNKNOWN')
         if not episodes.has_key(season): episodes[season] = {}
         ep_number = data.get('episode')
         if ep_number is None:
             ep_number = max((episodes[season].keys() or [0])) + 1
         episodes[season][ep_number] = m
     return episodes
Пример #40
0
    def postprocess_data(self, data):
        if (not data) or ('chart' not in data):
            return []

        movies = []
        for entry in data['chart']:

            if ('movieID' not in entry) or ('rank'
                                            not in entry) or ('title'
                                                              not in entry):
                continue

            movie_id = analyze_imdbid(
                entry['movieID'])  # actually url parser to filter out id
            if movie_id is None:
                continue
            del entry['movieID']

            title = analyze_title(entry['title'])
            entry.update(title)

            movies.append((movie_id, entry))
        return movies
Пример #41
0
 def do_br(self, attrs):
     if self._in_series_title:
         self._in_series_title = 0
         st = self._series_title.strip()
         if st and self.__seriesID:
             d_title = analyze_title(st, canonical=1)
             m = Movie(movieID=str(self.__seriesID),
                       data=d_title,
                       accessSystem=self._as,
                       modFunct=self._modFunct)
             self._result['kind'] = u'episode'
             self._result['episode of'] = m
         self._series_title = u''
     elif self._in_series_info:
         self._in_series_info = 0
         si = ' '.join([x for x in self._series_info.split() if x])
         if si:
             aid = self.re_airdate.findall(si)
             if aid and len(aid[0]) == 3:
                 date, season, episode = aid[0]
                 date = date.strip()
                 try:
                     season = int(season)
                 except:
                     pass
                 try:
                     episode = int(episode)
                 except:
                     pass
                 if date and date != '????':
                     self._result['original air date'] = date
                 # Handle also "episode 0".
                 if season or type(season) is type(0):
                     self._result['season'] = season
                 if episode or type(season) is type(0):
                     self._result['episode'] = episode
         self._series_info = u''
Пример #42
0
 def _get_keyword(self, keyword, results):
     return [(movieID, analyze_title(getLabel(movieID,
             '%stitles.index' % self.__db, '%stitles.key' % self.__db)))
             for movieID in getKeywordMovies(keyword,
                 '%skeywords.data' % self.__db)][:results]
Пример #43
0
 def set_title(self, title):
     """Set the title of the movie."""
     # XXX: convert title to unicode, if it's a plain string?
     d_title = analyze_title(title)
     self.data.update(d_title)
Пример #44
0
    def _readTitlesKeyFile(keyFile, searchingEpisode=0):
        """Iterate over the given file, returning tuples suited for
        the common.locsql.scan_titles function."""
        try:
            kf = open(keyFile, 'r')
        except IOError, e:
            raise IMDbDataAccessError, str(e)
        for line in kf:
            ls = line.split('|')
            t = ls[0]
            if not t: continue
            if searchingEpisode:
                if t[-1] != '}': continue
            elif t[-1] == '}': continue
            titled = analyze_title(latin2utf(t))
            yield (long(ls[1], 16), titled)
        kf.close()

    def _scan_titles(keyFile,
                     title1,
                     title2,
                     title3,
                     results=0,
                     _only_episodes=0):
        """Scan the given file, using the common.locsql.scan_titles
        pure-Python function, for title variations."""
        se = 0
        if _only_episodes:
            se = 1
        else:
Пример #45
0
 def get_movie_main(self, movieID):
     # Information sets provided by this method.
     infosets = ('main', 'vote details')
     tl = getLabel(movieID, '%stitles.index' % self.__db,
                     '%stitles.key' % self.__db)
     # No title, no party.
     if tl is None:
         raise IMDbDataAccessError, 'unable to get movieID "%s"' % movieID
     res = analyze_title(tl)
     # Build the cast list.
     actl = []
     for castG in ('actors', 'actresses'):
         midx = getFullIndex('%s%s.titles' % (self.__db, castG),
                         movieID, multi=1)
         if midx is not None:
             params = {'movieID': movieID,
                         'dataF': '%s%s.data' % (self.__db, castG),
                         'indexF': '%snames.index' % self.__db,
                         'keyF': '%snames.key' % self.__db,
                         'attrIF': '%sattributes.index' % self.__db,
                         'attrKF': '%sattributes.key' % self.__db,
                         'charNF': '%scharacter2id.index' % self.__db,
                         'offsList': midx, 'doCast': 1}
             actl += getMovieCast(**params)
     if actl:
         actl.sort()
         res['cast'] = actl
     # List of other workers.
     works = ('writer', 'cinematographer', 'composer',
             'costume-designer', 'director', 'editor', 'miscellaneou',
             'producer', 'production-designer', 'cinematographer')
     for i in works:
         index = getFullIndex('%s%ss.titles' % (self.__db, i),
                                 movieID, multi=1)
         if index is not None:
             params = {'movieID': movieID,
                         'dataF': '%s%s.data' % (self.__db, i),
                         'indexF': '%snames.index' % self.__db,
                         'keyF': '%snames.key' % self.__db,
                         'attrIF': '%sattributes.index' % self.__db,
                         'attrKF': '%sattributes.key' % self.__db,
                         'offsList': index}
             name = key = i
             if '-' in name:
                 name = name.replace('-', ' ')
             elif name == 'miscellaneou':
                 name = 'miscellaneous crew'
                 key = 'miscellaneou'
             elif name == 'writer':
                 params['doWriters'] = 1
             params['dataF'] = '%s%ss.data' % (self.__db, key)
             data = getMovieCast(**params)
             if name == 'writer': data.sort()
             res[name] = data
     # Rating.
     rt = self.get_movie_vote_details(movieID)['data']
     if rt: res.update(rt)
     # Various information.
     miscInfo = (('runtimes', 'running-times'), ('color info', 'color-info'),
                 ('genres', 'genres'), ('distributors', 'distributors'),
                 ('languages', 'language'), ('certificates', 'certificates'),
                 ('special effects companies', 'special-effects-companies'),
                 ('sound mix', 'sound-mix'), ('tech info', 'technical'),
                 ('production companies', 'production-companies'),
                 ('countries', 'countries'))
     for name, fname in miscInfo:
         params = {'movieID': movieID,
             'dataF': '%s%s.data' % (self.__db, fname),
             'indexF': '%s%s.index' % (self.__db, fname),
             'attrIF': '%sattributes.index' % self.__db,
             'attrKF': '%sattributes.key' % self.__db}
         data = getMovieMisc(**params)
         if name in ('distributors', 'special effects companies',
                     'production companies'):
             for nitem in xrange(len(data)):
                 n, notes = split_company_name_notes(data[nitem])
                 company = Company(name=n, companyID=getCompanyID(n,
                                     '%scompany2id.index' % self.__db),
                                     notes=notes,
                                     accessSystem=self.accessSystem)
                 data[nitem] = company
         if data: res[name] = data
     if res.has_key('runtimes') and len(res['runtimes']) > 0:
         rt = res['runtimes'][0]
         episodes = re_episodes.findall(rt)
         if episodes:
             res['runtimes'][0] = re_episodes.sub('', rt)
             res['number of episodes'] = episodes[0]
     # AKA titles.
     akas = getAkaTitles(movieID,
                 '%saka-titles.data' % self.__db,
                 '%stitles.index' % self.__db,
                 '%stitles.key' % self.__db,
                 '%sattributes.index' % self.__db,
                 '%sattributes.key' % self.__db)
     if akas:
         # normalize encoding.
         for i in xrange(len(akas)):
             ts = akas[i].split('::')
             if len(ts) != 2: continue
             t = ts[0]
             n = ts[1]
             nt = self._changeAKAencoding(n, t)
             if nt is not None: akas[i] = '%s::%s' % (nt, n)
         res['akas'] = akas
     if res.get('kind') == 'episode':
         # Things to do if this is a tv series episode.
         episodeOf = res.get('episode of')
         if episodeOf is not None:
             parentSeries = Movie(data=res['episode of'],
                                         accessSystem='local')
             seriesID = self._getTitleID(parentSeries.get(
                                         'long imdb canonical title'))
             parentSeries.movieID = seriesID
             res['episode of'] = parentSeries
         if not res.get('year'):
             year = getFullIndex('%smovies.data' % self.__db,
                                 movieID, kind='moviedata', rindex=1)
             if year: res['year'] = year
     # MPAA info.
     mpaa = getMPAA(movieID, '%smpaa-ratings-reasons.index' % self.__db,
                     '%smpaa-ratings-reasons.data' % self.__db)
     if mpaa: res.update(mpaa)
     return {'data': res, 'info sets': infosets}
Пример #46
0
    def _search_movie(self, title, results, _episodes=False):
        title = title.strip()
        if not title: return []
        title_dict = analyze_title(title, canonical=1)
        s_title = title_dict['title']
        if not s_title: return []
        episodeOf = title_dict.get('episode of')

        if not episodeOf:
            if not _episodes:
                s_title_split = s_title.split(', ')
                if len(s_title_split) > 1 and \
                        s_title_split[-1].lower() in _articles:
                    s_title_rebuilt = ', '.join(s_title_split[:-1])
                    if s_title_rebuilt:
                        s_title = s_title_rebuilt
        else:
            _episodes = False
            s_title = normalizeTitle(s_title)
        if isinstance(s_title, UnicodeType):
            s_title = s_title.encode('ascii', 'ignore')

        soundexCode = soundex(s_title)

        # XXX: improve the search restricting the kindID if the
        #      "kind" of the input differs from "movie"?
        condition = conditionAka = None
        if _episodes:
            condition = AND(Title.q.phoneticCode == soundexCode,
                            Title.q.kindID == self._kindRev['episode'])
            conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode,
                               AkaTitle.q.kindID == self._kindRev['episode'])
        elif title_dict['kind'] == 'episode' and episodeOf is not None:
            series_title = build_title(episodeOf, canonical=1)
            # XXX: is it safe to get "results" results?
            #      Too many?  Too few?
            serRes = results
            if serRes < 3 or serRes > 10:
                serRes = 10
            searchSeries = self._search_movie(series_title, serRes)
            seriesIDs = [result[0] for result in searchSeries]
            if seriesIDs:
                condition = AND(Title.q.phoneticCode == soundexCode,
                                IN(Title.q.episodeOfID, seriesIDs),
                                Title.q.kindID == self._kindRev['episode'])
                conditionAka = AND(
                    AkaTitle.q.phoneticCode == soundexCode,
                    IN(AkaTitle.q.episodeOfID, seriesIDs),
                    AkaTitle.q.kindID == self._kindRev['episode'])
            else:
                # XXX: bad situation: we have found no matching series;
                #      try searching everything (both episodes and
                #      non-episodes) for the title.
                condition = AND(Title.q.phoneticCode == soundexCode,
                                IN(Title.q.episodeOfID, seriesIDs))
                conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode,
                                   IN(AkaTitle.q.episodeOfID, seriesIDs))
        if condition is None:
            # XXX: excludes episodes?
            condition = AND(Title.q.kindID != self._kindRev['episode'],
                            Title.q.phoneticCode == soundexCode)
            conditionAka = AND(AkaTitle.q.kindID != self._kindRev['episode'],
                               AkaTitle.q.phoneticCode == soundexCode)

        # Up to 3 variations of the title are searched, plus the
        # long imdb canonical title, if provided.
        if not _episodes:
            title1, title2, title3 = titleVariations(title)
        else:
            title1 = title
            title2 = ''
            title3 = ''
        try:
            qr = [(q.id, get_movie_data(q.id, self._kind))
                  for q in Title.select(condition)]
            q2 = [(q.movieID, get_movie_data(q.id, self._kind, fromAka=1))
                  for q in AkaTitle.select(conditionAka)]
            qr += q2
        except NotFoundError, e:
            raise IMDbDataAccessError, \
                    'unable to search the database: "%s"' % str(e)
Пример #47
0
 def end_title(self):
     self._reading_page_title = 0
     t = self._page_title.strip()
     if t.find('IMDb Title') != -1 and t.find('Search') != -1: return
     self._result = analyze_title(t, canonical=1)
Пример #48
0
 def set_title(self, title):
     """Set the title of the movie."""
     # XXX: convert title to unicode, if it's a plain string?
     d_title = analyze_title(title)
     self.data.update(d_title)
Пример #49
0
 def set_title(self, title):
     """Set the title of the movie."""
     d_title = analyze_title(title)
     self.data.update(d_title)
Пример #50
0
 def set_title(self, title):
     """Set the title of the movie."""
     d_title = analyze_title(title)
     self.data.update(d_title)
Пример #51
0
 def _search_episode(self, title, results):
     t_dict = analyze_title(title)
     if t_dict['kind'] == 'episode':
         title = t_dict['title']
     cont = self._get_search_content('ep', title, results)
     return self.smProxy.search_movie_parser.parse(cont, results=results)['data']
Пример #52
0
    def run( self ):
        try:
            if self.strategy == 'film_db': #TODO
                self.chomik.connect()

                movies = []
                file = open( 'db/movies.list', 'r' )
                for line in file.readlines():
                    title = line.split('\t')[0].decode( 'latin1' )
                    analized = analyze_title( title )
                    title = analized['title']

                    if analized['kind'] in ( 'movie', 'tv series', 'tv movie', 'tv mini series', 'episode' ):
                        movies.append( analized )

                file.close()

                random.shuffle( movies )
                for movie in movies:

                    title = movie['title'].replace( '/', '\\')
                    year = movie['year'] if movie.has_key( 'year' ) else ''

                    FIRST_LETTER = title[0].upper()
                    if movie['kind'] == 'episode':
                        pattern1 = [
                            'Seriale',
                            'Alfabetycznie',
                            movie['episode of']['title'][0].upper(),
                            movie['episode of']['title'] + ' (%s)' % movie['episode of']['year'] if movie['episode of'].has_key( 'year' ) else '', 
                        ]
                        if movie.has_key( 'season' ):
                            pattern1.append( 'Sezon %s' % movie['season'] )
                        if movie.has_key( 'episode' ):
                            pattern1.append( 'Odcinek %s, %s' % ( movie['episode'], movie['title'] ) )
                        else:
                            pattern1.append( title )
                        pattern1 = '/'.join( pattern1 )
                            
                        patterns = [ pattern1 ]
                        if movie['episode of'].has_key( 'year' ):
                            pattern2 = [
                                'Seriale',
                                'Chronologicznie',
                                str( movie['episode of']['year'] ), 
                                movie['episode of']['title'] + ' (%s)' % movie['episode of']['year'] if movie['episode of'].has_key( 'year' ) else '', 
                            ]
                            if movie.has_key( 'season' ):
                                pattern2.append( 'Sezon %s' % movie['season'] )
                            if movie.has_key( 'episode' ):
                                pattern2.append( 'Odcinek %s, %s' % ( movie['episode'], movie['title'] ) )
                            else:
                                pattern2.append( title )
                            pattern2 = '/'.join( pattern2 )
                            patterns.append( pattern2 )

                    else:
                        title = '%s (%s)' % ( title, year ) if year else title
                        if movie['kind'] in ( 'tv series', 'tv mini series' ):
                            folder = 'Seriale'
                        else:
                            folder = 'Filmy'

                        full_title = ( "%s (%s)" % ( movie['title'], year ) ).decode( 'latin1' )
                        patterns = ( '%s/Alfabetycznie/%s/%s' % ( folder, FIRST_LETTER, title ), 
                                     '%s/Chronologicznie/%s/%s' % ( folder, year, title ) 
                                   )

                    if not Db.fetchone( "SELECT * FROM folders WHERE user_id=? AND name=?", ( self.id, title ) ):

                        good = []
                        if movie['kind'] in ( 'movie', 'tv movie' ): # TODO search series
                            sizes = []
                            self.chomik.logger.debug( 'searching: %s' % full_title ) 
                            items = self.chomik.search( full_title )
                            self.chomik.logger.debug( 'find: %d' % len( items ) ) 
                            for item in items:
                                self.chomik.logger.debug( '%s, %s' % item['title'], item['size'] )
                                if item['title'].lower() == full_title.lower() or item['title'].lower().startswith( full_title.lower() ) or item['title'].lower() == movie['title'].lower() or item['title'].lower == ( '%s %s' % ( movie['title'], year ) ).lower():
                                    if not item['size'] in sizes:
                                        good.append( item )
                                        sizes.append( item['size'] )

                        for pattern in patterns:
                            id, url = self.chomik.create_directory( pattern )
                            if id and url:
                                Db.execute( "INSERT INTO folders VALUES (?, ?, ?, ?)", ( id, self.id, title, url ), commit=True )

                                for item in good:
                                    self.chomik.clone( item['id'], id )

            elif self.strategy == 'smieciarz':

                if self.chomik.connect():
                    self.generate_other_users()

                    users = Db.fetch( "SELECT login from other_users" )
                    random.shuffle( users )
                    for user in users:
                        url = '/%s' % user
                        full_url = 'http://chomikuj.pl/%s%s' % ( self.login, url )
                        if not self.chomik.check_directory( url )[0]:
                            self.chomik.copy_directory_tree( url, timeout=self.server.timeout )
                            self.generate_other_users( 5 )

        except Exception, e:
            print e
            self.chomik.logger.exception( e )
            self.chomik.logger.info( "going to sleep for 60 seconds" )
            time.sleep( 60 )

            self.run()
Пример #53
0
 def get_movie_main(self, movieID):
     cont = self._mretrieve(self.urls["movie_main"] % movieID + "maindetails")
     title = _findBetween(cont, "<title>", "</title>", maxRes=1)
     if not title:
         raise IMDbDataAccessError('unable to get movieID "%s"' % movieID)
     title = _unHtml(title[0])
     if title.endswith(" - IMDb"):
         title = title[:-7]
     if cont.find('<span class="tv-extra">TV mini-series</span>') != -1:
         title += " (mini)"
     d = analyze_title(title)
     kind = d.get("kind")
     tv_series = _findBetween(cont, "TV Series:</h5>", "</a>", maxRes=1)
     if tv_series:
         mid = re_imdbID.findall(tv_series[0])
     else:
         mid = None
     if tv_series and mid:
         s_title = _unHtml(tv_series[0])
         s_data = analyze_title(s_title)
         m = Movie(movieID=str(mid[0]), data=s_data, accessSystem=self.accessSystem, modFunct=self._defModFunct)
         d["kind"] = kind = u"episode"
         d["episode of"] = m
     if kind in ("tv series", "tv mini series"):
         years = _findBetween(cont, "<h1>", "</h1>", maxRes=1)
         if years:
             years[:] = _findBetween(years[0], "TV series", "</span>", maxRes=1)
             if years:
                 d["series years"] = years[0].strip()
     air_date = _findBetween(cont, "Original Air Date:</h5>", "</div>", maxRes=1)
     if air_date:
         air_date = air_date[0]
         vi = air_date.find("(")
         if vi != -1:
             date = _unHtml(air_date[:vi]).strip()
             if date != "????":
                 d["original air date"] = date
             air_date = air_date[vi:]
             season = _findBetween(air_date, "Season", ",", maxRes=1)
             if season:
                 season = season[0].strip()
                 try:
                     season = int(season)
                 except:
                     pass
                 if season or type(season) is _inttype:
                     d["season"] = season
             episode = _findBetween(air_date, "Episode", ")", maxRes=1)
             if episode:
                 episode = episode[0].strip()
                 try:
                     episode = int(episode)
                 except:
                     pass
                 if episode or type(season) is _inttype:
                     d["episode"] = episode
     direct = _findBetween(cont, "<h5>Director", ("</div>", "<br/> <br/>"), maxRes=1)
     if direct:
         direct = direct[0]
         h5idx = direct.find("/h5>")
         if h5idx != -1:
             direct = direct[h5idx + 4 :]
         direct = self._getPersons(direct)
         if direct:
             d["director"] = direct
     if kind in ("tv series", "tv mini series", "episode"):
         if kind != "episode":
             seasons = _findBetween(cont, "Seasons:</h5>", "</div>", maxRes=1)
             if seasons:
                 d["number of seasons"] = seasons[0].count("|") + 1
         creator = _findBetween(cont, "Created by</h5>", ('class="tn15more"', "</div>", "<br/> <br/>"), maxRes=1)
         if not creator:
             # They change 'Created by' to 'Creator' and viceversa
             # from time to time...
             # XXX: is 'Creators' also used?
             creator = _findBetween(cont, "Creator:</h5>", ('class="tn15more"', "</div>", "<br/> <br/>"), maxRes=1)
         if creator:
             creator = creator[0]
             if creator.find("tn15more"):
                 creator = "%s>" % creator
             creator = self._getPersons(creator)
             if creator:
                 d["creator"] = creator
     writers = _findBetween(cont, "<h5>Writer", ("</div>", "<br/> <br/>"), maxRes=1)
     if writers:
         writers = writers[0]
         h5idx = writers.find("/h5>")
         if h5idx != -1:
             writers = writers[h5idx + 4 :]
         writers = self._getPersons(writers)
         if writers:
             d["writer"] = writers
     cvurl = _getTagsWith(cont, 'name="poster"', toClosure=True, maxRes=1)
     if cvurl:
         cvurl = _findBetween(cvurl[0], 'src="', '"', maxRes=1)
         if cvurl:
             d["cover url"] = cvurl[0]
     genres = _findBetween(cont, 'href="/genre/', '"')
     if genres:
         d["genres"] = list(set(genres))
     ur = _findBetween(cont, 'id="star-bar-user-rate">', "</div>", maxRes=1)
     if ur:
         rat = _findBetween(ur[0], "<b>", "</b>", maxRes=1)
         if rat:
             if rat:
                 d["rating"] = rat[0].strip()
             else:
                 self._mobile_logger.warn("wrong rating: %s", rat)
         vi = ur[0].rfind('href="ratings"')
         if vi != -1 and ur[0][vi + 10 :].find("await") == -1:
             try:
                 votes = _findBetween(ur[0][vi:], "title='", " IMDb", maxRes=1)
                 votes = int(votes[0].replace(",", ""))
                 d["votes"] = votes
             except (ValueError, IndexError):
                 self._mobile_logger.warn("wrong votes: %s", ur)
     top250 = _findBetween(cont, 'href="/chart/top?', "</a>", maxRes=1)
     if top250:
         fn = top250[0].rfind("#")
         if fn != -1:
             try:
                 td = int(top250[0][fn + 1 :])
                 d["top 250 rank"] = td
             except ValueError:
                 self._mobile_logger.warn("wrong top250: %s", top250)
     castdata = _findBetween(cont, "Cast overview", "</table>", maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, "Credited cast", "</table>", maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, "Complete credited cast", "</table>", maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, "Series Cast Summary", "</table>", maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, "Episode Credited cast", "</table>", maxRes=1)
     if castdata:
         castdata = castdata[0]
         # Reintegrate the fist tag.
         fl = castdata.find("href=")
         if fl != -1:
             castdata = "<a " + castdata[fl:]
         # Exclude the 'rest of cast listed alphabetically' row.
         smib = castdata.find('<tr><td align="center" colspan="4"><small>')
         if smib != -1:
             smie = castdata.rfind("</small></td></tr>")
             if smie != -1:
                 castdata = castdata[:smib].strip() + castdata[smie + 18 :].strip()
         castdata = castdata.replace("/tr> <tr", "/tr><tr")
         cast = self._getPersons(castdata, sep="</tr><tr")
         if cast:
             d["cast"] = cast
     akas = _findBetween(cont, "Also Known As:</h5>", "</div>", maxRes=1)
     if akas:
         # For some reason, here <br> is still used in place of <br/>.
         akas[:] = [x for x in akas[0].split("<br>") if x.strip()]
         akas = [_unHtml(x).replace('" - ', "::", 1).lstrip('"').strip() for x in akas]
         if "See more" in akas:
             akas.remove("See more")
         akas[:] = [x for x in akas if x]
         if akas:
             d["akas"] = akas
     mpaa = _findBetween(cont, "MPAA</a>:", "</div>", maxRes=1)
     if mpaa:
         d["mpaa"] = _unHtml(mpaa[0])
     runtimes = _findBetween(cont, "Runtime:</h5>", "</div>", maxRes=1)
     if runtimes:
         runtimes = runtimes[0]
         runtimes = [x.strip().replace(" min", "").replace(" (", "::(", 1) for x in runtimes.split("|")]
         d["runtimes"] = [_unHtml(x).strip() for x in runtimes]
     if kind == "episode":
         # number of episodes.
         epsn = _findBetween(cont, 'title="Full Episode List">', "</a>", maxRes=1)
         if epsn:
             epsn = epsn[0].replace(" Episodes", "").strip()
             if epsn:
                 try:
                     epsn = int(epsn)
                 except:
                     self._mobile_logger.warn("wrong episodes #: %s", epsn)
                 d["number of episodes"] = epsn
     country = _findBetween(cont, "Country:</h5>", "</div>", maxRes=1)
     if country:
         country[:] = country[0].split(" | ")
         country[:] = ["<a %s" % x for x in country if x]
         country[:] = [_unHtml(x.replace(" <i>", "::")) for x in country]
         if country:
             d["countries"] = country
     lang = _findBetween(cont, "Language:</h5>", "</div>", maxRes=1)
     if lang:
         lang[:] = lang[0].split(" | ")
         lang[:] = ["<a %s" % x for x in lang if x]
         lang[:] = [_unHtml(x.replace(" <i>", "::")) for x in lang]
         if lang:
             d["languages"] = lang
     col = _findBetween(cont, '"/search/title?colors=', "</div>")
     if col:
         col[:] = col[0].split(" | ")
         col[:] = ["<a %s" % x for x in col if x]
         col[:] = [_unHtml(x.replace(" <i>", "::")) for x in col]
         if col:
             d["color info"] = col
     sm = _findBetween(cont, "/search/title?sound_mixes=", "</div>", maxRes=1)
     if sm:
         sm[:] = sm[0].split(" | ")
         sm[:] = ["<a %s" % x for x in sm if x]
         sm[:] = [_unHtml(x.replace(" <i>", "::")) for x in sm]
         if sm:
             d["sound mix"] = sm
     cert = _findBetween(cont, "Certification:</h5>", "</div>", maxRes=1)
     if cert:
         cert[:] = cert[0].split(" | ")
         cert[:] = [_unHtml(x.replace(" <i>", "::")) for x in cert]
         if cert:
             d["certificates"] = cert
     plotoutline = _findBetween(cont, "Plot:</h5>", ["<a ", "</div>"], maxRes=1)
     if plotoutline:
         plotoutline = plotoutline[0].strip()
         plotoutline = plotoutline.rstrip("|").rstrip()
         if plotoutline:
             d["plot outline"] = _unHtml(plotoutline)
     aratio = _findBetween(cont, "Aspect Ratio:</h5>", ["<a ", "</div>"], maxRes=1)
     if aratio:
         aratio = aratio[0].strip().replace(" (", "::(", 1)
         if aratio:
             d["aspect ratio"] = _unHtml(aratio)
     return {"data": d}
Пример #54
0
 def _search_episode(self, title, results):
     t_dict = analyze_title(title)
     if t_dict["kind"] == "episode":
         title = t_dict["title"]
     cont = self._get_search_content("ep", title, results)
     return self.smProxy.search_movie_parser.parse(cont, results=results)["data"]
Пример #55
0
 def get_movie_main(self, movieID):
     cont = self._mretrieve(self.urls['movie_main'] % movieID + 'maindetails')
     title = _findBetween(cont, '<title>', '</title>', maxRes=1)
     if not title:
         raise IMDbDataAccessError('unable to get movieID "%s"' % movieID)
     title = _unHtml(title[0])
     if title.endswith(' - IMDb'):
         title = title[:-7]
     if cont.find('<span class="tv-extra">TV mini-series</span>') != -1:
         title += ' (mini)'
     d = analyze_title(title)
     kind = d.get('kind')
     tv_series = _findBetween(cont, 'TV Series:</h5>', '</a>', maxRes=1)
     if tv_series: mid = re_imdbID.findall(tv_series[0])
     else: mid = None
     if tv_series and mid:
         s_title = _unHtml(tv_series[0])
         s_data = analyze_title(s_title)
         m = Movie(movieID=str(mid[0]), data=s_data,
                     accessSystem=self.accessSystem,
                     modFunct=self._defModFunct)
         d['kind'] = kind = u'episode'
         d['episode of'] = m
     if kind in ('tv series', 'tv mini series'):
         years = _findBetween(cont, '<h1>', '</h1>', maxRes=1)
         if years:
             years[:] = _findBetween(years[0], 'TV series', '</span>',
                                     maxRes=1)
             if years:
                 d['series years'] = years[0].strip()
     air_date = _findBetween(cont, 'Original Air Date:</h5>', '</div>',
                             maxRes=1)
     if air_date:
         air_date = air_date[0]
         vi = air_date.find('(')
         if vi != -1:
             date = _unHtml(air_date[:vi]).strip()
             if date != '????':
                 d['original air date'] = date
             air_date = air_date[vi:]
             season = _findBetween(air_date, 'Season', ',', maxRes=1)
             if season:
                 season = season[0].strip()
                 try: season = int(season)
                 except: pass
                 if season or type(season) is _inttype:
                     d['season'] = season
             episode = _findBetween(air_date, 'Episode', ')', maxRes=1)
             if episode:
                 episode = episode[0].strip()
                 try: episode = int(episode)
                 except: pass
                 if episode or type(season) is _inttype:
                     d['episode'] = episode
     direct = _findBetween(cont, '<h5>Director', ('</div>', '<br/> <br/>'),
                             maxRes=1)
     if direct:
         direct = direct[0]
         h5idx = direct.find('/h5>')
         if h5idx != -1:
             direct = direct[h5idx+4:]
         direct = self._getPersons(direct)
         if direct: d['director'] = direct
     if kind in ('tv series', 'tv mini series', 'episode'):
         if kind != 'episode':
             seasons = _findBetween(cont, 'Seasons:</h5>', '</div>',
                                     maxRes=1)
             if seasons:
                 d['number of seasons'] = seasons[0].count('|') + 1
         creator = _findBetween(cont, 'Created by</h5>', ('class="tn15more"',
                                                         '</div>',
                                                         '<br/> <br/>'),
                                                         maxRes=1)
         if not creator:
             # They change 'Created by' to 'Creator' and viceversa
             # from time to time...
             # XXX: is 'Creators' also used?
             creator = _findBetween(cont, 'Creator:</h5>',
                                     ('class="tn15more"', '</div>',
                                     '<br/> <br/>'), maxRes=1)
         if creator:
             creator = creator[0]
             if creator.find('tn15more'): creator = '%s>' % creator
             creator = self._getPersons(creator)
             if creator: d['creator'] = creator
     writers = _findBetween(cont, '<h5>Writer', ('</div>', '<br/> <br/>'),
                             maxRes=1)
     if writers:
         writers = writers[0]
         h5idx = writers.find('/h5>')
         if h5idx != -1:
             writers = writers[h5idx+4:]
         writers = self._getPersons(writers)
         if writers: d['writer'] = writers
     cvurl = _getTagsWith(cont, 'name="poster"', toClosure=True, maxRes=1)
     if cvurl:
         cvurl = _findBetween(cvurl[0], 'src="', '"', maxRes=1)
         if cvurl: d['cover url'] = cvurl[0]
     genres = _findBetween(cont, 'href="/genre/', '"')
     if genres:
         d['genres'] = list(set(genres))
     ur = _findBetween(cont, 'id="star-bar-user-rate">', '</div>',
                         maxRes=1)
     if ur:
         rat = _findBetween(ur[0], '<b>', '</b>', maxRes=1)
         if rat:
             if rat:
                 d['rating'] = rat[0].strip()
             else:
                 self._mobile_logger.warn('wrong rating: %s', rat)
         vi = ur[0].rfind('href="ratings"')
         if vi != -1 and ur[0][vi+10:].find('await') == -1:
             try:
                 votes = _findBetween(ur[0][vi:], "title='",
                                     " IMDb", maxRes=1)
                 votes = int(votes[0].replace(',', ''))
                 d['votes'] = votes
             except (ValueError, IndexError):
                 self._mobile_logger.warn('wrong votes: %s', ur)
     top250 = _findBetween(cont, 'href="/chart/top?', '</a>', maxRes=1)
     if top250:
         fn = top250[0].rfind('#')
         if fn != -1:
             try:
                 td = int(top250[0][fn+1:])
                 d['top 250 rank'] = td
             except ValueError:
                 self._mobile_logger.warn('wrong top250: %s', top250)
     castdata = _findBetween(cont, 'Cast overview', '</table>', maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, 'Credited cast', '</table>', maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, 'Complete credited cast', '</table>',
                                 maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, 'Series Cast Summary', '</table>',
                                 maxRes=1)
     if not castdata:
         castdata = _findBetween(cont, 'Episode Credited cast', '</table>',
                                 maxRes=1)
     if castdata:
         castdata = castdata[0]
         # Reintegrate the fist tag.
         fl = castdata.find('href=')
         if fl != -1: castdata = '<a ' + castdata[fl:]
         # Exclude the 'rest of cast listed alphabetically' row.
         smib = castdata.find('<tr><td align="center" colspan="4"><small>')
         if smib != -1:
             smie = castdata.rfind('</small></td></tr>')
             if smie != -1:
                 castdata = castdata[:smib].strip() + \
                             castdata[smie+18:].strip()
         castdata = castdata.replace('/tr> <tr', '/tr><tr')
         cast = self._getPersons(castdata, sep='</tr><tr')
         if cast: d['cast'] = cast
     akas = _findBetween(cont, 'Also Known As:</h5>', '</div>', maxRes=1)
     if akas:
         # For some reason, here <br> is still used in place of <br/>.
         akas[:] = [x for x in akas[0].split('<br>') if x.strip()]
         akas = [_unHtml(x).replace('" - ','::', 1).lstrip('"').strip()
                 for x in akas]
         if 'See more' in akas: akas.remove('See more')
         akas[:] = [x for x in akas if x]
         if akas:
             d['akas'] = akas
     mpaa = _findBetween(cont, 'MPAA</a>:', '</div>', maxRes=1)
     if mpaa: d['mpaa'] = _unHtml(mpaa[0])
     runtimes = _findBetween(cont, 'Runtime:</h5>', '</div>', maxRes=1)
     if runtimes:
         runtimes = runtimes[0]
         runtimes = [x.strip().replace(' min', '').replace(' (', '::(', 1)
                 for x in runtimes.split('|')]
         d['runtimes'] = [_unHtml(x).strip() for x in runtimes]
     if kind == 'episode':
         # number of episodes.
         epsn = _findBetween(cont, 'title="Full Episode List">', '</a>',
                             maxRes=1)
         if epsn:
             epsn = epsn[0].replace(' Episodes', '').strip()
             if epsn:
                 try:
                     epsn = int(epsn)
                 except:
                     self._mobile_logger.warn('wrong episodes #: %s', epsn)
                 d['number of episodes'] = epsn
     country = _findBetween(cont, 'Country:</h5>', '</div>', maxRes=1)
     if country:
         country[:] = country[0].split(' | ')
         country[:] = ['<a %s' % x for x in country if x]
         country[:] = [_unHtml(x.replace(' <i>', '::')) for x in country]
         if country: d['countries'] = country
     lang = _findBetween(cont, 'Language:</h5>', '</div>', maxRes=1)
     if lang:
         lang[:] = lang[0].split(' | ')
         lang[:] = ['<a %s' % x for x in lang if x]
         lang[:] = [_unHtml(x.replace(' <i>', '::')) for x in lang]
         if lang: d['languages'] = lang
     col = _findBetween(cont, '"/search/title?colors=', '</div>')
     if col:
         col[:] = col[0].split(' | ')
         col[:] = ['<a %s' % x for x in col if x]
         col[:] = [_unHtml(x.replace(' <i>', '::')) for x in col]
         if col: d['color info'] = col
     sm = _findBetween(cont, '/search/title?sound_mixes=', '</div>',
                         maxRes=1)
     if sm:
         sm[:] = sm[0].split(' | ')
         sm[:] = ['<a %s' % x for x in sm if x]
         sm[:] = [_unHtml(x.replace(' <i>', '::')) for x in sm]
         if sm: d['sound mix'] = sm
     cert = _findBetween(cont, 'Certification:</h5>', '</div>', maxRes=1)
     if cert:
         cert[:] = cert[0].split(' | ')
         cert[:] = [_unHtml(x.replace(' <i>', '::')) for x in cert]
         if cert: d['certificates'] = cert
     plotoutline = _findBetween(cont, 'Plot:</h5>', ['<a ', '</div>'],
                                 maxRes=1)
     if plotoutline:
         plotoutline = plotoutline[0].strip()
         plotoutline = plotoutline.rstrip('|').rstrip()
         if plotoutline: d['plot outline'] = _unHtml(plotoutline)
     aratio = _findBetween(cont, 'Aspect Ratio:</h5>', ['<a ', '</div>'],
                         maxRes=1)
     if aratio:
         aratio = aratio[0].strip().replace(' (', '::(', 1)
         if aratio:
             d['aspect ratio'] = _unHtml(aratio)
     return {'data': d}