Exemplo n.º 1
0
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None,
                 only_cached=False, session=None):
    """
    Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.
    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup

    """

    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title_parser.parse(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or imdb_id or title):
            raise PluginError('Failed to parse name from %s' % smart_match)

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str():
        return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug('Looking up rotten tomatoes information for %s' % id_str())

    movie = None

    # Try to lookup from cache
    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).\
            filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = session.query(RottenTomatoesAlternateId).\
            filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\
            filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first()
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            log.debug('No matches in movie cache found, checking search cache.')
            found = session.query(RottenTomatoesSearchResult).\
                filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                log.debug('Movie found in search cache.')
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str())
            try:
                imdb_alt_id = movie.alternate_ids and filter(
                    lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, 'imdb')
                else:
                    result = movies_info(movie.id)
                movie = _set_movie_details(movie, session, result)
                session.merge(movie)
            except URLError:
                log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.')
        else:
            log.debug('Movie %s information restored from cache.' % id_str())
    else:
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str())
        try:
            # Lookups using imdb_id
            # TODO: extract to method
            if imdb_id:
                log.debug('Using IMDB alias %s.' % imdb_id)
                result = movies_alias(imdb_id, 'imdb')
                if result:
                    mismatch = []
                    min_match = difflib.SequenceMatcher(lambda x: x == ' ',
                                                        re.sub('\s+\(.*\)$', '', result['title'].lower()),
                                                        title.lower()).ratio() < MIN_MATCH
                    if title and min_match:
                        mismatch.append('the title (%s <-?-> %s)' % (title, result['title']))
                    result['year'] = int(result['year'])
                    if year and fabs(result['year'] - year) > 1:
                        mismatch.append('the year (%s <-?-> %s)' % (year, result['year']))
                        release_year = None
                        if result.get('release_dates', {}).get('theater'):
                            log.debug('Checking year against theater release date')
                            release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the theater release (%s)' % release_year)
                        elif result.get('release_dates', {}).get('dvd'):
                            log.debug('Checking year against dvd release date')
                            release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the DVD release (%s)' % release_year)
                    if mismatch:
                        log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' %
                                    (imdb_id, ', or '.join(mismatch)))
                    else:
                        log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id']))
                        movie = session.query(RottenTomatoesMovie).\
                            filter(RottenTomatoesMovie.id == result.get('id')).first()
                        if movie:
                            log.debug('Movie %s was in database, but did not have the imdb_id stored, '
                                      'forcing an update' % movie)
                            movie = _set_movie_details(movie, session, result)
                            session.merge(movie)
                        else:
                            log.debug('%s was not in database, setting info.' % result['title'])
                            movie = RottenTomatoesMovie()
                            movie = _set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError('set_movie_details returned %s' % movie)
                            session.add(movie)
                else:
                    log.debug('IMDB alias %s returned no results.' % imdb_id)

            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = _set_movie_details(movie, session, result)
                    session.add(movie)

            if not movie and title:
                # TODO: Extract to method
                log.verbose('Searching from rt `%s`' % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(
                                lambda x: x == ' ', movie_res['title'].lower(), title.lower())
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year'):
                                movie_res['year'] = int(movie_res['year'])
                                if movie_res['year'] != year:
                                    release_year = False
                                    if movie_res.get('release_dates', {}).get('theater'):
                                        log.debug('Checking year against theater release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('theater'),
                                                                     '%Y-%m-%d').tm_year
                                    elif movie_res.get('release_dates', {}).get('dvd'):
                                        log.debug('Checking year against dvd release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('dvd'),
                                                                     '%Y-%m-%d').tm_year
                                    if not (release_year and release_year == year):
                                        log.debug('removing %s - %s (wrong year: %s)' %
                                                  (movie_res['title'], movie_res['id'],
                                                   str(release_year or movie_res['year'])))
                                        results.remove(movie_res)
                                        continue
                            if movie_res['match'] < MIN_MATCH:
                                log.debug('removing %s (min_match)' % movie_res['title'])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            log.debug('SUCCESS: only one movie remains')
                        else:
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                log.debug('unable to determine correct movie, min_diff too small'
                                          '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' %
                                          (results[0]['title'], results[0]['year'], results[0]['id'],
                                           results[1]['title'], results[1]['year'], results[1]['id']))
                                for r in results:
                                    log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id']))
                                raise PluginError('min_diff')

                        imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb')
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                        else:
                            result = movies_info(results[0].get('id'))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                        movie = _set_movie_details(movie, session, result)
                        if imdb_id and not filter(
                            lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'),
                                movie.alternate_ids):  # TODO: get rid of these confusing lambdas
                            log.warning('Adding flexget_imdb alternate id %s for movie %s' %
                                        (imdb_id, movie))
                            movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',
                                                                                 imdb_id.lstrip('t')))
                        session.add(movie)
                        session.commit()

                        if title.lower() != movie.title.lower():
                            log.debug('Saving search result for \'%s\'' % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' % id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']:
            getattr(movie, attr)
        session.commit()
        return movie
Exemplo n.º 2
0
    def lookup(title=None,
               year=None,
               tmdb_id=None,
               imdb_id=None,
               smart_match=None,
               only_cached=False,
               session=None):
        """Do a lookup from tmdb for the movie matching the passed arguments.

        Any combination of criteria can be passed, the most specific criteria specified will be used.

        Returns:
            The Movie object populated with data from tmdb

        Raises:
            LookupError if a match cannot be found or there are other problems with the lookup

        Args:
            tmdb_id: tmdb_id of desired movie
            imdb_id: imdb_id of desired movie
            title: title of desired movie
            year: release year of desired movie
            smart_match: attempt to clean and parse title and year from a string
            only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
            session: optionally specify a session to use, if specified, returned Movie will be live in that session
        """

        if not (tmdb_id or imdb_id or title) and smart_match:
            # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
            title_parser = MovieParser()
            title_parser.parse(smart_match)
            title = title_parser.name
            year = title_parser.year

        if title:
            search_string = title.lower()
            if year:
                search_string = '%s %s' % (search_string, year)
        elif not (tmdb_id or imdb_id):
            raise LookupError('No criteria specified for tmdb lookup')
        log.debug('Looking up tmdb information for %r' % {
            'title': title,
            'tmdb_id': tmdb_id,
            'imdb_id': imdb_id
        })

        movie = None

        def id_str():
            return '<title=%s,tmdb_id=%s,imdb_id=%s>' % (title, tmdb_id,
                                                         imdb_id)

        if tmdb_id:
            movie = session.query(TMDBMovie).filter(
                TMDBMovie.id == tmdb_id).first()
        if not movie and imdb_id:
            movie = session.query(TMDBMovie).filter(
                TMDBMovie.imdb_id == imdb_id).first()
        if not movie and title:
            movie_filter = session.query(TMDBMovie).filter(
                func.lower(TMDBMovie.name) == title.lower())
            if year:
                movie_filter = movie_filter.filter(TMDBMovie.year == year)
            movie = movie_filter.first()
            if not movie:
                found = session.query(TMDBSearchResult). \
                    filter(func.lower(TMDBSearchResult.search) == search_string).first()
                if found and found.movie:
                    movie = found.movie
        if movie:
            # Movie found in cache, check if cache has expired.
            refresh_time = timedelta(days=2)
            if movie.released:
                if movie.released > datetime.now() - timedelta(days=7):
                    # Movie is less than a week old, expire after 1 day
                    refresh_time = timedelta(days=1)
                else:
                    age_in_years = (datetime.now() - movie.released).days / 365
                    refresh_time += timedelta(days=age_in_years * 5)
            if movie.updated < datetime.now(
            ) - refresh_time and not only_cached:
                log.debug(
                    'Cache has expired for %s, attempting to refresh from TMDb.'
                    % id_str())
                try:
                    ApiTmdb.get_movie_details(movie, session)
                except URLError:
                    log.error(
                        'Error refreshing movie details from TMDb, cached info being used.'
                    )
            else:
                log.debug('Movie %s information restored from cache.' %
                          id_str())
        else:
            if only_cached:
                raise LookupError('Movie %s not found from cache' % id_str())
            # There was no movie found in the cache, do a lookup from tmdb
            log.debug('Movie %s not found in cache, looking up from tmdb.' %
                      id_str())
            try:
                if imdb_id and not tmdb_id:
                    result = get_first_result('imdbLookup', imdb_id)
                    if result:
                        movie = session.query(TMDBMovie).filter(
                            TMDBMovie.id == result['id']).first()
                        if movie:
                            # Movie was in database, but did not have the imdb_id stored, force an update
                            ApiTmdb.get_movie_details(movie, session)
                        else:
                            tmdb_id = result['id']
                if tmdb_id:
                    movie = TMDBMovie()
                    movie.id = tmdb_id
                    ApiTmdb.get_movie_details(movie, session)
                    if movie.name:
                        session.merge(movie)
                    else:
                        movie = None
                elif title:
                    result = get_first_result('search', search_string)
                    if result:
                        movie = session.query(TMDBMovie).filter(
                            TMDBMovie.id == result['id']).first()
                        if not movie:
                            movie = TMDBMovie(result)
                            ApiTmdb.get_movie_details(movie, session)
                            session.merge(movie)
                        if title.lower() != movie.name.lower():
                            session.merge(
                                TMDBSearchResult(search=search_string,
                                                 movie=movie))
            except URLError:
                raise LookupError('Error looking up movie from TMDb')

        if not movie:
            raise LookupError('No results found from tmdb for %s' % id_str())
        else:
            # Access attributes to force the relationships to eager load before we detach from session
            movie.genres
            movie.posters
            return movie
Exemplo n.º 3
0
 def parse(self, data):
     movieparser = MovieParser()
     movieparser.data = data
     movieparser.parse()
     return movieparser