示例#1
0
    def search(self, q, limit=12):
        ''' Find movie by name '''

        if self.isDisabled():
            return False

        search_string = q
        cache_key = 'tmdb.cache.%s.%s' % (search_string, limit)
        results = self.getCache(cache_key)

        if not results:
            log.debug('Searching for movie: %s', q)

            raw = None
            try:
                raw = tmdb.search(search_string)
            except:
                log.error('Failed searching TMDB for "%s": %s',
                          (search_string, traceback.format_exc()))

            results = []
            if raw:
                try:
                    nr = 0

                    for movie in raw:
                        results.append(self.parseMovie(movie))

                        nr += 1
                        if nr == limit:
                            break
                    for result in results:
                        if 'year' in result:
                            log.info('Found: %s', [
                                result['titles'][0] + ' (' +
                                str(result['year']) + ')'
                            ])
                        else:
                            log.info('Found: %s', [result['titles'][0]])
                    self.setCache(cache_key.encode('utf-8'), results)
                    return results
                except SyntaxError, e:
                    log.error('Failed to parse XML response: %s', e)
                    return False
示例#2
0
    def search(self, q, limit=12):
        """ Find movie by name """

        if self.isDisabled():
            return False

        search_string = q
        cache_key = "tmdb.cache.%s.%s" % (search_string, limit)
        results = self.getCache(cache_key)

        if not results:
            log.debug("Searching for movie: %s", q)

            raw = None
            try:
                raw = tmdb.search(search_string)
            except:
                log.error('Failed searching TMDB for "%s": %s', (search_string, traceback.format_exc()))

            results = []
            if raw:
                try:
                    nr = 0

                    for movie in raw:
                        results.append(self.parseMovie(movie))

                        nr += 1
                        if nr == limit:
                            break
                    for result in results:
                        if "year" in result:
                            log.info("Found: %s", [result["titles"][0] + " (" + str(result["year"]) + ")"])
                        else:
                            log.info("Found: %s", [result["titles"][0]])
                    self.setCache(cache_key.encode("utf-8"), results)
                    return results
                except SyntaxError, e:
                    log.error("Failed to parse XML response: %s", e)
                    return False
示例#3
0
    def search(self, q, limit = 12):
        ''' Find movie by name '''

        if self.isDisabled():
            return False

        search_string = simplifyString(q)
        cache_key = 'tmdb.cache.%s.%s' % (search_string, limit)
        results = self.getCache(cache_key)

        if not results:
            log.debug('Searching for movie: %s', q)

            raw = None
            try:
                raw = tmdb.search(search_string)
            except:
                log.error('Failed searching TMDB for "%s": %s', (search_string, traceback.format_exc()))

            results = []
            if raw:
                try:
                    nr = 0

                    for movie in raw:
                        results.append(self.parseMovie(movie))

                        nr += 1
                        if nr == limit:
                            break

                    log.info('Found: %s', [result['titles'][0] + ' (' + str(result.get('year', 0)) + ')' for result in results])

                    self.setCache(cache_key, results)
                    return results
                except SyntaxError, e:
                    log.error('Failed to parse XML response: %s', e)
                    return False
示例#4
0
def crawlForMovies(count=0):
    ''' Imports things that are recognized as Movies from File table'''

    # grab all video files from things with Movie in the path name,
    # excluding things whose filename begin with '.' or '_'
    print "Filtering out non-({})".format(File.videoEndings)
    candidates = File.objects.filter(
        filenameend__regex=r'({})'.format(File.videoEndings))

    dirExcludes = "pornography"
    print "Filtering out things in ({}) directories, things not in movies".format(
        dirExcludes)
    candidates = candidates.exclude(path__fullname__regex='({})'.format(dirExcludes))\
                           .filter(path__fullname__icontains='Movies')\
                           .exclude(filename__istartswith='.')\
                           .exclude(filename__istartswith='_')

    # We should now have all likely video files.
    # Filter according to the regexp
    # (.)*( \((([a-zA-Z]) (- )?)?[12][0-9][0-9][0-9]\)\)?.(.)*
    # Filename[ ([Director [- ]]Year)].filenameend
    # so that we can use this shit with tmdb/imdb
    print "Narrowing down filenames a little further to deal with \"(director - year)\" construction"
    candidates.filter(
        filename__regex=
        r'(.)+( \(([a-zA-Z]* (- )?)?[12][0-9][0-9][0-9]\)\))?.(.)*')

    #issues = {}

    #issues['problems'] = []
    #issues['nomatches'] = []

    total = len(candidates)
    print "{:d} files to check. Here we go...".format(total)
    for candidate in candidates[count:]:
        if candidate.goodfile == 0:
            print "Marked as bad file; skipping..."
            continue
        pset = clean_slate(candidate)
        count += 1
        # skip all of this if the file already has a movie
        print candidate.id
        try:
            if candidate.MIDs != None:
                print "  Candidate file %s is already recognized; moving on!" % candidate.id
                continue
        except ObjectDoesNotExist:
            # an old movie file was deleted
            print "  Previous movie no longer extant, resetting link..."
            candidate.MIDs = None
            candidate.save()
        # get rid of the file extension
        print "#%d out of %d" % (count, total)
        print "  Candidate (ID %d): %s " % (candidate.id, candidate)
        print "  slicing off extension..."
        sliceIndex = candidate.filename.rfind('.')
        info = candidate.filename[:sliceIndex]

        # some people (coughWOPRcough) like to use '\.' instead of spaces, in their filenames.
        # f**k those people.
        info = re.split("\.", info)
        info = u" ".join(info)
        info = re.split("\((.*)\)", info)

        # also '_'
        probablyTitle = info[0].rstrip().replace('_', ' ').replace('-', ' ')

        # ignore anything between {}

        probablyTitle = re.sub(r'{.*}', '', probablyTitle)
        probablyTitle = probablyTitle.replace('  ', ' ')

        # now, clean up MORE BULLSHIT;
        # screw you guys, we know it's 1080 or 720 or BLURAY
        # because it's a f*****g HUGE file. Seriously.
        #       probablyTitle = probablyTitle.replace(' 1080p','').replace(' 720p','').replace(' bluray','')\
        #                       .replace(' hdtv','').replace(' 456p','').replace(' dvd','').replace(' 524p','')\
        #                       .replace(' 368p','').replace(' 400p','').replace(' 480p','').replace(' 336p','')\
        #                       .replace(' 432p','').replace(' tv','').replace(' 340p','').replace(' 346p','')\
        #                       .replace(' 455p','')
        # oh my god f**k this

        print "    Stripping out retarded information..."
        extraShit = [
            '[',
            ']',
            ' dvdrip',
            ' dvdscr',
            ' hddvd',
            ' dvd',
            ' hdtv',
            ' tv',
            ' bluray',
            ' ts',
        ]
        for shit in extraShit:
            probablyTitle = probablyTitle.replace('%s' % shit, '')

        # f**k youu ###(#)?pppppp
        bitches = re.split(" \d{3,4}p", probablyTitle)

        probablyTitle = ''.join(bitches)
        # get some meta-data
        print "  Stripping metadata out of title, if it's there."
        if len(info) > 1:
            meta = re.search('((?P<DIRECTOR>(.+)) - )?(?P<YEAR>\d{4})',
                             info[1])
            try:  # if there's a year
                try:
                    year = meta.group('YEAR')
                    print "  Found year data."
                except:
                    print "  No year data."
                    year = ""
            except KeyError:
                year = ""
        else:
            year = ""

        # find movies that match the title
        string = "  Querying TMDB... (%s) " % probablyTitle
        print string.encode('utf-8')
        try:
            movies = search("%s %s" % (probablyTitle, year))
        except TmdHttpError, e:
            print "  TMDB not available: \n\t%s" % e
            return

        if len(movies) > 0:
            print "  Found something!"
            candidate.remove_dne_problem()
        else:
            # add problem for later perusal
            candidate.remove_dne_problem()
            prob = DNEProblem()
            prob.file = candidate
            prob.save()
            pset.dneproblem_set.add(prob)
            pset.save()

            print "  No love. Moving on!"
            #issues['nomatches'] += [candidate]
            continue

        # only take the first result, which is the most likely

        for movieresult in movies[:1]:
            # now, get the info and put in the DB - if it's not already there.
            try:
                checker = Movie.objects.get(pk=int(movieresult['id']))
                print "    Movie already in database; no new entry made."
                print "    Checking to see if this is a new file..."
                if candidate in checker.files.all():
                    print "    Not a new file, moving on."
                    continue
                else:
                    print "    New file! adding to list of sources..."
                    checker.files.add(candidate)
                    checker.save()
            # this exception means, obviously, it's a new movie:
            except ObjectDoesNotExist:
                movie = getMovieInfo(movieresult['id'])

                print "    Movie not in database: %s\n" % movie['name'].encode(
                    'utf-8')
                certification = movie['certification'],
                latestEntry = Movie(
                    id=int(movieresult['id']),  # for API compatibility
                    rating=movie['rating'],
                    votes=int(movie['votes']),
                    name=movie['name'].encode('utf-8'),
                    dateadded=datetime.datetime.now(),
                    url=movie['url'],
                    overview=movie['overview']
                    if movie['overview'] else 'No overview available',
                    popularity=int(movie['popularity']),
                    imdb_id=movie['imdb_id'] if movie['imdb_id'] else
                    None,  # in case we ever want to use imdb data
                    released=movie['released'] if movie['released'] else None,
                    adult=True if movie['adult'] == 'true' else False,
                    director=movie['cast']['director'][0]['name']
                    if movie['cast'].has_key('director') else 'Unknown',
                    runtime=str(
                        datetime.timedelta(minutes=int(movie['runtime'])))
                    if movie['runtime'] else None,
                )

                # setting images for the movie - ugly try/escape chain, sorry
                try:
                    latestEntry.backdrop = movieresult['images'][1][
                        'poster'] if len(movie['images']) > 1 and movie[
                            'images'][1].has_key(
                                'poster') else '/media/images/no_backdrop.jpg'
                except IndexError:
                    latestEntry.backdrop = '/media/images/no_backdrop.jpg'
                try:
                    latestEntry.poster = movie['images'][0]['cover'] if len(
                        movie['images']) > 0 and movie['images'][0].has_key(
                            'cover') else '/imaging/no_poster/{}'.format(
                                movie.id)
                except:
                    latestEntry.poster = '/media/images/no_poster.jpg'
                try:
                    latestEntry.thumb = movie['images'][0]['thumb'] if len(
                        movie['images']) > 0 and movie['images'][0].has_key(
                            'thumb') else '/media/images/no_thumb.jpg'
                except:
                    latestEntry.thumb = '/media/images/no_thumb.jpg'

                print "    adding %s to movie's file set..." % candidate
                latestEntry.files.add(candidate)
                # we have to save here, or the loop below will fail due to no entry in
                # the movies table
                try:
                    latestEntry.save()
                except:
                    print "    Something went wrong; moving on."
                    prob = SavingProblem()
                    prob.file = candidate
                    prob.save()
                    pset.savingproblem_set.add(prob)
                    pset.save()
                    #issues['problems']+= [candidate]

                candidate.remove_saving_problem()

                print "    setting %s to movie's certification..." % movie[
                    'certification']
                if len(
                        MovieCert.objects.filter(
                            cert="None" if movie['certification'] ==
                            None else movie['certification'])) == 0:
                    print "      Found a new cert, adding to database..."
                    cert = MovieCert.objects.create(
                        cert="None" if movie['certification'] ==
                        None else movie['certification'])
                else:
                    cert = MovieCert.objects.get(
                        cert="None" if movie['certification'] ==
                        None else movie['certification'])
                latestEntry.cert = cert

                print "    adding genres to movie's genres..."
                if movie['categories'].has_key('genre'):
                    for genre in movie['categories']['genre']:
                        if len(MovieGenre.objects.filter(name=genre)) == 0:
                            print "      Found a new genre, adding it to database..."
                            newGenre = MovieGenre(name=genre)
                            newGenre.save()
                        else:
                            newGenre = MovieGenre.objects.get(name=genre)
                        # add movie to genre and vice versa, then save genre (because we leave
                        # the genre object first!)
                        latestEntry.genres.add(newGenre)
                        newGenre.movies.add(latestEntry)
                        newGenre.save()
                else:
                    latestEntry.genres.add(MovieGenre.objects.get(name="None"))
                latestEntry.save()
示例#5
0
def crawlForMovies(count=0):
    ''' Imports things that are recognized as Movies from File table'''
    
    # grab all video files from things with Movie in the path name,
    # excluding things whose filename begin with '.' or '_'
    print "Filtering out non-({})".format(File.videoEndings)
    candidates = File.objects.filter(filenameend__regex=r'({})'.format(File.videoEndings))
    
    dirExcludes = "^[pP]orn"
    print "Filtering out things in ({}) directories, things not in movies".format(dirExcludes)
    candidates = candidates.exclude(path__fullname__regex='({})'.format(dirExcludes))\
                           .filter(path__fullname__icontains='Movies')\
                           .exclude(filename__istartswith='.')\
                           .exclude(filename__istartswith='_')
    
    
    
    # We should now have all likely video files.
    # Filter according to the regexp
    # (.)*( \((([a-zA-Z]) (- )?)?[12][0-9][0-9][0-9]\)\)?.(.)*
    # Filename[ ([Director [- ]]Year)].filenameend
    # so that we can use this shit with tmdb/imdb
    print "Narrowing down filenames a little further to deal with \"(director - year)\" construction"
    candidates.filter(filename__regex=r'(.)+( \(([a-zA-Z]* (- )?)?[12][0-9][0-9][0-9]\)\))?.(.)*')
    
    #issues = {}
    
    #issues['problems'] = []
    #issues['nomatches'] = []
    
    total = len(candidates)
    print "{:d} files to check. Here we go...".format(total)
    for candidate in candidates[count:]:
        if candidate.goodfile == 0:
            print "Marked as bad file; skipping..."
            continue
        pset = clean_slate(candidate)
        if pset == None:
            continue
        count += 1
        # skip all of this if the file already has a movie
        print candidate.id
        try:
            if candidate.MIDs != None:
                print "  Candidate file %s is already recognized; moving on!" % candidate.id
                continue
        except ObjectDoesNotExist:
            # an old movie file was deleted
            print "  Previous movie no longer extant, resetting link..."
            candidate.MIDs = None
            candidate.save()
        # get rid of the file extension
        print "#%d out of %d" % (count, total)
        print "  Candidate (ID %d): %s " % (candidate.id, candidate)
        print "  slicing off extension..."
        sliceIndex = candidate.filename.rfind('.')
        info = candidate.filename[:sliceIndex]
        
        # some people (coughWOPRcough) like to use '\.' instead of spaces, in their filenames.
        # f**k those people.
        info = re.split("\.",info)
        info = u" ".join(info)
        info = re.split("\((.*)\)",info)
        
        
        # also '_'
        probablyTitle = info[0].rstrip().replace('_',' ').replace('-',' ')
        
        # ignore anything between {}
        
        probablyTitle = re.sub(r'{.*}','',probablyTitle)
        probablyTitle = probablyTitle.replace('  ',' ')
        
        # now, clean up MORE BULLSHIT;
        # screw you guys, we know it's 1080 or 720 or BLURAY
        # because it's a f*****g HUGE file. Seriously.
 #       probablyTitle = probablyTitle.replace(' 1080p','').replace(' 720p','').replace(' bluray','')\
 #                       .replace(' hdtv','').replace(' 456p','').replace(' dvd','').replace(' 524p','')\
 #                       .replace(' 368p','').replace(' 400p','').replace(' 480p','').replace(' 336p','')\
 #                       .replace(' 432p','').replace(' tv','').replace(' 340p','').replace(' 346p','')\
 #                       .replace(' 455p','')
        # oh my god f**k this
        
        print "    Stripping out retarded information..."
        extraShit = ['[',']',' dvdrip',' dvdscr',' hddvd',' dvd',' hdtv',' tv',' bluray',' ts',]
        for shit in extraShit:
            probablyTitle = probablyTitle.replace('%s'%shit,'')
            
        # f**k youu ###(#)?pppppp
        bitches = re.split(" \d{3,4}p",probablyTitle)
        
        probablyTitle = ''.join(bitches)
        # get some meta-data
        print "  Stripping metadata out of title, if it's there."
        if len(info) > 1:
            meta = re.search('((?P<DIRECTOR>(.+)) - )?(?P<YEAR>\d{4})', info[1])
            try: # if there's a year
                try:
                    year = meta.group('YEAR')
                    print "  Found year data."
                except:
                    print "  No year data."
                    year = ""
            except KeyError:
                year = ""
        else:
            year = ""
                
        # find movies that match the title
        string = "  Querying TMDB... (%s) " % probablyTitle
        print string.encode('utf-8')
        try:
            movies = search("%s %s" % (probablyTitle, year))
        except TmdHttpError, e:
                print "  TMDB not available: \n\t%s" % e
                return
                
        if len(movies) > 0:
            print "  Found something!"
            candidate.remove_dne_problem()
        else:
            # add problem for later perusal
            candidate.remove_dne_problem()
            prob = DNEProblem()
            prob.file = candidate
            prob.save()
            pset.dneproblem_set.add(prob)
            pset.save()
            
            print "  No love. Moving on!"
            #issues['nomatches'] += [candidate]
            continue
            
        # only take the first result, which is the most likely
        
        for movieresult in movies[:1]:
            # now, get the info and put in the DB - if it's not already there.
            try:
                checker = Movie.objects.get(pk=int(movieresult['id']))
                print "    Movie already in database; no new entry made."
                print "    Checking to see if this is a new file..."
                if candidate in checker.files.all():
                    print "    Not a new file, moving on."
                    continue
                else:
                    print "    New file! adding to list of sources..."
                    checker.files.add(candidate)
                    checker.save()
            # this exception means, obviously, it's a new movie:
            except ObjectDoesNotExist:
                movie = getMovieInfo(movieresult['id'])
                
                print "    Movie not in database: %s\n" % movie['name'].encode('utf-8')
                certification=movie['certification'],
                latestEntry = Movie(
                            id=int(movieresult['id']), # for API compatibility
                            rating=movie['rating'],
                            votes=int(movie['votes']),
                            name=movie['name'].encode('utf-8'),
                            dateadded=datetime.datetime.now(),
                            url=movie['url'],
                            overview=movie['overview'] if movie['overview'] else 'No overview available',
                            popularity=int(float(movie['popularity'])),
                            imdb_id=movie['imdb_id'] if movie['imdb_id'] else None, # in case we ever want to use imdb data
                            released=movie['released'] if movie['released'] else None,
                            adult=True if movie['adult']=='true' else False,
                            director=movie['cast']['director'][0]['name'] if movie['cast'].has_key('director') else 'Unknown',
                            runtime=str(datetime.timedelta(minutes=int(movie['runtime']))) if movie['runtime'] else None,
                            )
                            
                # setting images for the movie - ugly try/escape chain, sorry
                try:
                    latestEntry.backdrop=movieresult['images'][1]['poster'] if len(movie['images'])>1 and movie['images'][1].has_key('poster') else '/media/images/no_backdrop.jpg'
                except IndexError:
                    latestEntry.backdrop= '/media/images/no_backdrop.jpg'
                try:
                    latestEntry.poster = movie['images'][0]['cover'] if len(movie['images'])>0 and movie['images'][0].has_key('cover') else '/imaging/no_poster/{}'.format(movie.id)
                except:
                    latestEntry.poster = '/media/images/no_poster.jpg'
                try:
                    latestEntry.thumb = movie['images'][0]['thumb'] if len(movie['images'])>0 and movie['images'][0].has_key('thumb') else '/media/images/no_thumb.jpg'
                except:
                    latestEntry.thumb = '/media/images/no_thumb.jpg'            
                            
                print "    adding %s to movie's file set..." % candidate
                latestEntry.files.add(candidate)
                # we have to save here, or the loop below will fail due to no entry in
                # the movies table
                try:
                    latestEntry.save()
                except:
                    print "    Something went wrong; moving on."
                    prob = SavingProblem()
                    prob.file = candidate
                    prob.save()
                    pset.savingproblem_set.add(prob)
                    pset.save()
                    #issues['problems']+= [candidate]
                    
                candidate.remove_saving_problem()
                
                
                print "    setting %s to movie's certification..." % movie['certification']
                if len(MovieCert.objects.filter(cert="None" if movie['certification']==None else movie['certification'])) == 0:
                    print  "      Found a new cert, adding to database..."
                    cert = MovieCert.objects.create(cert="None" if movie['certification']==None else movie['certification'])
                else:
                    cert = MovieCert.objects.get(cert="None" if movie['certification']==None else movie['certification'])
                latestEntry.cert = cert
                
                print "    adding genres to movie's genres..."
                if movie['categories'].has_key('genre'):
                    for genre in movie['categories']['genre']:                    
                        if len(MovieGenre.objects.filter(name=genre)) == 0:
                            print "      Found a new genre, adding it to database..."
                            newGenre = MovieGenre(name=genre)
                            newGenre.save()
                        else:
                            newGenre = MovieGenre.objects.get(name=genre)
                        # add movie to genre and vice versa, then save genre (because we leave
                        # the genre object first!)
                        latestEntry.genres.add(newGenre)
                        newGenre.movies.add(latestEntry)
                        newGenre.save()
                else:
                    latestEntry.genres.add(MovieGenre.objects.get(name="None"))
                latestEntry.save()
示例#6
0
def process_movie(path, conf, facts):
    '''\
    Retrieve and write metadata for this movie.
    '''
    # check if metadata has already been written for this movie
    if is_movie_metadata_complete(path, conf):
        return
    
    # no metadata yet, so fetch it
    try:
        print '\tRetrieving movie metadata...'
        
        movie_title = facts['movie_title']
        # the .decode call is necessary because the series title may have non-
        # ASCII characters in it. In Linux, path names are UTF-8 encoded, so
        # we need to tell Python that so it can use that information for
        # encoding later.
        results = tmdb.search(movie_title.decode('utf-8'))
        if results:
            # using .info() returns the full record, not just a common subset
            result = results[0].info()
        else:
            print '\t\t[ERROR] No matches found for the title \'%s\'' % movie_title
            return
        
        # data has been fetched; write it out
        xml_path = get_movie_metadata_path(path)
        
        # the .get method is used for non-essential attributes
        xml_root = ET.Element('Title')
        x = ET.SubElement(xml_root, 'LocalTitle')
        x.text = result['name']
        x = ET.SubElement(xml_root, 'OriginalTitle')
        x.text = result['original_name']
        x = ET.SubElement(xml_root, 'Description')
        x.text = result.get('overview')
        x = ET.SubElement(xml_root, 'Tagline')
        x.text = result.get('tagline')
        x = ET.SubElement(xml_root, 'IMDBId')
        x.text = result.get('imdb_id')
        
        # parse the production year manually
        date_released = result.get('released')
        if date_released:
            try:
                date_released = datetime.strptime('%Y-%m-%d', date_released)
                x = ET.SubElement(xml_root, 'ProductionYear')
                x.text = date_released.year
            except ValueError, e:
                # could not parse the date; whatever.
                pass
        
        x = ET.SubElement(xml_root, 'IMDBrating')
        x.text = result.get('rating')
        x = ET.SubElement(xml_root, 'MPAARating')
        x.text = result.get('certification')
        
        persons = ET.SubElement(xml_root, 'Persons')
        cast = result.get('cast', { })
        for actor in cast.get('actor', [ ]):
            try:
                actor_id = actor['id']
                actor_name = actor['name']
                actor_role = actor['character']
                person = ET.SubElement(persons, 'Person')
                # the ID field isn't used by Media Browser, but seems useful for
                # other uses.
                x = ET.SubElement(person, 'Id')
                x.text = actor_id
                x = ET.SubElement(person, 'Type')
                x.text = 'Actor'
                x = ET.SubElement(person, 'Name')
                x.text = actor_name
                x = ET.SubElement(person, 'Role')
                x.text = actor_role
            except KeyError, e:
                # incomplete metadata, meh.
                pass