def search(self, q, limit=12): ''' Find movie by name ''' if self.isDisabled(): return False search_string = q cache_key = 'tmdb.cache.%s.%s' % (search_string, limit) results = self.getCache(cache_key) if not results: log.debug('Searching for movie: %s', q) raw = None try: raw = tmdb.search(search_string) except: log.error('Failed searching TMDB for "%s": %s', (search_string, traceback.format_exc())) results = [] if raw: try: nr = 0 for movie in raw: results.append(self.parseMovie(movie)) nr += 1 if nr == limit: break for result in results: if 'year' in result: log.info('Found: %s', [ result['titles'][0] + ' (' + str(result['year']) + ')' ]) else: log.info('Found: %s', [result['titles'][0]]) self.setCache(cache_key.encode('utf-8'), results) return results except SyntaxError, e: log.error('Failed to parse XML response: %s', e) return False
def search(self, q, limit=12): """ Find movie by name """ if self.isDisabled(): return False search_string = q cache_key = "tmdb.cache.%s.%s" % (search_string, limit) results = self.getCache(cache_key) if not results: log.debug("Searching for movie: %s", q) raw = None try: raw = tmdb.search(search_string) except: log.error('Failed searching TMDB for "%s": %s', (search_string, traceback.format_exc())) results = [] if raw: try: nr = 0 for movie in raw: results.append(self.parseMovie(movie)) nr += 1 if nr == limit: break for result in results: if "year" in result: log.info("Found: %s", [result["titles"][0] + " (" + str(result["year"]) + ")"]) else: log.info("Found: %s", [result["titles"][0]]) self.setCache(cache_key.encode("utf-8"), results) return results except SyntaxError, e: log.error("Failed to parse XML response: %s", e) return False
def search(self, q, limit = 12): ''' Find movie by name ''' if self.isDisabled(): return False search_string = simplifyString(q) cache_key = 'tmdb.cache.%s.%s' % (search_string, limit) results = self.getCache(cache_key) if not results: log.debug('Searching for movie: %s', q) raw = None try: raw = tmdb.search(search_string) except: log.error('Failed searching TMDB for "%s": %s', (search_string, traceback.format_exc())) results = [] if raw: try: nr = 0 for movie in raw: results.append(self.parseMovie(movie)) nr += 1 if nr == limit: break log.info('Found: %s', [result['titles'][0] + ' (' + str(result.get('year', 0)) + ')' for result in results]) self.setCache(cache_key, results) return results except SyntaxError, e: log.error('Failed to parse XML response: %s', e) return False
def crawlForMovies(count=0): ''' Imports things that are recognized as Movies from File table''' # grab all video files from things with Movie in the path name, # excluding things whose filename begin with '.' or '_' print "Filtering out non-({})".format(File.videoEndings) candidates = File.objects.filter( filenameend__regex=r'({})'.format(File.videoEndings)) dirExcludes = "pornography" print "Filtering out things in ({}) directories, things not in movies".format( dirExcludes) candidates = candidates.exclude(path__fullname__regex='({})'.format(dirExcludes))\ .filter(path__fullname__icontains='Movies')\ .exclude(filename__istartswith='.')\ .exclude(filename__istartswith='_') # We should now have all likely video files. # Filter according to the regexp # (.)*( \((([a-zA-Z]) (- )?)?[12][0-9][0-9][0-9]\)\)?.(.)* # Filename[ ([Director [- ]]Year)].filenameend # so that we can use this shit with tmdb/imdb print "Narrowing down filenames a little further to deal with \"(director - year)\" construction" candidates.filter( filename__regex= r'(.)+( \(([a-zA-Z]* (- )?)?[12][0-9][0-9][0-9]\)\))?.(.)*') #issues = {} #issues['problems'] = [] #issues['nomatches'] = [] total = len(candidates) print "{:d} files to check. Here we go...".format(total) for candidate in candidates[count:]: if candidate.goodfile == 0: print "Marked as bad file; skipping..." continue pset = clean_slate(candidate) count += 1 # skip all of this if the file already has a movie print candidate.id try: if candidate.MIDs != None: print " Candidate file %s is already recognized; moving on!" % candidate.id continue except ObjectDoesNotExist: # an old movie file was deleted print " Previous movie no longer extant, resetting link..." candidate.MIDs = None candidate.save() # get rid of the file extension print "#%d out of %d" % (count, total) print " Candidate (ID %d): %s " % (candidate.id, candidate) print " slicing off extension..." sliceIndex = candidate.filename.rfind('.') info = candidate.filename[:sliceIndex] # some people (coughWOPRcough) like to use '\.' instead of spaces, in their filenames. # f**k those people. info = re.split("\.", info) info = u" ".join(info) info = re.split("\((.*)\)", info) # also '_' probablyTitle = info[0].rstrip().replace('_', ' ').replace('-', ' ') # ignore anything between {} probablyTitle = re.sub(r'{.*}', '', probablyTitle) probablyTitle = probablyTitle.replace(' ', ' ') # now, clean up MORE BULLSHIT; # screw you guys, we know it's 1080 or 720 or BLURAY # because it's a f*****g HUGE file. Seriously. # probablyTitle = probablyTitle.replace(' 1080p','').replace(' 720p','').replace(' bluray','')\ # .replace(' hdtv','').replace(' 456p','').replace(' dvd','').replace(' 524p','')\ # .replace(' 368p','').replace(' 400p','').replace(' 480p','').replace(' 336p','')\ # .replace(' 432p','').replace(' tv','').replace(' 340p','').replace(' 346p','')\ # .replace(' 455p','') # oh my god f**k this print " Stripping out retarded information..." extraShit = [ '[', ']', ' dvdrip', ' dvdscr', ' hddvd', ' dvd', ' hdtv', ' tv', ' bluray', ' ts', ] for shit in extraShit: probablyTitle = probablyTitle.replace('%s' % shit, '') # f**k youu ###(#)?pppppp bitches = re.split(" \d{3,4}p", probablyTitle) probablyTitle = ''.join(bitches) # get some meta-data print " Stripping metadata out of title, if it's there." if len(info) > 1: meta = re.search('((?P<DIRECTOR>(.+)) - )?(?P<YEAR>\d{4})', info[1]) try: # if there's a year try: year = meta.group('YEAR') print " Found year data." except: print " No year data." year = "" except KeyError: year = "" else: year = "" # find movies that match the title string = " Querying TMDB... (%s) " % probablyTitle print string.encode('utf-8') try: movies = search("%s %s" % (probablyTitle, year)) except TmdHttpError, e: print " TMDB not available: \n\t%s" % e return if len(movies) > 0: print " Found something!" candidate.remove_dne_problem() else: # add problem for later perusal candidate.remove_dne_problem() prob = DNEProblem() prob.file = candidate prob.save() pset.dneproblem_set.add(prob) pset.save() print " No love. Moving on!" #issues['nomatches'] += [candidate] continue # only take the first result, which is the most likely for movieresult in movies[:1]: # now, get the info and put in the DB - if it's not already there. try: checker = Movie.objects.get(pk=int(movieresult['id'])) print " Movie already in database; no new entry made." print " Checking to see if this is a new file..." if candidate in checker.files.all(): print " Not a new file, moving on." continue else: print " New file! adding to list of sources..." checker.files.add(candidate) checker.save() # this exception means, obviously, it's a new movie: except ObjectDoesNotExist: movie = getMovieInfo(movieresult['id']) print " Movie not in database: %s\n" % movie['name'].encode( 'utf-8') certification = movie['certification'], latestEntry = Movie( id=int(movieresult['id']), # for API compatibility rating=movie['rating'], votes=int(movie['votes']), name=movie['name'].encode('utf-8'), dateadded=datetime.datetime.now(), url=movie['url'], overview=movie['overview'] if movie['overview'] else 'No overview available', popularity=int(movie['popularity']), imdb_id=movie['imdb_id'] if movie['imdb_id'] else None, # in case we ever want to use imdb data released=movie['released'] if movie['released'] else None, adult=True if movie['adult'] == 'true' else False, director=movie['cast']['director'][0]['name'] if movie['cast'].has_key('director') else 'Unknown', runtime=str( datetime.timedelta(minutes=int(movie['runtime']))) if movie['runtime'] else None, ) # setting images for the movie - ugly try/escape chain, sorry try: latestEntry.backdrop = movieresult['images'][1][ 'poster'] if len(movie['images']) > 1 and movie[ 'images'][1].has_key( 'poster') else '/media/images/no_backdrop.jpg' except IndexError: latestEntry.backdrop = '/media/images/no_backdrop.jpg' try: latestEntry.poster = movie['images'][0]['cover'] if len( movie['images']) > 0 and movie['images'][0].has_key( 'cover') else '/imaging/no_poster/{}'.format( movie.id) except: latestEntry.poster = '/media/images/no_poster.jpg' try: latestEntry.thumb = movie['images'][0]['thumb'] if len( movie['images']) > 0 and movie['images'][0].has_key( 'thumb') else '/media/images/no_thumb.jpg' except: latestEntry.thumb = '/media/images/no_thumb.jpg' print " adding %s to movie's file set..." % candidate latestEntry.files.add(candidate) # we have to save here, or the loop below will fail due to no entry in # the movies table try: latestEntry.save() except: print " Something went wrong; moving on." prob = SavingProblem() prob.file = candidate prob.save() pset.savingproblem_set.add(prob) pset.save() #issues['problems']+= [candidate] candidate.remove_saving_problem() print " setting %s to movie's certification..." % movie[ 'certification'] if len( MovieCert.objects.filter( cert="None" if movie['certification'] == None else movie['certification'])) == 0: print " Found a new cert, adding to database..." cert = MovieCert.objects.create( cert="None" if movie['certification'] == None else movie['certification']) else: cert = MovieCert.objects.get( cert="None" if movie['certification'] == None else movie['certification']) latestEntry.cert = cert print " adding genres to movie's genres..." if movie['categories'].has_key('genre'): for genre in movie['categories']['genre']: if len(MovieGenre.objects.filter(name=genre)) == 0: print " Found a new genre, adding it to database..." newGenre = MovieGenre(name=genre) newGenre.save() else: newGenre = MovieGenre.objects.get(name=genre) # add movie to genre and vice versa, then save genre (because we leave # the genre object first!) latestEntry.genres.add(newGenre) newGenre.movies.add(latestEntry) newGenre.save() else: latestEntry.genres.add(MovieGenre.objects.get(name="None")) latestEntry.save()
def crawlForMovies(count=0): ''' Imports things that are recognized as Movies from File table''' # grab all video files from things with Movie in the path name, # excluding things whose filename begin with '.' or '_' print "Filtering out non-({})".format(File.videoEndings) candidates = File.objects.filter(filenameend__regex=r'({})'.format(File.videoEndings)) dirExcludes = "^[pP]orn" print "Filtering out things in ({}) directories, things not in movies".format(dirExcludes) candidates = candidates.exclude(path__fullname__regex='({})'.format(dirExcludes))\ .filter(path__fullname__icontains='Movies')\ .exclude(filename__istartswith='.')\ .exclude(filename__istartswith='_') # We should now have all likely video files. # Filter according to the regexp # (.)*( \((([a-zA-Z]) (- )?)?[12][0-9][0-9][0-9]\)\)?.(.)* # Filename[ ([Director [- ]]Year)].filenameend # so that we can use this shit with tmdb/imdb print "Narrowing down filenames a little further to deal with \"(director - year)\" construction" candidates.filter(filename__regex=r'(.)+( \(([a-zA-Z]* (- )?)?[12][0-9][0-9][0-9]\)\))?.(.)*') #issues = {} #issues['problems'] = [] #issues['nomatches'] = [] total = len(candidates) print "{:d} files to check. Here we go...".format(total) for candidate in candidates[count:]: if candidate.goodfile == 0: print "Marked as bad file; skipping..." continue pset = clean_slate(candidate) if pset == None: continue count += 1 # skip all of this if the file already has a movie print candidate.id try: if candidate.MIDs != None: print " Candidate file %s is already recognized; moving on!" % candidate.id continue except ObjectDoesNotExist: # an old movie file was deleted print " Previous movie no longer extant, resetting link..." candidate.MIDs = None candidate.save() # get rid of the file extension print "#%d out of %d" % (count, total) print " Candidate (ID %d): %s " % (candidate.id, candidate) print " slicing off extension..." sliceIndex = candidate.filename.rfind('.') info = candidate.filename[:sliceIndex] # some people (coughWOPRcough) like to use '\.' instead of spaces, in their filenames. # f**k those people. info = re.split("\.",info) info = u" ".join(info) info = re.split("\((.*)\)",info) # also '_' probablyTitle = info[0].rstrip().replace('_',' ').replace('-',' ') # ignore anything between {} probablyTitle = re.sub(r'{.*}','',probablyTitle) probablyTitle = probablyTitle.replace(' ',' ') # now, clean up MORE BULLSHIT; # screw you guys, we know it's 1080 or 720 or BLURAY # because it's a f*****g HUGE file. Seriously. # probablyTitle = probablyTitle.replace(' 1080p','').replace(' 720p','').replace(' bluray','')\ # .replace(' hdtv','').replace(' 456p','').replace(' dvd','').replace(' 524p','')\ # .replace(' 368p','').replace(' 400p','').replace(' 480p','').replace(' 336p','')\ # .replace(' 432p','').replace(' tv','').replace(' 340p','').replace(' 346p','')\ # .replace(' 455p','') # oh my god f**k this print " Stripping out retarded information..." extraShit = ['[',']',' dvdrip',' dvdscr',' hddvd',' dvd',' hdtv',' tv',' bluray',' ts',] for shit in extraShit: probablyTitle = probablyTitle.replace('%s'%shit,'') # f**k youu ###(#)?pppppp bitches = re.split(" \d{3,4}p",probablyTitle) probablyTitle = ''.join(bitches) # get some meta-data print " Stripping metadata out of title, if it's there." if len(info) > 1: meta = re.search('((?P<DIRECTOR>(.+)) - )?(?P<YEAR>\d{4})', info[1]) try: # if there's a year try: year = meta.group('YEAR') print " Found year data." except: print " No year data." year = "" except KeyError: year = "" else: year = "" # find movies that match the title string = " Querying TMDB... (%s) " % probablyTitle print string.encode('utf-8') try: movies = search("%s %s" % (probablyTitle, year)) except TmdHttpError, e: print " TMDB not available: \n\t%s" % e return if len(movies) > 0: print " Found something!" candidate.remove_dne_problem() else: # add problem for later perusal candidate.remove_dne_problem() prob = DNEProblem() prob.file = candidate prob.save() pset.dneproblem_set.add(prob) pset.save() print " No love. Moving on!" #issues['nomatches'] += [candidate] continue # only take the first result, which is the most likely for movieresult in movies[:1]: # now, get the info and put in the DB - if it's not already there. try: checker = Movie.objects.get(pk=int(movieresult['id'])) print " Movie already in database; no new entry made." print " Checking to see if this is a new file..." if candidate in checker.files.all(): print " Not a new file, moving on." continue else: print " New file! adding to list of sources..." checker.files.add(candidate) checker.save() # this exception means, obviously, it's a new movie: except ObjectDoesNotExist: movie = getMovieInfo(movieresult['id']) print " Movie not in database: %s\n" % movie['name'].encode('utf-8') certification=movie['certification'], latestEntry = Movie( id=int(movieresult['id']), # for API compatibility rating=movie['rating'], votes=int(movie['votes']), name=movie['name'].encode('utf-8'), dateadded=datetime.datetime.now(), url=movie['url'], overview=movie['overview'] if movie['overview'] else 'No overview available', popularity=int(float(movie['popularity'])), imdb_id=movie['imdb_id'] if movie['imdb_id'] else None, # in case we ever want to use imdb data released=movie['released'] if movie['released'] else None, adult=True if movie['adult']=='true' else False, director=movie['cast']['director'][0]['name'] if movie['cast'].has_key('director') else 'Unknown', runtime=str(datetime.timedelta(minutes=int(movie['runtime']))) if movie['runtime'] else None, ) # setting images for the movie - ugly try/escape chain, sorry try: latestEntry.backdrop=movieresult['images'][1]['poster'] if len(movie['images'])>1 and movie['images'][1].has_key('poster') else '/media/images/no_backdrop.jpg' except IndexError: latestEntry.backdrop= '/media/images/no_backdrop.jpg' try: latestEntry.poster = movie['images'][0]['cover'] if len(movie['images'])>0 and movie['images'][0].has_key('cover') else '/imaging/no_poster/{}'.format(movie.id) except: latestEntry.poster = '/media/images/no_poster.jpg' try: latestEntry.thumb = movie['images'][0]['thumb'] if len(movie['images'])>0 and movie['images'][0].has_key('thumb') else '/media/images/no_thumb.jpg' except: latestEntry.thumb = '/media/images/no_thumb.jpg' print " adding %s to movie's file set..." % candidate latestEntry.files.add(candidate) # we have to save here, or the loop below will fail due to no entry in # the movies table try: latestEntry.save() except: print " Something went wrong; moving on." prob = SavingProblem() prob.file = candidate prob.save() pset.savingproblem_set.add(prob) pset.save() #issues['problems']+= [candidate] candidate.remove_saving_problem() print " setting %s to movie's certification..." % movie['certification'] if len(MovieCert.objects.filter(cert="None" if movie['certification']==None else movie['certification'])) == 0: print " Found a new cert, adding to database..." cert = MovieCert.objects.create(cert="None" if movie['certification']==None else movie['certification']) else: cert = MovieCert.objects.get(cert="None" if movie['certification']==None else movie['certification']) latestEntry.cert = cert print " adding genres to movie's genres..." if movie['categories'].has_key('genre'): for genre in movie['categories']['genre']: if len(MovieGenre.objects.filter(name=genre)) == 0: print " Found a new genre, adding it to database..." newGenre = MovieGenre(name=genre) newGenre.save() else: newGenre = MovieGenre.objects.get(name=genre) # add movie to genre and vice versa, then save genre (because we leave # the genre object first!) latestEntry.genres.add(newGenre) newGenre.movies.add(latestEntry) newGenre.save() else: latestEntry.genres.add(MovieGenre.objects.get(name="None")) latestEntry.save()
def process_movie(path, conf, facts): '''\ Retrieve and write metadata for this movie. ''' # check if metadata has already been written for this movie if is_movie_metadata_complete(path, conf): return # no metadata yet, so fetch it try: print '\tRetrieving movie metadata...' movie_title = facts['movie_title'] # the .decode call is necessary because the series title may have non- # ASCII characters in it. In Linux, path names are UTF-8 encoded, so # we need to tell Python that so it can use that information for # encoding later. results = tmdb.search(movie_title.decode('utf-8')) if results: # using .info() returns the full record, not just a common subset result = results[0].info() else: print '\t\t[ERROR] No matches found for the title \'%s\'' % movie_title return # data has been fetched; write it out xml_path = get_movie_metadata_path(path) # the .get method is used for non-essential attributes xml_root = ET.Element('Title') x = ET.SubElement(xml_root, 'LocalTitle') x.text = result['name'] x = ET.SubElement(xml_root, 'OriginalTitle') x.text = result['original_name'] x = ET.SubElement(xml_root, 'Description') x.text = result.get('overview') x = ET.SubElement(xml_root, 'Tagline') x.text = result.get('tagline') x = ET.SubElement(xml_root, 'IMDBId') x.text = result.get('imdb_id') # parse the production year manually date_released = result.get('released') if date_released: try: date_released = datetime.strptime('%Y-%m-%d', date_released) x = ET.SubElement(xml_root, 'ProductionYear') x.text = date_released.year except ValueError, e: # could not parse the date; whatever. pass x = ET.SubElement(xml_root, 'IMDBrating') x.text = result.get('rating') x = ET.SubElement(xml_root, 'MPAARating') x.text = result.get('certification') persons = ET.SubElement(xml_root, 'Persons') cast = result.get('cast', { }) for actor in cast.get('actor', [ ]): try: actor_id = actor['id'] actor_name = actor['name'] actor_role = actor['character'] person = ET.SubElement(persons, 'Person') # the ID field isn't used by Media Browser, but seems useful for # other uses. x = ET.SubElement(person, 'Id') x.text = actor_id x = ET.SubElement(person, 'Type') x.text = 'Actor' x = ET.SubElement(person, 'Name') x.text = actor_name x = ET.SubElement(person, 'Role') x.text = actor_role except KeyError, e: # incomplete metadata, meh. pass