def tmdb3_search_by_imdb_id(imdb_id): """Retrieve movie metadata by IMDb ID through TMDb Searches for movies based on IMDb ID and retrieves their metadata from TMDb. Args: imdb_id(int): IMDb ID of the movie to be searched Returns: movie(dict): a dictionary containing the movie metadata or None otherwise movie { title(str): movie title, release(datetime): movie release date/time, imdb_id(int): IMDb ID of the movie, cast(dict): movie cast { 'Actor'(list): list of actors in movie, }, crew(dict): movie crew { 'Director'(list): list of movie directors, }, imdb_rating(float): IMDb rating for this movie, tomato_rating(int): RottenTomatoes ratings, } Raises: None """ try: if type(imdb_id) == str or type(imdb_id) == unicode: # check for valid format pattern1 = re.compile(r'^tt[0-9]{7}$') pattern2 = re.compile(r'^tt[0-9]+$') pattern3 = re.compile(r'^[0-9]+$') if pattern1.match(imdb_id): # print 'TMDb(P1): ' + imdb_id pass elif pattern2.match(imdb_id): imdb_id = 'tt' + '%07d' % int(imdb_id[2:]) # print 'TMDb(P2): ' + imdb_id elif pattern3.match(imdb_id): imdb_id = 'tt' + '%07d' % int(imdb_id) # print 'TMDb(P3): ' + imdb_id else: raise ValueError('Invalid IMDb ID ' + imdb_id) elif type(imdb_id) == int: imdb_id = 'tt' + '%07d' % imdb_id print 'TMDb: %s' % imdb_id # log('downloading movie metadata...', newline=False) url = 'https://api.themoviedb.org/3/find/%s' \ '?external_source=imdb_id&api_key=%s' % \ (imdb_id, TMDbKey.get_solo().key) log.debug('%s tmdb by imdb id %s' % (imdb_id, url)) res = json.load(urllib2.urlopen(url, timeout=5)) if res is not None: movie = tmdb_parse_result(res['movie_results'][0]) movie['imdb_id'] = imdb_id movie['imdb_rating'] = imdb_rating_by_id(imdb_id) return movie except KeyError as e: print imdb_id, ' TMDb: IMDb ID does not match any known movie.' print e # log('COMPLETE!') except Exception: print "TMDb(I): %s Error retrieving movie metadata!" % imdb_id log.error("TMDb(%s): %s" % (imdb_id, traceback.format_exc()))
def setup(request): """Setup for first-use """ if not request.POST: log.info('served setup page') return render( request, 'hdd_indexer/setup.html', { 'RegistrationKey': RegistrationKey.get_solo().key, 'hdd_name': HDDName.get_solo().name, 'hdd_root': HDDRoot.get_solo().path, 'movie_folder': MovieFolder.get_solo().relpath, 'opensub_id': OpenSubKey.get_solo().uid, 'opensub_key': OpenSubKey.get_solo().key, 'tmdb_key': TMDbKey.get_solo().key, 'error': False, 'err_msg': '', } ) error = False err_msg = 'Validation errors have been found: ' log.info('POST: preferences and settings in setup') # validations # registration key registration_key = request.POST.get('ID', '') if registration_key: # make sure that it is valid registration key registration_key_db = RegistrationKey.get_solo() registration_key_db.key = registration_key registration_key_db.save() log.info('registration key = %s saved to db' % registration_key) else: pass # hdd name hdd_name = request.POST.get('HDDName', '') pattern = re.compile(r'^[0-9a-zA-z_-]+$') if pattern.match(hdd_name): hdd_name_db = HDDName.get_solo() hdd_name_db.name = hdd_name hdd_name_db.save() log.info('hdd_name: %s saved to db' % hdd_name) else: error = True err_msg = ' '.join(((err_msg, 'HDD Name,'))) log.error('%s is not a valid hdd_name' % hdd_name) # hdd root hdd_root = request.POST.get('HDDRoot', '') if path.exists(hdd_root): hdd_root_db = HDDRoot.get_solo() hdd_root_db.path = hdd_root hdd_root_db.save() log.info('hdd_root = %s saved to db' % hdd_root) else: error = True err_msg = ' '.join(((err_msg, 'HDD Root,'))) log.error('%s is not a valid path' % hdd_root) # movie folder movie_folder = request.POST.get('MovieFolder', '') log.info('POST: movie_folder = %s' % movie_folder) if path.exists(movie_folder): movie_folder_db = MovieFolder.get_solo() movie_folder_db.relpath = movie_folder movie_folder_db.save() log.info('movie_folder = %s saved to db' % movie_folder) else: error = True err_msg = ' '.join((err_msg, 'Movie Folder,')) log.error('%s is not a valid path' % movie_folder) # tmdb key # TODO: check tmdb key is valid tmdb_key = request.POST.get('TMDB_KEY', '') log.info('POST: tmdb_key = %s' % tmdb_key) if len(tmdb_key) >= 5: tmdb_db = TMDbKey.get_solo() tmdb_db.key = tmdb_key tmdb_db.save() log.info('tmdb_key = %s saved to db' % tmdb_key) else: error = True err_msg = ' '.join(((err_msg, 'TMDb Key,'))) log.error('%s is not a valid tmdb_key' % tmdb_key) # opensub # TODO: check opensub key is valid opensub_id = request.POST.get('OpenSubID', '') opensub_key = request.POST.get('OpenSubKey', '') log.info('opensub id:%s key:%s' % (opensub_id, opensub_key)) if opensub_id and opensub_key: if len(opensub_id) >= 5 and len(opensub_key) >= 5: opensub_db = OpenSubKey.get_solo() opensub_db.uid = opensub_id opensub_db.key = opensub_key opensub_db.save() log.info('opensub id:%s key:%s saved to db' % ( opensub_id, opensub_key )) else: error = True err_msg = ' '.join((err_msg, 'OpenSubtitles ID and Key,')) log.info('opensub id:%s key:%s are not valid' % ( opensub_id, opensub_key )) if error is False: log.info('setup complete, redirected to welcome page') return render( request, 'hdd_indexer/help.html', { 'welcome': True, } ) log.error('setup input has errors, redirect to setup page') return render( request, 'hdd_indexer/setup.html', { 'RegistrationKey': RegistrationKey, 'hdd_name': hdd_name, 'hdd_root': hdd_root, 'movie_folder': movie_folder, 'opensub_id': opensub_id, 'opensub_key': opensub_key, 'tmdb_key': tmdb_key, 'error': error, 'err_msg': err_msg, } )
def _run(): """Run the loader Downloads metadata from online sources for movies in database. Uses Queue for concurrent access using parallel threads. Args: None Returns: None Raises: None """ # TODO: wrap the entire func in try block tmdb.API_KEY = TMDbKey.get_solo().key t_size = 5 # size of thread, and movie queue # download job queue q = Queue.LifoQueue() # movies to be saved to database m = Queue.Queue() # skipped movies movies = list(Movie.objects.all()) log.info('started load run with %s movies' % len(movies)) m_start = 0 loader_status('MOVIES_EVALUATED', 0) loader_status('METADATA_DOWNLOADED', 0) loader_status('MOVIES_SKIPPED', 0) m_list = movies[m_start:m_start + t_size] while m_list: if loader_status('STATUS') is False: break for movie in m_list: # initialize the job queue log.debug('%s queued' % movie.title) q.put(movie) log.debug('starting %s threads' % t_size) for i in range(t_size): # start threads thread = Thread(target=_load, args=(q, m)) thread.daemon = True thread.start() # wait for threads to finish q.join() while not m.empty(): # save movies to database # When movies are saved from within threads, SQLite throws # concurrency errors since it cannot handle so many locks # An alternate solution is to download the data usnig threads, # and then to save movies in a single thread. movie, data = m.get() movie.delete() try: movie_save(data) log.info('%s saved to database' % data['title']) except Exception: log.error('error saving %s to database.' % data['title']) log.error(traceback.format_exc()) loader_status('SKIPPED', movie.title) log.debug('%s put in skipped list' % movie.title) # movie.save() m.task_done() log.info('%s items processed of %s' % ( len(m_list) + m_start, len(movies) )) m_start += t_size m_list = movies[m_start:m_start + t_size] loader_status('STATUS', False)