Exemplo n.º 1
0
def imdb_search_by_name(request, format, name):
    '''
    search an actor in imdb by title, and send back a list with names & imdb ids
    '''
    if request.is_ajax():
        info = {}
        info['list'] = []
        try:
            ia = IMDb('httpThin')
            actors = ia.search_person(name)
            for actor in actors:
                id = actor.personID
                found_name = actor['long imdb canonical name']
                info['list'].append({'id':id, 'name':found_name })
                
        except:
            info['error'] = True
            
        if format == 'xml':
            mimetype = 'application/xml'
            #TODO xml serialize
            data = 'Not implemented'
        if format == 'json':
            mimetype = 'application/javascript'
            data = json.dumps(info)

        return HttpResponse(data ,mimetype)
    # If you want to prevent non XHR calls    
    else:
        return HttpResponse(status=400)
Exemplo n.º 2
0
def getMissing(idMovieObj):

    ia = IMDb()
    
    path = "./missing.txt"
    file  = open(path, 'a')
    
    m = 0
    for id, movie in idMovieObj.iteritems():
        
        i = int(id)
        
        newFile = "./DB/MovieDB/%s.json" % str(i)
        if not os.path.exists(newFile):
            results = ia.search_movie(movie.title.strip())
            
            try:
                mv = results[0] #First result
            except IndexError:
                m += 1
                print "Missing %s" % (m)
                continue
            
            URL = ia.get_imdbURL(mv) #URL for first result
            movie.imdb_url = URL
           
            file.write(str(i) + " " + URL + "\n")
            file.flush()
            print(URL)
            #process_URLS(dict)
            #fileM.write(str(i) + " " + FullDict[i].imdb_url + "\n")
    
    file.close()
    print("Missing Done!")
Exemplo n.º 3
0
def get_byURL(url, info=None, args=None, kwds=None):
    """Return a Movie, Person, Character or Company object for the given URL;
    info is the info set to retrieve, args and kwds are respectively a list
    and a dictionary or arguments to initialize the data access system.
    Returns None if unable to correctly parse the url; can raise
    exceptions if unable to retrieve the data."""
    if args is None:
        args = []
    if kwds is None:
        kwds = {}
    ia = IMDb(*args, **kwds)
    match = _re_imdbIDurl.search(url)
    if not match:
        return None
    imdbtype = match.group(1)
    imdbID = match.group(2)
    if imdbtype == 'tt':
        return ia.get_movie(imdbID, info=info)
    elif imdbtype == 'nm':
        return ia.get_person(imdbID, info=info)
    elif imdbtype == 'ch':
        return ia.get_character(imdbID, info=info)
    elif imdbtype == 'co':
        return ia.get_company(imdbID, info=info)
    return None
Exemplo n.º 4
0
def getTitles():
	APP_NAME   = 'rankyourfavs'
	API_KEY    = 'smucdewbn2j8rp3rvegmp8y6'
	API_SECRET = 'DvstJpTa7f'
	CALLBACK   = ''
	verbose = False
	
	netflix = NetflixAPIV2( APP_NAME, API_KEY , API_SECRET)
	
	ia = IMDb()
	
	movies = Movie.objects.all()
	
	count = 0
	
	for movie in movies:
		print movie.imdb_title
		if movie.imdb_title == "":		
			imdb_info = ia.get_movie(movie.imdb_id)
			movie.imdb_title = imdb_info['title']
		if movie.netflix_title == "":
			id = "http://api-public.netflix.com/catalog/titles/movies/" + str(movie.netflix_id)
			try:
				netflix_info = netflix.get_title(id)
				movie.netflix_title = netflix_info['catalog_title']['title']['title_short']
			except:
				time.sleep(10)
		movie.save()
		count +=1
Exemplo n.º 5
0
    def __init__(self, imdbid):
        print "Fetching info for imdb id " + str(imdbid)
        for i in range(3):
            try:
                imdb = IMDb()
                self.movie = imdb.get_movie(imdbid)

                self.title = self.movie['title']
                self.HD = None

                self.genre = self.movie['genre']

                self.shortdescription = self.movie['plot outline']
                if 'plot' in self.movie.keys():
                    self.description = self.movie['plot'][0].split('::')[0]
                else:
                    self.description = self.shortdescription

                self.date = str(self.movie['year'])

                # Generate XML tags for Actors/Writers/Directors/Producers
                self.xml = self.xmlTags()
                break
            except:
                print sys.exc_info()[0]
                #print "Failed to connect to IMDb, trying again in 20 seconds"
                time.sleep(20)
Exemplo n.º 6
0
 def get_rating(self, speech, language,  regex):
     if language == "en-GB" or language == "en-US":
         MovieTitle = regex.group(regex.lastindex).strip()
         ia = IMDb()
         search_result = ia.search_movie(MovieTitle)
         if not search_result:
             self.say("Sorry, I could not find any information for " + MovieTitle)
             self.complete_request()
             
         else:
             movie_info = search_result[0]
             ia.update(movie_info)    
                             
             try:
                 MovieRating = movie_info['rating']
                 if (MovieRating < 6):
                     self.say("Rating: " + str(MovieRating) + " out of 10. You probably should not see this movie.")
                 elif (MovieRating < 8):
                     self.say("Rating: " + str(MovieRating) + " out of 10. I recommend you see this movie.")
                 elif (MovieRating >= 8):
                     self.say("Rating: " + str(MovieRating) + " out of 10. This movie is a must-see!")
                 self.complete_request()
             except:
                 self.say("Sorry. This movie has not yet been rated.")
                 self.complete_request()
Exemplo n.º 7
0
def check_film(film, actors_ids, out):
    im = IMDb()
    complete_film = im.get_movie(film.getID())
    cast_ids = set([person.getID() for person in complete_film['cast']])
    are_actors_in = cast_ids.issuperset(actors_ids)
    film.rate = complete_film.get('rating')
    out.put((are_actors_in, film))
Exemplo n.º 8
0
def handle_url(bot, user, channel, url, msg):
    """Handle IMDB urls"""
    if not has_imdb:
        return
    m = re.match("http://.*?\.imdb\.com/title/tt([0-9]+)/?", url)
    if not m:
        return

    i = IMDb()
    movie = i.get_movie(m.group(1))
    title = movie['long imdb title']
    rating = movie.get('rating', 0.0)
    votes = movie.get('votes', 'no')
    toprank = movie.get('top 250 rank')
    rank = ""
    if toprank:
        rank = "Top 250: #%d" % toprank
    bottomrank = movie.get('bottom 100 rank')
    if bottomrank:
        rank = "Bottom 100: #%d" % bottomrank

    genre = "(" + "/".join(movie.get('genres')) + ")"

    msg = "[IMDB] %s - Rating: %.1f (%s votes) %s %s" % (title, rating, votes, genre, rank)
    msg = msg.encode("UTF-8")

    return bot.say(channel, msg)
Exemplo n.º 9
0
 def get_director(self, speech, language,  regex):
     MovieTitle = regex.group(regex.lastindex).strip()
     ia = IMDb()
     search_result = ia.search_movie(MovieTitle)
     if not search_result:
         if language == "zh-CN":
             self.say(u"抱歉,我没找到“" + str(MovieTitle.encode("utf-8")) + u"”的信息。")
         else:
             self.say("Sorry, I could not find any information for " + MovieTitle)
         self.complete_request()
         
     else:
         movie_info = search_result[0]
         ia.update(movie_info)    
         MovieRating = movie_info['rating']
         
         if (MovieRating < 6):
             if language == "zh-CN":
                 self.say(u"评级:" + str(MovieRating) + u"。不值一看。")
             else:
                 self.say("Rating: " + str(MovieRating) + " out of 10. You probably should not see this movie.")
         elif (MovieRating < 8):
             if language == "zh-CN":
                 self.say(u"评级:" + str(MovieRating) + u"。还可以,推荐一看。")
             else:
                 self.say("Rating: " + str(MovieRating) + " out of 10. I recommend you see this movie.")
         elif (MovieRating >= 8):
             if language == "zh-CN":
                 self.say(u"评级:" + str(MovieRating) + u"。必看好片!")
             else:
                 self.say("Rating: " + str(MovieRating) + " out of 10. This movie is a must-see!")
         self.complete_request()
Exemplo n.º 10
0
 def movie_find_via_imdblib(self, stitle, syear):
     if stitle == u'Spanish Affair 2':
         pass
     from imdb import IMDb
     ia = IMDb()
     try:
         movies = ia.search_movie(stitle)
     except:
         return
     for movie in movies:
         kind = movie['kind']
         if not kind == u'movie':
             pass
         if kind == u'video game':
             continue
         assert kind == u'movie' or kind == u'tv series' or kind == u'episode' or kind == u'tv mini series'
         title = movie['title']
         year = movie['year']
         movie_id = movie.movieID
         pass
         found_movie = self.ImdbFoundMovie(code=movie_id, title=title, year=year, search_title=stitle,
                                           result=self.ImdbFoundMovie.Result.MATCH, search_year=syear)
         if abs(found_movie.get_year_diff()) <= 1:
             return found_movie
         pass
     if not stitle == u'Spanish Affair 2':
         pass
     pass
Exemplo n.º 11
0
def search():
    query = request.args.get("query")
    limit = int(request.args.get("limit", 10))

    movies = OrderedDict()
    if query:
        movies = OrderedDict(
            (movie.imdb_id, movie)
            for movie in Series.objects(title__icontains=query))

        imdb = IMDb()
        for movie in imdb.search_movie(query, results=limit * 2):
            if movie.movieID in movies:
                continue
            if movie['kind'] not in [MOVIE, TV_SERIES]:
                continue

            cls = Movie if movie['kind'] == MOVIE else Series

            item = cls(imdb_id=movie.movieID)
            item.year = int(movie.get('year', 0)) or None
            item.title = movie['title']

            movies[item.imdb_id] = item

    return jsonify(movies=[movie.json for movie in movies.values()[:limit]])
Exemplo n.º 12
0
def watch():
    try:
        when = request.form.get('when', datetime.today().strftime('%d.%m.%Y'))
        when = datetime.strptime(when, '%d.%m.%Y')

        where = request.form.get('where') or None
        language = request.form.get('language') or 'ru'

        imdb_id = request.form.get('movie')
        imdb = IMDb()
        movie = imdb.get_movie(imdb_id)

        episode = request.form.get('episode')
        if episode:
            imdb.update(movie, "episodes")
            matches = match('^S([0-9]+)E([0-9]+)$', episode)
            if matches:
                season = int(matches.group(1).lstrip('0'))
                episode = int(matches.group(2).lstrip('0'))

                View.watch_episode(movie, season, episode,
                                   when=when, where=where, language=language)
        else:
            View.watch(movie, when=when, where=where, language=language)

    except:
        pass

    return redirect(url_for('videos.index'))
Exemplo n.º 13
0
def handle(text, mic, profile):
    mic.say('What movie?')
    movie_name = mic.activeListen()
    mic.say('Searching top five results for.  %s' %movie_name)
    ia = IMDb()
    movie_query = ia.search_movie(movie_name)
    del movie_query[5:]
    for movie in movie_query:
        mic.say('Did you mean %s (%s)?' %(movie.get('title'), movie.get('year')))
        response = mic.activeListen()
        if yes(response):
            filename = "Movie.CSV"
            ia.update(movie)
            movie_info = '%s (%s).  ' %(movie.get('title'), movie.get('year'))
            text = movie_info
            if movie.get('rating'): movie_info += 'Rating.  %s out of 10.  ' %movie.get('rating')
            if movie.get('runtimes'): movie_info += 'Runtime.  %s minutes.  ' %movie.get('runtimes')[0]
            if movie.get('genres'): movie_info += 'Genres.  %s.  ' %'.  '.join(movie.get('genres'))
            if movie.get('plot outline'): movie_info += 'Plot.  %s  ' %movie.get('plot outline')
            if movie.get('director'): movie_info += 'Directors.  %s.  ' %format_names(movie.get('director'))
            if movie.get('producer'): movie_info += 'Producers.  %s.  ' %format_names(movie.get('producer'))
            if movie.get('cast'): movie_info += 'Cast.  %s.  ' %format_names(movie.get('cast'))
            mic.say(movie_info)
            logdata(filename,text)
            return
    mic.say('Unable to find information on the requested movie')
Exemplo n.º 14
0
def get_list_of_shows(a_directory, a_show_name, a_season_number, a_file_count):

    # query IMDb for the show
    db = IMDb()
    results = db.search_movie(a_show_name)

    # check to see if a show was matched
    if 0 == len(results):
        return ('ERROR', rename_result.SHOW_NOT_FOUND_ERROR, )

    # get the first tv show. there could be movies in the list
    series = []
    for item in results:
        if u'tv series' == item['kind']:
            series = item
            break

    # get the list of episodes
    db.update(series, 'episodes')

    # get the number of episodes found from IMDb
    imdb_file_count = len(series['episodes'][a_season_number])

    episode_list = []
    for i in range(0, imdb_file_count):
        episode_list.append(series['episodes'][a_season_number][i+1]['title'])

    return ('SUCCESS', episode_list)
Exemplo n.º 15
0
    def handle(self, *args, **options):
        self.bot = options.get("bot")
        imdb_am = settings.IMDB_ACCESS_SYSTEM
        if imdb_am == "http":
            self.imdb = IMDb()
        elif imdb_am == "sql":
            self.imdb = IMDb("sql", settings.IMDB_ACCESS_DB_URI)
        else:
            raise CommandError("Incorrect configuration of IMDB_ACCESS_SYSTEM property")

        for f in args:
            print "Importing...", f
            data = json.load(open(f))
            for link in data:
                print link
                #Data checks
                if "serie" not in link or link["serie"] in self.not_found:
                    warn("Problems looking up serie %s" % link)
                elif "temp" not in link or "epi" not in link:
                    warn("Season and episode is not setted in %s" % link)
                elif "lang" not in link:
                    warn(u"Serie %s with link %s has not lang" % (link["serie"], link["links"]))
                else:
                    try:
                        #Check if temp and epi are ints
                        link["temp"] = int(link["temp"])
                        link["epi"] = int(link["epi"])
                    except:
                        warn("Erroneous temp or epi in %s" % link)
                    else:
                        self.process_link(**link)
        print "List of series not found:"
        for serie in self.not_found:
            print "\t%s" % serie
Exemplo n.º 16
0
    def stats(self):
        query_directors = select(['persons.id', func.count('roles.person_id').label('count')],
                       from_obj=['persons', 'roles'],
                       whereclause="roles.person_id = persons.id AND roles.role_type = 'director'",
                       group_by=['persons.id'], order_by='count desc', limit=10)
        query_actors = select(['persons.id', func.count('roles.person_id').label('count')],
                       from_obj=['persons', 'roles'],
                       whereclause="roles.person_id = persons.id AND roles.role_type = 'cast'",
                       group_by=['persons.id'], order_by='count desc', limit=10)                       
        
        top_directors = DBSession.query(Person, 'count').from_statement(query_directors).all()
        top_actors = DBSession.query(Person, 'count').from_statement(query_actors).all()        
    
        ia = IMDb()

        top250_ids = [x.movieID for x in ia.get_top250_movies()]
        bottom100_ids = [x.movieID for x in ia.get_bottom100_movies()]
        
        top250_count = DBSession.query(Movie).filter(Movie.id.in_(top250_ids)).count()
        bottom100_count = DBSession.query(Movie).filter(Movie.id.in_(bottom100_ids)).count()
        total_count = DBSession.query(Movie).count()
        
        total_runtime = 1
        
        return {'top250_count': top250_count,
                'bottom100_count': bottom100_count,
                'total_count': total_count,
                'total_runtime' : total_runtime,
                'top_directors': top_directors,
                'top_actors': top_actors}
Exemplo n.º 17
0
def airdates():
    if config is None:
        print 'Config not found!!'
        return '{}'

    mmap = MovieMap(config['cache_file'])
    imdb = IMDb('sql', uri = config['imdb_db_url'])
    show_list = json.loads(request.get_data())
    res = {}
    default = datetime(2016, 7, 15)
    try:
        for id in show_list:
            res[id] = None
            rdata = show_list[id]
            if rdata['imdb_id'] not in mmap.data:
                movie_id = get_movie_id(imdb, rdata['title'], rdata['imdb_id'])
                if movie_id:
                    mmap.data[rdata['imdb_id']] = movie_id

            if rdata['imdb_id'] not in mmap.data:
                print rdata
                continue

            movie = imdb.get_movie(mmap.data[rdata['imdb_id']])

            curr_season = rdata['after'][0]
            curr_episode = rdata['after'][1]
            next_start_season = curr_season
            next_start_episode = curr_episode

            if curr_episode+1 in movie['episodes'][curr_season]:
                next_start_episode = curr_episode + 1
            elif curr_season+1 in movie['episodes']:
                next_start_season = curr_season + 1
                next_start_episode = min(movie['episodes'][next_start_season].keys())
            else:
                continue

            try:
                next_episode = imdb.get_movie(movie['episodes'][next_start_season][next_start_episode].movieID)

                dates = []

                for release_date in next_episode['release dates']:
                    dates.append(parser.parse(release_date.split(':')[1], default = default))

                res[id] = dict(
                    air_en = min(dates).strftime('%Y-%m-%d'),
                    season = next_start_season,
                    episode = next_start_episode,
                )

            except KeyError as e:
                logger.exception("Error during movie info fetch")

    finally:
        mmap.save()

    return json.dumps(res) + '\n'
Exemplo n.º 18
0
def get_bottom_movies(args):
    connection = IMDb()
    items = connection.get_bottom100_movies()
    if args.first:
        connection.update(items[0])
        print(items[0].summary())
    else:
        list_ranking(items, n=args.n)
Exemplo n.º 19
0
 def search_do(self, **kw):
     tmpl_context.search_form = search_movie_form
     
     ih = IMDb()
     results = ih.search_movie(kw['movie_title'])
 
     return {'search_term': kw['movie_title'],
             'results': results}
Exemplo n.º 20
0
    def __init__(self, config, http = False):
        log.info('Using IMDB provider.')

        self.config = config
        if not http:
            self.p = IMDb('mobile')
        else:
            self.p = IMDb('http')
Exemplo n.º 21
0
class imdbWrapper(movieBase):
    """Api for theMovieDb"""

    def __init__(self, config):
        log.info('Using IMDB provider.')

        self.config = config

        self.p = IMDb('mobile')

    def conf(self, option):
        return self.config.get('IMDB', option)

    def find(self, q, limit = 8, alternative = True):
        ''' Find movie by name '''

        log.info('IMDB - Searching for movie: %s' % q)

        r = self.p.search_movie(q)

        return self.toResults(r, limit)

    def toResults(self, r, limit = 8, one = False):
        results = []

        if one:
            new = self.feedItem()
            new.imdb = 'tt' + r.movieID
            new.name = self.toSaveString(r['title'])
            new.year = r['year']

            return new
        else :
            nr = 0
            for movie in r:
                results.append(self.toResults(movie, one = True))
                nr += 1
                if nr == limit:
                    break

            return results

    def findById(self, id):
        ''' Find movie by TheMovieDB ID '''

        return []


    def findByImdbId(self, id):
        ''' Find movie by IMDB ID '''

        log.info('IMDB - Searching for movie: %s' % str(id))

        r = self.p.get_movie(id.replace('tt', ''))
        return self.toResults(r, one = True)

    def findReleaseDate(self, movie):
        pass
Exemplo n.º 22
0
def imdb_get_info(request, format, person_id):
    '''
    search a person in imdb by it's id
    '''
    if request.is_ajax():
        
        info = {}
        try:
             # set the socket timeout to 1sec
            import socket 
            socket.setdefaulttimeout(5)
            # max retrys on this request
            MAX_RETRY = 3 
            fetch = True
            retries = 0
            while fetch:
                if retries >= MAX_RETRY: break
                try:
                    retries += 1
                    ia = IMDb('http')
                    person = ia.get_person(person_id)
                    fetch = False
                except socket.timeout: logger.warn("timeout") 
                except IOError: logger.warn("timeout") 
                except IMDbDataAccessError: logger.warn("imdb error")
            
            socket.setdefaulttimeout(None)
            try: info['name'] = person['name'].replace(' - IMDb', '')
            except KeyError: pass
            
            try: info['bio'] = person['mini biography']
            except KeyError: pass
            
            try: info['headshot'] = person['headshot'] 
            except KeyError: pass
            
            try: info['imdb_url'] = "http://www.imdb.com/name/nm"+person_id+"/"
            except KeyError: pass
            
            try :  info['birth_date'] = person.data['birth date']
            except KeyError: pass
            
        except: 
            info['error'] = True
            
        if format == 'xml':
            mimetype = 'application/xml'
            #TODO xml serialize
            data = 'Not implemented'
        if format == 'json':
            mimetype = 'application/javascript'
            data = json.dumps(info)

        return HttpResponse(data ,mimetype)
    # If you want to prevent non XHR calls
    else:
        return HttpResponse(status=400)
Exemplo n.º 23
0
	def search_film(self, kw):
		
		#make connection
		ia = IMDb()
		movie_list = ia.search_movie(kw)
		movie = movie_list[0]
		movie_id = ia.get_movie(movie.movieID)
		plot = movie_id.get("plot",[''])[0] 
		plot = plot.split('::')[0]
		print plot
Exemplo n.º 24
0
def movie_finder(movie):
    ia = IMDb()
    movies = ia.search_movie(movie)
    movie_list = []
    for m in movies:
        print m
        movie_list.append(m)

    print movie_list
    return movie_list
Exemplo n.º 25
0
def fill_info():
    imdb_conn = IMDb('sql', DB_URI)
    cust_conn = IMDB()

    ids = cust_conn.fetch_vec('SELECT imdb_id FROM title WHERE NOT imdb_id IS NULL;')
    for id in ids:
        try:
            imdb_conn.get_movie_business(id)
        except:
            print "err %s \n" % id
Exemplo n.º 26
0
def search_item(args):
    connection = IMDb()
    n = args.n if args.n is not None else DEFAULT_RESULT_SIZE
    if args.type == 'keyword':
        items = connection.search_keyword(args.key)
        if args.first:
            items = connection.get_keyword(items[0])
            list_results(items, type_='movie', n=n)
        else:
            print('  # keyword')
            print('=== =======')
            for i, keyword in enumerate(items[:n]):
                print('%(index)3d %(kw)s' % {'index': i + 1, 'kw': keyword})
    else:
        if args.type == 'movie':
            items = connection.search_movie(args.key)
        elif args.type == 'person':
            items = connection.search_person(args.key)
        elif args.type == 'character':
            items = connection.search_character(args.key)
        elif args.type == 'company':
            items = connection.search_company(args.key)

        if args.first:
            connection.update(items[0])
            print(items[0].summary())
        else:
            list_results(items, type_=args.type, n=args.n)
Exemplo n.º 27
0
 def searchPeople(self, name, language):
     ia = IMDb()
     search_result = ia.search_person(name.encode("utf-8"))
     if not search_result:
         dontfind = self.res["dont_find_people"][language]
         self.say(dontfind.format(name))
         self.complete_request()
         return None
     people = search_result[0]
     ia.update(people)
     return people
Exemplo n.º 28
0
def search_imdb_name(token):
    ia = IMDb()
    # s_result = ia.search_movie(token)
    logging.debug("Querying IMDB for movie name: %s" % token)
    try:
        s_result = ia.search_movie(token)
        logging.debug(s_result)
        for item in s_result:
            logging.debug(item["long imdb canonical title"], item.movieID)
    except IMDbError, err:
        logging.debug(err)
Exemplo n.º 29
0
 def searchMovie(self, title, language):
     ia = IMDb()
     search_result = ia.search_movie(title.encode("utf-8"))
     if not search_result:
         dontfind = self.res["dont_find_movie"][language]
         self.say(dontfind.format(title))
         self.complete_request()
         return None
     infos = search_result[0]
     ia.update(infos)
     return infos
Exemplo n.º 30
0
def print_details(mid):
    ia = IMDb()
    movie = ia.get_movie(mid)
    d = OrderedDict()
    d['imdb_id'] = movie.getID()
    d['imdb_url'] = 'http://www.imdb.com/title/tt' + mid + '/'
    d['title'] = movie['title']
    d['year'] = str(movie['year'])
    d['imdb_rating'] = str(movie['rating'])

    pprint(d)
movies_ratings_list = []
stop_words_movies = ['2D', '3D', '4DX', 'ATMOS', 'IMAX', 'VP', 'VO', 'VIP']

cartaz_norteshop_page = requests.get(
    "http://cinemas.nos.pt/cinemas/Pages/norteshopping.aspx")
cartaz_norteshop_soup = BeautifulSoup(cartaz_norteshop_page.text,
                                      'html.parser')

cartaz_norteshop_a_links = cartaz_norteshop_soup.find_all('a',
                                                          class_='list-item')
for a_link in cartaz_norteshop_a_links:
    a_href = a_link['href']
    if re.match(r'/Filmes/', a_href):
        movies_list.append(a_link.text)

imdb_obj = IMDb()

for movie in movies_list:
    movie_flag = 0
    for stop_word in stop_words_movies:
        if stop_word in movie:
            movie = movie.replace(stop_word, '').strip(
            )  # Remove words that might throw off search on IMDB website
            for movie_rating_tuple in movies_ratings_list:
                if movie in movie_rating_tuple[0]:
                    movie_flag = 1
    if movie_flag == 1:  # If the rating of the movie has already been obtained, move on to the next iteration
        continue
    movie_search_results = imdb_obj.search_movie(movie)
    if movie_search_results:
        movie_obj = movie_search_results[0]
Exemplo n.º 32
0
from imdb import IMDb
import json

ia = IMDb()
data = {}
seriesIds = [
    "0060028", "0092455", "5171438", "0106145", "0112178", "0244365", "0069637"
]
count = 0

for sId in seriesIds:
    m = ia.get_movie(sId)
    seriesTitle = m["title"]
    data[seriesTitle] = {}
    data[seriesTitle]["movieID"] = m.movieID
    data[seriesTitle]["seasons"] = []

    ia.update(m, 'episodes')
    for season in m["episodes"].values():
        data[seriesTitle]["seasons"].append([])
        for epNumber, ep in season.items():
            epDetails = ia.get_movie(ep.movieID)
            data[seriesTitle]["seasons"][-1].append({
                "title":
                ep["title"],
                "episodeID":
                ep.movieID,
                "plot":
                epDetails.get("plot", None),
                "imdbRating":
                epDetails.get("rating", None),
Exemplo n.º 33
0
class FromIMDB(object):
    """
    This plugin enables generating entries based on an entity, an entity being a person, character or company.
    It's based on IMDBpy which is required (pip install imdbpy). The basic config required just an IMDB ID of the
    required entity.

    For example:

        from_imdb: ch0001354

    Schema description:
    Other than ID, all other properties are meant to filter the full list that the entity generates.

    id: string that relates to a supported entity type. For example: 'nm0000375'. Required.
    job_types: a string or list with job types from job_types. Default is 'actor'.
    content_types: A string or list with content types from content_types. Default is 'movie'.
    max_entries: The maximum number of entries that can return. This value's purpose is basically flood protection
        against unruly configurations that will return too many results. Default is 200.

    Advanced config example:
        dynamic_movie_queue:
            from_imdb:
              id: co0051941
              job_types:
                - actor
                - director
              content_types: tv series
            accept_all: yes
            movie_queue: add

    """
    job_types = ['actor', 'actress', 'director', 'producer', 'writer', 'self', 'editor', 'miscellaneous',
                 'editorial department', 'cinematographer', 'visual effects', 'thanks', 'music department',
                 'in development', 'archive footage', 'soundtrack']

    content_types = ['movie', 'tv series', 'tv mini series', 'video game', 'video movie', 'tv movie', 'episode']

    content_type_conversion = {
        'movie': 'movie',
        'tv series': 'tv',
        'tv mini series': 'tv',
        'tv movie': 'tv',
        'episode': 'tv',
        'video movie': 'video',
        'video game': 'video game'
    }

    character_content_type_conversion = {
        'movie': 'feature',
        'tv series': 'tv',
        'tv mini series': 'tv',
        'tv movie': 'tv',
        'episode': 'tv',
        'video movie': 'video',
        'video game': 'video-game',
    }

    jobs_without_content_type = ['actor', 'actress', 'self', 'in development', 'archive footage']

    imdb_pattern = one_or_more({'type': 'string',
                                'pattern': r'(nm|co|ch)\d{7}',
                                'error_pattern': 'Get the id from the url of the person/company you want to use,'
                                                 ' e.g. http://imdb.com/text/<id here>/blah'}, unique_items=True)

    schema = {
        'oneOf': [
            imdb_pattern,
            {'type': 'object',
             'properties': {
                 'id': imdb_pattern,
                 'job_types': one_or_more({'type': 'string', 'enum': job_types}, unique_items=True),
                 'content_types': one_or_more({'type': 'string', 'enum': content_types}, unique_items=True),
                 'max_entries': {'type': 'integer'},
                 'match_type': {'type': 'string', 'enum': ['strict', 'loose']}
             },
             'required': ['id'],
             'additionalProperties': False
             }
        ],

    }

    def prepare_config(self, config):
        """
        Converts config to dict form and sets defaults if needed
        """
        config = config
        if isinstance(config, basestring):
            config = {'id': [config]}
        elif isinstance(config, list):
            config = {'id': config}
        if isinstance(config, dict) and not isinstance(config['id'], list):
            config['id'] = [config['id']]

        config.setdefault('content_types', [self.content_types[0]])
        config.setdefault('job_types', [self.job_types[0]])
        config.setdefault('max_entries', 200)
        config.setdefault('match_type', 'strict')

        if isinstance(config.get('content_types'), str_types):
            log.debug('Converted content type from string to list.')
            config['content_types'] = [config['content_types']]

        if isinstance(config['job_types'], str_types):
            log.debug('Converted job type from string to list.')
            config['job_types'] = [config['job_types']]
        # Special case in case user meant to add actress instead of actor (different job types in IMDB)
        if 'actor' in config['job_types'] and 'actress' not in config['job_types']:
            config['job_types'].append('actress')

        return config

    def get_items(self, config):
        items = []
        for id in config['id']:
            try:
                entity_type, entity_object = self.get_entity_type_and_object(id)
            except Exception as e:
                log.error(
                    'Could not resolve entity via ID: {}. '
                    'Either error in config or unsupported entity. Error:{}'.format(id, e))
                continue
            items += self.get_items_by_entity(entity_type, entity_object, config.get('content_types'),
                                              config.get('job_types'), config.get('match_type'))
        return set(items)

    def get_entity_type_and_object(self, imdb_id):
        """
        Return a tuple of entity type and entity object
        :param imdb_id: string which contains IMDB id
        :return: entity type, entity object (person, company, etc.)
        """
        if imdb_id.startswith('nm'):
            person = self.ia.get_person(imdb_id[2:])
            log.info('Starting to retrieve items for person: %s' % person)
            return 'Person', person
        elif imdb_id.startswith('co'):
            company = self.ia.get_company(imdb_id[2:])
            log.info('Starting to retrieve items for company: %s' % company)
            return 'Company', company
        elif imdb_id.startswith('ch'):
            character = self.ia.get_character(imdb_id[2:])
            log.info('Starting to retrieve items for Character: %s' % character)
            return 'Character', character

    def get_items_by_entity(self, entity_type, entity_object, content_types, job_types, match_type):
        """
        Gets entity object and return movie list using relevant method
        """
        if entity_type == 'Company':
            return self.items_by_company(entity_object)

        if entity_type == 'Character':
            return self.items_by_character(entity_object, content_types, match_type)

        elif entity_type == 'Person':
            return self.items_by_person(entity_object, job_types, content_types, match_type)

    def flatten_list(self, _list):
        """
        Gets a list of lists and returns a flat list
        """
        for el in _list:
            if isinstance(el, collections.Iterable) and not isinstance(el, basestring):
                for sub in self.flatten_list(el):
                    yield sub
            else:
                yield el

    def flat_list(self, non_flat_list, remove_none=False):
        flat_list = self.flatten_list(non_flat_list)
        if remove_none:
            flat_list = [_f for _f in flat_list if _f]
        return flat_list

    def filtered_items(self, unfiltered_items, content_types, match_type):
        items = []
        unfiltered_items = set(unfiltered_items)
        for item in sorted(unfiltered_items):
            if match_type == 'strict':
                log.debug('Match type is strict, verifying item type to requested content types')
                self.ia.update(item)
                if item['kind'] in content_types:
                    log.verbose('Adding item "{}" to list. Item kind is "{}"'.format(item, item['kind']))
                    items.append(item)
                else:
                    log.verbose('Rejecting item "{}". Item kind is "{}'.format(item, item['kind']))
            else:
                log.debug('Match type is loose, all items are being added')
                items.append(item)
        return items

    def items_by_person(self, person, job_types, content_types, match_type):
        """
        Return item list for a person object
        """
        unfiltered_items = self.flat_list(
            [self.items_by_job_type(person, job_type, content_types) for job_type in job_types],
            remove_none=True)

        return self.filtered_items(unfiltered_items, content_types, match_type)

    def items_by_content_type(self, person, job_type, content_type):
        return [_f for _f in (person.get(job_type + ' ' + self.content_type_conversion[content_type], [])) if _f]

    def items_by_job_type(self, person, job_type, content_types):
        items = person.get(job_type, []) if job_type in self.jobs_without_content_type else [
            person.get(job_type + ' ' + 'documentary', []) and
            person.get(job_type + ' ' + 'short', []) and
            self.items_by_content_type(person, job_type, content_type)
            if content_type == 'movie'
            else
            self.items_by_content_type(person, job_type, content_type)
            for content_type in content_types
            ]
        return [_f for _f in items if _f]

    def items_by_character(self, character, content_types, match_type):
        """
        Return items list for a character object
        :param character: character object
        :param content_types: content types as defined in config
        :return:
        """
        unfiltered_items = self.flat_list(
            [character.get(self.character_content_type_conversion[content_type])
             for content_type in content_types], remove_none=True)

        return self.filtered_items(unfiltered_items, content_types, match_type)

    def items_by_company(self, company):
        """
        Return items list for a company object
        :param company: company object
        :return: company items list
        """
        return company.get('production companies')

    @cached('from_imdb', persist='2 hours')
    def on_task_input(self, task, config):
        try:
            from imdb import IMDb
            self.ia = IMDb()
        except ImportError:
            log.error('IMDBPY is required for this plugin. Please install using "pip install imdbpy"')
            return

        entries = []
        config = self.prepare_config(config)
        items = self.get_items(config)
        if not items:
            log.error('Could not get IMDB item list, check your configuration.')
            return
        for item in items:
            entry = Entry(title=item['title'],
                          imdb_id='tt' + self.ia.get_imdbID(item),
                          url='',
                          imdb_url=self.ia.get_imdbURL(item))

            if entry.isvalid():
                if entry not in entries:
                    entries.append(entry)
                    if entry and task.options.test:
                        log.info("Test mode. Entry includes:")
                        for key, value in list(entry.items()):
                            log.info('     {}: {}'.format(key.capitalize(), value))
            else:
                log.error('Invalid entry created? %s' % entry)
        if len(entries) <= config.get('max_entries'):
            return entries
        else:
            log.warning(
                'Number of entries (%s) exceeds maximum allowed value %s. '
                'Edit your filters or raise the maximum value by entering a higher "max_entries"' % (
                    len(entries), config.get('max_entries')))
            return
Exemplo n.º 34
0
def getEps(title):
    #strip title input by user
    title = title.strip()
    # Create IMDb object
    i = IMDb()
    #search for title
    search_results = i.search_movie(title)
    # Get first search result that is a TV series
    search_results = filter(lambda s: s['kind'] == 'tv series', search_results)
    search_results = list(islice(search_results, 0, 1))
    #if no result found
    if len(search_results) == 0:
        return 'No TV series matches were found for "%s".' % title
    s = search_results[0]
    i.update(s, 'episodes')
    s_title = s['long imdb title']
    #if no episode info found
    if (not s.has_key('episodes')) or len(s['episodes']) == 0:
        return 'Episode info is unavailable for %s.' % s_title
    s = sortedEpisodes(s)
    if len(s) == 0:
        return 'Episode info is unavailable for %s.' % s_title
    s.reverse()
    date_today = date.today()
    e = []
    for ep_ind in range(0, len(s)):
        if s[ep_ind].has_key('original air date'):
            if (len(s[ep_ind]['original air date'])) == 4:
                s[ep_ind]['date'] = strptime(s[ep_ind]['original air date'],
                                             '%Y')
            else:
                s[ep_ind]['date'] = strptime(
                    s[ep_ind]['original air date'].replace('.', ''),
                    '%d %b %Y')
        if s[ep_ind].has_key('date'):
            s[ep_ind]['date'] = date(*s[ep_ind]['date'][0:3])
            s[ep_ind]['age'] = (date_today - s[ep_ind]['date']).days
            if s[ep_ind]['age'] > 0:
                s[ep_ind]['has aired'] = True
            else:
                s[ep_ind]['has aired'] = False
                e.append(s[ep_ind])
        else:
            s[ep_ind]['has aired'] = False
            e.append(s[ep_ind])
    #function to get season episode format for description
    def getSE(e):
        if not isinstance(e['season'], int):
            return ''
        Sstr = 'S' + str(e['season']).zfill(2)
        Estr = 'E' + str(e['episode']).zfill(2)
        return ' (' + Sstr + Estr + ')'

    #function to get age of episode (negative if has not aired, positive if has aired)
    def getAge(e):
        return locale.format('%i', abs(e['age']), grouping=True)

    #function to get date of episode
    def getDate(e):
        return e['date'].strftime('%a, ') + str(
            e['date'].day) + e['date'].strftime(' %b %y')

    titles = []
    descriptions = []
    dates = []
    for i in e:
        e_out = ''
        if len(e) > 0:
            e = i
            titles.append(s_title)
            descriptions.append(getSE(e))
            e_out = e_out + 'The next upcoming episode ' + 'for ' + s_title + ' ' + 'is "' + e[
                'title'] + '"' + getSE(e) + '.'
            if e.has_key('age'):
                e_schedule = 'in %s days' % getAge(e)
                e_out = e_out + ' It airs ' + e_schedule + ', ' + getDate(
                    e) + '.'
                dates.append(getDate(e))
            else:
                e_out = e_out + ' Its air date is unavailable.'
                dates.append('unknown')
        print(e_out)
    return titles, descriptions, dates
Exemplo n.º 35
0
# Create Series Class for SQL Database Table
class Series(Base):
    __tablename__ = 'series'
    id = Column(Integer, primary_key=True)
    tconst = Column(Integer)
    title = Column(String(255))
    num_seasons = Column(Integer)
    avg_rating = Column(Float)
    num_votes = Column(Integer)


# Create Episode Class for SQL Database Table

Base.metadata.create_all(conn)

ia = IMDb()

# Show Listing
rick_n_morty = '2861424'
ren_n_stimpy = '0101178'
beevis_n_butthead = '0105950'
aeon_flux = '0111873'  # in case of re search copy Æon Flux
celeb_deathmatch = '0208614'
daria = '0118298'
south_park = '0121955'
fam_guy = '0182576'
american_dad = '0397306'
king_hill = '0118375'
space_ghost = '0108937'
futurama = '0149460'
aqua_thf = '0297494'
Exemplo n.º 36
0
import json, urllib.request, sqlite3, time
from imdb import IMDb
from flask import Flask, flash, redirect, render_template, request, session
from flask_session import Session
from datetime import datetime
from flask_paginate import Pagination, get_page_args

app = Flask(__name__)
imdb = IMDb()

# Ensure templates are auto-reloaded
app.config["TEMPLATES_AUTO_RELOAD"] = True


# Main function to update and jsonify once a day
def main():
    update()
    jsonify()
    time.sleep(86400)


# Parsing function
def update():
    SQL = sqlite3.connect('movies.db')
    database = SQL.cursor()
    print("Starting the list update...\n")
    page_counter = 1
    url = "https://www.rottentomatoes.com/api/private/v2.0/browse?" \
          "maxTomato=100&maxPopcorn=100&certified&sortBy=release&type=cf-dvd-streaming-all&page="
    movies = []
    count = 32
Exemplo n.º 37
0
def getEps(title, max_len=990, debug=False):  # 1024-32-2 = 990
    """Returns a text string containing schedule info for the last aired and the next upcoming episodes for the given TV series title"""

    # Validate title
    assert isinstance(title, str), 'A string input was not provided.'

    # Preprocess title
    title = title.strip()

    # Determine if the next upcoming episode's plot should be included if available (Default is True)
    if title.lower().endswith('/noplot'):
        title = title[:-len('/noplot')].rstrip()
        include_plot = False
    else:
        include_plot = True

    try:

        # Validate title further
        if len(title) == 0: return 'A title was not provided.'

        # Create IMDb object
        i = IMDb()

        # Get search results
        max_attempts = 3  # Set to anything greater than 1
        for attempt in range(1, max_attempts + 1):
            try:
                search_results = i.search_movie(title)
                break
            except:
                if attempt < max_attempts:
                    if debug:
                        print(
                            'An error occurred while attempting to retrieve search results for "%s". %s attempts were made.'
                            % (title, attempt) + '\n')
                    sleep(attempt * 2)
                else:
                    return 'An error occurred while attempting to retrieve search results for "%s". %s attempts were made.' % (
                        title, attempt)
        del attempt, max_attempts

        # Get first search result that is a TV series
        search_results = filter(lambda s: s['kind'] == 'tv series',
                                search_results)
        search_results = list(islice(search_results, 0, 1))
        if len(search_results) == 0:
            return 'No TV series matches were found for "%s".' % title
        s = search_results[0]
        del search_results

        # Get episodes
        i.update(s, 'episodes')
        s_title = s['long imdb title']
        if ('episodes' not in s) or len(s['episodes']) == 0:
            return 'Episode info is unavailable for %s.' % s_title
        s = sortedEpisodes(s)
        if len(s) == 0: return 'Episode info is unavailable for %s.' % s_title

        # Sort episodes in approximately the desired order
        s.reverse(
        )  # This results in episodes that are sorted in the desired order. If, however, the episodes are not listed in proper order at the source, such as for "Showtime Championship Boxing" (1987) as of 2/29/08, the error will be copied here.
        s = list(dropwhile(lambda e: e['season'] == 'unknown', s)) + list(
            takewhile(lambda e: e['season'] == 'unknown', s)
        )  # While this may not always produce the most accurate results, it prevents episodes belonging to an unknown season from being thought of as most recent.

        # Process date related info for episodes
        date_today = date.today()
        for ep_ind in range(len(s)):
            if 'original air date' in s[ep_ind]:
                try:
                    s[ep_ind]['date'] = strptime(
                        s[ep_ind]['original air date'], '%d %B %Y')
                except:
                    pass
            if 'date' in s[ep_ind]:
                s[ep_ind]['date'] = date(*s[ep_ind]['date'][0:3])
                s[ep_ind]['age'] = (
                    s[ep_ind]['date'] -
                    date_today).days  # Age is date delta in days
                if s[ep_ind]['age'] < 0:
                    s[ep_ind]['has aired'] = True
                else:
                    s[ep_ind]['has aired'] = False
            else:
                s[ep_ind]['has aired'] = False
        del date_today, ep_ind

        # Print last 10 listed episodes (if debugging)
        if debug:
            print('Last 10 listed episodes:\nS# Epi# Age   Episode Title')
            for e in s[:10]:
                print(
                    '%s %s %s %s' %
                    (str(e['season']).zfill(2)[:2], str(e['episode']).zfill(4),
                     'age' in e and str(e['age']).zfill(5)
                     or ' ' * 5, e['title'].encode('latin-1')))
            print()

        # Declare convenient functions for use in generating output string
        def getSE(e):
            if not isinstance(e['season'], int): return ''
            Sstr = 'S' + str(e['season']).zfill(2)
            Estr = 'E' + str(e['episode']).zfill(2)
            return ' (' + Sstr + Estr + ')'

        def getAge(e):
            return locale.format('%i', abs(e['age']), grouping=True)

        def getDate(e):
            return 'i.e. on ' + e['date'].strftime('%a, ') + str(
                e['date'].day) + e['date'].strftime(' %b %y')

        # Determine last aired episode
        # (An episode that airs today is considered to be not yet aired)
        e = filter(lambda e: e['has aired'], s)
        e = list(islice(e, 0, 1))
        if len(e) > 0:
            e = e[0]
            e_schedule = e['age'] != -1 and ('%s days ago' %
                                             getAge(e)) or 'yesterday'

            # Generate output string when last aired episode is available
            e_out = 'The episode that aired last for ' + s_title + ' is "' + e[
                'title'] + '"' + getSE(
                    e) + '. It aired ' + e_schedule + ', ' + getDate(e) + '. '
            del e_schedule

        else:
            # Generate output string when last aired episode is unavailable
            e_out = ''

        # Determine next upcoming episode
        # (An episode that airs today is considered to be an upcoming episode)
        e = list(takewhile(lambda e: e['has aired'] == False,
                           s))  # Memory inefficient
        if len(e) > 0:
            e = e[-1]

            # Generate output string when next upcoming episode is available
            e_out = e_out + 'The next upcoming episode ' + (
                e_out == '' and ('for ' + s_title + ' ')
                or '') + 'is "' + e['title'] + '"' + getSE(e) + '.'

            if 'age' in e:
                e_schedule = e['age'] > 1 and (
                    'in %s days' % getAge(e)
                ) or e['age'] == 1 and 'tomorrow' or e['age'] == 0 and 'today'
                e_out = e_out + ' It airs ' + e_schedule + ', ' + getDate(
                    e) + '.'
                del e_schedule
            else:
                e_out = e_out + ' Its air date is unavailable.'

            if include_plot:
                if 'plot' in e and e['plot'] != 'Related Links':
                    e_out = e_out + ' Its plot is: ' + e['plot']
                elif e_out.endswith('Its air date is unavailable.'):
                    e_out = e_out.replace(
                        'Its air date is unavailable.',
                        'Its air date and plot are unavailable.')
                else:
                    e_out = e_out + ' Its plot is unavailable.'

        else:
            if e_out != '':  # Last: available; Next: unavailable
                e_out = e_out + 'No upcoming episode is scheduled.'
            else:  # Last: unavailable; Next: unavailable
                e_out = 'Episode info is unavailable for %s.' % s_title

        # Conditionally trim output string
        if (max_len not in [-1, 0, None]) and len(e_out) > max_len - 3:
            e_out = e_out[:max_len - 3] + '...'

        # Return output string
        return e_out

    except:
        return 'An error occurred while attempting to retrieve episode info for "%s".' % title
Exemplo n.º 38
0
#PLEASE BE AWARE CODE IS A DUMMY AS OF THIS AND CAN ONLY TAKE 4 GENRES AS A USER INPUT WHILE FILTERING

from imdb import IMDb
import pandas as pd
import numpy as np

ia = IMDb()
top250Movies = ia.get_top250_movies()

#get top 20 Movies this way which returns lot of details including genres
top250Movies = [ia.get_movie(movie.movieID) for movie in top250Movies[:250]]
dataset = [movie['title'] for movie in top250Movies]
"""
Full list of genre types on IMDB
    Action
    Adventure
    Animation
    Biography
    Comedy
    Crime
    Drama
    Family
    Fantasy
    Film-Noir
    History
    Horror
    Music
    Musical
    Mystery
    Romance
    Sci-Fi
Exemplo n.º 39
0
#!python2

from imdb import IMDb
from Tkinter import *
import ttk

i = IMDb()

# Return a list of credits for an actor


def search(actor):
    name = i.search_person(actor)[0]
    actor_id = name.personID
    try:
        return i.get_person(actor_id)['actor']
    except:
        return i.get_person(actor_id)['actress']


'''
print('\nEnter the names of two actors')
print("We'll see what they've been in together")

actor_a = raw_input('\nActor 1: ')
actor_b = raw_input('\nActor 2: ')
'''


def match(*args):
    disp_match.configure(text='Fetching filmographies')
Exemplo n.º 40
0
from imdb import IMDb

# create an instance of the IMDb class
ia = IMDb()

# get a movie and print its director(s)
the_matrix = ia.search_movie('Stargate SG1')
for movie in the_matrix:
    print(movie["title"])
    print(movie.movieID)
    mv = ia.get_movie(movie.movieID)
    print(mv)
    ia.update(mv, 'episodes')
    episode = mv['episodes'][4][3]
    #print(episode)
    print(episode["plot"])
    print(episode["title"])
    #print(episode["synopsis"])
    for key in mv.current_info:
        try:
            print(mv[key])
        except Exception as e:
            print("except" + str(e))
            pass
    print()
"""
for director in the_matrix['directors']:
    print(director['name'])

# show all information that are currently available for a movie
print(sorted(the_matrix.keys()))
Exemplo n.º 41
0
                animation_movies.append(line[0][2:])
    print(i)
    i = i+1
tsv_file.close()

# create the final DB
workbook = xlsxwriter.Workbook('USA_DB.xlsx')
worksheet = workbook.add_worksheet()

worksheet.write('A1', 'Movie_ID')
worksheet.write('B1', 'Title')
worksheet.write('C1', 'Year')
row = 1

# connecting to IMDB
ia = IMDb()

j = 0
for movie_id in animation_movies:
    movie = ia.get_movie(movie_id)
    country = movie.get('country')
    if country == None:
        continue
    if country[0] == "United States":
        content = [movie_id, movie.get('title'), movie.get('year')]
        column = 0
        for item in content:
            worksheet.write(row, column, item)
            column += 1
        row += 1
    print(j)
Exemplo n.º 42
0
pip install imdbpy

pip install bs4

pip install pandas

pip install requests

from imdb import IMDb
from bs4 import BeautifulSoup
from requests import get
import pandas as pd
import collections
import re
ia = IMDb()

Movie_Names = []
Movie_actor_1_name = []
Movie_actor_2_name = []
Movie_actor_3_name = []
Movie_director = []
Movie_writter_names = []
Movie_music_names = []
Movie_cinematography_name = []
Movie_costume_name = []
Movie_years = []
Movie_ratings = []
Movie_awords_win = []
Movie_awords_nominations = []
Movie_awords_oscars = []
Exemplo n.º 43
0
from imdb import IMDb
ia = IMDb()

# get a movie and print its director(s)
the_matrix = ia.get_movie('0133093')
# print(the_matrix['director'])
print(the_matrix['runtimes'][0])
print(the_matrix['year'])

# show all the information sets avaiable for Movie objects
# print(ia.get_movie_infoset())
Exemplo n.º 44
0
class PlexHolidays():
    def __init__(self):
        self.plex = Plex()
        self.imdb = IMDb()
        keyword = input('Keyword (i.e. Holiday name): ')
        keyword_matches = []

        print('Scanning', self.plex.section.title, '...')
        for plex_medium in tqdm(self.plex.media):
            imdb_medium = self.plex2imdb(plex_medium)

            if not imdb_medium:
                continue

            keywords = self.get_keywords(imdb_medium)
            if keyword.lower() in keywords:
                keyword_matches.append(plex_medium)

        if keyword_matches:
            print('Titles matching\"', keyword, '\" :')
            for match in keyword_matches:
                print('\t', match.title)
            self.plex.create_playlist(input('Playlist name: '),
                                      keyword_matches)
        else:
            print('No matches found. D:')

        print('Happy Holidays!')

    def plex2imdb(self, medium):
        """
            Get the IMDbPy object for a given Plex object.
        """
        # Set appropriate search method and acceptable results based on section type
        if self.plex.section.type == 'movie':
            kinds = {'movie', 'short', 'tv movie', 'tv short'}
            search_function = self.imdb.search_movie
        else:
            kinds = {'episode'}
            search_function = self.imdb.search_episode

        # Perform IMDb search for the Plex object
        while True:
            try:
                results = [
                    _ for _ in search_function(medium.title)
                    if _['kind'] in kinds
                ]
                break
            # Time out, try again.
            except OSError:
                print('Timed out while downloading', medium.title)
                continue

        # No IMDb results whatsoever
        if not results:
            return None
        # Plex has no year listed, return first search result
        elif not medium.year:
            return results[0]

        closest_result, closest_year = None, 9999
        for result in results:
            # This result has no year listed, ignore it.
            if not result.get('year'):
                continue

            # Exact match found
            if result['year'] == medium.year:
                return result
            # Track match with closest year in case exact match is not found
            elif (medium.year - result['year']) < closest_year:
                closest_result = result
        # No exact match found, use result with closest year
        else:
            return closest_result

    def get_keywords(self, imdb_obj):
        """
            Get the plot keywords for a given IMDbPy object.
        """
        if not imdb_obj:
            return []

        data = self.imdb.get_movie_keywords(imdb_obj.movieID)['data']
        if not 'keywords' in data:
            return []
        return data['keywords']
Exemplo n.º 45
0
from django.forms import model_to_dict
from django.template.defaultfilters import urlencode
from django.utils.datetime_safe import datetime
from django.utils.timezone import make_aware
from imdb import IMDb
from wikipedia import wikipedia, re

from OpenAlumni.Bot import Bot
from OpenAlumni.Tools import log, translate, load_page, in_dict, load_json, remove_html, fusion, remove_ponctuation, \
    equal_str, now, remove_accents, index_string
from OpenAlumni.settings import MOVIE_NATURE
from alumni.models import Profil, Work, PieceOfWork, Award, Festival

#from scipy import pdist

ia = IMDb()


def extract_movie_from_cnca(title: str):
    #title=title.replace(" ","+")
    obj = {"RechercheOeuvre_1": {"Tbx_Titre": title}}
    #page=wikipedia.BeautifulSoup(wikipedia.requests.post("http://www.cnc-rca.fr/Pages/Page.aspx?view=RecOeuvre",),headers={'User-Agent': 'Mozilla/5.0'}).text, "html5lib")
    return title


def extract_movie_from_bdfci(pow: PieceOfWork, refresh_delay=31):
    title = pow.title.replace(" ", "+")
    page = load_page("https://www.bdfci.info/?q=" + title +
                     "&pa=f&d=f&page=search&src=bdfci&startFrom=1&offset=1",
                     refresh_delay=refresh_delay)
    articles = page.find_all("article")
Exemplo n.º 46
0
from flask import Flask, render_template, request
from imdb import IMDb

app = Flask(__name__)
instance = IMDb()


@app.route("/")
def home():
    return render_template("home.html")


@app.route("/movies", methods=["GET", "POST"])
def movies():
    if request.method == "POST":
        search = request.form.get("name")
        movie = instance.search_movie(str(search))

        movie_three = []

        for i in range(len(movie)):
            id_number = movie[i].movieID
            movie_two = instance.get_movie(id_number)
            movie_three.append(movie_two)

        return render_template("movies.html",
                               movie=movie,
                               movie_three=movie_three)
    else:
        return render_template("home.html")
Exemplo n.º 47
0
    def getInfos(self, item, export):
        infoLabels = self.getAsins(item)
        infoLabels['DisplayTitle'] = infoLabels['Title'] = self.cleanTitle(
            item['title'])
        infoLabels['contentType'] = contentType = item['contentType'].lower()

        infoLabels['mediatype'] = 'movie'
        infoLabels['Plot'] = item.get('synopsis')
        infoLabels['Director'] = item.get('director')
        infoLabels['Studio'] = item.get('studioOrNetwork')
        infoLabels['Cast'] = item.get('starringCast', '').split(',')
        infoLabels['Duration'] = str(item['runtime']['valueMillis'] /
                                     1000) if 'runtime' in item else None
        infoLabels['TrailerAvailable'] = item.get('trailerAvailable', False)
        infoLabels['Fanart'] = item.get('heroUrl')
        infoLabels['isAdult'] = 1 if 'ageVerificationRequired' in str(
            item.get('restrictions')) else 0
        infoLabels['Genre'] = ' / '.join(item.get('genres', '')).replace(
            '_', ' & ').replace('Musikfilm & Tanz', 'Musikfilm, Tanz')
        if 'formats' in item and 'images' in item['formats'][0].keys():
            try:
                thumbnailUrl = item['formats'][0]['images'][0]['uri']
                thumbnailFilename = thumbnailUrl.split('/')[-1]
                thumbnailBase = thumbnailUrl.replace(thumbnailFilename, '')
                infoLabels['Thumb'] = thumbnailBase + thumbnailFilename.split(
                    '.')[0] + '.jpg'
            except:
                pass

        if 'releaseOrFirstAiringDate' in item:
            infoLabels['Premiered'] = item['releaseOrFirstAiringDate'][
                'valueFormatted'].split('T')[0]
            infoLabels['Year'] = int(infoLabels['Premiered'].split('-')[0])

        if 'regulatoryRating' in item:
            if item['regulatoryRating'] == 'not_checked' or not item[
                    'regulatoryRating']:
                infoLabels['MPAA'] = getString(30171)
            else:
                infoLabels['MPAA'] = AgeRestrictions().GetAgeRating(
                ) + item['regulatoryRating']

        if 'customerReviewCollection' in item:
            infoLabels['Rating'] = float(
                item['customerReviewCollection']['customerReviewSummary']
                ['averageOverallRating']) * 2
            infoLabels['Votes'] = str(
                item['customerReviewCollection']['customerReviewSummary']
                ['totalReviewCount'])
        elif 'amazonRating' in item:
            infoLabels['Rating'] = float(
                item['amazonRating']
                ['rating']) * 2 if 'rating' in item['amazonRating'] else None
            infoLabels['Votes'] = str(
                item['amazonRating']
                ['count']) if 'count' in item['amazonRating'] else None
        stars = infoLabels['Rating']
        ia = IMDb()
        movs = ia.search_movie(infoLabels['Title'])
        if len(movs) > 0:
            ia.update(movs[0])
            infoLabels['Rating'] = movs[0].get('rating')
        else:
            if stars >= 8:
                infoLabels['Rating'] = 1.1
            else:
                infoLabels['Rating'] = 1

        if contentType == 'series':
            infoLabels['mediatype'] = 'tvshow'
            infoLabels['TVShowTitle'] = item['title']
            infoLabels['TotalSeasons'] = item['childTitles'][0][
                'size'] if 'childTitles' in item else None

        elif contentType == 'season':
            infoLabels['mediatype'] = 'season'
            infoLabels['Season'] = item['number']
            if item['ancestorTitles']:
                try:
                    infoLabels['TVShowTitle'] = item['ancestorTitles'][0][
                        'title']
                    infoLabels['SeriesAsin'] = item['ancestorTitles'][0][
                        'titleId']
                except:
                    pass
            else:
                infoLabels['SeriesAsin'] = infoLabels['Asins'].split(',')[0]
                infoLabels['TVShowTitle'] = item['title']
            if 'childTitles' in item:
                infoLabels['TotalSeasons'] = 1
                infoLabels['Episode'] = item['childTitles'][0]['size']

        elif contentType == 'episode':
            infoLabels['mediatype'] = 'episode'
            if item['ancestorTitles']:
                for content in item['ancestorTitles']:
                    if content['contentType'] == 'SERIES':
                        infoLabels['SeriesAsin'] = content[
                            'titleId'] if 'titleId' in content else None
                        infoLabels['TVShowTitle'] = content[
                            'title'] if 'title' in content else None
                    elif content['contentType'] == 'SEASON':
                        infoLabels['Season'] = content[
                            'number'] if 'number' in content else None
                        infoLabels['SeasonAsin'] = content[
                            'titleId'] if 'titleId' in content else None
                        seasontitle = content[
                            'title'] if 'title' in content else None
                if 'SeriesAsin' not in infoLabels.keys(
                ) and 'SeasonAsin' in infoLabels.keys():
                    infoLabels['SeriesAsin'] = infoLabels['SeasonAsin']
                    infoLabels['TVShowTitle'] = seasontitle
            else:
                infoLabels['SeriesAsin'] = ''

            if 'number' in item.keys():
                infoLabels['Episode'] = item['number']
                if item['number'] > 0:
                    infoLabels['DisplayTitle'] = '%s - %s' % (
                        item['number'], infoLabels['Title'])
                else:
                    if ':' in infoLabels['Title']:
                        infoLabels['DisplayTitle'] = infoLabels['Title'].split(
                            ':')[1].strip()

        if 'TVShowTitle' in infoLabels:
            infoLabels['TVShowTitle'] = self.cleanTitle(
                infoLabels['TVShowTitle'])

        infoLabels = self.getArtWork(infoLabels, contentType)

        if not export:
            if not infoLabels['Thumb']:
                infoLabels['Thumb'] = self._s.DefaultFanart
            if not infoLabels['Fanart']:
                infoLabels['Fanart'] = self._s.DefaultFanart
            if not infoLabels['isPrime'] and not contentType == 'series':
                infoLabels['DisplayTitle'] = '[COLOR %s]%s[/COLOR]' % (
                    self._g.PayCol, infoLabels['DisplayTitle'])

        return contentType, infoLabels
Exemplo n.º 48
0
class IMDBAPI:

    def __init__(self) -> None:
        self.films = list()
        self.app = IMDb()
        # self.save_file()
        self.load_file()

    def initialise(self):
        print("Start fetching")
        self.films = list()
        self.load_file()

    def save_file(self):
        top250 = self.app.get_top250_movies()
        for i in range(len(top250)):
            if i > 100:
                continue
            self.films.append(self.app.get_movie(top250[i].movieID))
        with open('film.pkl', 'wb') as output:
            pickle.dump(self.films, output, pickle.HIGHEST_PROTOCOL)

    def load_file(self):
        with open('film.pkl', 'rb') as input:
            self.films = pickle.load(input)

    def search_for_years(self, start_year, end_year):
        result = list()
        for i in self.films:
            if is_null(i):
                continue
            if start_year <= get_year(i) <= end_year:
                result.append(i)
        return result

    @staticmethod
    def search_for_country(country_category, films):
        result = list()
        if country_category == "Other":
            for i in films:
                if "Russia" not in get_country(i) and "United States" not in get_country(i):
                    result.append(i)
        else:
            for i in films:
                for j in get_country(i):
                    if j == country_category:
                        result.append(i)
        return result

    @staticmethod
    def search_for_genre(genre, films):
        result = list()
        for i in films:
            for j in get_genres(i):
                if j == genre:
                    result.append(i)
        return result

    """Return a movie or a list of movies as Movie objects"""
    def present_movie(self, start_year, end_year, country_category, genre):
        films = self.search_for_years(start_year, end_year)
        films = self.search_for_country(country_category, films)
        films = self.search_for_genre(genre, films)
        return create_film_list(films)
Exemplo n.º 49
0
import os
import sys
from imdb import IMDb
import numpy as np
import requests
import webbrowser
from contextlib import closing
from bs4 import BeautifulSoup
from selenium.webdriver import Firefox  # pip install selenium
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver
import numpy as np
import urllib.request
import cv2
ia = IMDb()


def get_image(link):
    driver = webdriver.PhantomJS()
    driver.get(link)  # load the web page
    innerHTML = driver.execute_script("return document.body.innerHTML")
    parsed = BeautifulSoup(innerHTML, "html.parser")
    driver.close()
    mediastrip = parsed.find_all('img', class_='pswp__img')
    imgtags = mediastrip[3]
    imgurl = imgtags['src']
    resp = urllib.request.urlopen(imgurl)
    #resp = urllib.urlopen(imgurl)
    image = np.asarray(bytearray(resp.read()), dtype="uint8")
    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
    cv2.imshow("Image", image)
Exemplo n.º 50
0
main = Blueprint('main', __name__)

# local - use local mysql db
local = False

# enable_extra - loads poster and plot overview from tmdb for movie info
enable_extra = True

# to laod posters on profile page
posters_on_profile_page = False

tmdb_img_url = r'https://image.tmdb.org/t/p/w342'

if local:
    ia = IMDb('s3', 'mysql+mysqldb://may:venom123@localhost/imdb')
else:
    ia = IMDb()


def db_fav_exists(tconst, user_id):
    """
    checks if the tconst exists as a favorite for the user of user id `user_id`
    """
    fav_tconst = fav.query.filter_by(user_id=user_id).all()
    if fav_tconst:  # user already has favorites
        for a in fav_tconst:
            if a.tconst == int(tconst):  # the same tconst already exists
                return True

    return False
Exemplo n.º 51
0
    m = re.search(r'data-tconst="tt(\d+?)">', line)
    if m:
        id = m.group(1)
        result.append(id)
top250_url = "https://www.imdb.com/list/ls005197923/?sort=list_order,asc&st_dt=&mode=detail&page=3"
r = requests.get(top250_url)
html = r.text.split("\n")
for line in html:
    line = line.rstrip("\n")
    m = re.search(r'data-tconst="tt(\d+?)">', line)
    if m:
        id = m.group(1)
        result.append(id)
result = set(result)
print(len(result))
ia = IMDb()
import inspect
#print(inspect.getmembers(ia))
with open('somefile.txt', 'a') as the_file:
    for v in result:
        st = []
        x = ia.get_movie(v)
        #  print(x)
        y = x.get('rating')
        z = x.get('title')
        ac = x['cast']
        print(y)
        print(z)
        if len(ac) >= 15:
            y = str(y)
            the_file.write(z)
Exemplo n.º 52
0
from bs4 import BeautifulSoup
import requests
import re
from imdb import IMDb
import json
from textblob import TextBlob

ia = IMDb()  # create an imdb instance

url = "https://www.filmsite.org/boxoffice.html"  # url from which the names of the top grossing movies are scraped from

url_request = requests.get(url)

soup = BeautifulSoup(url_request.content, 'html.parser')  # gets content

scrape = soup.find_all("li")  # saving all li tags

data = []
for names in scrape:  # saving just the text from the tags
    data.append(names.text)
# maybe combine lines 20 till 38 maybe in a function see if you can do a sapply like thing
titles = []

for x in data:  # from the text, saving only the movie titles
    if re.match(r">?.*\d{4}", x, re.DOTALL):
        titles.append(x)

clean = []
for title in titles:  # cleaning the movie title names
    title = re.sub(r"Filmsite.org", "", title)
    title = re.sub(r"\r|\n", "", title)
Exemplo n.º 53
0
 def __init__(self) -> None:
     self.films = list()
     self.app = IMDb()
     # self.save_file()
     self.load_file()
Exemplo n.º 54
0
import pandas as pd

# load ratings
df_ratings = pd.read_csv('../data/Personal_Movie_Ratings_Updated.csv')

# drop movies missing ratings
df_ratings = df_ratings.loc[~df_ratings['My Rating'].isnull(), :].reset_index(
    drop=True)

# load UIDs for movies that do not match search results
df_uid = pd.read_csv('../data/Personal_Movies_Missing_UID_Updated.csv',
                     dtype={'UID': str})
df_ratings = df_ratings.merge(df_uid, how='left', validate='1:1')

# create an instance of the IMDb class
ia = IMDb()

# initialize results container
results = {
    'UID': [],
    'My_Rating': [],
    'Budget': [],
    'Cumulative_Worldwide_Gross': [],
    'Production_Company1': [],
    'Synopsis': [],
    'Top_250_Rank': [],
    'Country1': [],
    'Director1': [],
    'Genre1': [],
    'Genre2': [],
    'Genre3': [],
Exemplo n.º 55
0
terms of the GNU General Public License as published by the Free Software Foundation,
either version 3 of the License, or (at your option) any later version.

This script is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this script.
If not, see http://www.gnu.org/licenses/.
"""

from imdb import IMDb
import pandas as pd
import re

imdb_access = IMDb()
movie_data = pd.read_csv("movies.csv")

# Grab only the movie number out of the IMDB URL
movie_data["Movie_Number"] = movie_data["IMDB_URL"].apply(
    lambda x: re.sub("[^0-9]", "", x))

with open("film-death-counts-Python.csv", "wb") as out_file:
    out_file.write(
        "Film,Year,Body_Count,MPAA_Rating,Genre,Director,Actors,Length_Minutes,IMDB_Rating\n"
    )

    for movie_entry in movie_data.iterrows():
        # Use a try-catch on the loop to prevent temporary connection-related issues from stopping the scrape
        try:
            movie = imdb_access.get_movie(movie_entry[1]["Movie_Number"])
Exemplo n.º 56
0
    info = soup.find('div', class_='subtext')
    print('Restrição de idade: ' + info.contents[0].strip())
    try:
        print('Duração: ' + info.time.string.strip())
    except AttributeError:
        pass
    print('Géneros: ' + ', '.join([g.string for g in info.findAll('a')[:-1]]))
    print('Data de estreia: ' + info.findAll('a')[-1].string)


if '-b' in ops:
    build()

if '-s' in ops:
    movie = input('Nome do filme: ')
    ia = IMDb()
    results = ia.search_movie(movie)
    mv = results[0]
    URL = ia.get_imdbURL(mv)
    info_movie(URL)

if '-a' in ops:
    movies_db = build_movies_db()
    choosen_movie = get_movie_url(movies_db)
    if choosen_movie:
        FULL_SCRIPT = scrap_full_script(choosen_movie)
        FULL_SCRIPT_CLEAN = cleaning_data(FULL_SCRIPT)
        along_script_sent(FULL_SCRIPT_CLEAN)
    else:
        print('Não foram encontrados resultados.')
Exemplo n.º 57
0
from imdb import IMDb

# create an instance of the IMDb class
ia = IMDb()

# get a movie and print its director(s)
the_matrix = ia.get_movie('0133093')
for director in the_matrix['directors']:
    print(director['name'])

# show all information that are currently available for a movie
print(sorted(the_matrix.keys()))

# show all information sets that can be fetched for a movie
print(ia.get_movie_infoset())

# update a Movie object with more information
ia.update(the_matrix, ['technical'])
# show which keys were added by the information set
print(the_matrix.infoset2keys['technical'])
# print one of the new keys
print(the_matrix.get('tech'))
Exemplo n.º 58
0
from imdb import IMDb
import sys
import json

# create an instance of the IMDb class
ia = IMDb()

#get the imdbid from the command
imdbId = str(sys.argv[1])

# get movie keywords
movie = ia.get_movie(imdbId, info=['keywords'])

# for keyword in movie['keywords']:
#     print(keyword)

# convert the keywords to json
print json.dumps(movie['keywords'])
Exemplo n.º 59
0
import urllib
from imdb import IMDb

oa=IMDb()

thematrix=oa.get_movie('0133093')
print(thematrix['director'])
Exemplo n.º 60
0
__author__ = 'abhishekchoudhary'
from imdb import IMDb
ia = IMDb()

the_matrix = ia.get_movie('0133093')
print the_matrix['director']

for person in ia.search_person('Salman Khan'):
    print person.personID, person['name']