class CommonMetadataIMDB(object): """ Class for interfacing with imdb """ def __init__(self, cache=True, cache_dir=None): # open connection to imdb if cache is not None: if cache_dir is not None: self.imdb = Imdb(cache=True, cache_dir=cache_dir) else: self.imdb = Imdb(cache=True) else: self.imdb = Imdb() def com_imdb_title_search(self, media_title): """ # fetch info from title """ return self.imdb.search_for_title(media_title) def com_imdb_id_search(self, media_id): """ # fetch info by ttid """ return self.imdb.get_title_by_id(media_id) def com_imdb_person_by_id(self, person_id): """ # fetch person info by id """ return self.imdb.get_person_by_id(person_id) def com_imdb_person_images_by_id(self, person_id): """ # fetch person images by id """ return self.imdb.get_person_images(person_id) def com_imdb_title_review_by_id(self, media_id): """ # fetch the title review """ return self.imdb.get_title_reviews(media_id)
'''INSERT INTO movie_movie VALUES (\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\')''' .format( item['tconst'], single_quote(str(item['title'])), item['year'], title.release_date, item['rating'], single_quote(item['image']['url']), single_quote(str(title.plot_outline)), single_quote(str(trailer_url)), )) print("Insert movie:" + new_movie) insert(new_movie) except: continue for actor in title.cast_summary: try: person = imdb.get_person_by_id(actor.imdb_id) new_actor = ( 'INSERT INTO movie_actor VALUES (\'{}\',\'{}\',\'{}\')'.format( actor.imdb_id, actor.name, single_quote(str(person.photo_url)))) new_act = ( 'INSERT INTO movie_act(actorid_id, movieid_id) VALUES (\'{}\',\'{}\')' .format(actor.imdb_id, item['tconst'])) insert(new_act) insert(new_actor) except: continue
class IMDBGraph: def __init__(self, anonymize=True): self._imdb = Imdb(anonymize=anonymize, cache=True) self._graph = nx.Graph() def _add_node(self, name, nodetype): ''' Add simple node without attributes ''' if name not in self._graph.nodes(): self._graph.add_node(name, node=nodetype) def addPerson(self, idname): ''' add New actor/actress no the graph ''' actor = self._imdb.get_person_by_id(idname) self._graph.add_node(actor.name) def addMovie(self, idname): movie = self._imdb.get_title_by_id(idname) self._add_node(movie.title, 'movie') logging.info("Loading {0}".format(idname)) def addMovieAndConnect(self, idname): movie = self._imdb.get_title_by_id(idname) self._add_node(movie.title, 'movie') self._add_node(movie.year, 'year') logging.info("Loading {0}".format(idname)) for genre in movie.genres: self._add_node(genre, 'genre') self._graph.add_edge(movie.title, genre) for person in movie.credits: self._add_node(person.name, 'actor') self._graph.add_edge(movie.title, person.name, weight=movie.rating + movie.votes, rating=movie.rating, votes=movie.votes) for person in movie.cast_summary: self._add_node(person.name, "actor") self._graph.add_edge(movie.title, person.name) def addPopular(self, limit=2): ''' Add popular movies and shows ''' shows = self._imdb.popular_shows() #movies = self._imdb.top_250() if limit > len(shows): limit = len(shows) for show in shows[:limit]: self.addMovie(show['tconst']) def removeNode(self, nodename): self._graph.remove_node(nodename) def addEdge(self, innode, outnode, prop=None): if innode not in self._graph: raise Exception("{0} not in graph".format(innode)) if outnode not in self._graph: raise Exception("{0} not in graph".format(outnode)) self._graph.add_edge(innode, outnode, prop=prop) def components(self): comp = nx.connected_components(self._graph) degree = nx.degree(self._graph) def avg_degree(self): ''' Return average number of degree for each node ''' return nx.average_neighbor_degree(self._graph) def avg_degree_connectivity(self): return nx.average_degree_connectivity(self._graph) def clustering(self): ''' Compute a bipartite clustering coefficient for nodes. ''' return nx.clustering(self._graph) def get_item(self, item): ''' Getting node from the graph ''' return self._graph.node[item] def filter_edges(self, param, func): for n, nbrs in self._graph.adjacency_iter(): for nbr, attr in nbrs.items(): if len(attr) == 0 or param not in attr: continue data = attr[param] if func(data): yield (n, nbr, data) def cliques(self): ''' return all cluques from the graph ''' return nx.find_cliques(self._graph) def stat(self): ''' Return basic statistics of the graph ''' return { 'nodes': self._graph.number_of_nodes(), 'edges': self._graph.number_of_edges(), 'density': nx.density(self._graph) } def save(self, outpath): ''' save graph to the file ''' pass
trailer_url = 'None' new_movie = ( '''INSERT INTO movie_movie VALUES (\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\')'''.format( item['tconst'], single_quote(str(item['title'])), item['year'], title.release_date, item['rating'], single_quote(item['image']['url']), single_quote(str(title.plot_outline)), single_quote(str(trailer_url)), )) print("Insert movie:" + new_movie) insert(new_movie) except: continue for actor in title.cast_summary: try: person = imdb.get_person_by_id(actor.imdb_id) new_actor = ('INSERT INTO movie_actor VALUES (\'{}\',\'{}\',\'{}\')'.format(actor.imdb_id, actor.name, single_quote( str(person.photo_url)))) new_act = ( 'INSERT INTO movie_act(actorid_id, movieid_id) VALUES (\'{}\',\'{}\')'.format(actor.imdb_id, item['tconst'])) insert(new_act) insert(new_actor) except: continue
elif len(str(id)) == 2: actor_id = 'nm' + '00000' + str(id) elif len(str(id)) == 3: actor_id = 'nm' + '0000' + str(id) elif len(str(id)) == 4: actor_id = 'nm' + '000' + str(id) elif len(str(id)) == 5: actor_id = 'nm' + '00' + str(id) elif len(str(id)) == 6: actor_id = 'nm' + '0' + str(id) elif len(str(id)) == 7: actor_id = 'nm' + str(id) else: print 'Check ID length' try: actor_name = imdb.get_person_by_id(actor_id).name except: print '----------- ERROR -----------' print '' print actor_id, ' is not a valid ID.' print '' continue save_image = str(actor_id) + '.jpg' actor_url = 'http://www.imdb.com/name/' + str(actor_id) actor_imdb_page = urllib.urlopen(actor_url) soup = BeautifulSoup(actor_imdb_page.read()) actor_picture = soup.find('img', {'id' : 'name-poster' } )['src'] actor_born = soup.find('time', {'itemprop' : 'birthDate' } )['datetime'] try: actor_death = soup.find('time', {'itemprop' : 'deathDate' } )['datetime'] age = int(actor_death[0:4]) - int(actor_born[0:4])
class IMDBGraph: def __init__(self, anonymize=True): self._imdb = Imdb(anonymize=anonymize, cache=True) self._graph = nx.Graph() def _add_node(self, name, nodetype): ''' Add simple node without attributes ''' if name not in self._graph.nodes(): self._graph.add_node(name, node=nodetype) def addPerson(self, idname): ''' add New actor/actress no the graph ''' actor = self._imdb.get_person_by_id(idname) self._graph.add_node(actor.name) def addMovie(self, idname): movie = self._imdb.get_title_by_id(idname) self._add_node(movie.title, 'movie') logging.info("Loading {0}".format(idname)) def addMovieAndConnect(self, idname): movie = self._imdb.get_title_by_id(idname) self._add_node(movie.title, 'movie') self._add_node(movie.year, 'year') logging.info("Loading {0}".format(idname)) for genre in movie.genres: self._add_node(genre, 'genre') self._graph.add_edge(movie.title, genre) for person in movie.credits: self._add_node(person.name, 'actor') self._graph.add_edge(movie.title, person.name, weight=movie.rating + movie.votes, rating=movie.rating, votes=movie.votes) for person in movie.cast_summary: self._add_node(person.name, "actor") self._graph.add_edge(movie.title, person.name) def addPopular(self, limit=2): ''' Add popular movies and shows ''' shows = self._imdb.popular_shows() #movies = self._imdb.top_250() if limit > len(shows): limit = len(shows) for show in shows[:limit]: self.addMovie(show['tconst']) def removeNode(self, nodename): self._graph.remove_node(nodename) def addEdge(self, innode, outnode, prop=None): if innode not in self._graph: raise Exception("{0} not in graph".format(innode)) if outnode not in self._graph: raise Exception("{0} not in graph".format(outnode)) self._graph.add_edge(innode,outnode, prop=prop) def components(self): comp = nx.connected_components(self._graph) degree = nx.degree(self._graph) def avg_degree(self): ''' Return average number of degree for each node ''' return nx.average_neighbor_degree(self._graph) def avg_degree_connectivity(self): return nx.average_degree_connectivity(self._graph) def clustering(self): ''' Compute a bipartite clustering coefficient for nodes. ''' return nx.clustering(self._graph) def get_item(self, item): ''' Getting node from the graph ''' return self._graph.node[item] def filter_edges(self, param, func): for n, nbrs in self._graph.adjacency_iter(): for nbr, attr in nbrs.items(): if len(attr) == 0 or param not in attr: continue data = attr[param] if func(data): yield (n, nbr, data) def cliques(self): ''' return all cluques from the graph ''' return nx.find_cliques(self._graph) def stat(self): ''' Return basic statistics of the graph ''' return {'nodes': self._graph.number_of_nodes(), 'edges': self._graph.number_of_edges(), 'density': nx.density(self._graph)} def save(self, outpath): ''' save graph to the file ''' pass
elif len(str(id)) == 2: actor_id = 'nm' + '00000' + str(id) elif len(str(id)) == 3: actor_id = 'nm' + '0000' + str(id) elif len(str(id)) == 4: actor_id = 'nm' + '000' + str(id) elif len(str(id)) == 5: actor_id = 'nm' + '00' + str(id) elif len(str(id)) == 6: actor_id = 'nm' + '0' + str(id) elif len(str(id)) == 7: actor_id = 'nm' + str(id) else: print 'Check ID length' try: actor_name = imdb.get_person_by_id(actor_id).name except: print '----------- ERROR -----------' print '' print actor_id, ' is not a valid ID.' print '' continue save_image = str(actor_id) + '.jpg' actor_url = 'http://www.imdb.com/name/' + str(actor_id) actor_imdb_page = urllib.urlopen(actor_url) soup = BeautifulSoup(actor_imdb_page.read()) actor_picture = soup.find('img', {'id': 'name-poster'})['src'] actor_born = soup.find('time', {'itemprop': 'birthDate'})['datetime'] try: actor_death = soup.find('time', {'itemprop': 'deathDate'})['datetime']
def person_tests(): print(('name', person.name)) print(('name', person.name)) # print(('firstname',person.firstname)) # print(('gender',person.gender)) #print(('directed',person.directed)) #print(('acted',person.acted)) #print(('filmography', person.filmography)) #print(('type', person.type)) #print(('tagline', person.tagline)) #print(('rating', person.rating)) #print(('certification', person.certification)) #print(('genres', person.genres)) #print(('runtime', person.runtime)) #print(('writers summary', person.writers_summary)) #print(('directors summary', person.directors_summary)) #print(('creators', person.creators)) #print(('cast summary', person.cast_summary)) #print(('full credits', person.credits)) #print(('cert', person.certification)) person = imdb.get_person_by_id("nm0000151") person_tests() # In[ ]: # In[ ]: