class CommonMetadataIMDB(object):
    """
    Class for interfacing with imdb
    """

    def __init__(self, cache=True, cache_dir=None):
        # open connection to imdb
        if cache is not None:
            if cache_dir is not None:
                self.imdb = Imdb(cache=True, cache_dir=cache_dir)
            else:
                self.imdb = Imdb(cache=True)
        else:
            self.imdb = Imdb()

    def com_imdb_title_search(self, media_title):
        """
        # fetch info from title
        """
        return self.imdb.search_for_title(media_title)

    def com_imdb_id_search(self, media_id):
        """
        # fetch info by ttid
        """
        return self.imdb.get_title_by_id(media_id)

    def com_imdb_person_by_id(self, person_id):
        """
        # fetch person info by id
        """
        return self.imdb.get_person_by_id(person_id)

    def com_imdb_person_images_by_id(self, person_id):
        """
        # fetch person images by id
        """
        return self.imdb.get_person_images(person_id)

    def com_imdb_title_review_by_id(self, media_id):
        """
        # fetch the title review
        """
        return self.imdb.get_title_reviews(media_id)
Пример #2
0
            '''INSERT INTO movie_movie VALUES (\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\')'''
            .format(
                item['tconst'],
                single_quote(str(item['title'])),
                item['year'],
                title.release_date,
                item['rating'],
                single_quote(item['image']['url']),
                single_quote(str(title.plot_outline)),
                single_quote(str(trailer_url)),
            ))
        print("Insert movie:" + new_movie)
        insert(new_movie)
    except:
        continue

    for actor in title.cast_summary:
        try:
            person = imdb.get_person_by_id(actor.imdb_id)
            new_actor = (
                'INSERT INTO movie_actor VALUES (\'{}\',\'{}\',\'{}\')'.format(
                    actor.imdb_id, actor.name,
                    single_quote(str(person.photo_url))))
            new_act = (
                'INSERT INTO movie_act(actorid_id, movieid_id) VALUES (\'{}\',\'{}\')'
                .format(actor.imdb_id, item['tconst']))
            insert(new_act)
            insert(new_actor)
        except:
            continue
Пример #3
0
class IMDBGraph:
    def __init__(self, anonymize=True):
        self._imdb = Imdb(anonymize=anonymize, cache=True)
        self._graph = nx.Graph()

    def _add_node(self, name, nodetype):
        ''' Add simple node without attributes
        '''
        if name not in self._graph.nodes():
            self._graph.add_node(name, node=nodetype)

    def addPerson(self, idname):
        ''' add New actor/actress no the graph
        '''
        actor = self._imdb.get_person_by_id(idname)
        self._graph.add_node(actor.name)

    def addMovie(self, idname):
        movie = self._imdb.get_title_by_id(idname)
        self._add_node(movie.title, 'movie')
        logging.info("Loading {0}".format(idname))

    def addMovieAndConnect(self, idname):
        movie = self._imdb.get_title_by_id(idname)
        self._add_node(movie.title, 'movie')
        self._add_node(movie.year, 'year')
        logging.info("Loading {0}".format(idname))
        for genre in movie.genres:
            self._add_node(genre, 'genre')
            self._graph.add_edge(movie.title, genre)
        for person in movie.credits:
            self._add_node(person.name, 'actor')
            self._graph.add_edge(movie.title,
                                 person.name,
                                 weight=movie.rating + movie.votes,
                                 rating=movie.rating,
                                 votes=movie.votes)
        for person in movie.cast_summary:
            self._add_node(person.name, "actor")
            self._graph.add_edge(movie.title, person.name)

    def addPopular(self, limit=2):
        ''' Add popular movies and shows
        '''
        shows = self._imdb.popular_shows()
        #movies = self._imdb.top_250()
        if limit > len(shows):
            limit = len(shows)
        for show in shows[:limit]:
            self.addMovie(show['tconst'])

    def removeNode(self, nodename):
        self._graph.remove_node(nodename)

    def addEdge(self, innode, outnode, prop=None):
        if innode not in self._graph:
            raise Exception("{0} not in graph".format(innode))
        if outnode not in self._graph:
            raise Exception("{0} not in graph".format(outnode))
        self._graph.add_edge(innode, outnode, prop=prop)

    def components(self):
        comp = nx.connected_components(self._graph)
        degree = nx.degree(self._graph)

    def avg_degree(self):
        ''' Return average number of degree for each node
        '''
        return nx.average_neighbor_degree(self._graph)

    def avg_degree_connectivity(self):
        return nx.average_degree_connectivity(self._graph)

    def clustering(self):
        ''' Compute a bipartite clustering coefficient for nodes.
        '''
        return nx.clustering(self._graph)

    def get_item(self, item):
        ''' Getting node from the graph
        '''
        return self._graph.node[item]

    def filter_edges(self, param, func):
        for n, nbrs in self._graph.adjacency_iter():
            for nbr, attr in nbrs.items():
                if len(attr) == 0 or param not in attr: continue
                data = attr[param]
                if func(data): yield (n, nbr, data)

    def cliques(self):
        ''' return all cluques from the graph
        '''
        return nx.find_cliques(self._graph)

    def stat(self):
        ''' Return basic statistics of the graph
        '''
        return {
            'nodes': self._graph.number_of_nodes(),
            'edges': self._graph.number_of_edges(),
            'density': nx.density(self._graph)
        }

    def save(self, outpath):
        ''' save graph to the file
        '''
        pass
Пример #4
0
            trailer_url = 'None'
        new_movie = (
            '''INSERT INTO movie_movie VALUES (\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\')'''.format(
                item['tconst'],
                single_quote(str(item['title'])),
                item['year'],
                title.release_date,
                item['rating'],
                single_quote(item['image']['url']),
                single_quote(str(title.plot_outline)),
                single_quote(str(trailer_url)),
            ))
        print("Insert movie:" + new_movie)
        insert(new_movie)
    except:
        continue

    for actor in title.cast_summary:
        try:
            person = imdb.get_person_by_id(actor.imdb_id)
            new_actor = ('INSERT INTO movie_actor VALUES (\'{}\',\'{}\',\'{}\')'.format(actor.imdb_id, actor.name,
                                                                                        single_quote(
                                                                                            str(person.photo_url))))
            new_act = (
                'INSERT INTO movie_act(actorid_id, movieid_id) VALUES (\'{}\',\'{}\')'.format(actor.imdb_id,
                                                                                              item['tconst']))
            insert(new_act)
            insert(new_actor)
        except:
            continue
Пример #5
0
		elif len(str(id)) == 2:
			actor_id = 'nm' + '00000' + str(id)
		elif len(str(id)) == 3:
			actor_id = 'nm' + '0000' + str(id)
		elif len(str(id)) == 4:
			actor_id = 'nm' + '000' + str(id)
		elif len(str(id)) == 5:
			actor_id = 'nm' + '00' + str(id)
		elif len(str(id)) == 6:
			actor_id = 'nm' + '0' + str(id)
		elif len(str(id)) == 7:
			actor_id = 'nm' + str(id)
		else:
			print 'Check ID length'
		try:
			actor_name = imdb.get_person_by_id(actor_id).name
		except:
			print '----------- ERROR -----------'
			print ''
			print actor_id, ' is not a valid ID.'
			print ''
			continue 
		save_image = str(actor_id) + '.jpg'
		actor_url = 'http://www.imdb.com/name/' + str(actor_id)
		actor_imdb_page = urllib.urlopen(actor_url)
		soup = BeautifulSoup(actor_imdb_page.read())
		actor_picture = soup.find('img', {'id' : 'name-poster' } )['src']
		actor_born = soup.find('time', {'itemprop' : 'birthDate' } )['datetime']
		try:
			actor_death = soup.find('time', {'itemprop' : 'deathDate' } )['datetime']
			age = int(actor_death[0:4]) - int(actor_born[0:4])
Пример #6
0
class IMDBGraph:
    def __init__(self, anonymize=True):
        self._imdb = Imdb(anonymize=anonymize, cache=True)
        self._graph = nx.Graph()

    def _add_node(self, name, nodetype):
        ''' Add simple node without attributes
        '''
        if name not in self._graph.nodes():
            self._graph.add_node(name, node=nodetype)

    def addPerson(self, idname):
        ''' add New actor/actress no the graph
        '''
        actor = self._imdb.get_person_by_id(idname)
        self._graph.add_node(actor.name)

    def addMovie(self, idname):
        movie = self._imdb.get_title_by_id(idname)
        self._add_node(movie.title, 'movie')
        logging.info("Loading {0}".format(idname))

    def addMovieAndConnect(self, idname):
        movie = self._imdb.get_title_by_id(idname)
        self._add_node(movie.title, 'movie')
        self._add_node(movie.year, 'year')
        logging.info("Loading {0}".format(idname))
        for genre in movie.genres:
            self._add_node(genre, 'genre')
            self._graph.add_edge(movie.title, genre)
        for person in movie.credits:
            self._add_node(person.name, 'actor')
            self._graph.add_edge(movie.title, person.name, weight=movie.rating + movie.votes, rating=movie.rating, votes=movie.votes)
        for person in movie.cast_summary:
            self._add_node(person.name, "actor")
            self._graph.add_edge(movie.title, person.name)

    def addPopular(self, limit=2):
        ''' Add popular movies and shows
        '''
        shows = self._imdb.popular_shows()
        #movies = self._imdb.top_250()
        if limit > len(shows):
            limit = len(shows)
        for show in shows[:limit]:
            self.addMovie(show['tconst'])

    def removeNode(self, nodename):
        self._graph.remove_node(nodename)

    def addEdge(self, innode, outnode, prop=None):
        if innode not in self._graph:
            raise Exception("{0} not in graph".format(innode))
        if outnode not in self._graph:
            raise Exception("{0} not in graph".format(outnode))
        self._graph.add_edge(innode,outnode, prop=prop)

    def components(self):
        comp = nx.connected_components(self._graph)
        degree = nx.degree(self._graph)

    def avg_degree(self):
        ''' Return average number of degree for each node
        '''
        return nx.average_neighbor_degree(self._graph)

    def avg_degree_connectivity(self):
        return nx.average_degree_connectivity(self._graph)

    def clustering(self):
        ''' Compute a bipartite clustering coefficient for nodes.
        '''
        return nx.clustering(self._graph)

    def get_item(self, item):
        ''' Getting node from the graph
        '''
        return self._graph.node[item]

    def filter_edges(self, param, func):
        for n, nbrs in self._graph.adjacency_iter():
            for nbr, attr in nbrs.items():
                if len(attr) == 0 or param not in attr: continue
                data = attr[param]
                if func(data): yield (n, nbr, data)

    def cliques(self):
        ''' return all cluques from the graph
        '''
        return nx.find_cliques(self._graph)

    def stat(self):
        ''' Return basic statistics of the graph
        '''
        return {'nodes': self._graph.number_of_nodes(), 'edges': self._graph.number_of_edges(), 'density': nx.density(self._graph)}

    def save(self, outpath):
        ''' save graph to the file
        '''
        pass
Пример #7
0
 elif len(str(id)) == 2:
     actor_id = 'nm' + '00000' + str(id)
 elif len(str(id)) == 3:
     actor_id = 'nm' + '0000' + str(id)
 elif len(str(id)) == 4:
     actor_id = 'nm' + '000' + str(id)
 elif len(str(id)) == 5:
     actor_id = 'nm' + '00' + str(id)
 elif len(str(id)) == 6:
     actor_id = 'nm' + '0' + str(id)
 elif len(str(id)) == 7:
     actor_id = 'nm' + str(id)
 else:
     print 'Check ID length'
 try:
     actor_name = imdb.get_person_by_id(actor_id).name
 except:
     print '----------- ERROR -----------'
     print ''
     print actor_id, ' is not a valid ID.'
     print ''
     continue
 save_image = str(actor_id) + '.jpg'
 actor_url = 'http://www.imdb.com/name/' + str(actor_id)
 actor_imdb_page = urllib.urlopen(actor_url)
 soup = BeautifulSoup(actor_imdb_page.read())
 actor_picture = soup.find('img', {'id': 'name-poster'})['src']
 actor_born = soup.find('time', {'itemprop': 'birthDate'})['datetime']
 try:
     actor_death = soup.find('time',
                             {'itemprop': 'deathDate'})['datetime']
Пример #8
0
def person_tests():
    print(('name', person.name))
    print(('name', person.name))


#    print(('firstname',person.firstname))
#    print(('gender',person.gender))
#print(('directed',person.directed))
#print(('acted',person.acted))
#print(('filmography', person.filmography))
#print(('type', person.type))
#print(('tagline', person.tagline))
#print(('rating', person.rating))
#print(('certification', person.certification))
#print(('genres', person.genres))
#print(('runtime', person.runtime))
#print(('writers summary', person.writers_summary))
#print(('directors summary', person.directors_summary))
#print(('creators', person.creators))
#print(('cast summary', person.cast_summary))
#print(('full credits', person.credits))
#print(('cert', person.certification))

person = imdb.get_person_by_id("nm0000151")
person_tests()

# In[ ]:

# In[ ]: