Exemplo n.º 1
0
def partition_components(decomposed):
    imdb_actor_info = util.read_imdb_actor_info()
    actor_list = imdb_actor_info.id.unique()
    imdb_actor_info = util.read_imdb_actor_info()
    actor_list = imdb_actor_info[imdb_actor_info['id'].isin(
        actor_list)]['name'].tolist()
    mlmovies = util.read_mlmovies()
    movies_list = mlmovies.movieid.unique()
    movies_list = mlmovies[mlmovies['movieid'].isin(
        movies_list)]['moviename'].tolist()
    year_list = mlmovies.year.unique()
    partitions = {
        1: {
            'actor': [],
            'movie': [],
            'year': []
        },
        2: {
            'actor': [],
            'movie': [],
            'year': []
        },
        3: {
            'actor': [],
            'movie': [],
            'year': []
        },
        4: {
            'actor': [],
            'movie': [],
            'year': []
        },
        5: {
            'actor': [],
            'movie': [],
            'year': []
        }
    }
    for j, actor_vec in enumerate(decomposed[0]):
        partition_num = np.argmax(actor_vec) + 1
        partitions[partition_num]['actor'].append(actor_list[j])
    for j, movie_vec in enumerate(decomposed[1]):
        partition_num = np.argmax(movie_vec) + 1
        partitions[partition_num]['movie'].append(movies_list[j])
    for j, year_vec in enumerate(decomposed[2]):
        partition_num = np.argmax(year_vec) + 1
        partitions[partition_num]['year'].append(year_list[j])
    util.print_partition(partitions)
    util.write_partition_output_file(partitions, output_file)
def main():
	if len(sys.argv) < 2:
		print('Expected arguments are not provided.')
		return
	genre = sys.argv[1]
	no_of_components = 4
	imdb_actor_info = util.read_imdb_actor_info()
	#print imdb_actor_info

	tf_idf_matrix = util.get_tf_idf_matrix(genre)
	actor_list = list(tf_idf_matrix.columns.values)
	actor_list = imdb_actor_info[imdb_actor_info['id'].isin(actor_list)]['name'].tolist()
	#print actor_list
	
	pca = PCA(n_components=no_of_components)
	pca.fit(tf_idf_matrix)

	concepts = []
	for i in range(no_of_components):
		concept = []
		for j, component in enumerate(pca.components_[i]):
			concept.append((actor_list[j], component))
		concept.sort(key=lambda tup: abs(tup[1]), reverse=True)
		concepts.append(concept)
	util.print_output(genre, concepts)
	util.write_output_file(genre, concepts, output_file)
Exemplo n.º 3
0
def main():
	if len(sys.argv) < 2:
		print('Expected arguments are not provided.')
		return
	movieid = int(sys.argv[1])
	mlmovies = util.read_mlmovies()
	movie_actors = util.read_movie_actor()
	imdb_actor_info = util.read_imdb_actor_info()

	input_movie = mlmovies[mlmovies['movieid'] == movieid]['moviename'].values[0]
	actors_of_movie = movie_actors.where(movie_actors['movieid']==movieid).dropna().loc[:,'actorid'].unique()
	#print (actors_of_movie)

	movie_matrix = util.get_movie_tf_idf_matrix()
	actor_matrix = util.get_actor_tf_idf_matrix()
	#print(actor_matrix.shape)
	input_movie_vector = pd.DataFrame(movie_matrix.loc[movieid])#.transpose()
	#print(input_movie_vector.shape)
	similarity_matrix = actor_matrix.dot(input_movie_vector)
	similarity_matrix = similarity_matrix[~similarity_matrix.index.isin(actors_of_movie)]
	#print(similarity_matrix)

	actors = []
	for index, row in similarity_matrix.iterrows():
		actor_name = imdb_actor_info[imdb_actor_info['id'] == index]['name'].values[0]
		actors.append((index, actor_name, similarity_matrix.loc[index][movieid]))
	actors.sort(key=lambda tup: tup[2], reverse=True)
	#print (actors)
	
	util.print_output(movieid, input_movie, actors[:no_of_actors])
	util.write_output_file(movieid, input_movie, actors[:no_of_actors], output_file)
Exemplo n.º 4
0
def main():
    if len(sys.argv) < 2:
        print('Expected arguments are not provided.')
        return
    actorid = int(sys.argv[1])
    imdb_actor_info = util.read_imdb_actor_info()
    input_actor = imdb_actor_info[imdb_actor_info['id'] ==
                                  actorid]['name'].values[0]

    tf_idf_matrix = util.get_tf_idf_matrix()
    #print (tf_idf_matrix)
    input_actor_tf_idf = tf_idf_matrix.loc[actorid]
    #print (input_actor_tf_idf)

    actors = []
    for index, row in tf_idf_matrix.iterrows():
        actor_name = imdb_actor_info[imdb_actor_info['id'] ==
                                     index]['name'].values[0]
        actors.append((index, actor_name, 1 - cosine(row, input_actor_tf_idf)))
    other_actors = list(filter(lambda tup: tup[0] != actorid, actors))
    other_actors.sort(key=lambda tup: tup[2], reverse=True)

    util.print_output(actorid, input_actor, other_actors[:no_of_actors])
    util.write_output_file(actorid, input_actor, other_actors[:no_of_actors],
                           output_file)
def main():
	if len(sys.argv) < 2:
		print('Expected arguments are not provided.')
		return
	actorid = int(sys.argv[1])
	imdb_actor_info = util.read_imdb_actor_info()
	input_actor_name = imdb_actor_info[imdb_actor_info['id'] == actorid]['name'].values[0]

	tf_idf_matrix = util.get_tf_idf_matrix()
	#print(tf_idf_matrix)
	actor_tf_idf = tf_idf_matrix.loc[actorid]
	#print(actor_tf_idf)

	svd = SVD(n_components=no_of_components)
	svd.fit(tf_idf_matrix)
	svd_df = pd.DataFrame(svd.transform(tf_idf_matrix), index=tf_idf_matrix.index)

	input_actor_row = svd_df.loc[actorid]

	actors = []
	for index, row in svd_df.iterrows():
		name = imdb_actor_info[imdb_actor_info['id'] == index]['name'].values[0]
		actors.append((index, name, 1 - cosine(row, input_actor_row)))
	other_actors = list(filter(lambda tup: tup[0] != actorid, actors))
	other_actors.sort(key=lambda tup: tup[2], reverse=True)
	util.print_output(actorid, input_actor_name, other_actors[:no_of_actors])
	util.write_output_file(actorid, input_actor_name, other_actors[:no_of_actors], output_file)
Exemplo n.º 6
0
def main():
	mlmovies = util.read_mlmovies()
	imdb_actor_info = util.read_imdb_actor_info()
	movie_actor = util.read_movie_actor()
	movies_list = mlmovies.movieid.unique()
	year_list = mlmovies.year.unique()
	actor_list = imdb_actor_info.id.unique()
	movie_year_matrix = []

	actor_movie_year_grouped = pd.merge(movie_actor, mlmovies, on=['movieid','movieid'], how='inner')
	actor_movie_year_tensor = []
	count=0
	for actor in actor_list:
		movie_year_matrix = []
		for movie in movies_list:
			movie_year_list = []
			for year in year_list:
				if actor_movie_year_grouped[(actor_movie_year_grouped.actorid == actor) & 
				(actor_movie_year_grouped.movieid == movie) & (actor_movie_year_grouped.year == year)].empty:
					movie_year_list.append(0.0)
				else:
					movie_year_list.append(1.0)
			movie_year_matrix.append(movie_year_list)
		actor_movie_year_tensor.append(movie_year_matrix)
	cPickle.dump( actor_movie_year_tensor, open( "actor_movie_year_tensor.pkl", "wb" ) )
Exemplo n.º 7
0
def latent_actor_semantics(actor_matrix):
    imdb_actor_info = util.read_imdb_actor_info()
    actor_list = imdb_actor_info.id.unique()
    actor_list = imdb_actor_info[imdb_actor_info['id'].isin(
        actor_list)]['name'].tolist()
    concepts = []
    for i in range(no_of_components):
        concept = []
        for j, component in enumerate(np.transpose(actor_matrix)[i]):
            concept.append((actor_list[j], component))
        concept.sort(key=lambda tup: abs(tup[1]), reverse=True)
        concepts.append(concept)
    util.print_output(concepts, 'Actor')
    util.write_output_file(concepts, output_file, 'Actor')