def main():
	if len(sys.argv) < 2:
		print('Expected arguments are not provided.')
		return
	genre = sys.argv[1]
	no_of_components = 4
	imdb_actor_info = util.read_imdb_actor_info()
	#print imdb_actor_info

	tf_idf_matrix = util.get_tf_idf_matrix(genre)
	actor_list = list(tf_idf_matrix.columns.values)
	actor_list = imdb_actor_info[imdb_actor_info['id'].isin(actor_list)]['name'].tolist()
	#print actor_list
	
	pca = PCA(n_components=no_of_components)
	pca.fit(tf_idf_matrix)

	concepts = []
	for i in range(no_of_components):
		concept = []
		for j, component in enumerate(pca.components_[i]):
			concept.append((actor_list[j], component))
		concept.sort(key=lambda tup: abs(tup[1]), reverse=True)
		concepts.append(concept)
	util.print_output(genre, concepts)
	util.write_output_file(genre, concepts, output_file)
예제 #2
0
def main():
    if len(sys.argv) < 2:
        print('Expected arguments are not provided.')
        return
    actorid = int(sys.argv[1])
    imdb_actor_info = util.read_imdb_actor_info()
    input_actor = imdb_actor_info[imdb_actor_info['id'] ==
                                  actorid]['name'].values[0]

    tf_idf_matrix = util.get_tf_idf_matrix()
    #print (tf_idf_matrix)
    input_actor_tf_idf = tf_idf_matrix.loc[actorid]
    #print (input_actor_tf_idf)

    actors = []
    for index, row in tf_idf_matrix.iterrows():
        actor_name = imdb_actor_info[imdb_actor_info['id'] ==
                                     index]['name'].values[0]
        actors.append((index, actor_name, 1 - cosine(row, input_actor_tf_idf)))
    other_actors = list(filter(lambda tup: tup[0] != actorid, actors))
    other_actors.sort(key=lambda tup: tup[2], reverse=True)

    util.print_output(actorid, input_actor, other_actors[:no_of_actors])
    util.write_output_file(actorid, input_actor, other_actors[:no_of_actors],
                           output_file)
예제 #3
0
def main():
	if len(sys.argv) < 2:
		print('Expected arguments are not provided.')
		return
	genre = sys.argv[1]
	no_of_components = 4
	genome_tags = util.read_genome_tags()
	#print genome_tags

	tf_idf_matrix = util.get_tf_idf_matrix(genre)
	tagid_list = list(tf_idf_matrix.columns.values)
	tag_list = genome_tags[genome_tags['tagId'].isin(tagid_list)]['tag'].tolist()
	#print tag_list
	
        svd = TruncatedSVD(n_components=no_of_components, n_iter=100, random_state=None)
	svd.fit(tf_idf_matrix)

	concepts = []
	for i in range(no_of_components):
		concept = []
		for j, component in enumerate(svd.components_[i]):
			concept.append((tag_list[j], component))
		concept.sort(key=lambda tup: abs(tup[1]), reverse=True)
		concepts.append(concept)
	util.print_output(genre, concepts)
	util.write_output_file(genre, concepts, output_file)
def main():
	if len(sys.argv) < 2:
		print('Expected arguments are not provided.')
		return
	actorid = int(sys.argv[1])
	imdb_actor_info = util.read_imdb_actor_info()
	input_actor_name = imdb_actor_info[imdb_actor_info['id'] == actorid]['name'].values[0]

	tf_idf_matrix = util.get_tf_idf_matrix()
	#print(tf_idf_matrix)
	actor_tf_idf = tf_idf_matrix.loc[actorid]
	#print(actor_tf_idf)

	svd = SVD(n_components=no_of_components)
	svd.fit(tf_idf_matrix)
	svd_df = pd.DataFrame(svd.transform(tf_idf_matrix), index=tf_idf_matrix.index)

	input_actor_row = svd_df.loc[actorid]

	actors = []
	for index, row in svd_df.iterrows():
		name = imdb_actor_info[imdb_actor_info['id'] == index]['name'].values[0]
		actors.append((index, name, 1 - cosine(row, input_actor_row)))
	other_actors = list(filter(lambda tup: tup[0] != actorid, actors))
	other_actors.sort(key=lambda tup: tup[2], reverse=True)
	util.print_output(actorid, input_actor_name, other_actors[:no_of_actors])
	util.write_output_file(actorid, input_actor_name, other_actors[:no_of_actors], output_file)