def task2b(): DataHandler.vectors() DataHandler.createDictionaries1() coactor_similarity_df, actorList = DataHandler.coactor_siilarity_matrix() u, sigma, vt = decompositions.SVDDecomposition(coactor_similarity_df, 3) semantics = np.matrix(vt).tolist() actorIdActorsDf = DataHandler.actor_info_df actorsInDf = list(coactor_similarity_df.index) print("Top 3 semantics are:") for semantic in semantics: prettyPrintActorVector(semantic, actorsInDf, actorIdActorsDf) print("") split_group_with_index = formatter.splitGroup(u, 3) print("The three groupings are:") groups = tasksBusiness.get_partition_on_ids(split_group_with_index, actorIdActorsDf['name']) for x, v in groups.items(): print('Group ' + str(x + 1) + ' : ' + str(v)) print(" ") print()
def PersnalizedPageRank_top10_SimilarCoActors(seed): DataHandler.createDictionaries1() DataHandler.create_actor_actorid_map() coactcoact, ignoreVariable = DataHandler.coactor_siilarity_matrix() actor_actorid_map = DataHandler.actor_actorid_map alpha = constants.ALPHA act_similarities = ppr.personalizedPageRank(coactcoact,seed,alpha) actors = list(coactcoact.index) actorDF = pd.DataFrame(pd.Series(actors),columns = ['Actor']) actorDF['Actor'] = actorDF['Actor'].map(lambda x:actor_actorid_map.get(x)) Result = pd.concat([act_similarities,actorDF],axis = 1) sortedResult=Result.sort_values(by=0,ascending=False).head(15) seedAcotorNames = [actor_actorid_map.get(i) for i in seed] print('Co Actors similar to the following seed actors: '+str(seedAcotorNames)) for index in sortedResult.index: if sortedResult.loc[index,'Actor'] not in seedAcotorNames: print(sortedResult.loc[index,'Actor']+' '+ str(sortedResult.loc[index,0]))