Пример #1
0
def task2b():
    DataHandler.vectors()
    DataHandler.createDictionaries1()

    coactor_similarity_df, actorList = DataHandler.coactor_siilarity_matrix()
    u, sigma, vt = decompositions.SVDDecomposition(coactor_similarity_df, 3)
    semantics = np.matrix(vt).tolist()

    actorIdActorsDf = DataHandler.actor_info_df
    actorsInDf = list(coactor_similarity_df.index)
    print("Top 3 semantics are:")

    for semantic in semantics:
        prettyPrintActorVector(semantic, actorsInDf, actorIdActorsDf)
        print("")

    split_group_with_index = formatter.splitGroup(u, 3)

    print("The three groupings are:")
    groups = tasksBusiness.get_partition_on_ids(split_group_with_index,
                                                actorIdActorsDf['name'])
    for x, v in groups.items():
        print('Group ' + str(x + 1) + ' : ' + str(v))
        print(" ")
    print()
Пример #2
0
def PersnalizedPageRank_top10_SimilarCoActors(seed):
    DataHandler.createDictionaries1()
    DataHandler.create_actor_actorid_map()
    coactcoact, ignoreVariable = DataHandler.coactor_siilarity_matrix()
    actor_actorid_map = DataHandler.actor_actorid_map
    alpha = constants.ALPHA
    act_similarities = ppr.personalizedPageRank(coactcoact,seed,alpha)
    actors = list(coactcoact.index)
    actorDF = pd.DataFrame(pd.Series(actors),columns = ['Actor'])
    actorDF['Actor'] = actorDF['Actor'].map(lambda x:actor_actorid_map.get(x))
    Result = pd.concat([act_similarities,actorDF],axis = 1)
    sortedResult=Result.sort_values(by=0,ascending=False).head(15)
    seedAcotorNames = [actor_actorid_map.get(i) for i in seed]
    print('Co Actors similar to the following seed actors: '+str(seedAcotorNames))
    for index in sortedResult.index:
        if sortedResult.loc[index,'Actor'] not in seedAcotorNames:
            print(sortedResult.loc[index,'Actor']+' '+ str(sortedResult.loc[index,0]))