def with_pre_load_data_set(): logger = logging.getLogger(__name__) song_set_df = get_song_set_df() preference_statistic = PreferenceAnalytics( users_preferences_df=get_users_preference_df() ) preference_statistic.run() for metadata, pt_graph_name in zip(METADATA_TO_PROCESS_LIST, METADATA_TO_PROCESS_LIST_PT): metadata_to_process_list = ['id', metadata] logger.info("*" * 60) logger.info( "*\tProcessando o metadado - " + str(metadata) ) logger.info("*" * 60) one_metadata_process(song_set_df=song_set_df.filter(metadata_to_process_list, axis=1), users_preferences_df=preference_statistic.get_users_relevance_preferences_df( user_top_n_relevance=USER_SIZE), preference_statistic=preference_statistic, label=pt_graph_name) one_metadata_process(song_set_df=song_set_df.filter(['id', 'album', 'title'], axis=1), users_preferences_df=preference_statistic.get_users_relevance_preferences_df( user_top_n_relevance=USER_SIZE), preference_statistic=preference_statistic, label='|AL|+|TL|') one_metadata_process( song_set_df=concat_metadata_preserve_id(df_list=song_set_df, metadata_to_process_list=['album', 'title'], new_column='AL+TL'), users_preferences_df=preference_statistic.get_users_relevance_preferences_df( user_top_n_relevance=USER_SIZE), preference_statistic=preference_statistic, label='AL+AR') preference_statistic.print_song_statistical() preference_statistic.print_user_statistical() preference_statistic.make_graphics() make_evaluate_graphics()
def data_analysis(): song_set_df = get_song_set_df() preference_statistic = PreferenceAnalytics( users_preferences_df=get_users_preference_df(song_set_df), song_df=song_set_df) preference_statistic.run() preference_statistic.print_song_statistical() preference_statistic.print_user_statistical() preference_statistic.make_graphics()
def pre_load_data_set_and_song_variation_all_combination(): logger = logging.getLogger(__name__) song_set_df = get_song_set_df() preference_statistic = PreferenceAnalytics( users_preferences_df=get_users_preference_df(song_set_df), song_df=song_set_df) preference_statistic.run() for song_set_size in SONG_SET_SIZE_LIST: song_set_with_size_df = song_select( song_set_df, song_set_size, preference_statistic.get_song_relevance_df()) preference_statistic_with_size = PreferenceAnalytics( users_preferences_df=get_users_preference_df( song_set_with_size_df), song_df=song_set_with_size_df) preference_statistic_with_size.run() for metadata, pt_graph_name in zip(METADATA_TO_PROCESS_LIST, METADATA_TO_PROCESS_LIST): gc.collect() metadata_to_process_list = ['id', metadata] logger.info("*" * 60) logger.info("*\tEXPERIMENTO 1 - " + str(metadata)) logger.info("*" * 60) one_metadata_process( song_set_df=song_set_with_size_df.filter( metadata_to_process_list, axis=1), users_preferences_df=preference_statistic_with_size. get_users_relevance_preferences_df( user_top_n_relevance=USER_SIZE), preference_statistic=preference_statistic_with_size, label=pt_graph_name) gc.collect() logger.info("*" * 60) logger.info("*\tEXPERIMENTO 2 - " + "album and title - |AL|+|TL|") logger.info("*" * 60) one_metadata_process( song_set_df=song_set_with_size_df.filter(['id', 'album', 'title'], axis=1), users_preferences_df=preference_statistic_with_size. get_users_relevance_preferences_df(user_top_n_relevance=USER_SIZE), preference_statistic=preference_statistic_with_size, label='|AL|+|TL|') logger.info("*" * 60) logger.info("*\tEXPERIMENTO 2 - " + "album and artist - |AL|+|AR|") logger.info("*" * 60) one_metadata_process( song_set_df=song_set_with_size_df.filter(['id', 'album', 'artist'], axis=1), users_preferences_df=preference_statistic_with_size. get_users_relevance_preferences_df(user_top_n_relevance=USER_SIZE), preference_statistic=preference_statistic_with_size, label='|AL|+|AR|') logger.info("*" * 60) logger.info("*\tEXPERIMENTO 2 - " + "title and artist - |TL|+|AR|") logger.info("*" * 60) one_metadata_process( song_set_df=song_set_with_size_df.filter(['id', 'title', 'artist'], axis=1), users_preferences_df=preference_statistic_with_size. get_users_relevance_preferences_df(user_top_n_relevance=USER_SIZE), preference_statistic=preference_statistic_with_size, label='|TL|+|AR|') gc.collect() logger.info("*" * 60) logger.info("*\tEXPERIMENTO 3 - " + "title and album - AL+TL") logger.info("*" * 60) one_metadata_process( song_set_df=concat_metadata_preserve_id( df_list=song_set_with_size_df, metadata_to_process_list=['album', 'title'], new_column='AL+TL'), users_preferences_df=preference_statistic_with_size. get_users_relevance_preferences_df(user_top_n_relevance=USER_SIZE), preference_statistic=preference_statistic_with_size, label='AL+TL') logger.info("*" * 60) logger.info("*\tEXPERIMENTO 3 - " + "artist and album - AL+AR") logger.info("*" * 60) one_metadata_process( song_set_df=concat_metadata_preserve_id( df_list=song_set_with_size_df, metadata_to_process_list=['album', 'artist'], new_column='AL+AR'), users_preferences_df=preference_statistic_with_size. get_users_relevance_preferences_df(user_top_n_relevance=USER_SIZE), preference_statistic=preference_statistic_with_size, label='AL+AR') logger.info("*" * 60) logger.info("*\tEXPERIMENTO 3 - " + "title and artist - TL+AR") logger.info("*" * 60) one_metadata_process( song_set_df=concat_metadata_preserve_id( df_list=song_set_with_size_df, metadata_to_process_list=['title', 'artist'], new_column='TL+AR'), users_preferences_df=preference_statistic_with_size. get_users_relevance_preferences_df(user_top_n_relevance=USER_SIZE), preference_statistic=preference_statistic_with_size, label='TL+AR') # logger.info("*" * 60) # logger.info( # "*\tEXPERIMENTO 4 - " # + "title, artist and album - TL+AR+AL" # ) # logger.info("*" * 60) # one_metadata_process( # song_set_df=concat_metadata_preserve_id(df_list=song_set_with_size_df, # metadata_to_process_list=['title', 'artist', 'album'], # new_column='TL+AR+AL'), # users_preferences_df=preference_statistic_with_size.get_users_relevance_preferences_df( # user_top_n_relevance=USER_SIZE), # preference_statistic=preference_statistic_with_size, # label='TL+AR+AL' # ) # gc.collect() # logger.info("*" * 60) # logger.info( # "*\tEXPERIMENTO 4 - " # + "title, artist and album- |TL|+|AR|+|AL|" # ) # logger.info("*" * 60) # one_metadata_process( # song_set_df=song_set_with_size_df.filter(['id', 'title', 'artist', 'album'], axis=1), # users_preferences_df=preference_statistic_with_size.get_users_relevance_preferences_df( # user_top_n_relevance=USER_SIZE), # preference_statistic=preference_statistic_with_size, # label='|TL|+|AR|+|AL|' # ) preference_statistic_with_size.print_song_statistical() preference_statistic_with_size.print_user_statistical() preference_statistic_with_size.make_graphics() gc.collect() make_evaluate_graphics()