def get_bar(): bar = px.bar(cache.get("style_df"), x='style', y='count') bar.update_layout(title_text='10 Most Frequently Occurring Style Tags', title_x=0.5, font_family='Arial') bar_div = plot(bar, output_type='div') return bar_div
def get_hm(): df = cache.get("df") cosine_sim = cache.get("cosine_sim") hm = px.imshow(cosine_sim * 100, labels={'color': 'Similarity (%)'}, x=df['release_name'], y=df['release_name'], width=800, height=800) hm.update_xaxes(showticklabels=False) hm.update_yaxes(showticklabels=False) hm.update_layout( title_text='Heat Map Representing Cosine Similarity Between Albums', title_x=0.5, font_family='Arial') hm_div = plot(hm, output_type='div') return hm_div
def get_pie(): genre_df = cache.get("genre_df") pie = px.pie(values=genre_df['count'], names=genre_df.index) pie.update_layout(title_text='Record Collection Breakdown by Genre', title_x=0.5, font_family='Arial') pie_div = plot(pie, output_type='div') return pie_div
def get_similar(release_id): df = cache.get("df") # Get the df index that corresponds to the release_id argument idx = cache.get("indices")[release_id] # Get the release's corresponding artist_id, so we can filter it out of the recommendations artist_id = df[df['release_id'] == release_id]['artist_id'].iloc[0] # Get the pairwise similarity scores of all albums against our chosen release sim_scores = list(enumerate(cache.get("cosine_sim")[idx])) # Turn that list of tuples into a Pandas series sim_series = pd.Series([i[1] for i in sim_scores]) sim_series.rename('similarity', inplace=True) # Merge the series of similarity scores into the DataFrame containing the full collection similar = df.merge(sim_series, how='left', left_index=True, right_index=True) # Sort by similariry scores in decending order similar.sort_values(by=['similarity'], inplace=True, ascending=False) # Filter out any additional albums by the same artist similar = similar[similar['artist_id'] != artist_id] similar.reset_index(drop=True, inplace=True) i = 1 while i < 9: if similar.iloc[i].artist_id in similar[0:i].artist_id.values: similar.drop(similar.index[i], inplace=True) else: i += 1 # Return the top nine most similar albums similar = similar[0:9] cache.set("similar", similar) return similar
def get_artist_df(): artist_df = cache.get("artist_df") if artist_df is None: artist_df = pd.read_csv(path.join('.', 'data', 'artists.csv'), header=0, low_memory=False, converters={ 'member_list': lambda x: x[1:-1].split(', '), 'group_list': lambda x: x[1:-1].split(', ') }) cache.set("artist_df", artist_df) return artist_df
def get_dict(): return cache.get("release_dict")
def search(string): df = cache.get("df") matches = df['artist_name'].str.contains( string, case=False) | df['release_name'].str.contains(string, case=False) return df[matches]
def get_top_ten(): return cache.get("top_ten")
def get_df(): df = cache.get("df") if df is None: transform_data(get_dict()) df = cache.get("df") return df