예제 #1
0
def get_bar():
    bar = px.bar(cache.get("style_df"), x='style', y='count')
    bar.update_layout(title_text='10 Most Frequently Occurring Style Tags',
                      title_x=0.5,
                      font_family='Arial')
    bar_div = plot(bar, output_type='div')
    return bar_div
예제 #2
0
def get_hm():
    df = cache.get("df")
    cosine_sim = cache.get("cosine_sim")
    hm = px.imshow(cosine_sim * 100,
                   labels={'color': 'Similarity (%)'},
                   x=df['release_name'],
                   y=df['release_name'],
                   width=800,
                   height=800)
    hm.update_xaxes(showticklabels=False)
    hm.update_yaxes(showticklabels=False)
    hm.update_layout(
        title_text='Heat Map Representing Cosine Similarity Between Albums',
        title_x=0.5,
        font_family='Arial')
    hm_div = plot(hm, output_type='div')
    return hm_div
예제 #3
0
def get_pie():
    genre_df = cache.get("genre_df")
    pie = px.pie(values=genre_df['count'], names=genre_df.index)
    pie.update_layout(title_text='Record Collection Breakdown by Genre',
                      title_x=0.5,
                      font_family='Arial')
    pie_div = plot(pie, output_type='div')
    return pie_div
예제 #4
0
def get_similar(release_id):
    df = cache.get("df")
    # Get the df index that corresponds to the release_id argument
    idx = cache.get("indices")[release_id]

    # Get the release's corresponding artist_id, so we can filter it out of the recommendations
    artist_id = df[df['release_id'] == release_id]['artist_id'].iloc[0]

    # Get the pairwise similarity scores of all albums against our chosen release
    sim_scores = list(enumerate(cache.get("cosine_sim")[idx]))

    # Turn that list of tuples into a Pandas series
    sim_series = pd.Series([i[1] for i in sim_scores])
    sim_series.rename('similarity', inplace=True)

    # Merge the series of similarity scores into the DataFrame containing the full collection
    similar = df.merge(sim_series,
                       how='left',
                       left_index=True,
                       right_index=True)

    # Sort by similariry scores in decending order
    similar.sort_values(by=['similarity'], inplace=True, ascending=False)

    # Filter out any additional albums by the same artist
    similar = similar[similar['artist_id'] != artist_id]

    similar.reset_index(drop=True, inplace=True)

    i = 1
    while i < 9:
        if similar.iloc[i].artist_id in similar[0:i].artist_id.values:
            similar.drop(similar.index[i], inplace=True)
        else:
            i += 1

    # Return the top nine most similar albums
    similar = similar[0:9]
    cache.set("similar", similar)

    return similar
예제 #5
0
def get_artist_df():
    artist_df = cache.get("artist_df")
    if artist_df is None:
        artist_df = pd.read_csv(path.join('.', 'data', 'artists.csv'),
                                header=0,
                                low_memory=False,
                                converters={
                                    'member_list':
                                    lambda x: x[1:-1].split(', '),
                                    'group_list': lambda x: x[1:-1].split(', ')
                                })
        cache.set("artist_df", artist_df)
    return artist_df
예제 #6
0
def get_dict():
    return cache.get("release_dict")
예제 #7
0
def search(string):
    df = cache.get("df")
    matches = df['artist_name'].str.contains(
        string, case=False) | df['release_name'].str.contains(string,
                                                              case=False)
    return df[matches]
예제 #8
0
def get_top_ten():
    return cache.get("top_ten")
예제 #9
0
def get_df():
    df = cache.get("df")
    if df is None:
        transform_data(get_dict())
        df = cache.get("df")
    return df