Пример #1
0
def subsetDataFrame(df, tracks):
    ## subset dataframe by spotify ids
    track_ids = []
    for track in tracks:
        track_ids.append(sptfy.getSpotifyTrackIDs(track)[0])
    db_subset = df[df.spotify_id.isin(track_ids)]
    return db_subset
Пример #2
0
def subsetDataFrame(df, tracks):
    ## subset dataframe by spotify ids
    track_ids = []
    for track in tracks:
        track_ids.append(sptfy.getSpotifyTrackIDs(track)[0])
    db_subset = df[df.spotify_id.isin(track_ids)]
    return db_subset
Пример #3
0
def main():

    # ## load tracks in playlist
    fives = loadFile("input", "fives.txt")
    fours = loadFile("input", "fours.txt")
    threes = loadFile("input", "threes.txt")
    twos = loadFile("input", "twos.txt")
    ones = loadFile("input", "ones.txt")

    ## load database of song ratings
    db = loadFile("../Databases", "song_ratings_db.csv")
    album_ratings = loadFile("../Databases", "album_ratings_db.csv")

    config = loadFile("../config", "config.csv", True)
    token = sptfy.authSpotipy()

    rating = 0
    for ls in [ones, twos, threes, fours, fives]:
        ## playlists are looped in this order such that if a song is in multiple lists it's rating will end up being the highest one
        rating = rating + 1
        for song in ls:
            track_id, spotify_uri = sptfy.getSpotifyTrackIDs(song)
            if not lookupSongBySpotifyID(track_id, db):
                track = sptfy.pullSpotifyTrack(track_id, token=token)
                db = db.append([{
                    'spotify_id': track_id,
                    'artist': track['artist'],
                    'album': track['album'],
                    'spotify_album_id': track['spotify_album_id'],
                    'song': track['title'],
                    'rating': rating
                }])

    db = db.sort(['artist', 'album', 'rating'])
    saveDataFrame(db, "../Databases", "song_ratings_db.csv")

    for album in pd.unique(zip(db.artist, db.album)):
        artist = album[0]
        album = album[1]
        if not lookupAlbumBySpotifyID(album, album_ratings):
            album_id = pd.unique(db[db.artist == artist][db.album == album]
                                 ['spotify_album_id'])[0]
            album_data = sptfy.pullSpotifyAlbum(album_id, token=token)
            ratings = db[db.artist == artist][db.album ==
                                              album]['rating'].tolist()
            if len(ratings) < 3:
                ## don't make album ratings for singles
                continue
            score = 0
            countNot3 = len(np.where(r != 3))
            countMoreThan2 = len(np.where(r > 2))
            countMoreThan3 = len(np.where(r > 3))
            for r in ratings:
                if r == 5:
                    score = score + 100 * 1.0
                    scoreNot3 = score + 100 * 1.0
                elif r == 4:
                    score = score + 80 * 1.2
                    scoreNot3 = score + 80 * 1.2
                elif r == 3:
                    score = score + 60 * 1.0
                elif r == 2:
                    score = score + 40 * 1.2
                    scoreNot3 = score + 40 * 1.2
                elif r == 1:
                    score = score + 20 * 1.0
                    scoreNot3 = score + 20 * 1.0
            std = np.std(ratings)
            if std == 0.0:
                std = 0.25

            if countNot3 == 0:
                adjMean = 3
            else:
                adjMean = scoreNot3 / countNot3
            prop4or5 = countMoreThan3 / len(r)
            adj1 = (adjMean - 3) * prop4or5
            adj2 = adj1 + countMoreThan2 * 0.03

            score = np.mean(ratings) + adj2

            if prop4or5 == 0:
                adjSD = std * 0.05
            else:
                adjSD = std * prop4or5 / len(r)

            score = score - adjSD

            ## min possible score: (mean of 1-star)
            min1 = 1.0
            ## max possible score: Radiohead "OK Computer"
            max1 = 5.662521
            min2 = -1.0
            max2 = -0.125

            scaledScore = (score - min1) / (max1 - min1)
            ## transform (curves the linear scores to inflate higher scores and reduce lower)
            transformedScore = -1 * (8 ^ (-1 * scaledScore))
            scaledScore = (transformedScore - min2) / (max2 - min2)
            album_score = (round(scaledScore * 1000)) / 1.0

            if album_score > 1000:
                album_score = 1000

            if album_score >= 965:
                album_rating = 5.0
            elif album_score >= 890:
                album_rating = 4.5
            elif album_score >= 750:
                album_rating = 4.0
            elif album_score >= 690:
                album_rating = 3.5
            elif album_score >= 625:
                album_rating = 3.0
            elif album_score >= 420:
                album_rating = 2.5
            elif album_score > 325:
                album_rating = 2.0
            elif album_score > 235:
                album_rating = 1.5
            elif album_score >= 100:
                album_rating = 1.0
            elif album_score < 100:
                album_rating = 0.5
            album_ratings = album_ratings.append([{
                'spotify_album_id': album_id,
                'artist': artist,
                'album': album,
                'year': album_data['year'],
                'album_rating': album_rating,
                'album_score': album_score
            }])

    saveDataFrame(album_ratings, "../Databases", "album_ratings_db.csv")
Пример #4
0
def lookupSongBySpotifyID(song, df):
    track_id = sptfy.getSpotifyTrackIDs(song)[0]
    return any(df.spotify_id == track_id)
Пример #5
0
def lookupSongBySpotifyID(song, df):
    track_id = sptfy.getSpotifyTrackIDs(song)[0]
    return any(df.spotify_id == track_id)
Пример #6
0
def pullEchoNestSong(auth, track, album=None, local_link=None):
    url_base = "https://api.spotify.com/v1/audio-features/"

    if 'spotify' in track:
        track_id, spotify_uri = sptfy.getSpotifyTrackIDs(track)

        ## due to echonest using 2 different bucket params and url encoding the ampersand, payload cannot be used
        # payload = {'api_key' : api_key, 'track_id' : spotify_uri, 'bucket' : "audio_summary&bucket=id:spotify", 'format' : "json"}
        url_suffix = "%s" % sptfy.stripSpotifyURI(spotify_uri)
    else:
        pdb.set_trace()
        ## it's an echonest id and can be accessed directly
        url_suffix = "?api_key=%s&id=%s&bucket=audio_summary&bucket=id:spotify&format=json" % (
            auth, track)

    url = url_base + url_suffix
    data = mhlpr.callAPI(url, headers=auth)

    ## if response is a success
    if int(data['response']['status']['code']) == 0 and len(
            data['response']['songs']) > 0:
        song = mhlpr.flattenDictCustom(data['response']['songs'][0])
        if 'spotify' in track:
            track = sptfy.pullSpotifyTrack(track_id)
            song['album'] = track['album']
            song['spotify_artist_id'] = track['spotify_artist_id']
            ## add spotify uri to song data
            song['spotify_id'] = track_id
        else:
            song['album'] = album
            song['spotify_id'] = local_link.strip()
        ## pop off unneeded data and flatten dict
        song.pop('audio_md5', None)
        song.pop('analysis_url', None)
        song['echonest_artist_id'] = song.pop('artist_id')
        if 'artist_foreign_ids' in song:
            song.pop('artist_foreign_ids')
        ## rename keys as necessary
        song['echonest_id'] = song.pop('id')
        song['artist'] = song.pop('artist_name')
    elif int(data['response']['status']['code']) == 5:
        ## the song cannot be found by the spotify id
        url = "http://developer.echonest.com/api/v4/song/search"
        if 'spotify' in track:
            track = sptfy.pullSpotifyTrack(track_id)
        artist = track['artist']
        title = track['title']
        payload = {
            'api_key': api_key,
            'artist': artist,
            'title': title,
            'bucket': "audio_summary",
            'format': "json"
        }
        data = mhlpr.callAPI(url, payload)
        if len(data['response']['songs']) > 0:
            ## pop off unneeded data and flatten dict
            song = mhlpr.flattenDictCustom(data['response']['songs'][0])
            if 'spotify' in track:
                song['album'] = track['album']
                song['spotify_artist_id'] = track['spotify_artist_id']
                ## check to be sure it's the correct song -- fuzzy string match of at least .75 levenshtein ratio
                if fuzzyMatch(song['artist'], track['artist'], song['title'],
                              track['title']):
                    song.pop('audio_md5', None)
                    song.pop('analysis_url', None)
                    song['echonest_artist_id'] = song.pop('artist_id')
                    if 'artist_foreign_ids' in song:
                        song.pop('artist_foreign_ids')
                    ## add spotify uri to song data
                    song['spotify_id'] = track_id
                    ## rename keys as necessary
                    song['echonest_id'] = song.pop('id')
                    song['artist'] = song.pop('artist_name')
            else:
                ## pop off unneeded data and flatten dict
                song.pop('audio_md5', None)
                song.pop('analysis_url', None)
                song['echonest_artist_id'] = song.pop('artist_id')
                if 'artist_foreign_ids' in song:
                    song.pop('artist_foreign_ids')
                ## rename keys as necessary
                song['echonest_id'] = song.pop('id')
                song['album'] = album
                song['spotify_id'] = track_id
                song['artist'] = song.pop('artist_name')
        else:
            print "Song not found via EchoNest search: {}".format(spotify_uri)
            return None
    else:
        pdb.set_trace()
        "Unrecognized error code."

    return song
Пример #7
0
def pullEchoNestSong(auth, track, album = None, local_link = None):
    url_base = "https://api.spotify.com/v1/audio-features/"

    if 'spotify' in track:
        track_id, spotify_uri = sptfy.getSpotifyTrackIDs(track)

        ## due to echonest using 2 different bucket params and url encoding the ampersand, payload cannot be used
        # payload = {'api_key' : api_key, 'track_id' : spotify_uri, 'bucket' : "audio_summary&bucket=id:spotify", 'format' : "json"}
        url_suffix = "%s" % sptfy.stripSpotifyURI(spotify_uri)
    else:
        pdb.set_trace()
        ## it's an echonest id and can be accessed directly
        url_suffix = "?api_key=%s&id=%s&bucket=audio_summary&bucket=id:spotify&format=json" % (auth, track)

    url = url_base + url_suffix
    data = mhlpr.callAPI(url, headers = auth)

    ## if response is a success
    if int(data['response']['status']['code']) == 0 and len(data['response']['songs']) > 0:
        song = mhlpr.flattenDictCustom(data['response']['songs'][0])
        if 'spotify' in track:
            track = sptfy.pullSpotifyTrack(track_id)
            song['album'] = track['album']
            song['spotify_artist_id'] = track['spotify_artist_id']
            ## add spotify uri to song data
            song['spotify_id'] = track_id
        else:
            song['album'] = album
            song['spotify_id'] = local_link.strip()
        ## pop off unneeded data and flatten dict
        song.pop('audio_md5', None)
        song.pop('analysis_url', None)
        song['echonest_artist_id'] = song.pop('artist_id')
        if 'artist_foreign_ids' in song:
            song.pop('artist_foreign_ids')
        ## rename keys as necessary
        song['echonest_id'] = song.pop('id')
        song['artist'] = song.pop('artist_name')
    elif int(data['response']['status']['code']) == 5:
        ## the song cannot be found by the spotify id
        url = "http://developer.echonest.com/api/v4/song/search"
        if 'spotify' in track:
            track = sptfy.pullSpotifyTrack(track_id)
        artist = track['artist']
        title = track['title']
        payload = {'api_key' : api_key, 'artist' : artist, 'title' : title, 'bucket' : "audio_summary", 'format' : "json"}
        data = mhlpr.callAPI(url, payload)
        if len(data['response']['songs']) > 0:
            ## pop off unneeded data and flatten dict
            song = mhlpr.flattenDictCustom(data['response']['songs'][0])
            if 'spotify' in track:
                song['album'] = track['album']
                song['spotify_artist_id'] = track['spotify_artist_id']
                ## check to be sure it's the correct song -- fuzzy string match of at least .75 levenshtein ratio
                if fuzzyMatch(song['artist'], track['artist'], song['title'], track['title']):
                    song.pop('audio_md5', None)
                    song.pop('analysis_url', None)
                    song['echonest_artist_id'] = song.pop('artist_id')
                    if 'artist_foreign_ids' in song:
                        song.pop('artist_foreign_ids')
                    ## add spotify uri to song data
                    song['spotify_id'] = track_id
                    ## rename keys as necessary
                    song['echonest_id'] = song.pop('id')
                    song['artist'] = song.pop('artist_name')
            else:
                ## pop off unneeded data and flatten dict
                song.pop('audio_md5', None)
                song.pop('analysis_url', None)
                song['echonest_artist_id'] = song.pop('artist_id')
                if 'artist_foreign_ids' in song:
                    song.pop('artist_foreign_ids')
                ## rename keys as necessary
                song['echonest_id'] = song.pop('id')
                song['album'] = album
                song['spotify_id'] = track_id
                song['artist'] = song.pop('artist_name')
        else:
            print "Song not found via EchoNest search: {}".format(spotify_uri)
            return None
    else:
        pdb.set_trace()
        "Unrecognized error code."

    return song
Пример #8
0
def main():

    # ## load tracks in playlist
    fives = loadFile("input", "fives.txt")
    fours = loadFile("input", "fours.txt")
    threes = loadFile("input", "threes.txt")
    twos = loadFile("input", "twos.txt")
    ones = loadFile("input", "ones.txt")

    ## load database of song ratings
    db = loadFile("../Databases", "song_ratings_db.csv")
    album_ratings = loadFile("../Databases", "album_ratings_db.csv")

    config = loadFile("../config", "config.csv", True)
    token = sptfy.authSpotipy()

    rating = 0
    for ls in [ones, twos, threes, fours, fives]:
        ## playlists are looped in this order such that if a song is in multiple lists it's rating will end up being the highest one
        rating = rating + 1
        for song in ls:
            track_id, spotify_uri = sptfy.getSpotifyTrackIDs(song)
            if not lookupSongBySpotifyID(track_id, db):
                track = sptfy.pullSpotifyTrack(track_id, token = token)
                db = db.append([{'spotify_id' : track_id, 'artist' : track['artist'], 'album' : track['album'], 'spotify_album_id' : track['spotify_album_id'], 'song' : track['title'], 'rating' : rating}])

    db = db.sort(['artist', 'album', 'rating'])
    saveDataFrame(db, "../Databases", "song_ratings_db.csv")

    for album in pd.unique(zip(db.artist, db.album)):
        artist = album[0]
        album = album[1]
        if not lookupAlbumBySpotifyID(album, album_ratings):
            album_id = pd.unique(db[db.artist == artist][db.album == album]['spotify_album_id'])[0]
            album_data = sptfy.pullSpotifyAlbum(album_id, token = token)
            ratings = db[db.artist == artist][db.album == album]['rating'].tolist()
            if len(ratings) < 3:
                ## don't make album ratings for singles
                continue
            score = 0
            countNot3 = len(np.where(r != 3))
            countMoreThan2 = len(np.where(r > 2))
            countMoreThan3 = len(np.where(r > 3))
            for r in ratings:
                if r == 5:
                    score = score + 100 * 1.0
                    scoreNot3 = score + 100 * 1.0
                elif r == 4:
                    score = score + 80 * 1.2
                    scoreNot3 = score + 80 * 1.2
                elif r == 3:
                    score = score + 60 * 1.0
                elif r == 2:
                    score = score + 40 * 1.2
                    scoreNot3 = score + 40 * 1.2
                elif r == 1:
                    score = score + 20 * 1.0
                    scoreNot3 = score + 20 * 1.0
            std = np.std(ratings)
            if std == 0.0:
                std = 0.25

            if countNot3 == 0:
                adjMean = 3
            else:
                adjMean = scoreNot3 / countNot3
            prop4or5 = countMoreThan3 / len(r)
            adj1 = (adjMean - 3) * prop4or5
            adj2 = adj1 + countMoreThan2 * 0.03

            score = np.mean(ratings) + adj2

            if prop4or5 == 0:
                adjSD = std * 0.05
            else:
                adjSD = std * prop4or5 / len(r)

            score = score - adjSD

            ## min possible score: (mean of 1-star)
            min1 = 1.0
            ## max possible score: Radiohead "OK Computer"
            max1 = 5.662521
            min2 = -1.0
            max2 = -0.125

            scaledScore = (score - min1) / (max1 - min1)
            ## transform (curves the linear scores to inflate higher scores and reduce lower)
            transformedScore = -1 * (8 ^ (-1 * scaledScore))
            scaledScore = (transformedScore - min2) / (max2 - min2)
            album_score = (round (scaledScore * 1000)) / 1.0

            if album_score > 1000:
	        album_score = 1000

            if album_score >= 965:
                album_rating = 5.0
            elif album_score >= 890:
                album_rating = 4.5
            elif album_score >= 750:
                album_rating = 4.0
            elif album_score >= 690:
                album_rating = 3.5
            elif album_score >= 625:
                album_rating = 3.0
            elif album_score >= 420:
                album_rating = 2.5
            elif album_score > 325:
                album_rating = 2.0
            elif album_score > 235:
                album_rating = 1.5
            elif album_score >= 100:
                album_rating = 1.0
            elif album_score < 100:
                album_rating = 0.5
            album_ratings = album_ratings.append([{'spotify_album_id' : album_id, 'artist' : artist, 'album' : album, 'year' : album_data['year'], 'album_rating' : album_rating, 'album_score' : album_score}])

    saveDataFrame(album_ratings, "../Databases", "album_ratings_db.csv")