def subsetDataFrame(df, tracks): ## subset dataframe by spotify ids track_ids = [] for track in tracks: track_ids.append(sptfy.getSpotifyTrackIDs(track)[0]) db_subset = df[df.spotify_id.isin(track_ids)] return db_subset
def main(): # ## load tracks in playlist fives = loadFile("input", "fives.txt") fours = loadFile("input", "fours.txt") threes = loadFile("input", "threes.txt") twos = loadFile("input", "twos.txt") ones = loadFile("input", "ones.txt") ## load database of song ratings db = loadFile("../Databases", "song_ratings_db.csv") album_ratings = loadFile("../Databases", "album_ratings_db.csv") config = loadFile("../config", "config.csv", True) token = sptfy.authSpotipy() rating = 0 for ls in [ones, twos, threes, fours, fives]: ## playlists are looped in this order such that if a song is in multiple lists it's rating will end up being the highest one rating = rating + 1 for song in ls: track_id, spotify_uri = sptfy.getSpotifyTrackIDs(song) if not lookupSongBySpotifyID(track_id, db): track = sptfy.pullSpotifyTrack(track_id, token=token) db = db.append([{ 'spotify_id': track_id, 'artist': track['artist'], 'album': track['album'], 'spotify_album_id': track['spotify_album_id'], 'song': track['title'], 'rating': rating }]) db = db.sort(['artist', 'album', 'rating']) saveDataFrame(db, "../Databases", "song_ratings_db.csv") for album in pd.unique(zip(db.artist, db.album)): artist = album[0] album = album[1] if not lookupAlbumBySpotifyID(album, album_ratings): album_id = pd.unique(db[db.artist == artist][db.album == album] ['spotify_album_id'])[0] album_data = sptfy.pullSpotifyAlbum(album_id, token=token) ratings = db[db.artist == artist][db.album == album]['rating'].tolist() if len(ratings) < 3: ## don't make album ratings for singles continue score = 0 countNot3 = len(np.where(r != 3)) countMoreThan2 = len(np.where(r > 2)) countMoreThan3 = len(np.where(r > 3)) for r in ratings: if r == 5: score = score + 100 * 1.0 scoreNot3 = score + 100 * 1.0 elif r == 4: score = score + 80 * 1.2 scoreNot3 = score + 80 * 1.2 elif r == 3: score = score + 60 * 1.0 elif r == 2: score = score + 40 * 1.2 scoreNot3 = score + 40 * 1.2 elif r == 1: score = score + 20 * 1.0 scoreNot3 = score + 20 * 1.0 std = np.std(ratings) if std == 0.0: std = 0.25 if countNot3 == 0: adjMean = 3 else: adjMean = scoreNot3 / countNot3 prop4or5 = countMoreThan3 / len(r) adj1 = (adjMean - 3) * prop4or5 adj2 = adj1 + countMoreThan2 * 0.03 score = np.mean(ratings) + adj2 if prop4or5 == 0: adjSD = std * 0.05 else: adjSD = std * prop4or5 / len(r) score = score - adjSD ## min possible score: (mean of 1-star) min1 = 1.0 ## max possible score: Radiohead "OK Computer" max1 = 5.662521 min2 = -1.0 max2 = -0.125 scaledScore = (score - min1) / (max1 - min1) ## transform (curves the linear scores to inflate higher scores and reduce lower) transformedScore = -1 * (8 ^ (-1 * scaledScore)) scaledScore = (transformedScore - min2) / (max2 - min2) album_score = (round(scaledScore * 1000)) / 1.0 if album_score > 1000: album_score = 1000 if album_score >= 965: album_rating = 5.0 elif album_score >= 890: album_rating = 4.5 elif album_score >= 750: album_rating = 4.0 elif album_score >= 690: album_rating = 3.5 elif album_score >= 625: album_rating = 3.0 elif album_score >= 420: album_rating = 2.5 elif album_score > 325: album_rating = 2.0 elif album_score > 235: album_rating = 1.5 elif album_score >= 100: album_rating = 1.0 elif album_score < 100: album_rating = 0.5 album_ratings = album_ratings.append([{ 'spotify_album_id': album_id, 'artist': artist, 'album': album, 'year': album_data['year'], 'album_rating': album_rating, 'album_score': album_score }]) saveDataFrame(album_ratings, "../Databases", "album_ratings_db.csv")
def lookupSongBySpotifyID(song, df): track_id = sptfy.getSpotifyTrackIDs(song)[0] return any(df.spotify_id == track_id)
def pullEchoNestSong(auth, track, album=None, local_link=None): url_base = "https://api.spotify.com/v1/audio-features/" if 'spotify' in track: track_id, spotify_uri = sptfy.getSpotifyTrackIDs(track) ## due to echonest using 2 different bucket params and url encoding the ampersand, payload cannot be used # payload = {'api_key' : api_key, 'track_id' : spotify_uri, 'bucket' : "audio_summary&bucket=id:spotify", 'format' : "json"} url_suffix = "%s" % sptfy.stripSpotifyURI(spotify_uri) else: pdb.set_trace() ## it's an echonest id and can be accessed directly url_suffix = "?api_key=%s&id=%s&bucket=audio_summary&bucket=id:spotify&format=json" % ( auth, track) url = url_base + url_suffix data = mhlpr.callAPI(url, headers=auth) ## if response is a success if int(data['response']['status']['code']) == 0 and len( data['response']['songs']) > 0: song = mhlpr.flattenDictCustom(data['response']['songs'][0]) if 'spotify' in track: track = sptfy.pullSpotifyTrack(track_id) song['album'] = track['album'] song['spotify_artist_id'] = track['spotify_artist_id'] ## add spotify uri to song data song['spotify_id'] = track_id else: song['album'] = album song['spotify_id'] = local_link.strip() ## pop off unneeded data and flatten dict song.pop('audio_md5', None) song.pop('analysis_url', None) song['echonest_artist_id'] = song.pop('artist_id') if 'artist_foreign_ids' in song: song.pop('artist_foreign_ids') ## rename keys as necessary song['echonest_id'] = song.pop('id') song['artist'] = song.pop('artist_name') elif int(data['response']['status']['code']) == 5: ## the song cannot be found by the spotify id url = "http://developer.echonest.com/api/v4/song/search" if 'spotify' in track: track = sptfy.pullSpotifyTrack(track_id) artist = track['artist'] title = track['title'] payload = { 'api_key': api_key, 'artist': artist, 'title': title, 'bucket': "audio_summary", 'format': "json" } data = mhlpr.callAPI(url, payload) if len(data['response']['songs']) > 0: ## pop off unneeded data and flatten dict song = mhlpr.flattenDictCustom(data['response']['songs'][0]) if 'spotify' in track: song['album'] = track['album'] song['spotify_artist_id'] = track['spotify_artist_id'] ## check to be sure it's the correct song -- fuzzy string match of at least .75 levenshtein ratio if fuzzyMatch(song['artist'], track['artist'], song['title'], track['title']): song.pop('audio_md5', None) song.pop('analysis_url', None) song['echonest_artist_id'] = song.pop('artist_id') if 'artist_foreign_ids' in song: song.pop('artist_foreign_ids') ## add spotify uri to song data song['spotify_id'] = track_id ## rename keys as necessary song['echonest_id'] = song.pop('id') song['artist'] = song.pop('artist_name') else: ## pop off unneeded data and flatten dict song.pop('audio_md5', None) song.pop('analysis_url', None) song['echonest_artist_id'] = song.pop('artist_id') if 'artist_foreign_ids' in song: song.pop('artist_foreign_ids') ## rename keys as necessary song['echonest_id'] = song.pop('id') song['album'] = album song['spotify_id'] = track_id song['artist'] = song.pop('artist_name') else: print "Song not found via EchoNest search: {}".format(spotify_uri) return None else: pdb.set_trace() "Unrecognized error code." return song
def pullEchoNestSong(auth, track, album = None, local_link = None): url_base = "https://api.spotify.com/v1/audio-features/" if 'spotify' in track: track_id, spotify_uri = sptfy.getSpotifyTrackIDs(track) ## due to echonest using 2 different bucket params and url encoding the ampersand, payload cannot be used # payload = {'api_key' : api_key, 'track_id' : spotify_uri, 'bucket' : "audio_summary&bucket=id:spotify", 'format' : "json"} url_suffix = "%s" % sptfy.stripSpotifyURI(spotify_uri) else: pdb.set_trace() ## it's an echonest id and can be accessed directly url_suffix = "?api_key=%s&id=%s&bucket=audio_summary&bucket=id:spotify&format=json" % (auth, track) url = url_base + url_suffix data = mhlpr.callAPI(url, headers = auth) ## if response is a success if int(data['response']['status']['code']) == 0 and len(data['response']['songs']) > 0: song = mhlpr.flattenDictCustom(data['response']['songs'][0]) if 'spotify' in track: track = sptfy.pullSpotifyTrack(track_id) song['album'] = track['album'] song['spotify_artist_id'] = track['spotify_artist_id'] ## add spotify uri to song data song['spotify_id'] = track_id else: song['album'] = album song['spotify_id'] = local_link.strip() ## pop off unneeded data and flatten dict song.pop('audio_md5', None) song.pop('analysis_url', None) song['echonest_artist_id'] = song.pop('artist_id') if 'artist_foreign_ids' in song: song.pop('artist_foreign_ids') ## rename keys as necessary song['echonest_id'] = song.pop('id') song['artist'] = song.pop('artist_name') elif int(data['response']['status']['code']) == 5: ## the song cannot be found by the spotify id url = "http://developer.echonest.com/api/v4/song/search" if 'spotify' in track: track = sptfy.pullSpotifyTrack(track_id) artist = track['artist'] title = track['title'] payload = {'api_key' : api_key, 'artist' : artist, 'title' : title, 'bucket' : "audio_summary", 'format' : "json"} data = mhlpr.callAPI(url, payload) if len(data['response']['songs']) > 0: ## pop off unneeded data and flatten dict song = mhlpr.flattenDictCustom(data['response']['songs'][0]) if 'spotify' in track: song['album'] = track['album'] song['spotify_artist_id'] = track['spotify_artist_id'] ## check to be sure it's the correct song -- fuzzy string match of at least .75 levenshtein ratio if fuzzyMatch(song['artist'], track['artist'], song['title'], track['title']): song.pop('audio_md5', None) song.pop('analysis_url', None) song['echonest_artist_id'] = song.pop('artist_id') if 'artist_foreign_ids' in song: song.pop('artist_foreign_ids') ## add spotify uri to song data song['spotify_id'] = track_id ## rename keys as necessary song['echonest_id'] = song.pop('id') song['artist'] = song.pop('artist_name') else: ## pop off unneeded data and flatten dict song.pop('audio_md5', None) song.pop('analysis_url', None) song['echonest_artist_id'] = song.pop('artist_id') if 'artist_foreign_ids' in song: song.pop('artist_foreign_ids') ## rename keys as necessary song['echonest_id'] = song.pop('id') song['album'] = album song['spotify_id'] = track_id song['artist'] = song.pop('artist_name') else: print "Song not found via EchoNest search: {}".format(spotify_uri) return None else: pdb.set_trace() "Unrecognized error code." return song
def main(): # ## load tracks in playlist fives = loadFile("input", "fives.txt") fours = loadFile("input", "fours.txt") threes = loadFile("input", "threes.txt") twos = loadFile("input", "twos.txt") ones = loadFile("input", "ones.txt") ## load database of song ratings db = loadFile("../Databases", "song_ratings_db.csv") album_ratings = loadFile("../Databases", "album_ratings_db.csv") config = loadFile("../config", "config.csv", True) token = sptfy.authSpotipy() rating = 0 for ls in [ones, twos, threes, fours, fives]: ## playlists are looped in this order such that if a song is in multiple lists it's rating will end up being the highest one rating = rating + 1 for song in ls: track_id, spotify_uri = sptfy.getSpotifyTrackIDs(song) if not lookupSongBySpotifyID(track_id, db): track = sptfy.pullSpotifyTrack(track_id, token = token) db = db.append([{'spotify_id' : track_id, 'artist' : track['artist'], 'album' : track['album'], 'spotify_album_id' : track['spotify_album_id'], 'song' : track['title'], 'rating' : rating}]) db = db.sort(['artist', 'album', 'rating']) saveDataFrame(db, "../Databases", "song_ratings_db.csv") for album in pd.unique(zip(db.artist, db.album)): artist = album[0] album = album[1] if not lookupAlbumBySpotifyID(album, album_ratings): album_id = pd.unique(db[db.artist == artist][db.album == album]['spotify_album_id'])[0] album_data = sptfy.pullSpotifyAlbum(album_id, token = token) ratings = db[db.artist == artist][db.album == album]['rating'].tolist() if len(ratings) < 3: ## don't make album ratings for singles continue score = 0 countNot3 = len(np.where(r != 3)) countMoreThan2 = len(np.where(r > 2)) countMoreThan3 = len(np.where(r > 3)) for r in ratings: if r == 5: score = score + 100 * 1.0 scoreNot3 = score + 100 * 1.0 elif r == 4: score = score + 80 * 1.2 scoreNot3 = score + 80 * 1.2 elif r == 3: score = score + 60 * 1.0 elif r == 2: score = score + 40 * 1.2 scoreNot3 = score + 40 * 1.2 elif r == 1: score = score + 20 * 1.0 scoreNot3 = score + 20 * 1.0 std = np.std(ratings) if std == 0.0: std = 0.25 if countNot3 == 0: adjMean = 3 else: adjMean = scoreNot3 / countNot3 prop4or5 = countMoreThan3 / len(r) adj1 = (adjMean - 3) * prop4or5 adj2 = adj1 + countMoreThan2 * 0.03 score = np.mean(ratings) + adj2 if prop4or5 == 0: adjSD = std * 0.05 else: adjSD = std * prop4or5 / len(r) score = score - adjSD ## min possible score: (mean of 1-star) min1 = 1.0 ## max possible score: Radiohead "OK Computer" max1 = 5.662521 min2 = -1.0 max2 = -0.125 scaledScore = (score - min1) / (max1 - min1) ## transform (curves the linear scores to inflate higher scores and reduce lower) transformedScore = -1 * (8 ^ (-1 * scaledScore)) scaledScore = (transformedScore - min2) / (max2 - min2) album_score = (round (scaledScore * 1000)) / 1.0 if album_score > 1000: album_score = 1000 if album_score >= 965: album_rating = 5.0 elif album_score >= 890: album_rating = 4.5 elif album_score >= 750: album_rating = 4.0 elif album_score >= 690: album_rating = 3.5 elif album_score >= 625: album_rating = 3.0 elif album_score >= 420: album_rating = 2.5 elif album_score > 325: album_rating = 2.0 elif album_score > 235: album_rating = 1.5 elif album_score >= 100: album_rating = 1.0 elif album_score < 100: album_rating = 0.5 album_ratings = album_ratings.append([{'spotify_album_id' : album_id, 'artist' : artist, 'album' : album, 'year' : album_data['year'], 'album_rating' : album_rating, 'album_score' : album_score}]) saveDataFrame(album_ratings, "../Databases", "album_ratings_db.csv")