def get_similar_artists(artist, limit=10, min_song_ct=5, artist_first=True): """Returns a list of similar artist names to @artist.""" # Skip the ORM and directly execute the SQL for performance reasons QUERYSTR = text(''' SELECT song1.artist, song2.artist, COUNT(sim.similarity) AS sim_count, scounts.count AS song_count, COUNT(sim.similarity) / CAST(scounts.count AS FLOAT) AS sim_count_norm FROM songs AS song1 JOIN similarities AS sim ON song1.song_id=sim.song1_id JOIN songs AS song2 ON sim.song2_id=song2.song_id JOIN (SELECT s.artist AS artist, COUNT(s.id) AS count FROM songs AS s GROUP BY s.artist) AS scounts ON song2.artist=scounts.artist WHERE song1.artist=:artist AND scounts.count > :count GROUP BY song1.artist, song2.artist, scounts.artist, scounts.count ORDER BY sim_count_norm DESC LIMIT :limit; ''') # This *should* be secure since it uses bound parameters which are # passed to the underlying DPAPI: # http://docs.sqlalchemy.org/en/rel_0_9/orm/session_api.html # #sqlalchemy.orm.session.Session.execute # http://docs.sqlalchemy.org/en/rel_0_9/core/sqlelement.html # #sqlalchemy.sql.expression.text # http://docs.sqlalchemy.org/en/rel_0_9/core/sqlelement.html # #sqlalchemy.sql.expression.bindparam params = { 'artist': artist, 'count': min_song_ct, 'limit': limit } # Result tuple: ('artist1', 'artist2', sim_ct, song_ct, norm_sim_ct) artists = [row[1] for row in DB_SESSION.execute(QUERYSTR, params)] if artist_first: if artist in artists: artists.remove(artist) return [artist] + artists else: return artists
def get_similar_songs(artist, limit=10): """Returns a list of similar (song, artist) tuples to @artist.""" QUERYSTR = text(''' SELECT song1.artist, song2.title, song2.artist, SUM(sim.similarity) AS total_sim FROM songs AS song1 JOIN similarities AS sim ON song1.song_id=sim.song1_id JOIN songs AS song2 ON sim.song2_id=song2.song_id WHERE song1.artist=:artist GROUP BY song1.artist, song2.title, song2.artist ORDER BY total_sim DESC LIMIT :limit; ''') params = { 'artist': artist, 'limit': limit } # Result tuple: ('artist1', 'song2', 'artist2', sim) return ['%s %s' % (row[1], row[2]) for row in DB_SESSION.execute(QUERYSTR, params)]