Пример #1
0
def main():
    from model.conv_autoencoder import Autoencoder

    from neomodel import config, db
    import pandas as pd
    import os
    import numpy as np
    from ast import literal_eval as eval
    from tqdm import tqdm

    from model.graph import connection_url
    from model.graph.spotify.track import Track
    from model.graph.spotify.playlist import Playlist

    config.DATABASE_URL = connection_url()
    db.set_connection(connection_url())

    # Segment analysis
    def segment_to_vector(segment):
        return list(map(abs, [segment['duration'], segment['loudness_start'], segment['loudness_max_time'],
                              segment['loudness_max']] \
                        + segment['pitches'] + segment['timbre']))

    stopval = len(Track.nodes)
    print('Number of tracks:', stopval)
    print('Number of playlists:', len(Playlist.nodes))

    def get_minibatches(stopval, offset=0, interval=500):
        while offset < stopval:
            to_analyze: List[Track] = Track.get_songs_not_in_playlists(
                interval, offset=offset)
            yield to_analyze

            offset += len(to_analyze)
            print(f'{offset}/{stopval}')

    for i, x in enumerate(get_minibatches(stopval)):
        tracks = x
        X = [a.analysis['segments'] for a in x if a.analysis]
        arrs = [list(map(segment_to_vector, sample)) for sample in X]
        print(tracks)
        latest = list(
            filter(lambda x: 'segment_analysis' in x, os.listdir('models')))
        if latest:
            latest = os.path.join('models', max(latest))
        else:
            latest = None

        to_pad = 800
        arrs = Autoencoder.pad_and_normalize(arrs, to_pad=to_pad)
        auto = Autoencoder.train('segment_analysis',
                                 arrs,
                                 weights_path=latest,
                                 epochs=10)
        df = pd.DataFrame({'uri': [a.uri for a in tracks]})
        df = Autoencoder.store_zVectors(auto, arrs, df)
        for i, row in tqdm(df.iterrows()):
            track = Track.add_zVector(row['uri'], row['zVector'].tolist())
Пример #2
0
    def add_zVector(cls, uri, data: List[List[float]]):
        '''
        Adds zVector data to the track

        :param uri: the Spotify URI of the track
        :param data: the zVector data as a list of lists, not a numpy array
        :return: the updated node
        '''
        # print(uri)
        db.set_connection(connection_url())
        # query = 'MATCH (n: Track {uri: "%s"}) RETURN n LIMIT 1' % (uri)
        # results, meta = db.cypher_query(query=query)
        # track = [cls.inflate(row[0]) for row in results]
        #
        # if track:
        #     track = track[0]
        # print(track.name)
        # print('Please save', track)

        # f**k neomodel ffs
        data = json.dumps({'zVector': data})

        query = '''
            MATCH (t:Track {uri:'%s'})
            WITH t, properties(t) as snapshot
            SET t.zVector = '%s'
            RETURN snapshot
            ''' % (uri, data)
        # print(query)
        track = db.cypher_query(query)
        return track
Пример #3
0
def main():
    import pandas as pd
    import os
    from ast import literal_eval
    from scraper.genius import Genius
    from scraper.spotify import Spotify
    from model.graph import connection_url
    from model.graph.billboard.track import Track
    from neomodel import db, clear_neo4j_database, config

    url = connection_url()
    print(url)
    config.DATABASE_URL = url
    db.set_connection(url)
    print('connected')

    clear_neo4j_database(db)

    BILLBOARD_DIR = os.path.join('output', 'billboard')
    weeks = os.listdir(BILLBOARD_DIR)
    weeks.sort(reverse=True)
    for week in weeks:
        df = pd.read_csv(os.path.join(BILLBOARD_DIR, week, 'main.csv'))
        for i, row in df.iterrows():
            billboard_track = Track.inst(**dict(row))
            print(billboard_track)
            # Sort artists by appearance in the title
            artists = ', '.join(
                list(
                    map(
                        lambda x: x['artist_name'],
                        sorted(literal_eval(row['credited_artists']),
                               key=lambda x: x['ordinal']))))
            search_str = row['title'] + ' ' + artists
            genius_resp = Genius.search(search_str)
            print('\n\nSearching for:', search_str)

            if genius_resp['meta']['status'] != 200:
                raise ValueError(
                    'Probably exceeded Genius limits or invalid search')

            genius_resp = genius_resp['response']['hits']
            for hit in genius_resp:
                hit = hit['result']
                song_data = Genius.get_song(hit['id'])['response']['song']
                if 'spotify' in [a['provider'] for a in song_data['media']]:
                    print('Spotify exists!')
                    for i, a in enumerate(song_data['media']):
                        print(a)
                        if a['provider'] == 'spotify':
                            print('Spotify Exists -', song_data['full_title'])
                            spotify_data = Spotify.get_track(
                                song_data['media'][i]['native_uri'])
                            print(spotify_data)
                            break
        quit()
    print(weeks)
Пример #4
0
 def get_playlists(spotify_id) -> List[Playlist]:
     '''
     Gets playlists the given track appears in
     :param spotify_id: the Spotify ID of the track
     :return: the list of playlists that the track appears in
     '''
     db.set_connection(connection_url())
     results, meta = db.cypher_query(
         'MATCH (t:Track {spotify_id: "%s"})-[:`FEATURED IN`]->(p:Playlist) RETURN p'
         % spotify_id)
     return [Playlist.inflate(row[0]) for row in results]
Пример #5
0
 def find(cls, uri: str):
     '''
     Finds a track given Spotify URI
     :param uri: the Spotify URI
     :return: the track
     '''
     db.set_connection(connection_url())
     results, meta = db.cypher_query(query='''
                 MATCH(t: Track {uri: "%s"}) RETURN t
                 ''' % (uri))
     return [cls.inflate(track[0]) for track in results]
Пример #6
0
 def get_album(track_id: str):
     '''
     Gets the album given the Spotify ID
     :param track_id: the Spotify ID of the track
     :return: the album it's on
     '''
     db.set_connection(connection_url())
     results, meta = db.cypher_query(query='''
         MATCH(t: Track {spotify_id: "%s"})-[:FROM]->(a: Album) RETURN a
         ''' % (track_id))
     return [Album.inflate(album[0]) for album in results]
Пример #7
0
    def get_tracks(cls, spotify_id):
        s = time()
        from model.graph.spotify.track import SmallTrack
        db.set_connection(connection_url())
        query = '''
        MATCH (p: Playlist {spotify_id: "%s"}) <-[r1: `FEATURED IN`]- (t: Track)
        RETURN t.uri, t.zVector, t.spotify_id, t.name
        ''' % spotify_id
        results, meta = db.cypher_query(query)
        print('Results fetched', time() - s)
        keys = ['uri', 'zVector', 'spotify_id', 'name']
        kwargs = [{keys[i]: val
                   for i, val in enumerate(result)} for result in results]

        return [SmallTrack(**result) for result in kwargs]
Пример #8
0
    def get_associated_artists(cls, artist_id):
        '''
        Gets artists associated with the given artist

        :param artist_id: the Spotify ID of the given artist
        :return: the list of associated artists
        '''
        db.set_connection(connection_url())
        results, meta = db.cypher_query('''
        MATCH (t) -[:`FEATURED IN`]-> (p: Playlist)
        MATCH (a: Artist {spotify_id: "%s"}) <-[r1: BY]- 
            (t: Track) -[r2: BY]-> 
            (similar_artists: Artist)
        RETURN similar_artists
        ''' % artist_id)

        return set([cls.inflate(artist[0]) for artist in results])
Пример #9
0
 def get_similar_playlists(track_id: str,
                           playlist_id: str) -> List[Playlist]:
     '''
     Gets similar playlists to the given playlist
     :param track_id: the Spotify ID of the track currently playing
     :param playlist_id: the Spotify ID of the playlist currently playing
     :return: a list of similar playlists
     '''
     query = '''
         match 
             (t: Track {spotify_id: "%s"}) -[r1:`FEATURED IN`]-> 
             (p: Playlist {spotify_id: "%s"}) <-[r2:`FEATURED IN`]- 
             (other_tracks: Track) -[r3:`FEATURED IN`]-> 
             (similar_playlists: Playlist)  
         return similar_playlists
         ''' % (track_id, playlist_id)
     db.set_connection(connection_url())
     results, meta = db.cypher_query(query=query)
     return [Playlist.inflate(playlist[0]) for playlist in results]
Пример #10
0
    def get_tracks_with_multiple_artists(cls, context, *artist_ids):
        '''
        Gets tracks with multiple artists on it

        :param artist_ids: the given artist IDs
        :param context: the Spotify ID of the playlist
        :return:
        '''
        artists = ','.join([
            "(a%s:Artist { uri: '%s' })" % (i, a)
            for i, a in enumerate(artist_ids)
        ])
        query_constructor = '''MATCH %s, 
        p = allShortestPaths((a0)-[*]-(a1))
        WHERE EXISTS((a0) <-[:BY]- (:Track) -[:`FEATURED IN`]-> (:Playlist {spotify_id: "%s"}))
        RETURN nodes(p)
        ''' % (artists, context)
        from model.graph.spotify.track import Track
        db.set_connection(connection_url())
        results, meta = db.cypher_query(query_constructor)
        return [Track.inflate(result[0][1]) for result in results]
Пример #11
0
def main():
    '''
    Gets all the albums in the database, loops through associated tracks, and searches Genius
        - if Spotify URI is included in Genius data, the node is updated to include Genius data
    :return:
    '''
    from time import sleep

    from neomodel import config

    from scraper.genius import Genius

    from model.graph import connection_url
    from model.graph.spotify.track import Track
    from model.graph.spotify.album import Album

    config.DATABASE_URL = connection_url()
    print(len(Album.nodes))
    for i, node in enumerate(Album.nodes):
        if i > 5:
            break

        print('\n\nCurrent Album:', node.name)
        for track in node.tracks:
            to_search = f'{track.name} {track.artists[0].name}'
            print('\nSearching for', to_search)
            resp = Genius.search(to_search)
            print(resp)
            if resp['meta']['status'] == 200:
                resp = resp['response']
                if 'hits' in resp:
                    for hit in resp['hits']:
                        song_data = Genius.get_song(
                            hit['result']['id'])['response']['song']
                        print(song_data['media'])
                        if 'spotify' in [
                                a['provider'] for a in song_data['media']
                        ]:
                            print('Spotify exists!')
                            for i, a in enumerate(song_data['media']):
                                print(a)
                                if a['provider'] == 'spotify':
                                    in_db = Track.nodes.get_or_none(
                                        uri=song_data['media'][i]
                                        ['native_uri'])
                                    if in_db:
                                        print('Track exists:', in_db.name)
                                    else:
                                        print('Track is not in database')
                                    Track.add_genius(
                                        song_data['media'][i]['native_uri'],
                                        song_data)
                                    new_track = Track.nodes.get_or_none(
                                        uri=song_data['media'][i]
                                        ['native_uri'])
                                    if new_track:
                                        print('Track updated',
                                              new_track.genius_data)
                                    else:
                                        print('Track not updated', new_track)
                        sleep(1)
Пример #12
0
def main():
    from neomodel import config, db

    from model.graph import connection_url
    from model.graph.spotify.track import Track
    from model.graph.spotify.playlist import Playlist

    from sklearn.neural_network import MLPClassifier
    from sklearn.linear_model import Perceptron, SGDClassifier
    from sklearn.multioutput import MultiOutputClassifier
    from sklearn.preprocessing import MultiLabelBinarizer
    from sklearn.preprocessing import normalize
    from sklearn.decomposition import LatentDirichletAllocation
    from sklearn.datasets import make_multilabel_classification

    from joblib import dump, load
    from tqdm import tqdm
    import numpy as np
    from math import log10
    import os

    config.DATABASE_URL = connection_url()

    db.set_connection(connection_url())

    stopval = len(Track.nodes)
    print(stopval)

    print('Playlists', len(Playlist.nodes))
    playlists = Playlist.get_all()
    num_playlists = len(playlists)
    playlists = {node.uri: ind for ind, node in enumerate(playlists)}

    def get_minibatches(stopval, count=0, interval=20):
        while count < stopval:
            to_analyze: List[Track] = Track.get_songs_in_playlists(
                interval, count)

            X = [a.get_song_features(as_list=True) for a in to_analyze]
            y = [[playlists[x.uri] for x in Track.get_playlists(a.spotify_id)]
                 for a in to_analyze]
            print(count, interval)
            yield np.array(list(
                map(lambda x: list(map(abs, x)),
                    X))), MultiLabelBinarizer().fit_transform(y)
            count += len(to_analyze)

    lda = LatentDirichletAllocation(n_components=num_playlists)
    startval = 0
    if len(os.listdir('trained_models')) > 0:
        startval = max(os.listdir('trained_models'))
        lda = load(os.path.join('trained_models', startval))

    startval = int(startval.split('.')[0])
    interval = 20
    # get_minibatches(stopval, count=startval * interval)
    for i, val in enumerate(
            tqdm(get_minibatches(stopval, count=startval * interval))):
        i += startval + 1
        X, y = val
        lda.partial_fit(X)
        dump(
            lda,
            os.path.join('trained_models',
                         f'{str(i).zfill(int(log10(stopval) + 1))}.joblib'))
        os.remove(
            os.path.join(
                'trained_models',
                f'{str(i - 1).zfill(int(log10(stopval) + 1))}.joblib'))