def constructGraph( conn, p, song_features_table_name, artist_similarities_table_name, graph_table_name, debug_print=False ):
        
    cur = conn.cursor()


    MX_common.resetTable( conn, graph_table_name, ['targeto text', 'similaro text, dist real'] )

        
    # read songs_list
    q = "SELECT \
            song_id \
        FROM "\
            +song_features_table_name +";"
    cur.execute(q)
    songs_list = cur.fetchall()
    
    node_stats = dict()
    node_stats['neighbors_cur_artist'] = list()
    node_stats['neighbors_topo'] = list()
    node_stats['neighbors_metric'] = list()
    node_stats['mean_dist'] = list()
    node_stats['similar_artists'] = list()
    edge_stats = dict()
    edge_stats['dist'] = list()
    edge_stats['vdist'] = list()
    
    # for each song (the focal song), find its neighbors
    for focal_song_id in songs_list[::1]:        
        focal_song_id = focal_song_id[0]
        
        if debug_print:
            print "focal_song: " + focal_song_id        
        
        # fetch all focal song's features
        q = "SELECT \
                * \
            FROM "\
                +song_features_table_name+" "\
            "WHERE \
                song_id='" + focal_song_id + "';"
        cur.execute(q)        
        focal_song_f = cur.fetchall()
    
        # find all songs that are TOPOLOGICALLY close (based on artist similarity):
        
        # find all song of current artist
        q = "SELECT "\
                +"song_id "\
            +"FROM "\
                +song_features_table_name+" "\
            +"WHERE "\
                +"artist_id='" + focal_song_f[0][p['invkey']['artist_id']] + "';"
        cur.execute(q)
        song_ids_cur_artist = cur.fetchall()
    
        node_stats['neighbors_cur_artist'].append(len(song_ids_cur_artist))

        if debug_print:
            for song_id in song_ids_cur_artist:
                print "same artist songs: " + song_id[0]  
    
        
        # find all artists similar to current artist
        q = "SELECT "\
                +"similaro "\
            +"FROM "\
                +artist_similarities_table_name+" "\
            +"WHERE "\
                +"targeto='" + focal_song_f[0][p['invkey']['artist_id']] + "';"
        cur.execute(q)
        artist_ids_topo = cur.fetchall()
    
        node_stats['similar_artists'].append(len(artist_ids_topo))
    
        
        song_ids_topo = list()
        # add all song IDs of the similar artists
        for artist_id in artist_ids_topo:
            # fetch all songs of this artist
            q = "SELECT "\
                    +"song_id "\
                +"FROM "\
                    +song_features_table_name+" "\
                +"WHERE "\
                    +"artist_id='" + artist_id[0] + "';"
            cur.execute(q)
            cur_song_ids = cur.fetchall()
#            if len(cur_song_ids)>0:
#                cur_song_ids = cur_song_ids[0]
            for song_id in cur_song_ids:
                song_ids_topo.append(song_id)
        
            # stage 2 - goo deeper into the tree of artist_similarities. This requires holding a fifo of nodes / some sort of recursion                
        
        node_stats['neighbors_topo'].append(len(song_ids_topo))

        if debug_print:
            for song_id in song_ids_topo:
                print "similar artists songs: " + song_id[0]        
    
            
        # find all songs that are METRICALLY close (based on song features)
        song_ids_metric = list()
        for key in p['relevant_features']:
            key_range = ( focal_song_f[key] - p['unit_vec'][key]*p['construct_neighborhood_metric_diameter'] , focal_song_f[key] +  )
            q = "SELECT "\
                    +"song_id "\
                +"FROM "\
                    +song_features_table_name+" "\
                +"WHERE "\
                    +key+ ">="+ str(key_range[0]) + " AND "+ key+ "<="+ str(key_range[1])+";"
            cur.execute(q)
            cur_song_ids = cur.fetchall()
            for id in cur_song_ids[0]:
                song_ids_metric.append(id)
    
        node_stats['neighbors_metric'].append(len(song_ids_metric))
    
        
        # combine all potential neighbors song IDs
        song_ids_combined = list(set(song_ids_cur_artist) | set(song_ids_topo) | set(song_ids_metric))   
    
    
        # for each potential neighbor, measure its distance from the focal point
        potential_neighbors = list()
        for neighbor_song_id in song_ids_combined:    
 #           print "song id: "+neighbor_song_id
            # fetch features for each potential neighbor
            q = "SELECT \
                    *\
                FROM "\
                    +song_features_table_name+" "\
                +"WHERE "\
                    +"song_id='" + neighbor_song_id[0] + "';"
            cur.execute(q)   
            neighbor_song_f = cur.fetchall()
        
        
            # calculate distance to current song    
            sdist = MX_traverse.calcDistance(focal_song_f[0], neighbor_song_f[0], p['distance_type_construct'])
    
            # filter songs that are too distant    
            if sdist[0] < p['maximal_overall_distance']:
                potential_neighbors.append((neighbor_song_id[0],sdist[0], sdist[1]))
        
        # sort potential neighbors by distance    
    #    potential_neighbors = sorted(potential_neighbors, key=lambda x: x[1])
    
        if len(potential_neighbors)>0:
            node_stats['mean_dist'].append( np.mean([x[1] for x in potential_neighbors]) )
        else:
            node_stats['mean_dist'].append(-1)
    
    
        # add potential neighbors and truncate, if there are too many, based on distance
        for neighborhood_type in neighboorhoodtypes:
            for neighbor in potential_neighbors[neighborhood_type][0:min(max_MXG_neighbors[neighborhood_type], len(potential_neighbors[neighborhood_type]))]:
    #            print "n0 " + neighbor[0]
    #            print "n1 " + str(neighbor[1])
                q = "INSERT INTO "\
                        +graph_table_name+" \
                        (targeto, similaro, dist) \
                    VALUES \
                        ('"+ focal_song_id +"', '"+ neighbor[0] +"', '"+ str(neighbor[1]) +"');"
                cur.execute(q)
                q = "INSERT INTO "\
                        +graph_table_name+" \
                        (targeto, similaro, dist) \
                    VALUES \
                        ('"+ neighbor[0] +"', '"+ focal_song_id +"', '"+ str(neighbor[1]) +"');"
                cur.execute(q)
                
                edge_stats['dist'].append( neighbor[1] )
                edge_stats['vdist'].append( neighbor[2] )
            
            
    #            MXG_target.append( song_id[0] )
    #            MXG_source.append( potential_neighbors[0] )
    
    
    return node_stats, edge_stats
def constructGraph( song_features_table_name, artist_similarities_table_name, graph_table_name, debug_print=False ): #graph_song_list_table_name, 

    global p

    SFstats = MX_common.initUnitVec( song_features_table_name )

    g_conn = psy.connect( MX_common.g_db_conn_command )
    g_conn.autocommit = True
    g_cur = g_conn.cursor()

    # read song_id list
    q = "SELECT \
            song_id \
        FROM "\
            +song_features_table_name +";"
    g_cur.execute(q)
    songs_list = g_cur.fetchall()
    
    node_stats = dict()
    node_stats['neighbors_cur_artist'] = list()
    node_stats['neighbors_topo'] = list()
    node_stats['neighbors_metric'] = list()
    node_stats['mean_dist'] = list()
    node_stats['similar_artists'] = list()
    edge_stats = dict()
    edge_stats['dist'] = list()
    edge_stats['vdist'] = list()
    
    MX_common.resetTable( g_cur, graph_table_name, ['targeto text', 'similaro text'] )
    
    # for each song (the focal song), find its neighbors
    for focal_song_id in songs_list[::1]: #::100        
        focal_song_id = focal_song_id[0]
        
        if debug_print:
            print "focal_song: " + focal_song_id        
        
        # fetch all focal song's features
        q = "SELECT \
                * \
            FROM "\
                +song_features_table_name+" "\
            "WHERE \
                song_id='" + focal_song_id + "';"
        g_cur.execute(q)        
        focal_song_f = g_cur.fetchall()
        
        # find all songs that are TOPOLOGICALLY close (based on artist similarity):
    
        potential_neighbors = dict()
        potential_neighbors_dist = dict()

        nbkey = 'topo'
        if nbkey in p['neighborhood_types']:
            potential_neighbors[nbkey] = list()            
            
            #   find all songs of current artist
            q = "SELECT "\
                    +"song_id "\
                +"FROM "\
                    +song_features_table_name+" "\
                +"WHERE "\
                    +"artist_id='" + focal_song_f[0][p['invkey']['artist_id']] + "';"
            g_cur.execute(q)
            song_ids_cur_artist = g_cur.fetchall()
            song_ids_cur_artist = [x[0] for x in song_ids_cur_artist]
            
            node_stats['neighbors_cur_artist'].append(len(song_ids_cur_artist))
    
            if debug_print:
                for song_id in song_ids_cur_artist:
                    print "same artist songs: " + song_id[0]  
        
            
            #   find all artists similar to current artist
            q = "SELECT "\
                    +"similaro "\
                +"FROM "\
                    +artist_similarities_table_name+" "\
                +"WHERE "\
                    +"targeto='" + focal_song_f[0][p['invkey']['artist_id']] + "';"
            g_cur.execute(q)
            artist_ids_topo = g_cur.fetchall()
        
            node_stats['similar_artists'].append(len(artist_ids_topo))
                    
            song_ids_topo = list()
            #   add all songs of similar artists
            for artist_id in artist_ids_topo:
                # fetch all songs of this artist
                q = "SELECT "\
                        +"song_id "\
                    +"FROM "\
                        +song_features_table_name+" "\
                    +"WHERE "\
                        +"artist_id='" + artist_id[0] + "';"
                g_cur.execute(q)
                cur_song_ids = g_cur.fetchall()
                for song_id in cur_song_ids:
                    song_ids_topo.append(song_id[0])
            
                # TBD - goo deeper into the tree of artist_similarities. This requires holding a fifo of nodes / some sort of recursion                
            
            node_stats['neighbors_topo'].append(len(song_ids_topo))
    
            if debug_print:
                for song_id in song_ids_topo:
                    print "similar artists songs: " + song_id[0]        
                    
            # combine all potential TOPOLOGICAL neighbors
            potential_neighbors[nbkey] = list(set(song_ids_cur_artist) | set(song_ids_topo))
        
        # find all songs that are METRICALLY close (based on song features)
        nbkey = 'features'
        if nbkey in p['neighborhood_types']:
            potential_neighbors[nbkey] = list()  

            for key in p['relevant_features']:
                radius = abs(p['unit_vec_inv'][p['invkey'][key]]) * p['construct_neighborhood_metric_diameter']
                key_range = ( focal_song_f[0][p['invkey'][key]] - radius , focal_song_f[0][p['invkey'][key]] + radius )
                q = "SELECT "\
                        +"song_id "\
                    +"FROM "\
                        +song_features_table_name+" "\
                    +"WHERE "\
                        +key+ ">="+ str(key_range[0]) + " AND "+ key+ "<="+ str(key_range[1]) + " "\
                    +"ORDER BY \
                        RANDOM() \
                    LIMIT "\
                        + str(p['max_feature_neighborhood_size']) +";"
    
                g_cur.execute(q)
                cur_song_ids = g_cur.fetchall()
                for id in cur_song_ids:
                    potential_neighbors[nbkey].append(id[0])
        
            node_stats['neighbors_metric'].append(len(potential_neighbors[nbkey]))
                
        # for each potential neighbor, measure its distance from the focal point
        for nbkey in p['neighborhood_types']:
            potential_neighbors_dist[nbkey] = list()
            
            for neighbor_song_id in potential_neighbors[nbkey]:
                # fetch features for each potential neighbor
                q = "SELECT \
                        *\
                    FROM "\
                        +song_features_table_name+" "\
                    +"WHERE "\
                        +"song_id='" + neighbor_song_id + "';"
                g_cur.execute(q)   
                neighbor_song_f = g_cur.fetchall()
        
                # calculate distance to current song  
                sdist = MX_common.calcDistance(focal_song_f[0], neighbor_song_f[0], p['distance_type_construct'])
    
                potential_neighbors_dist[nbkey].append((neighbor_song_id,sdist[0], sdist[1]))
    
            # sort potential neighbors by distance    
            potential_neighbors_dist[nbkey] = sorted(potential_neighbors_dist[nbkey], key=lambda x: x[1])
            if len(potential_neighbors_dist[nbkey])>0:
                node_stats['mean_dist'].append( np.mean([x[1] for x in potential_neighbors_dist[nbkey]]) )
            else:
                node_stats['mean_dist'].append(-1)
        
        print "#Feature NBs = " + str(len(potential_neighbors_dist[nbkey])) + ", Topo same artist NBs = " + str(len(song_ids_cur_artist)) + ", Topo similar artists NBs = " + str(len(song_ids_topo))
    
    
        # add potential neighbors and truncate, if there are too many, based on distance
        for nbkey in p['neighborhood_types']:
            for neighbor in potential_neighbors_dist[nbkey][0:min(p['max_MXG_neighbors'][nbkey], len(potential_neighbors_dist[nbkey]))]:
            
                # add both direction of the edge (later on we call a function that filters duplicate edges)
                q = "INSERT INTO "\
                        +graph_table_name+" \
                        (targeto, similaro) \
                    VALUES \
                        ('"+ focal_song_id +"', '"+ neighbor[0] +"');"
                g_cur.execute(q)
                q = "INSERT INTO "\
                        +graph_table_name+" \
                        (targeto, similaro) \
                    VALUES \
                        ('"+ neighbor[0] +"', '"+ focal_song_id +"');"
                g_cur.execute(q)
                
                edge_stats['dist'].append( neighbor[1] )
                edge_stats['vdist'].append( neighbor[2] )
                                
    g_conn.close()
       
    return SFstats, node_stats, edge_stats
示例#3
0
def pickNextSongs( cur, p, cur_song_id, mag, conservation=None, debug_print=0 ):    

    # fetch current song features
    q = "SELECT \
            * \
        FROM "\
            +MX_common.song_features_table_name+" "\
        "WHERE \
            song_id='" + cur_song_id + "';"
    cur.execute(q)        
    cur_song_f = cur.fetchall()

    #cur = conn.cursor()    
    verbose = 1
    cur_song_desc = printSongDescription( cur_song_f[0], verbose )
    
    # Update recently-played FIFO to prevent frequent repeats:
    # push current song into recently-played FIFOs:
    # history FIFO
    p['recently_played'].appendleft( p['currently_playing'] )
#    p['recently_played'].appendleft(cur_song_id)
    if len(p['recently_played']) > p['recently_played_num']:
        p['recently_played'].pop()   
    p['currently_playing'] = (cur_song_id, cur_song_desc)

    # avoid-replay FIFO
    p['avoid_recent'].appendleft(cur_song_id)
    if len(p['avoid_recent']) > p['recent_to_avoid_num']:
        p['avoid_recent'].pop()   
       
    
    t_scale      = p['unit_vec_inv'][p['invkey_distance_features']]
    t_cur_f      = np.array([cur_song_f[0][i] for i in p['invkey_distance_features']])
   
    if conservation != None:
        t_cons_f     = np.array([conservation[key] for key in p['distance_features']])
    else:
        t_cons_f     = np.array([0.0 for key in p['distance_features']])

# previous logic of similarity - degenerated
#    t_angle      = (1-t_cons_f) / np.linalg.norm(1-t_cons_f)
#    # calculate the ideal location of the next song (in terms of the relevant distance features)    
#    t_next_f     = t_cur_f + mag*t_angle*t_scale
    
    # fetch current song neighbors (=potentials for the next song)
    q = "SELECT \
            similaro\
        FROM "\
            +MX_common.MXG_table_name+" "\
        +"WHERE "\
            +"targeto='" + cur_song_id + "';"
    cur.execute(q)   
    neighbor_songs = cur.fetchall()

    # filter out duplicates
    neighbor_songs = list(set(neighbor_songs))
    print "#neighbors = " + str(len(neighbor_songs))
    # filter out recently-played songs
    neighbor_songs = [x for x in neighbor_songs if x[0] not in p['avoid_recent'] ]
    print "#neighbors (- recent) = " + str(len(neighbor_songs))

    # for each neighbor, fetch features and calc weighted distance to current song
    nb_disted = list()
    p['debug_neighbors'] = list()
    print "#neighbors: " + str(len(neighbor_songs))
    for neighbor in neighbor_songs:
        # fetch features of all neighbors
        q = "SELECT \
                *\
            FROM "\
                +MX_common.song_features_table_name+" "\
            +"WHERE "\
                +"song_id='" + neighbor[0] + "';"
        cur.execute(q)   
        neighbor_song_f = cur.fetchall()
        
        # for each neighbor, calculate its distance from the ideal location
        t_neighbor_f = np.array([neighbor_song_f[0][i] for i in p['invkey_distance_features']])
        sdist = MX_common.calcDistanceInner(t_neighbor_f, t_cur_f, t_scale, t_cons_f, 'L2')

        # log neighbor's detailsfor debug purposes
        nb_disted.append((sdist[0], neighbor[0], printSongDescription( neighbor_song_f[0], 1 )))

    trip = list()
    for i in range(len(t_cons_f)):
        trip.append( ( p['invkey_distance_features'][i], p['distance_features'][i], t_cons_f[i], t_neighbor_f[i], t_cur_f[i] ) )
    print trip       
        
    nb_disted = sorted(nb_disted)

   # randomly pick the next song according to probabilities which correspond to the distance between current and neighboring songs
    epsilon = 0.0001
    xk = np.arange(len(nb_disted))
    pk = (np.array([x[0] for x in nb_disted]) + epsilon) ** p['noisyness']
    pk /= sum(pk)
    print (pk[:5]), sum(pk)
    custm = stats.rv_discrete(name='custm', values=(xk, pk))   

    p['debug_neighbors'] = list()
    for i in range(len(nb_disted)):
        desc = "0 %4.4f" % pk[i]
        p['debug_neighbors'].append(desc +" "+ nb_disted[i][2])
        
    # randomize a few potential next songs:
    # pick one, null its probability, resample and so on    
    ii        = list()
    ii.append(custm.rvs(size=1))
    p['debug_neighbors'][ii[-1]] = str(1) + p['debug_neighbors'][ii[-1]][1:]
    print "i0 = " + str(xk[ii[-1]]), (pk[:5]), sum(pk)
    for j in range(min(len(nb_disted),p['suggestions_num'])-1):
        pk[ii[-1]] = 0
        pk /= sum(pk)
        custm = stats.rv_discrete(name='custm', values=(xk, pk))   
        ii.append(custm.rvs(size=1))
        print "i"+ str(j+1) +" = " + str(xk[ii[-1]]), (pk[:5]), sum(pk)
        
        p['debug_neighbors'][ii[-1]] = str(2+j) + p['debug_neighbors'][ii[-1]][1:]

   
    next_song_ids = [ nb_disted[i][1] for i in ii]

    return next_song_ids


    
示例#4
0
    dfsongs['valence:real'] = dfsongs['valence:real'].fillna(-1);
    dfsongs['instrumentalness:real'] = dfsongs['instrumentalness:real'].fillna(-1);
    


    # write to csv file as an intermediate
    pd.DataFrame.to_csv(dfsongs, tmp_csv_songs_file, index=False)    

    # open PostgreSQL
    conn_out = psy.connect("dbname='my_db_test' user='******' host='localhost' password='' port=8787")
    conn_out.autocommit = True

    cur = conn_out.cursor()

            
    MX_common.importFromDbCsv(conn_out, tmp_csv_songs_file, MX_common.song_features_table_name)

    cur.execute("CREATE INDEX "\
                    +MX_common.song_features_song_id_index_name +" \
                ON "\
                    +MX_common.song_features_table_name +" \
                (song_id);")  
    cur.execute("CREATE INDEX "\
                    +MX_common.song_features_track_id_index_name +" \
                ON "\
                    +MX_common.song_features_table_name +" \
                (track_id);")  
    cur.execute("CREATE INDEX "\
                    +MX_common.song_features_7digital_id_index_name +" \
                ON "\
                    +MX_common.song_features_table_name +" \