def scatterPlotSingleUser(model, embedding_dim, userIndex, numMovies, tsneIter, perplexity):
    '''
    Creates a visualisation of a single-user along with all the items of the dataset (using latent factors).
    This shows the items that are most similar and least similar to that user's tastes.

    model: the prediction model built by spotlight (from which we get the item and user latent factors).
    idNoLabel: Movies with no corresponding Id in the dataset
    userIndex: the user whose taste we wish the visualise.
    tsneIter: number of iterations to plot tSNE's visualisation.
    perplexity: setting for tSNE visualisation (see sources for more info).
    '''
    
    allLatentFactors = np.empty((numMovies+1,embedding_dim))

    pca = PCA(n_components=10)
    allLatentFactors = pca.fit_transform(allLatentFactors)
    
    dimReduc = tsne(tsneIter,allLatentFactors, 2, 10, perplexity)
    
    plot1 = plt.scatter(dimReduc[:numMovies, 0], dimReduc[:numMovies, 1], 10 ,'black')
    plot2 = plt.scatter(dimReduc[numMovies, 0], dimReduc[numMovies, 1], 20 ,'red','*')


    plt.legend([plot1,plot2],['items','user '+str(userIndex)],bbox_to_anchor=(1.1, 1.05))

    return (dimReduc, plot1)
예제 #2
0
def main():
    data_dir = '../../../shared-data/'
    img_dir = '../../data/text-imgs/'

    dimensionality = 10
    img_res = 48
    data_dump_path = data_dir + 'tsne_dump_{0}d_{1}px.json'.format(
        dimensionality, img_res)

    grid_plot_count = 30  # No. of images per axis in plot of the images in the 2-D space
    img_count_limit = 0

    use_stored_data = False
    img_matrix, img_names = read_img_matrix(img_dir, img_res, img_count_limit)

    if use_stored_data == True:
        data_dict = load_data(data_dump_path)
        Z = np.asarray(data_dict['Z'])
    else:
        max_iter = 500
        num_pcs = 300
        perplexity = 20.0  # Originally 20.0
        Z = tsne(img_matrix, dimensionality, num_pcs, perplexity, max_iter)
        dump_data(data_dump_path, Z, img_res, img_count_limit, img_names)

    if dimensionality == 2:
        plot_tsne_grid(img_matrix, img_res, Z, grid_plot_count, data_dir)
예제 #3
0
def visualization(feature, label, save_dir, nameStr):
    '''t-SNE visualization for visual features'''
    assert feature.shape[0] == label.shape[0]
    X = feature
    labels = label
    Y = tsne(X, 2, 50, 20.0)
    plt.scatter(Y[:, 0], Y[:, 1], 20, labels)
    save_path = os.path.join(save_dir, nameStr + '.png')
    plt.savefig(save_path)
    print('visualization results are saved done in %s!' % save_dir)
def scatterPlotEntireModel(modelPredict, tsneIter, perplexity, labels):
    '''
    Creates a scatter plot with the list of movies along with a legend (single-labels prioritising alphabetical order).
    Each movie is plotted separately, and is given a legend only if the specific plot for this label has been created.
    Movies with IDs not found in the MovieLens DataSet will be assigned a None label

    modelPredict: matrix containing predicted ratings for all user/item combinations. Shape is items x users.
    tsneIter: number of iterations to plot tSNE's visualisation.
    perplexity: setting for tSNE visualisation (see sources for more info).
    labels: array of colour values for each different genre (contains as many elements as there are items).
    '''

    # Predictions.shape = (1683,2)
    pca = PCA(n_components=10)
    modelPredict = pca.fit_transform(modelPredict)
    
    predictions = tsne(tsneIter,modelPredict, 2, 10, perplexity)
    assignSingleLabels(predictions,labels)
    return False
예제 #5
0
def tsne_viz(X,
             vocab,
             output_filename,
             colors=None,
             no_dims=2,
             initial_dims=50,
             perplexity=30.0):
    """Plot a 2-dimensional graph by applying t-SNE on the embedding matrix X to represent the vocabulary.
    It also saves the figure.

    Parameters
    ----------
    X : array shaped list
        The embedding matrix.
    vocab : dict
        The list of all tokens in the data. It is alphabetically sorted.
    output_filename : str
        The name of the figure.
    colors : list
        A list with the same first dimension as X to indicate the annotation color of each token.
        If it is None, then the default color black is used for each token.
    no_dims : int
        The output dimension of t-SNE.
    initial_dims : int
        The output dimension of PCA, that is applied on X before going through t-SNE.
    perplexity : float
        The perplexity.
    """
    assert X.shape[0] == len(
        vocab), "Error: X and vocab must have same dimensions."

    if colors is None:
        colors = ['black' for _ in range(len(X))]
    # Run t-SNE on the word representation matrix
    Y = tsne(X, no_dims, initial_dims, perplexity)
    # Plotting:
    xvals, yvals = Y[:, 0], Y[:, 1]
    plt.figure(figsize=(100, 100))
    plt.plot(xvals, yvals, marker='', linestyle='')
    # Text labels:
    for word, x, y, color in zip(vocab, xvals, yvals, colors):
        plt.annotate(word, (x, y), fontsize=0.1, color=color)
    plt.savefig(output_filename, bbox_inches='tight', format="svg", dpi=1200)
def scatterPlotAllUsers(model, embedding_dim, userIndex, numUsers, pointNum, tsneIter, perplexity, previousClosest=["0"]):
    '''
    Creates a visualisation of all users from the dataset.
    This is useful to find neighbour users to a specific user.

    model: the prediction model built by spotlight (from which we get the item and user latent factors).
    userIndex: the user for which we will plot the closest points.
    numUsers: number of total users in the model.
    pointNum: number of closest points (or users) we want to represent.
    tsneIter: number of iterations to plot tSNE's visualisation.
    perplexity: setting for tSNE visualisation (see sources for more info).
    '''
    
    allUserFactors = np.empty((numUsers,embedding_dim))

    for i in range (numUsers):
        allUserFactors[i,:] = model._net.user_embeddings.weight[i].detach()

    #PCA used to reduce from 32 to 10
    if(embedding_dim>10):
        pca = PCA(n_components=10)
        allUserFactors = pca.fit_transform(allUserFactors)
    
    allUsersReduction = tsne(tsneIter,allUserFactors, 2, 10, perplexity)

    userX = allUsersReduction[userIndex,0]
    userY = allUsersReduction[userIndex,1]
    distances = []

    for index in range (numUsers):
        pointX = allUsersReduction[index,0]
        pointY = allUsersReduction[index,1]
        dist = math.sqrt((pointX-userX)**2+(pointY-userY)**2)

        
        distances += [dist]

    distIndexes = np.argsort(distances)
    #The first index will be the index of the chosen user (distance to itself is 0)
    distSmallestIndexes = distIndexes[1:pointNum+1]
    closestPoints = np.empty((pointNum,2))

    counter = 0
    for index in distSmallestIndexes:
        closestPoints[counter] = allUsersReduction[index,:]
        counter += 1
    
    plot1 = plt.scatter(allUsersReduction[:, 0], allUsersReduction[:, 1], 10 ,'black')
    plot2 = plt.scatter(closestPoints[:, 0], closestPoints[:, 1], 10 ,'lime')
    plot3 = plt.scatter(allUsersReduction[userIndex, 0], allUsersReduction[userIndex, 1], 20 ,'red','*')
    if "0" in previousClosest:
        plt.legend([plot1,plot2,plot3],['Other Users',
                                        'Closest '+str(pointNum)+' Users',
                                        'user '+str(userIndex)],bbox_to_anchor=(1.1, 1.05))
        
    else:
        previousClosestPoints = np.empty((pointNum,2))
        counter = 0
        for index in previousClosest:
            previousClosestPoints[counter]= allUsersReduction[index,:]
            counter += 1
            
        plot4 = plt.scatter(previousClosestPoints[:, 0], previousClosestPoints[:, 1], 10 ,'deeppink')
        plt.legend([plot1,plot2,plot3,plot4],['Other Users',
                                              'Current neighbours',
                                              'user '+str(userIndex),
                                              'Previous neighbours'],bbox_to_anchor=(1.1, 1.05))

        

    
    

    print("The users most similar to user",userIndex,"are:",distSmallestIndexes)
    return (distSmallestIndexes, False)