Exemplo n.º 1
0
    def hclust(self):

        dissim2 = functions.getSimilaritiesFromModel(self._subModel)
        linkageMatrix = hier.linkage(dist.squareform(dissim2), method='single')
        labels = functions.getMoviesNames(self._moviesInformations,self._original_movieIDs[self._mostRated])
        dendro = hier.dendrogram(linkageMatrix, labels=labels, leaf_rotation=90)
        plt.show()
Exemplo n.º 2
0
    def graph(self) :
        dissim2 = functions.getSimilaritiesFromModel(self._subModel)
        maxi = np.amax(dissim2) #-1
        A = dist.squareform(maxi-dissim2)
        G = nx.from_numpy_matrix(A)
        movieList = functions.getMoviesNames(self._moviesInformations,self._original_movieIDs[self._mostRated])
        G = nx.relabel_nodes(G, dict(zip(range(len(G.nodes())),movieList)))    

        #G = nx.to_agraph(G)

        #G.node_attr.update(color="red", style="filled")
        #G.edge_attr.update(color="blue", width="2.0")

        nx.draw(G, edge_color = "blue", font_weight="bold")
        plt.show()
Exemplo n.º 3
0
    def pca_movies(self, color):
                 # Preparation de la methode PCA pour une projection sur 3 dimensions
        pca = PCA(n_components=3)
        # PCA pour l'ensemble des films
        # Calcul de la projection a partir des donnees
        pca.fit(self._model.u)
        # Application de la projection aux donnees
        newMovies = pca.transform(self._model.u)
        # Creation de la figure dans laquelle nous allons representer le nuage de point
        fig=plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        if color == 0:
            # colorer selon la moyenne donnée au film
            moyennes = [row[0] for row in self._movieMean]
            moyennes = [0 if math.isnan(x) else x for x in moyennes]
            colors = ['blue','green','cyan','yellow','magenta','red']
            categories = np.unique(moyennes)
            colordict = dict(zip(categories, colors)) 
            listColors = [colordict[x] for x in moyennes]
        elif color ==1:
            # colorer selon le genre du film
            genres = functions.getGenres(self._moviesInformations)
            categories = ['Action','Adventure','Animation','Children\'s','Comedy','Crime','Documentary','Drama','Fantasy','Film-Noir','Horror','Musical','Mystery','Romance','Sci-Fi','Thriller','War','Western']
            colors = ['IndianRed','Red','Pink','PaleVioletRed','LightSalmon','Orange','Gold','Violet','Purple','DarkSlateBlue','GreenYellow','DarkOliveGreen','MediumAquamarine','DarkCyan', 'CornflowerBlue','Navy','MistyRose','Peru','Maroon']
            colordict = dict(zip(categories, colors)) 

        #faire la correspondance entre les ids
        genres2 = list()
        for i in range (0, self._model.u.shape[0]):
            genres2.append(genres[str(self._original_movieIDs[i])])

        listColors = [colordict[x] for x in genres2]

        ax.scatter(newMovies[:,0],newMovies[:,1],newMovies[:,2], c=listColors)
        #fig.add_axes(ax)
        plt.title('3D PCA of movies from model')
        plt.show()



        pca.fit(self._subModel)
        # Application de la projection aux donnees
        newMovies = pca.transform(self._subModel)

        # Creation de la figure dans laquelle nous allons representer le nuage de point
        fig=plt.figure()
        ax = p3.Axes3D(fig)
        
        j=0
        listColors2 =list()
        for i,x in enumerate(listColors) :
            if (i in self._mostRated):
                listColors2.append(x)
                j=j+1
        ax.scatter3D(newMovies[:,0],newMovies[:,1],newMovies[:,2],c=listColors2)

        fig.add_axes(ax)
        plt.show()

        pca2 = PCA(n_components=2)
        pca2.fit(self._subModel)
        # Application de la projection aux donnees
        newMovies = pca2.transform(self._subModel)

        # Percentage of variance explained for each components
        print "explained variance ratio (first two components): ", pca2.explained_variance_ratio_

        #plot with labels
        labels = functions.getMoviesNames(self._moviesInformations,self._original_movieIDs[self._mostRated])

        plt.figure()
        plt.scatter(newMovies[:, 0], newMovies[:, 1],c=listColors2)
        plt.legend()
        plt.title('2D PCA of most rated movies from model')
        for label, x, y in zip(labels, newMovies[:, 0], newMovies[:, 1]):
            plt.annotate(
                label.decode('utf-8'), 
                xy = (x, y), xytext = (30, 10),
                textcoords = 'offset points', ha = 'right', va = 'bottom',
                arrowprops = dict(arrowstyle = '-'))

        plt.show()
Exemplo n.º 4
0
center = pca2.transform(centroids)
# some plotting using numpy's logical indexing
plt.plot(newMovies[idx==0,0],newMovies[idx==0,1],'oc',
     newMovies[idx==1,0],newMovies[idx==1,1],'or',
     newMovies[idx==2,0],newMovies[idx==2,1],'ob',
     newMovies[idx==3,0],newMovies[idx==3,1],'om',
     newMovies[idx==4,0],newMovies[idx==4,1],'oy',
     newMovies[idx==5,0],newMovies[idx==5,1],'ok',
     newMovies[idx==6,0],newMovies[idx==6,1],color="#aff666", marker="o",
     newMovies[idx==7,0],newMovies[idx==7,1],color="#efe986", marker="o",
     newMovies[idx==8,0],newMovies[idx==8,1],color="#b34ee", marker="o",
     newMovies[idx==9,0],newMovies[idx==9,1],color="#bbbccc", marker="o")
plt.plot(center[:,0],center[:,1],'sg',markersize=8)
plt.title(dis)
#plot with labels
labels = functions.getMoviesNames(a._moviesInformations,a._original_movieIDs)
for label, x, y in zip(labels, newMovies[:, 0], newMovies[:, 1]):
    plt.annotate(
        label.decode('utf-8'), 
        xy = (x, y), xytext = (30, 10),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        arrowprops = dict(arrowstyle = '-'))

plt.show()
print dis


    def bestK(self):
        all_dis = dict()
        for k in range(1,100):
            centroids,dis = kmeans(self._model.u,k)