def hclust(self): dissim2 = functions.getSimilaritiesFromModel(self._subModel) linkageMatrix = hier.linkage(dist.squareform(dissim2), method='single') labels = functions.getMoviesNames(self._moviesInformations,self._original_movieIDs[self._mostRated]) dendro = hier.dendrogram(linkageMatrix, labels=labels, leaf_rotation=90) plt.show()
def graph(self) : dissim2 = functions.getSimilaritiesFromModel(self._subModel) maxi = np.amax(dissim2) #-1 A = dist.squareform(maxi-dissim2) G = nx.from_numpy_matrix(A) movieList = functions.getMoviesNames(self._moviesInformations,self._original_movieIDs[self._mostRated]) G = nx.relabel_nodes(G, dict(zip(range(len(G.nodes())),movieList))) #G = nx.to_agraph(G) #G.node_attr.update(color="red", style="filled") #G.edge_attr.update(color="blue", width="2.0") nx.draw(G, edge_color = "blue", font_weight="bold") plt.show()
def pca_movies(self, color): # Preparation de la methode PCA pour une projection sur 3 dimensions pca = PCA(n_components=3) # PCA pour l'ensemble des films # Calcul de la projection a partir des donnees pca.fit(self._model.u) # Application de la projection aux donnees newMovies = pca.transform(self._model.u) # Creation de la figure dans laquelle nous allons representer le nuage de point fig=plt.figure() ax = fig.add_subplot(111, projection='3d') if color == 0: # colorer selon la moyenne donnée au film moyennes = [row[0] for row in self._movieMean] moyennes = [0 if math.isnan(x) else x for x in moyennes] colors = ['blue','green','cyan','yellow','magenta','red'] categories = np.unique(moyennes) colordict = dict(zip(categories, colors)) listColors = [colordict[x] for x in moyennes] elif color ==1: # colorer selon le genre du film genres = functions.getGenres(self._moviesInformations) categories = ['Action','Adventure','Animation','Children\'s','Comedy','Crime','Documentary','Drama','Fantasy','Film-Noir','Horror','Musical','Mystery','Romance','Sci-Fi','Thriller','War','Western'] colors = ['IndianRed','Red','Pink','PaleVioletRed','LightSalmon','Orange','Gold','Violet','Purple','DarkSlateBlue','GreenYellow','DarkOliveGreen','MediumAquamarine','DarkCyan', 'CornflowerBlue','Navy','MistyRose','Peru','Maroon'] colordict = dict(zip(categories, colors)) #faire la correspondance entre les ids genres2 = list() for i in range (0, self._model.u.shape[0]): genres2.append(genres[str(self._original_movieIDs[i])]) listColors = [colordict[x] for x in genres2] ax.scatter(newMovies[:,0],newMovies[:,1],newMovies[:,2], c=listColors) #fig.add_axes(ax) plt.title('3D PCA of movies from model') plt.show() pca.fit(self._subModel) # Application de la projection aux donnees newMovies = pca.transform(self._subModel) # Creation de la figure dans laquelle nous allons representer le nuage de point fig=plt.figure() ax = p3.Axes3D(fig) j=0 listColors2 =list() for i,x in enumerate(listColors) : if (i in self._mostRated): listColors2.append(x) j=j+1 ax.scatter3D(newMovies[:,0],newMovies[:,1],newMovies[:,2],c=listColors2) fig.add_axes(ax) plt.show() pca2 = PCA(n_components=2) pca2.fit(self._subModel) # Application de la projection aux donnees newMovies = pca2.transform(self._subModel) # Percentage of variance explained for each components print "explained variance ratio (first two components): ", pca2.explained_variance_ratio_ #plot with labels labels = functions.getMoviesNames(self._moviesInformations,self._original_movieIDs[self._mostRated]) plt.figure() plt.scatter(newMovies[:, 0], newMovies[:, 1],c=listColors2) plt.legend() plt.title('2D PCA of most rated movies from model') for label, x, y in zip(labels, newMovies[:, 0], newMovies[:, 1]): plt.annotate( label.decode('utf-8'), xy = (x, y), xytext = (30, 10), textcoords = 'offset points', ha = 'right', va = 'bottom', arrowprops = dict(arrowstyle = '-')) plt.show()
center = pca2.transform(centroids) # some plotting using numpy's logical indexing plt.plot(newMovies[idx==0,0],newMovies[idx==0,1],'oc', newMovies[idx==1,0],newMovies[idx==1,1],'or', newMovies[idx==2,0],newMovies[idx==2,1],'ob', newMovies[idx==3,0],newMovies[idx==3,1],'om', newMovies[idx==4,0],newMovies[idx==4,1],'oy', newMovies[idx==5,0],newMovies[idx==5,1],'ok', newMovies[idx==6,0],newMovies[idx==6,1],color="#aff666", marker="o", newMovies[idx==7,0],newMovies[idx==7,1],color="#efe986", marker="o", newMovies[idx==8,0],newMovies[idx==8,1],color="#b34ee", marker="o", newMovies[idx==9,0],newMovies[idx==9,1],color="#bbbccc", marker="o") plt.plot(center[:,0],center[:,1],'sg',markersize=8) plt.title(dis) #plot with labels labels = functions.getMoviesNames(a._moviesInformations,a._original_movieIDs) for label, x, y in zip(labels, newMovies[:, 0], newMovies[:, 1]): plt.annotate( label.decode('utf-8'), xy = (x, y), xytext = (30, 10), textcoords = 'offset points', ha = 'right', va = 'bottom', arrowprops = dict(arrowstyle = '-')) plt.show() print dis def bestK(self): all_dis = dict() for k in range(1,100): centroids,dis = kmeans(self._model.u,k)