def plot_nmf_faces():
    """利用越来越多的非负分量对三张人脸图像进行重建"""
    # 下面这张图计算时间比较长,需要耐心等等
    people = load_people()
    image_shape = people.images[0].shape
    X_train, X_test, y_train, y_test = load_train_test_faces()
    mglearn.plots.plot_nmf_faces(X_train, X_test, image_shape=image_shape)
    plt.suptitle("图3-14 利用越来越多的非负分量对三张人脸图像进行重建")
def knn_classify_pca_faces():
    people = load_people()
    X_train, X_test, y_train, y_test = load_train_test_faces()

    # 计算每个目标出现的次数
    counts = np.bincount(people.target)
    # 将次数与目标名称一起打印出来
    print('{0:25} {1:5}'.format("姓名", "照片数目"))
    for i, (count, name) in enumerate(zip(counts, people.target_names)):
        print("{0:25} {1:3}".format(name, count), end='\t\t')
        if (i + 1) % 3 == 0:
            print()
    print()

    # 2) 使用KNN训练和测试被PCA白化的数据
    # X_train_pca.shape: (1341, 100)
    # Test set score of 1-nn: 0.35
    # pca.components_.shape: (100, 5655)
    from sklearn.decomposition import PCA
    pca = PCA(n_components=75, whiten=True, random_state=seed)
    pca.fit(X_train)
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test)

    from sklearn.neighbors import KNeighborsClassifier
    knn = KNeighborsClassifier(n_neighbors=1)
    knn.fit(X_train_pca, y_train)
    print('=' * 20)
    print("-- 使用KNN训练和测试经过PCA白化的数据 --")
    print('PCA主成分的形状: {}'.format(pca.components_.shape))
    print('经过PCA白化的数据的形状: {}'.format(X_train_pca.shape))
    print('PCA白化的数据经过KNN训练后测试集的精度: {:.2f}'.format(knn.score(X_test_pca, y_test)))

    image_shape = people.images[0].shape
    fig, axes = plt.subplots(3, 5, figsize=(20, 10), subplot_kw={'xticks': (), 'yticks': ()})
    for i, (component, ax) in enumerate(zip(pca.components_, axes.ravel())):
        ax.imshow(component.reshape(image_shape), cmap='viridis')
        ax.set_title('{}.component'.format((i + 1)))
        pass
    plt.suptitle("图3-9:人脸数据集的前15个主成分的成分向量")

    # 图3-10:人脸照片=Σ_(i=0)^(n) x_i * components_i
    # 每张照片就是主成分的加权求和
    mglearn.plots.plot_pca_faces(X_train, X_test, image_shape)
    plt.suptitle("图3-11 利用越来越多的主成分对三张人脸图像进行重建")

    # plt.figure()
    # plt.scatter(X_train_pca[:, 0], X_train_pca[:, 1], y_train)

    plt.figure()
    mglearn.discrete_scatter(X_train_pca[:, 0], X_train_pca[:, 1], y_train)
    plt.xlabel('第一个主成分')
    plt.ylabel('第二个主成分')
    plt.suptitle("两个主成分的散点图\n观察数据聚类的效果(数据不可分)")
def knn_classify_nmf_faces():
    people = load_people()
    image_shape = people.images[0].shape
    X_train, X_test, y_train, y_test = load_train_test_faces()

    from sklearn.neighbors import KNeighborsClassifier
    knn = KNeighborsClassifier(n_neighbors=1)
    knn.fit(X_train, y_train)
    print('=' * 20)
    print("-- 使用KNN训练和测试原始数据 --")
    print('原始数据的形状: {}'.format(X_train.shape))
    print('原始数据经过KNN训练后测试集的精度: {:.2f}'.format(knn.score(X_test, y_test)))

    # 分量太少,学习的精确度较差,但是增加分量不一定能够提高模型的精度,但是会增加计算时间
    from sklearn.decomposition import NMF
    nmf = NMF(n_components=100, max_iter=200, random_state=seed)
    nmf.fit(X_train)
    X_train_nmf = nmf.transform(X_train)
    X_test_nmf = nmf.transform(X_test)

    from sklearn.neighbors import KNeighborsClassifier
    knn = KNeighborsClassifier(n_neighbors=1)
    knn.fit(X_train_nmf, y_train)

    print('=' * 20)
    print("-- 使用KNN训练和测试经过NMF处理的数据 --")
    print('NMF成分的形状: {}'.format(nmf.components_.shape))
    print('经过NMF的数据的形状: {}'.format(X_train_nmf.shape))
    print('NMF的数据经过KNN训练后测试集的精度: {:.2f}'.format(knn.score(X_test_nmf, y_test)))

    fig, axes = plt.subplots(3, 5, figsize=(20, 10), subplot_kw={'xticks': (), 'yticks': ()})
    plt.suptitle("图3-15 使用15个分量的NMF在人脸数据集上找到的15个分量")
    for i, (component, ax) in enumerate(zip(nmf.components_, axes.ravel())):
        ax.imshow(component.reshape(image_shape), cmap='viridis')
        ax.set_title('{}.component'.format((i + 1)))
        pass

    components = 3  # 不同分量的图片有一定的共性
    indexes = np.argsort(X_train_nmf[:, components])[::-1]
    fig, axes = plt.subplots(2, 5, figsize=(20, 10), subplot_kw={'xticks': (), 'yticks': ()})
    plt.suptitle("图3-16 第3个分量的系数较大的人脸")
    for i, (index, ax) in enumerate(zip(indexes, axes.ravel())):
        ax.imshow(X_train[index].reshape(image_shape))
        pass

    components = 7  # 不同分量的图片有一定的共性
    indexes = np.argsort(X_train_nmf[:, components])[::-1]
    fig, axes = plt.subplots(2, 5, figsize=(20, 10), subplot_kw={'xticks': (), 'yticks': ()})
    plt.suptitle("图3-16 第7个分量的系数较大的人脸")
    for i, (index, ax) in enumerate(zip(indexes, axes.ravel())):
        ax.imshow(X_train[index].reshape(image_shape))
        pass
def knn_classify_original_faces():
    X_train, X_test, y_train, y_test= load_train_test_faces()

    # 1) 使用KNN训练和测试数据
    # 5655=87*65
    # X_train.shape: (1341, 5655)
    # Test set score of 1-knn: 0.27
    from sklearn.neighbors import KNeighborsClassifier
    knn = KNeighborsClassifier(n_neighbors=1)
    knn.fit(X_train, y_train)
    print('=' * 20)
    print("-- 使用KNN训练和测试原始数据 --")
    print('原始数据的形状: {}'.format(X_train.shape))
    print('原始数据经过KNN训练后测试集的精度: {:.2f}'.format(knn.score(X_test, y_test)))
def kmeans_vector_quantization():
    people = load_people()
    image_shape = people.images[0].shape
    X_train, X_test, y_train, y_test = load_train_test_faces()

    from sklearn.cluster import KMeans
    kmeans = KMeans(n_clusters=100, random_state=seed)
    kmeans.fit(X_train)
    from sklearn.decomposition import PCA
    pca = PCA(n_components=100, random_state=seed)
    pca.fit(X_train)
    from sklearn.decomposition import NMF
    nmf = NMF(n_components=100, random_state=seed)
    nmf.fit(X_train)

    fig, axes = plt.subplots(3,
                             15,
                             figsize=(20, 10),
                             subplot_kw={
                                 'xticks': (),
                                 'yticks': ()
                             })
    fig.suptitle('图3-30:对比K均值的簇中心与PCA和NMF找到的分量')
    # K均值找到的是图片的共性,PCA 找到的是图片变化最大的特征,NMF 找到的是图片中基础元素
    for ax, comp_kmeans, comp_pca, comp_nmf in zip(axes.T,
                                                   kmeans.cluster_centers_,
                                                   pca.components_,
                                                   nmf.components_):
        ax[0].imshow(comp_kmeans.reshape(image_shape))
        ax[1].imshow(comp_pca.reshape(image_shape), cmap='viridis')
        ax[2].imshow(comp_nmf.reshape(image_shape))

    axes[0, 0].set_ylabel('kmeans')
    axes[1, 0].set_ylabel('pca')
    axes[2, 0].set_ylabel('nmf')

    X_reconstructed_kmeans = kmeans.cluster_centers_[kmeans.predict(X_test)]
    X_reconstructed_pca = pca.inverse_transform(pca.transform(X_test))
    X_reconstructed_nmf = np.dot(nmf.transform(X_test), nmf.components_)

    fig, axes = plt.subplots(4,
                             5,
                             figsize=(20, 10),
                             subplot_kw={
                                 'xticks': (),
                                 'yticks': ()
                             })
    fig.suptitle('图3-31:利用100个分量(或簇中心)的K均值、PCA和NMF的图像重建的对比——K均值的每张图像仅使用了一个簇中心')
    # K均值重建的效果没有其他两种好,因为K均值取的是平均值,而每个类别有自己的特点,而不是所有特点的平均
    for ax, orig, rec_kmeans, rec_pca, rec_nmf in zip(axes.T, X_test,
                                                      X_reconstructed_kmeans,
                                                      X_reconstructed_pca,
                                                      X_reconstructed_nmf):
        ax[0].imshow(orig.reshape(image_shape))
        ax[1].imshow(rec_kmeans.reshape(image_shape))
        ax[2].imshow(rec_pca.reshape(image_shape))
        ax[3].imshow(rec_nmf.reshape(image_shape))

    axes[0, 0].set_ylabel('original')
    axes[1, 0].set_ylabel('kmeans')
    axes[2, 0].set_ylabel('pca')
    axes[3, 0].set_ylabel('nmf')