def main(): if len(argv) != 6: print_usage(basename(argv[0])) exit(1) print_log = True n_components = int(argv[1]) input_train_data = load(argv[2]) input_test_data = load(argv[3]) output_train_data_file = argv[4] output_test_data_file = argv[5] t = time() pca = ProbabilisticPCA(n_components=n_components) output_train_data = pca.fit_transform(input_train_data) t = time()-t if print_log: print 'PCA fitting to {0} with {1} components to save (with transformation): {2[0]:d}m {2[1]:02d}s'.format(input_train_data.shape, n_components, (int(t//60), int(t%60)) ) print 'New shape: {}'.format(output_train_data.shape) t = time() output_test_data = pca.transform (input_test_data) t = time()-t if print_log: print 'PCA transformation of {0} with {1} components to save: {2[0]:d}m {2[1]:02d}s'.format(input_test_data.shape, n_components, (int(t//60), int(t%60)) ) print 'New shape: {}'.format(output_test_data.shape) save(output_train_data_file, output_train_data) save(output_test_data_file, output_test_data)
def test_probabilistic_pca_vs_pca(): """Test that PCA matches ProbabilisticPCA with homoscedastic=True """ n, p = 100, 3 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) pca = PCA(n_components=2).fit(X) ppca = ProbabilisticPCA(n_components=2).fit(X) assert_array_almost_equal(pca.score_samples(X), ppca.score(X))
def test_probabilistic_pca_2(): """Test that probabilistic PCA correctly separated different datasets""" n, p = 100, 3 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) ppca = ProbabilisticPCA(n_components=2) ppca.fit(X) ll1 = ppca.score(X) ll2 = ppca.score(rng.randn(n, p) * .2 + np.array([3, 4, 5])) assert_greater(ll1.mean(), ll2.mean())
def test_probabilistic_pca_1(): """Test that probabilistic PCA yields a reasonable score""" n, p = 1000, 3 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) ppca = ProbabilisticPCA(n_components=2) ppca.fit(X) ll1 = ppca.score(X) h = -0.5 * np.log(2 * np.pi * np.exp(1) * 0.1 ** 2) * p np.testing.assert_almost_equal(ll1.mean() / h, 1, 0)
def test_probabilistic_pca_1(): """Test that probabilistic PCA yields a reasonable score""" n, p = 1000, 3 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) ppca = ProbabilisticPCA(n_components=2) ppca.fit(X) ll1 = ppca.score(X) h = -0.5 * np.log(2 * np.pi * np.exp(1) * 0.1**2) * p np.testing.assert_almost_equal(ll1.mean() / h, 1, 0)
def test_probabilistic_pca_4(): """Check that ppca select the right model""" n, p = 200, 3 rng = np.random.RandomState(0) Xl = rng.randn(n, p) + rng.randn(n, 1) * np.array([3, 4, 5]) + np.array([1, 0, 7]) Xt = rng.randn(n, p) + rng.randn(n, 1) * np.array([3, 4, 5]) + np.array([1, 0, 7]) ll = np.zeros(p) for k in range(p): ppca = ProbabilisticPCA(n_components=k) ppca.fit(Xl) ll[k] = ppca.score(Xt).mean() assert_true(ll.argmax() == 1)
def test_probabilistic_pca_4(): """Check that ppca select the right model""" n, p = 200, 3 rng = np.random.RandomState(0) Xl = (rng.randn(n, p) + rng.randn(n, 1) * np.array([3, 4, 5]) + np.array([1, 0, 7])) Xt = (rng.randn(n, p) + rng.randn(n, 1) * np.array([3, 4, 5]) + np.array([1, 0, 7])) ll = np.zeros(p) for k in range(p): ppca = ProbabilisticPCA(n_components=k) ppca.fit(Xl) ll[k] = ppca.score(Xt).mean() assert_true(ll.argmax() == 1)
def pcaImgWrdMat(highDim, lowDim): (options, args) = parser.parse_args(sys.argv[1:]) #@UnusedVariable dataset = options.dataset # method = options.method #acquire the category list catmap = getCatMap(dataset) catList = catmap.keys() # the number of categories in category list # nCategory = len(catList) for catName in catList: print '%s : %d : %d\n' % (catName, highDim, lowDim) catPosFileName = rootDir + dataset + iwmDir + catName + str( highDim) + iwmext catPosData = np.loadtxt(catPosFileName, dtype=np.int, delimiter=' ') nPosImages = catPosData.shape[0] catNegFileName = rootDir + dataset + iwmDir + 'NEG' + catName + str( highDim) + iwmext catNegData = np.loadtxt(catNegFileName, dtype=np.int, delimiter=' ') nNegImages = catNegData.shape[0] catData = np.vstack((catPosData, catNegData)) labels = np.vstack((np.ones( (nPosImages, 1), np.int), np.zeros((nNegImages, 1), np.int))) print 'pca...' pcaData = PCA(n_components=lowDim).fit(catData).transform(catData) pcaData = np.hstack((pcaData, labels)) pcaDataFileName = rootDir + dataset + outputDir + catName + str( highDim) + str(lowDim) + '.pca' np.savetxt(pcaDataFileName, pcaData, fmt='%f', delimiter=' ') print 'ppca...' ppcaData = ProbabilisticPCA( n_components=lowDim).fit(catData).transform(catData) ppcaData = np.hstack((ppcaData, labels)) ppcaDataFileName = rootDir + dataset + outputDir + catName + str( highDim) + str(lowDim) + '.ppca' np.savetxt(ppcaDataFileName, ppcaData, fmt='%f', delimiter=' ') print 'rpca...' rpcaData = RandomizedPCA( n_components=lowDim).fit(catData).transform(catData) rpcaData = np.hstack((rpcaData, labels)) rpcaDataFileName = rootDir + dataset + outputDir + catName + str( highDim) + str(lowDim) + '.rpca' np.savetxt(rpcaDataFileName, rpcaData, fmt='%f', delimiter=' ') print 'kpca...' kpcaData = KernelPCA( n_components=lowDim).fit(catData).transform(catData) kpcaData = np.hstack((kpcaData, labels)) kpcaDataFileName = rootDir + dataset + outputDir + catName + str( highDim) + str(lowDim) + '.kpca' np.savetxt(kpcaDataFileName, kpcaData, fmt='%f', delimiter=' ') print 'spca...' spcaData = MiniBatchSparsePCA( n_components=lowDim, n_iter=100).fit(catData).transform(catData) spcaData = np.hstack((spcaData, labels)) spcaDataFileName = rootDir + dataset + outputDir + catName + str( highDim) + str(lowDim) + '.spca' np.savetxt(spcaDataFileName, spcaData, fmt='%f', delimiter=' ') pass
def test_probabilistic_pca_3(): """The homoscedastic model should work slightly worse than the heteroscedastic one in over-fitting condition """ n, p = 100, 3 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) ppca = ProbabilisticPCA(n_components=2) ppca.fit(X) ll1 = ppca.score(X) ppca.fit(X, homoscedastic=False) ll2 = ppca.score(X)