def generateKMeansDictionary(images, patch_size, num_samples, num_features): video_patches, _ = generateVideoPatches(patch_size, images) samples = samplePatches(num_samples, video_patches) samples = samples.reshape(samples.shape[0], samples.shape[1]**2).T X = kmeans(samples, num_features) X = X[np.sum(np.abs(X), axis=1) != 0.0] X = (X.T / np.linalg.norm(X, axis=1).T).T return X.reshape((X.shape[0], patch_size, patch_size))
def generatePSDDictionary(images, patch_size, num_samples, num_features): # https://cs.nyu.edu/~yann/research/sparse/index.html video_patches, _ = generateVideoPatches(patch_size, images) samples = samplePatches(num_samples, video_patches) samples = samples.reshape(samples.shape[0], samples.shape[1]**2) # m > n usually n = samples.shape[1] m = num_features Z = npr.random(size=(m, num_samples)) B = npr.random(size=(n, m)) B = (B.T / np.linalg.norm(B, axis=1)).T W = npr.random(size=(m, n)) D = npr.random(size=m) G = np.diag(npr.random(size=m)) Y = samples.T #n by num_samples lmbda = 1.0 alpha = 1.0 lr = 1e-6 for _ in range(200): # keep G,D,W & B constant, minimize wrt Z F = np.matmul(G, np.tanh(np.matmul(W, Y).T + D).T) for i in range(1000): dJ = 2 * np.matmul(B.T, (np.matmul(B, Z) - Y)) + lmbda * np.sum( np.sign(Z), axis=0) + 2 * alpha * (Z - F) Z = Z - lr * dJ i = npr.randint(num_samples) z = Z[:, i] y = Y[:, i] f = np.matmul(G, np.tanh(np.matmul(W, y).T + D).T) # one step of stochastic gradient descent on G,D,W & B G -= -0.001 * lr * 2 * alpha * np.matmul(G, z - f) D -= -lr * 2 * alpha * np.matmul((np.matmul( G, (1 - np.power(np.tanh(np.matmul(W, y).T + D).T, 2)))).T, z - f) W -= -lr * 2 * alpha * y.T.dot( np.matmul( np.matmul( G, (1 - np.power(np.tanh(np.matmul(W, y).T + D).T, 2))).T, z - f)) B -= 0.001 * lr * np.outer(np.matmul(B, z) - y, z) # print(np.linalg.norm(2*alpha* np.matmul( G.T, z - f)), \ # np.linalg.norm(2*alpha*np.matmul( (np.matmul(G, (1- np.power(np.tanh(np.matmul(W,y).T+D).T, 2)) )).T , z-f)), \ # np.linalg.norm(2*alpha*y.T.dot(np.matmul(np.matmul(G, (1- np.power(np.tanh(np.matmul(W,y).T+D).T, 2)) ).T, z-f))), \ # np.linalg.norm(np.outer(np.matmul(B,z) - y, z))) B = (B.T / np.linalg.norm(B, axis=1)).T return B.T.reshape((num_features, patch_size, patch_size))
def generateKMeansDictionary(images, patch_size, num_samples, num_features): video_patches, _ = generateVideoPatches(patch_size, images) samples = samplePatches(num_samples, video_patches) samples = samples.reshape(samples.shape[0], samples.shape[1]**2) kmeans = KMeans(n_clusters=num_features).fit(samples) centers = kmeans.cluster_centers_ centers = (centers.T / np.linalg.norm(centers, axis=1).T).T return centers.reshape((num_features, patch_size, patch_size))
def generateOptSparseDictionary(images, patch_size, num_samples, num_features): video_patches, _ = generateVideoPatches(patch_size, images) samples = samplePatches(num_samples, video_patches) alg = DictionaryLearning(n_components=num_features) # Squeeze sample patches to be array alg.fit(samples.reshape(np.shape(samples)[0], np.shape(samples)[1]**2)) features = alg.components_ filter_size = np.shape(samples)[1] features = (features.T / np.linalg.norm(features, axis=1).T).T # features = (features.T - np.mean(features,axis=1).T).T features = features.reshape(features.shape[0], filter_size, filter_size) return features
def generatePCADictionary(images, patch_size, num_samples, num_features): video_patches, _ = generateVideoPatches(patch_size, images) samples = samplePatches(num_samples, video_patches) return computePCA(num_features, samples)