def rpca_denoise(D, p_local, p_global): M = p_global['rpca']['max_iter'] v = p_global['rpca']['verbose'] method = p_global['rpca']['pca_method'] delta = p_global['rpca']['delta'] mu = p_global['rpca']['mu'] return (pcp(D, mu = mu, delta = delta, maxiter=M, verbose=v, svd_method=method)[0], p_local)
def fit(self, X, y): L, S, (u, s, v) = pcp(np.array(y), maxiter=30, verbose=False, svd_method="approximate") L = np.ravel(L) S = np.ravel(S) self.regressorA.fit(X, S) self.regressorB.fit(X, L) self.baseRegressor.fit(X, y)
def do_pcp(M=None, y_true=None, L=None, S=None, mu=None, lam=None, report=False): if M is None: M = L + S L_true, S_true = L, S if report: d_ = '-'*8 print((d_ + 'Dan PCA for mu = %s' + d_) % mu) L, S, obj, err = pcp(M, verbose=True, svd_method='exact', maxiter=100) if report: gen_report(M, 'Dan PCP', obj, err, L_true=L_true, S_true=S_true, L_test=L, S_test=S, y_true=y_true) return L, S, obj
# this is too terse? def tui_menu(lst): def opt_fmtr(argv1, argv2): return str(argv1).ljust(5) + str(argv2) for count, item in enumerate(lst): print(opt_fmtr(count + 1, item)) print(opt_fmtr('q', 'quit')) choice = input("pick an option: ") if choice == 'q': exit() else: return str(int(choice) - 1) data = pcp().data names = pcp().names #names of provinces provinces = pcp().provinces #dataframes of provinces # ask user to choose a province userinput = tui_menu(names) print(names[int(userinput)]) # ask user to choose a data set datas = data.axes[1][4:] # axes[0] is row index, axes[1] is column index userinput2 = tui_menu(datas) print(datas[int(userinput2)]) # graph the data data[data.prname == names[int(userinput)]].plot(x='date', y=datas[int(userinput2)])
return arr DD = 23 HH = 02 MM = 02 fn = "/home/vighnesh/data/Twitter_volume_AAPL.csv" tweet_data = np.loadtxt(open(fn,"r"),delimiter=",", skiprows=1, usecols=[1]) plt.plot(tweet_data) print("Original shape = ", tweet_data.shape) M = make_windows_non_overlap(tweet_data, 30) print("NNZ original = ", np.count_nonzero(M)) print("Matrix shape = ", M.shape) L, S, _ = pcp(M, verbose=True, delta=1e-4, svd_method='approximate') L = L.flatten() S = S.flatten() outlier_values = tweet_data[OUTLIER_IDX] plt.scatter(OUTLIER_IDX, outlier_values, color='red') plt.xlim(0, 16000) plt.ylim(0, 16000) plt.figure() N = np.abs(S) plt.plot(N, label='Mag of low rank') plt.figure() plt.plot(L, color='orange')
import numpy as np from pcp import pcp from util import * n = 500 r = 0.05*n k = 0.05*(n**2) M = np.empty([n, n]) with Timer('Creating matrix ...'): L0 = np.dot( np.random.normal(0.0, 1.0/n, size=(n, r)), np.random.normal(0.0, 1.0/n, size=(r, n)) ) P_omg = np.zeros(L0.size) P_omg[np.random.choice(L0.size, k, False)] = 1 S0 = np.random.choice([1, -1], size=(n, n))*np.random.randint(2, size=(n, n)) M = L0 + S0 print "rank(L0) =", np.linalg.matrix_rank(L0) print "||S0||_0 =", np.count_nonzero(S0) with Timer('Principal Component Pursuit ...'): L, S, (u, s, v) = pcp(M, maxiter=50, verbose=True, svd_method="exact")
def do_plot(ax, img, shape): ax.cla() ax.imshow(img.reshape(shape), cmap="gray", interpolation="nearest") ax.set_xticklabels([]) ax.set_yticklabels([]) if __name__ == "__main__": import sys import glob import matplotlib.pyplot as pl if "--test" in sys.argv: M = (10*np.ones((10, 10))) + (-5 * np.eye(10)) L, S, svd = pcp(M, verbose=True, svd_method="exact") assert np.allclose(M, L + S), "Failed" print("passed") sys.exit(0) gl = glob.glob("/home/vighnesh/images/Escalator/*.bmp")[:2000:2] M, shape = bitmap_to_mat(gl) print(M.shape) L, S, (u, s, v) = pcp(M, delta=1e-3, maxiter=50, verbose=True, svd_method="approximate") fig, axes = pl.subplots(1, 3, figsize=(10, 4)) fig.subplots_adjust(left=0, right=1, hspace=0, wspace=0.01) i = 0 do_plot(axes[0], M[i], shape)
def doRpca(image_filenames, ref_file): start = time.time() M, dimension = process_image(image_filenames) # shape[0] is the number of images in the dataset # shape[1] is the pixel values for either r,g or b (width x height, flattened) # shape[2] is 3, because it consists of r,g,b M = np.reshape(M, (M.shape[0], M.shape[1] * M.shape[2])) logger.info(M.shape) logger.info(dimension) L, S, (u, s, v) = pcp(M, maxiter=num_iter, verbose=True, svd_method=rpca_method) # Uncomment this section and comment the line above if you want to run ALM instead of PCP ADM # L, S, (u, s, v) = alm(M, maxiter=num_iter, verbose=True, svd_method=rpca_method) M = np.reshape(M, (M.shape[0], int(M.shape[1] / 3), 3)) L = np.reshape(L, (L.shape[0], int(L.shape[1] / 3), 3)) S = np.reshape(S, (S.shape[0], int(S.shape[1] / 3), 3)) ref = Image.open(ref_file).convert("RGB") ref = np.array(ref.getdata()) sum_mse = 0.0 for i in range(len(M)): base = os.path.basename(image_filenames[i]) ext = os.path.splitext(base)[1] current_filename = os.path.splitext(base)[0] testM = np.reshape(M[i], (dimension[0], dimension[1], 3)) testM = np.uint8(testM) imgM = Image.fromarray(testM) imgM.save(out_dir + "/" + current_filename + "-ori.png") testL = np.reshape(L[i], (dimension[0], dimension[1], 3)) testL = np.uint8(testL) imgL = Image.fromarray(testL) imgL.save(out_dir + "/" + current_filename + "-lowrank.png") testS = np.reshape(S[i], (dimension[0], dimension[1], 3)) testS = np.uint8(testS) imgS = Image.fromarray(testS) imgS.save(out_dir + "/" + current_filename + "-sparse.png") # logger.info(current_filename + ext + " Original MSE : " + str(mse(ref.flatten(), M[i].flatten()))) current_mse = mse(ref.flatten(), L[i].flatten()) logger.info(current_filename + ext + " Low-rank MSE : " + str(current_mse)) sum_mse = sum_mse + current_mse logger.info("Sum MSE of all low-rank images = " + str(sum_mse)) logger.info("Average MSE of all low-rank images = " + str(sum_mse / len(M))) logger.info("RPCA completed!") rpca_time = time.time() - start logger.info("RPCA total time taken = %.3f seconds" % rpca_time)
def do_plot(ax, img, shape): ax.cla() ax.imshow(img.reshape(shape), cmap="gray", interpolation="nearest") ax.set_xticklabels([]) ax.set_yticklabels([]) if __name__ == "__main__": import sys import glob import matplotlib.pyplot as pl if "--test" in sys.argv: M = (10*np.ones((10, 10))) + (-5 * np.eye(10)) L, S, svd = pcp(M, verbose=True, svd_method="exact") assert np.allclose(M, L + S), "Failed" print("passed") sys.exit(0) M, shape = bitmap_to_mat(glob.glob("test_data/Escalator/*.bmp")[:2000:2]) print(M.shape) L, S, (u, s, v) = pcp(M, maxiter=50, verbose=True, svd_method="exact") fig, axes = pl.subplots(1, 3, figsize=(10, 4)) fig.subplots_adjust(left=0, right=1, hspace=0, wspace=0.01) for i in range(min(len(M), 500)): do_plot(axes[0], M[i], shape) axes[0].set_title("raw") do_plot(axes[1], L[i], shape) axes[1].set_title("low rank")
end_day = act_day + dt.timedelta(days=15) future_features = data.get_features_for_prev_days(end_day, dt.timedelta(days=14)) future_data_set = data.flatten_features(future_features) # for predicting future_target = data.get_target_for_prev_days(end_day, dt.timedelta(days=14)) future_target_data_set = data.flatten_features(future_target) # for testing print "Start day = " + day print "End day = " + str(end_day) # Experiment # RPCA Tests L, S, (u, s, v) = pcp(np.array(target_data_set), maxiter=30, verbose=False, svd_method="approximate") L = np.ravel(L) S = np.ravel(S) LD, SD, (uD, sD, vD) = pcp(np.array(historic_data_set), maxiter=30, verbose=False, svd_method="exact") # plt.figure(5) # plt.plot([item[3] for item in historic_data_set], label="historic_data_set") # plt.plot([item[3] for item in LD], label="low_dimension") # plt.plot([item[3] for item in SD], label="sparse") # plt.legend() # plt.show()
def rpca_singvals(d): return pca_singvals(pcp(d, maxiter=5, verbose=True, svd_method='randomized')[0])