Пример #1
0
def rpca_denoise(D, p_local, p_global):
    M = p_global['rpca']['max_iter']
    v = p_global['rpca']['verbose']
    method = p_global['rpca']['pca_method']
    delta = p_global['rpca']['delta']
    mu = p_global['rpca']['mu']
    return (pcp(D, mu = mu, delta = delta, maxiter=M, verbose=v, svd_method=method)[0], p_local)
Пример #2
0
 def fit(self, X, y):
     L, S, (u, s, v) = pcp(np.array(y),
                           maxiter=30,
                           verbose=False,
                           svd_method="approximate")
     L = np.ravel(L)
     S = np.ravel(S)
     self.regressorA.fit(X, S)
     self.regressorB.fit(X, L)
     self.baseRegressor.fit(X, y)
Пример #3
0
def do_pcp(M=None, y_true=None, L=None, S=None, mu=None,
           lam=None, report=False):

    if M is None:
        M = L + S

    L_true, S_true = L, S
    if report:
        d_ = '-'*8
        print((d_ + 'Dan PCA for mu = %s' + d_) % mu)

    L, S, obj, err = pcp(M, verbose=True,
                         svd_method='exact', maxiter=100)

    if report:
        gen_report(M, 'Dan PCP', obj, err, L_true=L_true, S_true=S_true,
                   L_test=L, S_test=S, y_true=y_true)
    return L, S, obj
Пример #4
0
# this is too terse?
def tui_menu(lst):
    def opt_fmtr(argv1, argv2):
        return str(argv1).ljust(5) + str(argv2)

    for count, item in enumerate(lst):
        print(opt_fmtr(count + 1, item))
    print(opt_fmtr('q', 'quit'))
    choice = input("pick an option: ")
    if choice == 'q':
        exit()
    else:
        return str(int(choice) - 1)


data = pcp().data
names = pcp().names  #names of provinces
provinces = pcp().provinces  #dataframes of provinces

# ask user to choose a province
userinput = tui_menu(names)
print(names[int(userinput)])

# ask user to choose a data set
datas = data.axes[1][4:]  # axes[0] is row index, axes[1] is column index
userinput2 = tui_menu(datas)
print(datas[int(userinput2)])

# graph the data
data[data.prname == names[int(userinput)]].plot(x='date',
                                                y=datas[int(userinput2)])
Пример #5
0
    return arr

DD = 23
HH = 02
MM = 02

fn = "/home/vighnesh/data/Twitter_volume_AAPL.csv"
tweet_data = np.loadtxt(open(fn,"r"),delimiter=",", skiprows=1, usecols=[1])
plt.plot(tweet_data)
print("Original shape = ", tweet_data.shape)

M = make_windows_non_overlap(tweet_data, 30)
print("NNZ original = ", np.count_nonzero(M))
print("Matrix shape = ", M.shape)

L, S, _ = pcp(M, verbose=True, delta=1e-4, svd_method='approximate')
L = L.flatten()
S = S.flatten()


outlier_values = tweet_data[OUTLIER_IDX]
plt.scatter(OUTLIER_IDX, outlier_values, color='red')

plt.xlim(0, 16000)
plt.ylim(0, 16000)
plt.figure()
N = np.abs(S)
plt.plot(N, label='Mag of low rank')
plt.figure()
plt.plot(L, color='orange')
Пример #6
0
import numpy as np
from pcp import pcp
from util import *
n = 500
r = 0.05*n
k = 0.05*(n**2)
M = np.empty([n, n])
with Timer('Creating matrix ...'):
    L0 = np.dot( np.random.normal(0.0, 1.0/n, size=(n, r)), np.random.normal(0.0, 1.0/n, size=(r, n)) )
    P_omg = np.zeros(L0.size)
    P_omg[np.random.choice(L0.size, k, False)] = 1
    S0 = np.random.choice([1, -1], size=(n, n))*np.random.randint(2, size=(n, n))
    M = L0 + S0
print "rank(L0) =", np.linalg.matrix_rank(L0)
print "||S0||_0 =", np.count_nonzero(S0)
with Timer('Principal Component Pursuit ...'):
    L, S, (u, s, v) = pcp(M, maxiter=50, verbose=True, svd_method="exact")

Пример #7
0
def do_plot(ax, img, shape):
    ax.cla()
    ax.imshow(img.reshape(shape), cmap="gray", interpolation="nearest")
    ax.set_xticklabels([])
    ax.set_yticklabels([])


if __name__ == "__main__":
    import sys
    import glob
    import matplotlib.pyplot as pl

    if "--test" in sys.argv:
        M = (10*np.ones((10, 10))) + (-5 * np.eye(10))
        L, S, svd = pcp(M, verbose=True, svd_method="exact")
        assert np.allclose(M, L + S), "Failed"
        print("passed")
        sys.exit(0)

    gl = glob.glob("/home/vighnesh/images/Escalator/*.bmp")[:2000:2]
    M, shape = bitmap_to_mat(gl)
    print(M.shape)
    L, S, (u, s, v) = pcp(M, delta=1e-3, maxiter=50, verbose=True, 
    svd_method="approximate")

    fig, axes = pl.subplots(1, 3, figsize=(10, 4))
    fig.subplots_adjust(left=0, right=1, hspace=0, wspace=0.01)

    i = 0
    do_plot(axes[0], M[i], shape)
Пример #8
0
def doRpca(image_filenames, ref_file):
    start = time.time()
    M, dimension = process_image(image_filenames)

    # shape[0] is the number of images in the dataset
    # shape[1] is the pixel values for either r,g or b (width x height, flattened)
    # shape[2] is 3, because it consists of r,g,b
    M = np.reshape(M, (M.shape[0], M.shape[1] * M.shape[2]))

    logger.info(M.shape)
    logger.info(dimension)

    L, S, (u, s, v) = pcp(M,
                          maxiter=num_iter,
                          verbose=True,
                          svd_method=rpca_method)
    # Uncomment this section and comment the line above if you want to run ALM instead of PCP ADM
    # L, S, (u, s, v) = alm(M, maxiter=num_iter, verbose=True, svd_method=rpca_method)

    M = np.reshape(M, (M.shape[0], int(M.shape[1] / 3), 3))
    L = np.reshape(L, (L.shape[0], int(L.shape[1] / 3), 3))
    S = np.reshape(S, (S.shape[0], int(S.shape[1] / 3), 3))

    ref = Image.open(ref_file).convert("RGB")
    ref = np.array(ref.getdata())

    sum_mse = 0.0

    for i in range(len(M)):
        base = os.path.basename(image_filenames[i])
        ext = os.path.splitext(base)[1]
        current_filename = os.path.splitext(base)[0]

        testM = np.reshape(M[i], (dimension[0], dimension[1], 3))
        testM = np.uint8(testM)
        imgM = Image.fromarray(testM)
        imgM.save(out_dir + "/" + current_filename + "-ori.png")

        testL = np.reshape(L[i], (dimension[0], dimension[1], 3))
        testL = np.uint8(testL)
        imgL = Image.fromarray(testL)
        imgL.save(out_dir + "/" + current_filename + "-lowrank.png")

        testS = np.reshape(S[i], (dimension[0], dimension[1], 3))
        testS = np.uint8(testS)
        imgS = Image.fromarray(testS)
        imgS.save(out_dir + "/" + current_filename + "-sparse.png")

        # logger.info(current_filename + ext + " Original MSE : " + str(mse(ref.flatten(), M[i].flatten())))
        current_mse = mse(ref.flatten(), L[i].flatten())
        logger.info(current_filename + ext + " Low-rank MSE : " +
                    str(current_mse))
        sum_mse = sum_mse + current_mse

    logger.info("Sum MSE of all low-rank images = " + str(sum_mse))
    logger.info("Average MSE of all low-rank images = " +
                str(sum_mse / len(M)))

    logger.info("RPCA completed!")
    rpca_time = time.time() - start
    logger.info("RPCA total time taken = %.3f seconds" % rpca_time)
Пример #9
0
def do_plot(ax, img, shape):
    ax.cla()
    ax.imshow(img.reshape(shape), cmap="gray", interpolation="nearest")
    ax.set_xticklabels([])
    ax.set_yticklabels([])


if __name__ == "__main__":
    import sys
    import glob
    import matplotlib.pyplot as pl

    if "--test" in sys.argv:
        M = (10*np.ones((10, 10))) + (-5 * np.eye(10))
        L, S, svd = pcp(M, verbose=True, svd_method="exact")
        assert np.allclose(M, L + S), "Failed"
        print("passed")
        sys.exit(0)

    M, shape = bitmap_to_mat(glob.glob("test_data/Escalator/*.bmp")[:2000:2])
    print(M.shape)
    L, S, (u, s, v) = pcp(M, maxiter=50, verbose=True, svd_method="exact")

    fig, axes = pl.subplots(1, 3, figsize=(10, 4))
    fig.subplots_adjust(left=0, right=1, hspace=0, wspace=0.01)
    for i in range(min(len(M), 500)):
        do_plot(axes[0], M[i], shape)
        axes[0].set_title("raw")
        do_plot(axes[1], L[i], shape)
        axes[1].set_title("low rank")
Пример #10
0
end_day = act_day + dt.timedelta(days=15)

future_features = data.get_features_for_prev_days(end_day,
                                                  dt.timedelta(days=14))
future_data_set = data.flatten_features(future_features)  # for predicting

future_target = data.get_target_for_prev_days(end_day, dt.timedelta(days=14))
future_target_data_set = data.flatten_features(future_target)  # for testing

print "Start day = " + day
print "End day = " + str(end_day)

# Experiment
# RPCA Tests
L, S, (u, s, v) = pcp(np.array(target_data_set),
                      maxiter=30,
                      verbose=False,
                      svd_method="approximate")
L = np.ravel(L)
S = np.ravel(S)
LD, SD, (uD, sD, vD) = pcp(np.array(historic_data_set),
                           maxiter=30,
                           verbose=False,
                           svd_method="exact")

# plt.figure(5)
# plt.plot([item[3] for item in historic_data_set], label="historic_data_set")
# plt.plot([item[3] for item in LD], label="low_dimension")
# plt.plot([item[3] for item in SD], label="sparse")
# plt.legend()
# plt.show()
def rpca_singvals(d):
    return pca_singvals(pcp(d, maxiter=5, verbose=True, svd_method='randomized')[0])