def test_stride_DCA(lorenz_dataset): """Check that deterministic and random strides work for DCA. """ X = lorenz_dataset model = DCA(T=1) model.estimate_data_statistics(X) ccms1 = model.cross_covs.numpy() model = DCA(T=1, stride=2) model.estimate_data_statistics(X) ccms2 = model.cross_covs.numpy() assert not np.allclose(ccms1, ccms2) assert_allclose(ccms1, ccms2, atol=5e-2) model = DCA(T=1, stride=.5, rng_or_seed=0) model.estimate_data_statistics(X) ccms2 = model.cross_covs.numpy() assert not np.allclose(ccms1, ccms2) assert_allclose(ccms1, ccms2, atol=5e-2) model = DCA(T=1, stride=.5, rng_or_seed=1) model.estimate_data_statistics(X) ccms1 = model.cross_covs.numpy() assert not np.allclose(ccms1, ccms2) assert_allclose(ccms1, ccms2, atol=5e-2) model = DCA(T=1, stride=.5, rng_or_seed=1) model.estimate_data_statistics(X) ccms2 = model.cross_covs.numpy() assert_allclose(ccms1, ccms2)
def test_DCA_short(noise_dataset): """Test that a DCA model raises an error when T would break chunking. """ X = noise_dataset model = DCA(d=3, T=20) model.fit(X) with pytest.raises(ValueError): model = DCA(d=3, T=20, chunk_cov_estimate=10) model.fit(X) model = DCA(d=3, T=20, chunk_cov_estimate=2) model.fit(X)
def test_DCA_variable_d(noise_dataset): """Test that the DCA projection can be refit with different d. """ X = noise_dataset model = DCA(d=3, T=10) model.estimate_data_statistics(X) model.fit_projection() assert model.coef_.shape[1] == 3 assert model.d_fit == 3 model.fit_projection(d=2) assert model.coef_.shape[1] == 2 assert model.d_fit == 2
def test_DCA(noise_dataset): """Test that a DCA model can be fit with no errors. """ X = noise_dataset model = DCA(d=3, T=10) model.fit(X) assert_allclose(X.mean(axis=0, keepdims=True), model.mean_) model.transform(X) model.fit_transform(X) model.score() model = DCA(d=3, T=10, n_init=2) model.fit(X) model.score() model.score(X) model = DCA(d=3, T=10, verbose=True) model.fit(X) model = DCA(d=3, T=10, block_toeplitz=False) model.fit(X)
def test_input_type(): """Test that a list of 2d arrays or a 3d array work. """ model = DCA(d=3, T=10) X = [np.random.randn(1000, 10) for ii in range(3)] model.fit(X) model.transform(X) model.fit_transform(X) X = np.random.randn(3, 1000, 10) model.fit(X) model.transform(X) model.fit_transform(X)
def test_DCA(noise_dataset): """Test that a DCA model can be fit with no errors. """ X = noise_dataset model = DCA(d=3, T=10) model.fit(X) model.transform(X) model.fit_transform(X) model.score() model = DCA(d=3, T=10, n_init=2) model.fit(X) model.score() model.score(X) model = DCA(d=3, T=10, use_scipy=False) model.fit(X) model = DCA(d=3, T=10, verbose=True) model.fit(X) model = DCA(d=3, T=10, block_toeplitz=False) model.fit(X)
def test_input_type(): """Test that a list of 2d arrays or a 3d array work. """ model = DCA(d=3, T=10) X = [np.random.randn(1000, 10) for ii in range(3)] model.fit(X) assert_allclose(np.concatenate(X).mean(axis=0, keepdims=True), model.mean_) model.transform(X) model.fit_transform(X) X = np.random.randn(3, 1000, 10) model.fit(X) model.transform(X) model.fit_transform(X)
def test_DCA_variable_T(noise_dataset): """Test that the DCA projection can be refit with different d. """ X = noise_dataset model = DCA(d=3, T=10) model.estimate_data_statistics(X) model.rng = np.random.RandomState(0) model.fit_projection() assert model.T_fit == 10 c0 = model.coef_.copy() model.rng = np.random.RandomState(0) model.fit_projection() c1 = model.coef_.copy() model.rng = np.random.RandomState(0) model.fit_projection(T=5) assert model.T_fit == 5 c2 = model.coef_.copy() assert_allclose(c0, c1) assert not np.allclose(c0, c2) with pytest.raises(ValueError): model.fit_projection(T=11)
def test_init(noise_dataset): X = noise_dataset model = DCA(d=3, T=10, init='random') model.fit(X) model = DCA(d=3, T=10, init='uniform') model.fit(X)
def main(args): parser = get_parser() parser = DAPC.add_arguments(parser) args = parser.parse_args(args) np.random.seed(args.seed) torch.manual_seed(args.seed) # Handle multiple gpu issues. T = args.T fdim = args.fdim encoder_name = args.encoder_type params = '' print(params) idim = 30 # lift projection dim noise_dim = 7 # noisify raw DCA split_rate = args.split_rate # train/valid split snr_vals = [0.3, 1.0, 5.0] # signal-to-noise ratios num_samples = 10000 # samples to collect from the lorenz system print("Generating ground truth dynamics ...") X_dynamics = gen_lorenz_data(num_samples) # 10000 * 3 noisy_model = DNN(X_dynamics.shape[1], idim, dropout=0.5) # DNN lift projection: 3 -> 30 for d-DCA use_gpu = False if use_gpu: device = torch.device("cuda:0") else: device = torch.device("cpu") dca_recons = [] dapc_recons = [] r2_vals = np.zeros((len(snr_vals), 2)) # obtain R2 scores for DCA and dDCA for snr_idx, snr in enumerate(snr_vals): print("Generating noisy data with snr=%.2f ..." % snr) X_clean, X_noisy = gen_nonlinear_noisy_lorenz(idim, T, snr, X_dynamics=X_dynamics, noisy_model=noisy_model, seed=args.seed) X_noisy = X_noisy - X_noisy.mean(axis=0) X_clean_train, X_clean_val = split(X_clean, split_rate) X_noisy_train, X_noisy_val = split(X_noisy, split_rate) X_dyn_train, X_dyn_val = split(X_dynamics, split_rate) if not os.path.exists("runs"): os.mkdir("runs") chunk_size = 500 X_train_seqs, L_train = chunk_long_seq(X_noisy_train, 30, chunk_size) X_valid_seqs, L_valid = chunk_long_seq(X_noisy_val, 30, chunk_size) X_clean_seqs, L_clean = chunk_long_seq(X_clean_val, 30, chunk_size) X_dyn_seqs, L_dyn = chunk_long_seq(X_dyn_val, 30, chunk_size) print(X_train_seqs[0].shape) # 0:500 test, 1000:1500 valid X_match = torch.from_numpy(_context_concat(X_noisy_val[1000:1500], 0)).float().to(device) Y_match = X_dyn_val[1000:1500] # Linear DCA print("Training {}".format(args.base_encoder_type)) dca_model = DCA(d=fdim, T=T) dca_model.fit(X_train_seqs + X_valid_seqs[:1]) X_dca = dca_model.transform(X_noisy_val[:500]) if X_dca.shape[1] > 3: X_dca = TSNE(n_components=3).fit_transform(X_dca) # deep DCA print("Training {}".format(encoder_name)) dapc_model = DAPC(args.obj, idim, fdim, T, encoder_type=args.encoder_type, ortho_lambda=args.ortho_lambda, recon_lambda=args.recon_lambda, dropout=args.dropout, masked_recon=args.masked_recon, args=args, device=device) dapc_model = fit_dapc(dapc_model, X_train_seqs, L_train, X_valid_seqs, L_valid, None, args.lr, use_gpu, batch_size=args.batchsize, max_epochs=args.epochs, device=device, snapshot=params + ".cpt", X_match=X_match, Y_match=Y_match, use_writer=False) X_dapc = dapc_model.encode( torch.from_numpy(_context_concat(X_noisy_val[:500], dapc_model.input_context)).float().to(device, dtype=dapc_model.dtype)).cpu().numpy() if X_dapc.shape[1] > 3: X_dapc = TSNE(n_components=3).fit_transform(X_dapc) print(np.matmul((X_dapc - X_dapc.mean(0)).T, (X_dapc - X_dapc.mean(0))) / X_dapc.shape[0]) if not os.path.exists("pngs"): os.mkdir("pngs") # match DCA with ground-truth if not os.path.exists("npys"): os.mkdir("npys") np.save("npys/dapc_bases_{}.npy".format(params), X_dapc) print("Matching {}".format(args.base_encoder_type)) X_dca_recon, _ = match(X_dca, X_dyn_val[:500], 15000, device) # match DAPC with ground-truth print("Matching {}".format(encoder_name)) X_dapc_recon, _ = match(X_dapc, X_dyn_val[:500], 15000, device) # R2 of dca r2_dca = 1 - np.sum((X_dca_recon - X_dyn_val[:500]) ** 2) / np.sum( (X_dyn_val[:500] - np.mean(X_dyn_val[:500], axis=0)) ** 2) print("\nr2_dca:", r2_dca) # R2 of dapc r2_dapc = 1 - np.sum((X_dapc_recon - X_dyn_val[:500]) ** 2) / np.sum( (X_dyn_val[:500] - np.mean(X_dyn_val[:500], axis=0)) ** 2) print("r2_dapc:", r2_dapc) # store R2's r2_vals[snr_idx] = [r2_dca, r2_dapc] # store reconstructed signals dca_recons.append(X_dca_recon) dapc_recons.append(X_dapc_recon) if not os.path.exists("plots"): os.mkdir("plots") if not os.path.exists("plots/{}".format(params)): os.mkdir("plots/{}".format(params)) plot_figs(dca_recons, dapc_recons, X_dyn_val[:500], X_clean_val[:500], X_noisy_val[:500], r2_vals, snr_vals, args.base_encoder_type, encoder_name, "plots/{}".format(params))