class TestNeffCriterion(tf.test.TestCase): def setUp(self): self.weights = tf.constant([[1 / 3, 1 / 3, 1 / 3], [0.05, 0.05, 0.9]]) self.log_weights = tf.math.log(self.weights) self.state = MockState(self.weights) self._scaled_weights = 3. * self.weights self._scaled_log_weights = self.log_weights + math.log(3) self.neff_log_instance = NeffCriterion(0.5, True, True, True) self.neff_instance = NeffCriterion(0.5, True, False, True) def test_neff_normalized(self): flag, _ = neff(self.weights, True, False, 0.5 * 3) flag_log, _ = neff(self.log_weights, True, True, 0.5 * 3) self.assertAllEqual(flag, flag_log) self.assertAllEqual(flag, [False, True]) def test_neff_unnormalized(self): flag, _ = neff(self._scaled_weights, False, False, 0.5 * 3) flag_log, _ = neff(self._scaled_log_weights, False, True, 0.5 * 3) self.assertAllEqual(flag, flag_log) self.assertAllEqual(flag, [False, True]) def test_neff(self): log_flags, _ = self.neff_log_instance.apply(self.state) flags, _ = self.neff_instance.apply(self.state) self.assertAllEqual(log_flags, flags) self.assertAllEqual(flags, [False, True])
def setUp(self): self.weights = tf.constant([[1 / 3, 1 / 3, 1 / 3], [0.05, 0.05, 0.9]]) self.log_weights = tf.math.log(self.weights) self.state = MockState(self.weights) self._scaled_weights = 3. * self.weights self._scaled_log_weights = self.log_weights + math.log(3) self.neff_log_instance = NeffCriterion(0.5, True, True, True) self.neff_instance = NeffCriterion(0.5, True, False, True)
def setUp(self): N = 10 n_particles = tf.constant(N) dimension = tf.constant(1) batch_size = tf.constant(4) weights = tf.ones((batch_size, n_particles), dtype=float) / tf.cast( n_particles, float) initial_particles = tf.random.uniform( (batch_size, n_particles, dimension), -1, 1) log_likelihoods = tf.zeros((batch_size), dtype=float) self.initial_state = State(particles=initial_particles, log_weights=tf.math.log(weights), weights=weights, log_likelihoods=log_likelihoods, ancestor_indices=None, resampling_correction=None) error_variance = tf.constant([0.5], dtype=tf.float32) error_rv = tfp.distributions.MultivariateNormalDiag( tf.constant([0.]), error_variance) noise_variance = tf.constant([0.5]) noise_rv = tfp.distributions.MultivariateNormalDiag( tf.constant([0.]), noise_variance) observation_model = LinearObservationModel(tf.constant([[1.]]), error_rv) transition_matrix = tf.constant([[1.]]) transition_model = RandomWalkModel(transition_matrix, noise_rv) bootstrap = BootstrapProposalModel(transition_model) resampling_criterion = NeffCriterion(tf.constant(0.5), is_relative=tf.constant(True)) systematic_resampling_method = SystematicResampler() self.bootstrap_filter = SMC(observation_model, transition_model, bootstrap, resampling_criterion, systematic_resampling_method) # TODO: Let's change this using an instance of StateSpaceModel self.n = 100 observation = np.array([[[0.]]]).astype(np.float32) observations = [] for _ in range(self.n): observations.append(observation) observation = observation + np.random.normal(0., 1., [1, 1, 1]) self.observation_dataset = tf.data.Dataset.from_tensor_slices( observations)
def main(resampling_method_value, resampling_neff, resampling_kwargs=None, T=150, batch_size=50, n_particles=25, data_seed=0, filter_seed=555, savefig=False): transition_matrix = 0.5 * np.eye(2, dtype=np.float32) transition_covariance = np.eye(2, dtype=np.float32) observation_matrix = np.eye(2, dtype=np.float32) observation_covariance = 0.1 * np.eye(2, dtype=np.float32) resampling_method_enum = ResamplingMethodsEnum(resampling_method_value) np_random_state = np.random.RandomState(seed=data_seed) data, kf = get_data(transition_matrix, observation_matrix, transition_covariance, observation_covariance, T, np_random_state) observation_dataset = tf.data.Dataset.from_tensor_slices(data) if resampling_method_enum == ResamplingMethodsEnum.KALMAN: return kalman_main(kf, data, savefig) if resampling_kwargs is None: resampling_kwargs = {} if resampling_neff == 0.: resampling_criterion = NeverResample() elif resampling_neff == 1.: resampling_criterion = AlwaysResample() else: resampling_criterion = NeffCriterion(resampling_neff, True) if resampling_method_enum == ResamplingMethodsEnum.MULTINOMIAL: resampling_method = MultinomialResampler() elif resampling_method_enum == ResamplingMethodsEnum.SYSTEMATIC: resampling_method = SystematicResampler() elif resampling_method_enum == ResamplingMethodsEnum.STRATIFIED: resampling_method = StratifiedResampler() elif resampling_method_enum == ResamplingMethodsEnum.REGULARIZED: resampling_method = RegularisedTransform(**resampling_kwargs) elif resampling_method_enum == ResamplingMethodsEnum.VARIANCE_CORRECTED: regularized_resampler = RegularisedTransform(**resampling_kwargs) resampling_method = PartiallyCorrectedRegularizedTransform(regularized_resampler) elif resampling_method_enum == ResamplingMethodsEnum.OPTIMIZED: lr = resampling_kwargs.pop('lr', resampling_kwargs.pop('learning_rate', 0.1)) loss = SinkhornLoss(**resampling_kwargs, symmetric=True) optimizer = SGD(loss, lr=lr, decay=0.95) regularized_resampler = RegularisedTransform(**resampling_kwargs) resampling_method = OptimizedPointCloud(optimizer, intermediate_resampler=regularized_resampler) else: raise ValueError(f'resampling_method_name {resampling_method_enum} is not a valid ResamplingMethodsEnum') observation_matrix = tf.convert_to_tensor(observation_matrix) transition_covariance_chol = tf.linalg.cholesky(transition_covariance) observation_covariance_chol = tf.linalg.cholesky(observation_covariance) initial_particles = np_random_state.normal(0., 1., [batch_size, n_particles, 2]).astype(np.float32) initial_state = State(tf.constant(initial_particles)) smc = make_filter(observation_matrix, transition_matrix, observation_covariance_chol, transition_covariance_chol, resampling_method, resampling_criterion) states = get_states(smc, initial_state, observation_dataset, tf.constant(T), tf.constant(filter_seed)) stddevs = std(states, keepdims=False).numpy() stddevs_df = stddevs
def main(resampling_method_value, resampling_neff, learning_rates=(1e-4, 1e-3), resampling_kwargs=None, alpha=0.42, dx=10, dy=3, observation_covariance=1., dense=False, T=20, batch_size=1, n_particles=25, data_seed=0, n_data=50, n_iter=50, savefig=False, filter_seed=0, use_xla=False, change_seed=True): transition_matrix = get_transition_matrix(alpha, dx) transition_covariance = get_transition_covariance(dx) observation_matrix = get_observation_matrix(dx, dy, dense) observation_covariance = get_observation_covariance( observation_covariance, dy) resampling_method_enum = ResamplingMethodsEnum(resampling_method_value) np_random_state = np.random.RandomState(seed=data_seed) observation_matrix = tf.convert_to_tensor(observation_matrix) transition_covariance_chol = tf.linalg.cholesky(transition_covariance) observation_covariance_chol = tf.linalg.cholesky(observation_covariance) initial_particles = np_random_state.normal( 0., 1., [batch_size, n_particles, dx]).astype(np.float32) initial_state = State(initial_particles) if resampling_neff == 0.: resampling_criterion = NeverResample() elif resampling_neff == 1.: resampling_criterion = AlwaysResample() else: resampling_criterion = NeffCriterion(resampling_neff, True) optimal_smc = make_optimal_filter(observation_matrix, transition_matrix, observation_covariance_chol, transition_covariance_chol, MultinomialResampler(), resampling_criterion) if resampling_kwargs is None: resampling_kwargs = {} resampling_method = resampling_method_factory(resampling_method_enum, resampling_kwargs) datas = [] lls = [] observation_datasets = [] optimal_lls = [] log_phi_x_0 = tf.ones(dx) phi_y_0 = tf.zeros(dy) for _ in range(n_data): data, ll = get_data(transition_matrix, observation_matrix, transition_covariance, observation_covariance, T, np_random_state) datas.append(data) lls.append(ll / T) observation_dataset = tf.data.Dataset.from_tensor_slices(data) observation_datasets.append(observation_dataset) final_state = optimal_smc(initial_state, observation_dataset, T, None, True, filter_seed) optimal_lls.append(final_state.log_likelihoods.numpy().mean() / T) log_phi_x = tf.Variable(log_phi_x_0, trainable=True) phi_y = tf.Variable(phi_y_0, trainable=True) smc = make_filter(observation_matrix, transition_matrix, observation_covariance_chol, transition_covariance_chol, resampling_method, resampling_criterion, log_phi_x, phi_y) def optimizer_maker(learning_rate): # tf.function doesn't like creating variables. This is a way to create them outside the graph # We can't reuse the same optimizer because it would be giving a warmed-up momentum to the ones run later optimizer = tf.optimizers.Adam(learning_rate=learning_rate) return optimizer initial_values = [log_phi_x_0, phi_y_0] losses_list = [] ess_profiles_list = [] mean_errors = [] for observation_dataset in observation_datasets: try: losses, ess_profiles = compare_learning_rates( smc, initial_state, observation_dataset, T, log_phi_x, phi_y, initial_values, n_iter, optimizer_maker, learning_rates, filter_seed, use_xla, change_seed) except: print('one dataset failed, ignoring') continue losses_df = pd.DataFrame(np.stack(losses).T, columns=np.log10(learning_rates)) ess_df = pd.DataFrame(np.stack(ess_profiles).T, columns=np.log10(learning_rates)) losses_df.columns.name = 'log learning rate' losses_df.columns.epoch = 'epoch' ess_df.columns.name = 'log learning rate' ess_df.columns.epoch = 'epoch' losses_list.append(losses_df) ess_profiles_list.append(ess_df) delta_phi_m_1 = tf.linalg.diag(tf.exp(-log_phi_x)) diff_cov = optimal_smc._proposal_model._sigma - delta_phi_m_1 @ transition_covariance approx_error = tf.linalg.diag_part(diff_cov).numpy() mean_error = np.sqrt(np.nanmean(approx_error**2)) mean_errors.append(mean_error) losses_data = pd.concat(losses_list, axis=1) ess_data = pd.concat(ess_profiles_list, axis=1) mean_data = pd.DataFrame([[np.mean(mean_errors)]], index=pd.MultiIndex.from_tuples([(batch_size, n_particles)]), columns=pd.MultiIndex.from_tuples([ (resampling_method_enum.name, change_seed) ])) losses_data = losses_data.groupby(axis=1, level=0).mean() ess_data = ess_data.groupby(axis=1, level=0).mean() # plot_losses(losses_df, resampling_method_enum.name, savefig, dx, dy, dense, T, change_seed) plot_losses_vs_ess(losses_data, ess_data, resampling_method_enum.name, savefig, dx, dy, dense, T, n_particles, change_seed, batch_size, np.mean(optimal_lls), np.mean(lls), n_iter, mean_data, n_data) print(tf.exp(log_phi_x))
def main(resampling_method_value, resampling_neff, resampling_kwargs=None, T=150, batch_size=50, n_particles=25, data_seed=0, values=(0.25, 0.5, 0.75), filter_seed=555, savefig=False): transition_matrix = 0.5 * np.eye(2, dtype=np.float32) transition_covariance = np.eye(2, dtype=np.float32) observation_matrix = np.eye(2, dtype=np.float32) observation_covariance = 0.1 * np.eye(2, dtype=np.float32) values = np.array(list(zip(values, values)), dtype=np.float32) resampling_method_enum = ResamplingMethodsEnum(resampling_method_value) np_random_state = np.random.RandomState(seed=data_seed) data, kf = get_data(transition_matrix, observation_matrix, transition_covariance, observation_covariance, T, np_random_state) observation_dataset = tf.data.Dataset.from_tensor_slices(data) if resampling_method_enum == 6: return kalman_main(kf, data, values, T, savefig) if resampling_kwargs is None: resampling_kwargs = {} if resampling_neff == 0.: resampling_criterion = NeverResample() elif resampling_neff == 1.: resampling_criterion = AlwaysResample() else: resampling_criterion = NeffCriterion(resampling_neff, True) if resampling_method_enum == ResamplingMethodsEnum.MULTINOMIAL: resampling_method = MultinomialResampler() elif resampling_method_enum == ResamplingMethodsEnum.SYSTEMATIC: resampling_method = SystematicResampler() elif resampling_method_enum == ResamplingMethodsEnum.STRATIFIED: resampling_method = StratifiedResampler() elif resampling_method_enum == ResamplingMethodsEnum.REGULARIZED: resampling_method = RegularisedTransform(**resampling_kwargs) elif resampling_method_enum == ResamplingMethodsEnum.VARIANCE_CORRECTED: regularized_resampler = RegularisedTransform(**resampling_kwargs) resampling_method = PartiallyCorrectedRegularizedTransform( regularized_resampler) elif resampling_method_enum == ResamplingMethodsEnum.OPTIMIZED: lr = resampling_kwargs.pop('lr', resampling_kwargs.pop('learning_rate', 0.1)) loss = SinkhornLoss(**resampling_kwargs, symmetric=True) optimizer = SGD(loss, lr=lr, decay=0.95) regularized_resampler = RegularisedTransform(**resampling_kwargs) resampling_method = OptimizedPointCloud( optimizer, intermediate_resampler=regularized_resampler) else: raise ValueError( f'resampling_method_name {resampling_method_enum} is not a valid ResamplingMethodsEnum' ) init_transition_matrix = (0.5 * np.eye(2) + 0.1 * np_random_state.randn(2, 2)).astype( np.float32) modifiable_transition_matrix = tf.Variable(init_transition_matrix, trainable=True) observation_matrix = tf.convert_to_tensor(observation_matrix) transition_covariance_chol = tf.linalg.cholesky(transition_covariance) observation_covariance_chol = tf.linalg.cholesky(observation_covariance) initial_particles = np_random_state.normal( 0., 1., [batch_size, n_particles, 2]).astype(np.float32) initial_state = State(tf.constant(initial_particles)) smc = make_filter(observation_matrix, modifiable_transition_matrix, observation_covariance_chol, transition_covariance_chol, resampling_method, resampling_criterion) elbos = get_elbos(smc, initial_state, observation_dataset, tf.constant(T), modifiable_transition_matrix, tf.constant(values), tf.constant(filter_seed)) elbos_df = pd.DataFrame( elbos.numpy(), pd.Index(values[:, 0], name=r'$\theta_1$, $\theta_2$')) elbos_df = elbos_df.T.describe().T[['mean', 'std']].reset_index() if savefig: filename = f"{resampling_method_enum.name}_batchsize_{batch_size}_N_{n_particles}_epsilon_{resampling_kwargs.get('epsilon')}_likelihoods_values.tex" elbos_df.to_latex(buf=os.path.join('./tables/', filename), float_format='{:,.3f}'.format, escape=False, index=False) else: print( elbos_df.to_latex(float_format='{:,.3f}'.format, escape=False, index=False))
def main(resampling_method_value, resampling_neff, resampling_kwargs=None, T=100, batch_size=1, n_particles=25, phi=0.5, data_seed=0, filter_seed=1, learning_rate=0.001, n_iter=50, savefig=False, use_xla=False, batch_data=1, assume_differentiable=False, change_seed=False): transition_matrix = phi * np.eye(2, dtype=np.float32) transition_covariance = 0.5 * np.eye(2, dtype=np.float32) observation_matrix = np.eye(2, dtype=np.float32) observation_covariance = 0.1 * np.eye(2, dtype=np.float32) resampling_method_enum = ResamplingMethodsEnum(resampling_method_value) np_random_state = np.random.RandomState(seed=data_seed) data = [] np_data = [] assert batch_data > 0 for _ in range(batch_data): a_data, kf = get_data(transition_matrix, observation_matrix, transition_covariance, observation_covariance, T, np_random_state) data.append(tf.data.Dataset.from_tensor_slices(a_data)) np_data.append(a_data) if resampling_kwargs is None: resampling_kwargs = {} if resampling_neff == 0.: resampling_criterion = NeverResample() elif resampling_neff == 1.: resampling_criterion = AlwaysResample() else: resampling_criterion = NeffCriterion(resampling_neff, True) if resampling_method_enum == ResamplingMethodsEnum.MULTINOMIAL: resampling_method = MultinomialResampler() elif resampling_method_enum == ResamplingMethodsEnum.SYSTEMATIC: resampling_method = SystematicResampler() elif resampling_method_enum == ResamplingMethodsEnum.STRATIFIED: resampling_method = StratifiedResampler() elif resampling_method_enum == ResamplingMethodsEnum.REGULARIZED: resampling_method = RegularisedTransform(**resampling_kwargs) elif resampling_method_enum == ResamplingMethodsEnum.VARIANCE_CORRECTED: regularized_resampler = RegularisedTransform(**resampling_kwargs) resampling_method = PartiallyCorrectedRegularizedTransform( regularized_resampler) elif resampling_method_enum == ResamplingMethodsEnum.OPTIMIZED: lr = resampling_kwargs.pop('lr', resampling_kwargs.pop('learning_rate', 0.1)) loss = SinkhornLoss(**resampling_kwargs, symmetric=True) optimizer = SGD(loss, lr=lr, decay=0.95) regularized_resampler = RegularisedTransform(**resampling_kwargs) resampling_method = OptimizedPointCloud( optimizer, intermediate_resampler=regularized_resampler) else: raise ValueError( f'resampling_method_name {resampling_method_enum} is not a valid ResamplingMethodsEnum' ) modifiable_transition_matrix = tf.Variable(transition_matrix, trainable=True) observation_matrix = tf.convert_to_tensor(observation_matrix) transition_covariance_chol = tf.linalg.cholesky(transition_covariance) observation_covariance_chol = tf.linalg.cholesky(observation_covariance) initial_particles = np_random_state.normal( 0., 1., [batch_size, n_particles, 2]).astype(np.float32) initial_state = State(initial_particles) smc = make_filter(observation_matrix, modifiable_transition_matrix, observation_covariance_chol, transition_covariance_chol, resampling_method, resampling_criterion) x0 = np.array([2 * phi / 3] * 2).astype(np.float32) print(x0) if resampling_method.DIFFERENTIABLE or assume_differentiable: loss_fun = lambda x, observation_dataset, seed: values_and_gradient( x, modifiable_transition_matrix, smc, initial_state, observation_dataset, T, seed) else: loss_fun = lambda x, observation_dataset, seed: values_and_gradient_finite_diff( x, modifiable_transition_matrix, smc, initial_state, observation_dataset, T, seed) final_values = [] losses = [] kalman_params = [] def kf_likelihood_fun(val, data): import copy kf_copy = copy.copy(kf) kf_copy.transition_matrices = np.diag(val) return -kf_loglikelihood(kf_copy, data) fun = tf.function(loss_fun, experimental_compile=use_xla) for observation_dataset, np_dataset in tqdm(zip(data, np_data), total=batch_data): final_value, loss = gradient_descent(fun, x0, observation_dataset, tf.constant(learning_rate), tf.constant(n_iter), tf.constant(filter_seed), tf.constant(change_seed)) final_values.append(final_value.numpy()) losses.append(loss.numpy()) kf_params = minimize(kf_likelihood_fun, x0, args=(np_dataset, )) kalman_params.append(kf_params.x) losses = np.array(losses).T plt.plot(losses) plt.show() final_values = np.vstack(final_values) kalman_params = np.vstack(kalman_params) df = pd.DataFrame(final_values - kalman_params, columns=[r'$\theta_1', r'$\theta_2']) parameters_diff = np.mean(np.square(df), 0) if savefig: filename = f'theta_diff_{resampling_method_enum.name}_batch_size_{batch_size}_N_{n_particles}_batch_data_{batch_data}_changeseed_{change_seed}.csv' df.to_csv(os.path.join('./tables/', filename), float_format='%.5f') else: print(parameters_diff.to_latex(float_format='%.5f'))
def main(run_method, latent_size=10, latent_encoded_size=32, batch_size=1, n_particles=25, epsilon=0.5, scaling=0.9, neff=0.9, max_iter=1000, additional_variables_are_state=False, convergence_threshold=1e-3, n_iter=100, initial_lr=0.01, decay=0.5, steps=100, warmup=100, data_seed=0, filter_seed=1, fixed_seed=False, out_dir='./', data_fp='../data/data/piano_data/jsb.pkl'): inputs_tensor, targets_tensor, lens, mean = create_pianoroll_dataset(data_fp, split='train', batch_size=1) T = targets_tensor.shape.as_list()[0] observation_size = targets_tensor.shape.as_list()[-1] encoded_data_size = latent_size rnn_hidden_size = latent_size // 2 latent_encoder_layers = [32] latent_encoder = snt.nets.MLP( output_sizes=latent_encoder_layers + [latent_encoded_size], name="latent_encoder") # store observations dimension = latent_size inputs_tensor = tf.expand_dims(inputs_tensor, 1) targets_tensor = tf.expand_dims(targets_tensor, 1) obs_data = tf.data.Dataset.from_tensor_slices(targets_tensor) inputs_data = tf.data.Dataset.from_tensor_slices(inputs_tensor) transition_model = VRNNTransitionModel(rnn_hidden_size, latent_encoder, latent_size) observation_model = VRNNBernoulliObservationModel(latent_encoder, observation_size) proposal_model = VRNNProposalModel(rnn_hidden_size, latent_encoder, latent_size) test_transition_model = TESTVRNNTransitionModel(rnn_hidden_size, latent_encoder, latent_size) test_proposal_model = TESTVRNNProposalModel(rnn_hidden_size, latent_encoder, latent_size) # initial state tf.random.set_seed(data_seed) normal_dist = tfp.distributions.Normal(0., 1.) initial_latent_state = tf.zeros([batch_size, n_particles, dimension]) initial_latent_state = tf.cast(initial_latent_state, dtype=float) latent_encoded = transition_model.latent_encoder(initial_latent_state) # initial rnn_state initial_rnn_state = [normal_dist.sample([batch_size, n_particles, rnn_hidden_size], seed=data_seed)] * 2 initial_rnn_state = tf.concat(initial_rnn_state, axis=-1) # rnn_out initial_rnn_out = tf.zeros([batch_size, n_particles, rnn_hidden_size]) initial_weights = tf.ones((batch_size, n_particles), dtype=float) / tf.cast(n_particles, float) log_likelihoods = tf.zeros(batch_size, dtype=float) init_state = VRNNState(particles=initial_latent_state, log_weights=tf.math.log(initial_weights), weights=initial_weights, obs_likelihood=log_likelihoods, log_likelihoods=log_likelihoods, rnn_state=initial_rnn_state, rnn_out=initial_rnn_out, latent_encoded=latent_encoded) # record loss LARGE_B = 50 N = 25 # initial state large_initial_latent_state = tf.zeros([LARGE_B, N, dimension]) large_initial_latent_state = tf.cast(large_initial_latent_state, dtype=float) large_latent_encoded = transition_model.latent_encoder(large_initial_latent_state) # initial rnn_state large_initial_rnn_state = [normal_dist.sample([LARGE_B, N, rnn_hidden_size])] * 2 large_initial_rnn_state = tf.concat(large_initial_rnn_state, axis=-1) # rnn_out large_initial_rnn_out = tf.zeros([LARGE_B, N, rnn_hidden_size]) obs_likelihood = tf.zeros(LARGE_B, dtype=float) large_init_state = VRNNState(particles=large_initial_latent_state, obs_likelihood=obs_likelihood, rnn_state=large_initial_rnn_state, rnn_out=large_initial_rnn_out, latent_encoded=large_latent_encoded) ## Check variables # snt networks initiated on first call t_samp = transition_model.sample(init_state, inputs_tensor[0], seed=data_seed) obs_samp = observation_model.sample(init_state, seed=data_seed) # for var in transition_model.variables: # print(var.name) # for var in observation_model.variables: # print(var.name) ## Particle Filter trainable_variables = transition_model.variables + observation_model.variables init_values = [v.value() for v in trainable_variables] resampling_criterion = NeffCriterion(tf.constant(neff), tf.constant(True)) # resampling_criterion = AlwaysResample() resampling_method = MultinomialResampler() epsilon = tf.constant(epsilon) scaling = tf.constant(scaling) regularized = RegularisedTransform(epsilon, scaling=scaling, max_iter=max_iter, convergence_threshold=convergence_threshold, additional_variables_are_state=additional_variables_are_state) multinomial_smc = VRNNSMC(observation_model, transition_model, proposal_model, resampling_criterion, MultinomialResampler()) regularized_smc = VRNNSMC(observation_model, transition_model, proposal_model, resampling_criterion, regularized) test_reg = VRNNSMC(observation_model, test_transition_model, test_proposal_model, resampling_criterion, regularized) test_mul = VRNNSMC(observation_model, test_transition_model, test_proposal_model, resampling_criterion, MultinomialResampler()) def run_smc(smc, optimizer, n_iter, seed=filter_seed): # print(optimizer.weights)# check @tf.function def smc_routine(smc, state, use_correction_term=False, seed=seed): final_state = smc(state, obs_data, n_observations=T, inputs_series=inputs_data, return_final=True, seed=seed) res = tf.reduce_mean(final_state.log_likelihoods) obs_likelihood = tf.reduce_mean(final_state.obs_likelihood) ess = final_state.ess if use_correction_term: return res, tf.reduce_mean(final_state.resampling_correction) return res, ess, tf.constant(0.), obs_likelihood @tf.function def run_one_step(smc, use_correction_term, init_state, seed=seed): with tf.GradientTape() as tape: tape.watch(trainable_variables) real_ll, ess, correction, obs_likelihood = smc_routine(smc, init_state, use_correction_term, seed) loss = -(real_ll + correction) grads_loss = tape.gradient(loss, trainable_variables) return real_ll, grads_loss, ess, obs_likelihood @tf.function def train_one_step(smc, use_correction_term, seed=seed): real_ll, grads_loss, ess, obs_likelihood = run_one_step(smc, use_correction_term, init_state, seed) capped_gvs = [tf.clip_by_value(grad, -500., 500.) for grad in grads_loss] optimizer.apply_gradients(zip(capped_gvs, trainable_variables)) return -real_ll, capped_gvs, ess, obs_likelihood @tf.function def train_niter(smc, num_steps=100, use_correction_term=False, reset=True, seed=seed, fixed_seed=fixed_seed): if reset: reset_operations = [v.assign(init) for v, init in zip(trainable_variables, init_values)] else: reset_operations = [] obs_lik_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False, element_shape=[]) multi_loss_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False, element_shape=[]) test_reg_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False, element_shape=[]) test_mul_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False, element_shape=[]) loss_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False, element_shape=[]) ess_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False, element_shape=[]) grad_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False, element_shape=[]) time_tensor_array = tf.TensorArray(dtype=tf.float64, size=num_steps, dynamic_size=False, element_shape=[]) with tf.control_dependencies(reset_operations): toc = tf.constant(0., dtype=tf.float64) tic = tf.timestamp() for step in tf.range(1, num_steps + 1): if fixed_seed: seed = seed else: seed = step tic_loss = tf.timestamp() with tf.control_dependencies([tic_loss]): loss, grads, ess_run, obs_likelihood = train_one_step(smc, use_correction_term, seed) with tf.control_dependencies([loss]): toc_loss = tf.timestamp() multi_loss_state = multinomial_smc(large_init_state, obs_data, n_observations=T, inputs_series=inputs_data, return_final=True, seed=seed) test_reg_state = test_reg(large_init_state, obs_data, n_observations=T, inputs_series=inputs_data, return_final=True, seed=seed) test_mul_state = test_mul(large_init_state, obs_data, n_observations=T, inputs_series=inputs_data, return_final=True, seed=seed) test_reg_loss = -tf.reduce_mean(test_reg_state.log_likelihoods) test_mul_loss = -tf.reduce_mean(test_mul_state.log_likelihoods) multi_loss = -tf.reduce_mean(multi_loss_state.log_likelihoods) ess = multi_loss_state.ess toc += toc_loss - tic_loss max_grad = tf.reduce_max([tf.reduce_max(tf.abs(grad)) for grad in grads]) print_step = num_steps // 10 if step % print_step == 0: tf.print('Step', step, '/', num_steps, ', obs_likelihood = ', obs_likelihood, ', loss = ', loss, ', test_reg = ', test_reg_loss, ', test_mul = ', test_mul_loss, ', multi_loss= ', multi_loss, ': ms per step= ', 1000. * toc / tf.cast(step, tf.float64), end='\r') test_reg_tensor_array = test_reg_tensor_array.write(step - 1, test_reg_loss) test_mul_tensor_array = test_mul_tensor_array.write(step - 1, test_mul_loss) obs_lik_tensor_array = obs_lik_tensor_array.write(step - 1, obs_likelihood) multi_loss_tensor_array = multi_loss_tensor_array.write(step - 1, multi_loss) ess_tensor_array = ess_tensor_array.write(step - 1, ess[0]) loss_tensor_array = loss_tensor_array.write(step - 1, loss) grad_tensor_array = grad_tensor_array.write(step - 1, max_grad) time_tensor_array = time_tensor_array.write(step - 1, toc) return (loss_tensor_array.stack(), grad_tensor_array.stack(), time_tensor_array.stack(), ess_tensor_array.stack(), multi_loss_tensor_array.stack(), obs_lik_tensor_array.stack(), test_reg_tensor_array.stack(), test_mul_tensor_array.stack()) return train_niter(smc, tf.constant(n_iter)) def run_block(smc, method, n_iter, initial_lr, decay, steps, out_dir, col='blue', warnup=100, force=False, data_name=None): if not os.path.isdir(out_dir): os.mkdir(out_dir) optimizer = make_optimizer(initial_learning_rate=initial_lr, decay_steps=steps, decay_rate=decay, staircase=True) key = fn_identifier(initial_lr, decay, steps, method, data_name) filename = "vrnn_loss_{0}.pkl".format(key) filepath = os.path.join(out_dir, filename) print("\n {0}".format(method)) print(key) (loss_array, grad_array, time_array, ess_array, multi_loss_array, obs_lik_array, test_reg_array, test_mul_array) = run_smc(smc, optimizer, n_iter, seed=filter_seed) obs_lik_array = obs_lik_array.numpy() loss_array = loss_array.numpy() grad_array = grad_array.numpy() time_array = time_array.numpy() ess_array = ess_array.numpy() test_reg_array= test_reg_array.numpy() test_mul_array= test_mul_array.numpy() multi_loss_array = multi_loss_array.numpy() pickle_obj(loss_array, os.path.join(out_dir, filename)) filename_test_loss = "vrnn_reg_tloss_{0}.pkl".format(key) pickle_obj(test_reg_array, os.path.join(out_dir, filename_test_loss)) filename_test_loss = "vrnn_mul_tloss_{0}.pkl".format(key) pickle_obj(test_mul_array, os.path.join(out_dir, filename_test_loss)) filename_olik = "vrnn_olik_{0}.pkl".format(key) pickle_obj(obs_lik_array, os.path.join(out_dir, filename_olik)) filename_mloss = "vrnn_mloss_{0}.pkl".format(key) pickle_obj(multi_loss_array, os.path.join(out_dir, filename_mloss)) filename_ess = "vrnn_ess_{0}.pkl".format(key) pickle_obj(ess_array, os.path.join(out_dir, filename_ess)) filename_grad = "vrnn_grad_{0}.pkl".format(key) pickle_obj(grad_array, os.path.join(out_dir, filename_grad)) fig, ax = plt.subplots(figsize=(10, 5)) ax.plot(ess_array, color=col) fig.savefig(os.path.join(out_dir, 'vrnn_ess_{0}.png'.format(key))) plt.close() fig, ax = plt.subplots(figsize=(10, 5)) ax.plot(grad_array, color=col) fig.savefig(os.path.join(out_dir, 'vrnn_grad_{0}.png'.format(key))) plt.close() # fig, ax = plt.subplots(figsize=(10, 5)) # ax.plot(loss_array[warmup:], color=col) # fig.savefig(os.path.join(out_dir, 'vrnn_loss_{0}.png'.format(key))) # plt.close() return multi_loss_array print(run_method) data_name = os.path.splitext(os.path.basename(data_fp))[0] if run_method == 'mult': multi_array = run_block(multinomial_smc, 'mult', n_iter, initial_lr, decay, steps, out_dir, col='blue', data_name=data_name) if run_method == 'reg': print(resampling_method) reg_array = run_block(regularized_smc, 'reg', n_iter, initial_lr, decay, steps, out_dir, col='green', data_name=data_name)
def main(resampling_method_value, resampling_neff, resampling_kwargs=None, T=100, batch_size=1, n_particles=25, data_seed=0, filter_seed=1, mesh_size=10, savefig=True, use_tqdm=False, use_xla=False, diff_epsilon=1e-1, optimal_proposal=False): v = 1. t = .1 transition_matrix = np.array([[1., 1.], [0., 1.]], dtype=np.float32) transition_covariance = v**2 * np.array([[1 / 3, 1 / 2], [1 / 2, 1.]], dtype=np.float32) observation_matrix = np.array([[1., 0]], dtype=np.float32) observation_covariance = np.array([[t**2]], dtype=np.float32) resampling_method_enum = ResamplingMethodsEnum(resampling_method_value) x_linspace = np.linspace(0.95, 1., mesh_size).astype(np.float32) y_linspace = np.linspace(0.95, 1., mesh_size).astype(np.float32) mesh = np.asanyarray([(x, y) for x in x_linspace for y in y_linspace]) np_random_state = np.random.RandomState(seed=data_seed) data, kf = get_data(transition_matrix, observation_matrix, transition_covariance, observation_covariance, T, np_random_state) if resampling_method_enum == ResamplingMethodsEnum.KALMAN: return kalman_main(kf, data, mesh, mesh_size, 1e-2, use_tqdm, savefig) observation_dataset = tf.data.Dataset.from_tensor_slices(data) if resampling_kwargs is None: resampling_kwargs = {} if resampling_neff == 0.: resampling_criterion = NeverResample() elif resampling_neff == 1.: resampling_criterion = AlwaysResample() else: resampling_criterion = NeffCriterion(resampling_neff, True) if resampling_method_enum == ResamplingMethodsEnum.MULTINOMIAL: resampling_method = MultinomialResampler() elif resampling_method_enum == ResamplingMethodsEnum.SYSTEMATIC: resampling_method = SystematicResampler() elif resampling_method_enum == ResamplingMethodsEnum.STRATIFIED: resampling_method = StratifiedResampler() elif resampling_method_enum == ResamplingMethodsEnum.REGULARIZED: resampling_method = RegularisedTransform(**resampling_kwargs) elif resampling_method_enum == ResamplingMethodsEnum.VARIANCE_CORRECTED: regularized_resampler = RegularisedTransform(**resampling_kwargs) resampling_method = PartiallyCorrectedRegularizedTransform( regularized_resampler) elif resampling_method_enum == ResamplingMethodsEnum.OPTIMIZED: lr = resampling_kwargs.pop('lr', resampling_kwargs.pop('learning_rate', 0.1)) loss = SinkhornLoss(**resampling_kwargs, symmetric=True) optimizer = SGD(loss, lr=lr, decay=0.95) regularized_resampler = RegularisedTransform(**resampling_kwargs) resampling_method = OptimizedPointCloud( optimizer, intermediate_resampler=regularized_resampler) else: raise ValueError( f'resampling_method_name {resampling_method_enum} is not a valid ResamplingMethodsEnum' ) modifiable_transition_matrix = tf.Variable(transition_matrix, trainable=False) observation_matrix = tf.convert_to_tensor(observation_matrix) transition_covariance_chol = tf.linalg.cholesky(transition_covariance) observation_covariance_chol = tf.linalg.cholesky(observation_covariance) initial_particles = np_random_state.normal( 0., .01, [batch_size, n_particles, 2]).astype(np.float32) initial_state = State(initial_particles) smc = make_filter(observation_matrix, modifiable_transition_matrix, observation_covariance_chol, transition_covariance_chol, resampling_method, resampling_criterion, optimal_proposal=optimal_proposal) # if resampling_method.DIFFERENTIABLE: get_method = tf.function(get_surface, experimental_compile=use_xla) # else: # fun = partial(get_surface_finite_difference, diff_epsilon=diff_epsilon) # get_method = tf.function(fun, experimental_compile=use_xla) log_likelihoods, gradients = get_method(mesh, modifiable_transition_matrix, smc, initial_state, False, observation_dataset, T, filter_seed, use_tqdm) plot_surface(mesh, mesh_size, log_likelihoods.numpy(), resampling_method_enum.name, resampling_kwargs, n_particles, savefig) plot_vector_field(mesh, mesh_size, log_likelihoods.numpy(), gradients.numpy(), resampling_method_enum.name, resampling_kwargs, n_particles, savefig)
def main(resampling_method_value, resampling_neff, learning_rates=(1e-4, 1e-3), resampling_kwargs=None, currencies=('EUR', 'GBP', 'CAD'), batch_size=1, n_particles=25, api_key='', start_date="2019-09-02", end_date="2020-01-02", n_iter=50, savefig=False, filter_seed=0, use_xla=False, change_seed=True): data = get_data(currencies, api_key, start_date, end_date) M = len(currencies) T = len(data) resampling_method_enum = ResamplingMethodsEnum(resampling_method_value) observation_dataset = tf.data.Dataset.from_tensor_slices(data) if resampling_kwargs is None: resampling_kwargs = {} if resampling_neff == 0.: resampling_criterion = NeverResample() elif resampling_neff == 1.: resampling_criterion = AlwaysResample() else: resampling_criterion = NeffCriterion(resampling_neff, True) if resampling_method_enum == ResamplingMethodsEnum.MULTINOMIAL: resampling_method = MultinomialResampler() elif resampling_method_enum == ResamplingMethodsEnum.SYSTEMATIC: resampling_method = SystematicResampler() elif resampling_method_enum == ResamplingMethodsEnum.STRATIFIED: resampling_method = StratifiedResampler() elif resampling_method_enum == ResamplingMethodsEnum.REGULARIZED: resampling_method = RegularisedTransform(**resampling_kwargs) elif resampling_method_enum == ResamplingMethodsEnum.VARIANCE_CORRECTED: regularized_resampler = RegularisedTransform(**resampling_kwargs) resampling_method = PartiallyCorrectedRegularizedTransform( regularized_resampler) elif resampling_method_enum == ResamplingMethodsEnum.OPTIMIZED: lr = resampling_kwargs.pop('lr', resampling_kwargs.pop('learning_rate', 0.1)) loss = SinkhornLoss(**resampling_kwargs, symmetric=True) optimizer = SGD(loss, lr=lr, decay=0.95) regularized_resampler = RegularisedTransform(**resampling_kwargs) resampling_method = OptimizedPointCloud( optimizer, intermediate_resampler=regularized_resampler) elif resampling_method_enum == ResamplingMethodsEnum.CORRECTED: resampling_method = CorrectedRegularizedTransform(**resampling_kwargs) else: raise ValueError( f'resampling_method_name {resampling_method_enum} is not a valid ResamplingMethodsEnum' ) np_random_state = np.random.RandomState(seed=555) initial_particles = np_random_state.normal( 1., 0.5, [batch_size, n_particles, M]).astype(np.float32) initial_state = State(initial_particles) large_initial_particles = np_random_state.normal( 1., 0.5, [25, n_particles, M]).astype(np.float32) large_initial_state = State(large_initial_particles) mu_init = -5. * tf.ones(M) F_init = 0.9 * tf.eye(M) transition_cov_init = 0.35 * tf.eye(M) observation_cov_init = 1. * tf.eye(M) mu = tf.Variable(mu_init, trainable=True) F = tf.Variable(F_init, trainable=True) transition_cov = tf.Variable(transition_cov_init, trainable=True) observation_cov = tf.Variable(observation_cov_init, trainable=False) smc = make_filter(mu, F, transition_cov, observation_cov, resampling_method, resampling_criterion) surrogate_smc = make_filter(mu, F, transition_cov, observation_cov, SystematicResampler(), resampling_criterion) def optimizer_maker(learning_rate): # tf.function doesn't like creating variables. This is a way to create them outside the graph # We can't reuse the same optimizer because it would be giving a warmed-up momentum to the ones run later optimizer = tf.optimizers.Adam(learning_rate=learning_rate) return optimizer variables = [mu, F, transition_cov] initial_values = [mu_init, F_init, transition_cov_init] losses, ess_profiles = compare_learning_rates( smc, initial_state, observation_dataset, T, variables, initial_values, n_iter, optimizer_maker, learning_rates, filter_seed, change_seed, large_initial_state, surrogate_smc) losses_df = pd.DataFrame(np.stack(losses).T, columns=np.log10(learning_rates)) ess_df = pd.DataFrame(np.stack(ess_profiles).T, columns=np.log10(learning_rates)) losses_df.columns.name = 'log learning rate' losses_df.columns.epoch = 'epoch' ess_df.columns.name = 'log learning rate' ess_df.columns.epoch = 'epoch' # plot_losses(losses_df, resampling_method_enum.name, savefig, dx, dy, dense, T, change_seed) plot_losses_vs_ess(losses_df, ess_df, resampling_method_enum.name, savefig, M, n_particles, change_seed, batch_size, n_iter, resampling_kwargs.get("epsilon")) print(mu) print(F) print(transition_cov) print(mu_init) print(F_init) print(transition_cov_init)