def test_prediction_long_term_hard_brake(self): config = TestConfig() config.n_global_steps = 20000 config.env_id = 'HeuristicRiskEnv-v0' config.discount = 0. # 599. / 600 config.max_timesteps = 10000 config.prime_timesteps = 50 config.learning_rate = 1e-3 config.adam_beta1 = .995 config.adam_beta2 = .999 config.dropout_keep_prob = 1. config.l2_reg = 0. config.local_steps_per_update = 20 config.hidden_layer_sizes = [32, 16] config.hard_brake_threshold = -3. config.hard_brake_n_past_frames = 1 config.target_loss_index = 3 config.loss_type = 'mse' env = build_envs.create_env(config) test_state = env.reset() summary_writer = tf.summary.FileWriter('/tmp/test') with tf.Session() as sess: trainer = async_td.AsyncTD(env, 0, config) sess.run(tf.global_variables_initializer()) sess.run(trainer.sync) trainer.start(sess, summary_writer) global_step = sess.run(trainer.global_step) c, h = trainer.network.get_initial_features() while global_step < config.n_global_steps: trainer.process(sess) global_step = sess.run(trainer.global_step) value = trainer.network.value(test_state, c, h) print(value)
def test_validate_const_reward_discounted_env(self): # config config = TestConfig() config.n_global_steps = 50000 config.env_id = 'RandObsConstRewardEnv-v0' config.discount = .9 config.value_dim = 2 config.adam_beta1 = .9 config.local_steps_per_update = 1000 config.hidden_layer_sizes = [256] config.learning_rate = 1e-3 config.learning_rate_end = 1e-5 config.loss_type = 'mse' config.target_loss_index = None # build env const_reward = .01 horizon = 10000000 rand_obs = False env = debug_envs.RandObsConstRewardEnv(horizon=horizon, reward=const_reward, value_dim=config.value_dim, rand_obs=rand_obs) env.spec = gym.envs.registration.EnvSpec( id='RandObsConstRewardEnv-v0', tags={'wrapper_config.TimeLimit.max_episode_steps': horizon + 1}) n_samples = 2 n_timesteps = 10 # predict after seeing this many timesteps n_prediction_timesteps = 10 # determines discount input_dim = 1 obs_gen = np.random.randn if rand_obs else np.ones x = obs_gen(np.prod((n_samples, n_timesteps, input_dim))).reshape( (n_samples, n_timesteps, input_dim)) y = (const_reward * np.ones( (n_samples, config.value_dim)) * n_prediction_timesteps) w = np.ones((n_samples, 1)) dataset = validation.Dataset(x, y, w) # run it summary_writer = tf.summary.FileWriter('/tmp/test') avg_loss = -1 with tf.Session() as sess: trainer = async_td.AsyncTD(env, 0, config) sess.run(tf.global_variables_initializer()) sess.run(trainer.sync) trainer.start(sess, summary_writer) global_step = sess.run(trainer.global_step) while global_step < config.n_global_steps: trainer.process(sess) if global_step % 10 == 0: avg_loss = trainer.validate(sess, dataset) global_step = sess.run(trainer.global_step)
def test_validate(self): # config config = TestConfig() config.n_global_steps = 5000 config.env_id = 'SeqSumDebugEnv-v0' config.discount = 1. config.value_dim = 1 config.adam_beta1 = .99 config.local_steps_per_update = 100000 config.hidden_layer_sizes = [128] config.learning_rate = 5e-4 config.learning_rate_end = 5e-6 config.loss_type = 'mse' config.target_loss_index = None # build env env = gym.make(config.env_id) # build validation set # in this case just sequences of either 1s or 0s (const per sequence) # e.g., # horizon = 4, and seeing 1s: [1 1 1 1] # then after seeing [1 1], should predict a value from this point of 2 # because that is the amount of reward expect to accrue in the future horizon = 4 n_samples = 2 n_timesteps = 2 # predict after seeing this many timesteps input_dim = 1 # half ones and half neg ones x = np.ones((n_samples, n_timesteps, input_dim)) x[int(n_samples / 2):] = -1 # expect value to be how many timesteps have left * -1 or 1 y = x[:, 0, :] * (horizon - n_timesteps + 1) w = np.ones(n_samples) dataset = validation.Dataset(x, y, w) # run it summary_writer = tf.summary.FileWriter('/tmp/test') avg_loss = -1 with tf.Session() as sess: trainer = async_td.AsyncTD(env, 0, config) sess.run(tf.global_variables_initializer()) sess.run(trainer.sync) trainer.start(sess, summary_writer) global_step = sess.run(trainer.global_step) while global_step < config.n_global_steps: trainer.process(sess) if global_step % 10 == 0: avg_loss = trainer.validate(sess, dataset) global_step = sess.run(trainer.global_step) self.assertTrue(avg_loss < .1)
def test_model_losses(self): config = TestConfig() config.hidden_layer_sizes = [32] config.value_dim = 1 config.learning_rate = 5e-3 # simple dataset, learn to output the sum n_samples = 10 n_timesteps = 4 input_dim = 1 x = np.random.rand(n_samples, n_timesteps, input_dim) y = np.ones((n_samples, n_timesteps)) * 0.5 for loss_type in ['mse', 'ce']: tf.reset_default_graph() with tf.Session() as session: predictor = model.LSTMPredictor((input_dim, ), config) target_placeholder = tf.placeholder(tf.float32, [None, 1], 'target') if loss_type == 'mse': loss = tf.reduce_sum( (predictor.vf - target_placeholder)**2) elif loss_type == 'ce': yt = target_placeholder yp = predictor.vf loss = tf.reduce_sum( (yt * -tf.log(tf.nn.sigmoid(yp)) + (1 - yt) * -tf.log(1 - tf.nn.sigmoid(yp)))) opt = tf.train.AdamOptimizer(config.learning_rate) train_op = opt.minimize(loss) session.run(tf.global_variables_initializer()) def run_sample(p, x, y, state_in, train=True): feed_dict = { p.x: x, target_placeholder: y, p.dropout_keep_prob_ph: 1., p.state_in[0]: state_in[0], p.state_in[1]: state_in[1], } outputs_list = [loss] if train: outputs_list += [train_op] fetched = session.run(outputs_list, feed_dict=feed_dict) if train: val_loss, _ = fetched else: val_loss = fetched[0] return val_loss n_epochs = 100 n_train = int(n_samples * .8) n_val = n_samples - n_train verbose = False for epoch in range(n_epochs): # train train_loss_mean = 0 for sidx in range(n_train): train_loss = run_sample(predictor, x[sidx, :, :], y[sidx].reshape(-1, 1), predictor.state_init) train_loss_mean += train_loss / n_train # val val_loss_mean = 0 for sidx in range(n_val): val_loss = run_sample(predictor, x[sidx, :, :], y[sidx].reshape(-1, 1), predictor.state_init, train=False) val_loss_mean += val_loss / n_val value = predictor.value(x[sidx, :, :], predictor.state_init[0], predictor.state_init[1], sequence=True) if loss_type == 'ce': value = 1 / (1 + np.exp(-value)) # print('x: {}\ny: {}\ny_pred: {}\n'.format( # x[sidx,:,:], y[sidx].reshape(-1,1), value)) # input() # report if verbose: print('epoch: {} / {}\ttrain loss: {}\tval loss: {}'. format(epoch, n_epochs, train_loss_mean, val_loss_mean)) self.assertTrue(np.abs(value - .5) < 1e-2)
def test_full_sequence_prediction(self): config = TestConfig() config.hidden_layer_sizes = [32, 32] config.value_dim = 1 config.learning_rate = 1e-3 # simple dataset, learn to output the sum n_samples = 100 n_timesteps = 4 input_dim = 1 x = np.random.rand(n_samples, n_timesteps, input_dim) y = np.sum(x, axis=(1, 2)).reshape(-1, 1) with tf.Session() as session: predictor = model.LSTMPredictor((input_dim, ), config) target_placeholder = tf.placeholder(tf.float32, [None, 1], 'target') loss = tf.reduce_sum((predictor.vf[-1] - target_placeholder)**2) opt = tf.train.AdamOptimizer(config.learning_rate) train_op = opt.minimize(loss) session.run(tf.global_variables_initializer()) def run_sample(p, x, y, state_in, train=True): feed_dict = { p.x: x, target_placeholder: y, p.dropout_keep_prob_ph: 1., p.state_in[0]: state_in[0], p.state_in[1]: state_in[1], } outputs_list = [loss] if train: outputs_list += [train_op] else: outputs_list += [p.vf[-1]] fetched = session.run(outputs_list, feed_dict=feed_dict) if train: val_loss, _ = fetched return val_loss else: val_loss, val_vf = fetched return val_loss, val_vf n_epochs = 10 n_train = int(n_samples * .8) n_val = n_samples - n_train verbose = False for epoch in range(n_epochs): # train train_loss_mean = 0 for sidx in range(n_train): train_loss = run_sample(predictor, x[sidx, :, :], y[sidx].reshape(1, -1), predictor.state_init) train_loss_mean += train_loss / n_train # val val_loss_mean = 0 for sidx in range(n_val): val_loss, val_vf = run_sample(predictor, x[sidx, :, :], y[sidx].reshape(1, -1), predictor.state_init, train=False) val_loss_mean += val_loss / n_val # print('x: {}\ny: {}\ny_pred: {}'.format( # x[sidx,:,:], y[sidx].reshape(1,-1), val_vf)) # input() # report if verbose: print( 'epoch: {} / {}\ttrain loss: {}\tval loss: {}'.format( epoch, n_epochs, train_loss_mean, val_loss_mean)) self.assertTrue(train_loss_mean < 1e-2) self.assertTrue(val_loss_mean < 1e-2)
def test_heuristic_deterministic_case(self): config = TestConfig() config.n_global_steps = 50000 config.max_timesteps = 50 config.env_id = 'BayesNetRiskEnv-v0' config.discount = 1. # 49. / 50 config.value_dim = 5 config.adam_beta1 = .9 config.local_steps_per_update = 100 config.hidden_layer_sizes = [128] config.learning_rate = 1e-3 config.learning_rate_end = 5e-6 config.loss_type = 'mse' config.target_loss_index = 1 config.validation_dataset_filepath = '/Users/wulfebw/Dropbox/School/Stanford/research/risk/risk_prediction/data/experiments/heuristic_determinstic_1_lane_5_sec/data/subselect_proposal_prediction_data.h5' config.max_validation_samples = 1 config.validate_every = 1000 config.visualize_every = 10000 config.summarize_features = True validation.transfer_dataset_settings_to_config( config.validation_dataset_filepath, config) config.base_bn_filepath = '/Users/wulfebw/Dropbox/School/Stanford/research/risk/risk_prediction/data/experiments/heuristic_determinstic_1_lane_5_sec/data/base_bn_filepath.h5' config.base_prop_filepath = '/Users/wulfebw/Dropbox/School/Stanford/research/risk/risk_prediction/data/experiments/heuristic_determinstic_1_lane_5_sec/data/prop_bn_filepath.h5' config.max_validation_samples = 1000 # config.roadway_radius = 400. # config.roadway_length = 100. # config.lon_accel_std_dev = 0. # config.lat_accel_std_dev = 0. # config.overall_response_time = .2 # config.lon_response_time = .0 # config.err_p_a_to_i = .15 # config.err_p_i_to_a = .3 # config.max_num_vehicles = 50 # config.min_num_vehicles = 50 # config.hard_brake_threshold = -3. # config.hard_brake_n_past_frames = 2 # config.min_base_speed = 30. # config.max_base_speed = 30. # config.min_vehicle_length = 5. # config.max_vehicle_length = 5. # config.min_vehicle_width = 2.5 # config.max_vehicle_width = 2.5 # config.min_init_dist = 10. # config.heuristic_behavior_type = "normal" # build env env = build_envs.create_env(config) dataset = validation.build_dataset(config, env) print('mean validation targets: {}'.format(np.mean(dataset.y, axis=0))) # run it summary_writer = tf.summary.FileWriter('/tmp/test') avg_loss = -1 last_global_step_val = 0 with tf.Session() as sess: trainer = async_td.AsyncTD(env, 0, config) sess.run(tf.global_variables_initializer()) sess.run(trainer.sync) trainer.start(sess, summary_writer) global_step = sess.run(trainer.global_step) while global_step < config.n_global_steps: trainer.process(sess) if (global_step - last_global_step_val) > config.validate_every: avg_loss = trainer.validate(sess, dataset) last_global_step_val = global_step global_step = sess.run(trainer.global_step)
def test_prediction_across_target_variance(val_x, val_y, sigma=0, n_mc=1, hidden_layer_sizes=[32, 32], n_epochs=50, n_samples=100, input_dim=1, n_timesteps=2): config = TestConfig() config.hidden_layer_sizes = hidden_layer_sizes config.value_dim = 1 config.learning_rate = 1e-3 config.n_epochs = n_epochs # simple dataset x, y = generate_data(sigma, n_mc, n_samples, n_timesteps, input_dim) val_losses = [] with tf.Session() as session: predictor = model.LSTMPredictor((input_dim, ), config) target_placeholder = tf.placeholder(tf.float32, [None, 1], 'target') loss = tf.reduce_sum((predictor.vf[-1] - target_placeholder)**2) opt = tf.train.AdamOptimizer(config.learning_rate) train_op = opt.minimize(loss) session.run(tf.global_variables_initializer()) def run_sample(p, x, y, state_in, train=True): feed_dict = { p.x: x, target_placeholder: y, p.dropout_keep_prob_ph: 1., p.state_in[0]: state_in[0], p.state_in[1]: state_in[1], } outputs_list = [loss] if train: outputs_list += [train_op] else: outputs_list += [p.vf[-1]] fetched = session.run(outputs_list, feed_dict=feed_dict) if train: val_loss, _ = fetched return val_loss else: val_loss, val_vf = fetched return val_loss, val_vf n_val = len(val_x) for epoch in range(n_epochs): # train train_loss_mean = 0 for sidx in range(n_samples): train_loss = run_sample(predictor, x[sidx, :, :], y[sidx].reshape(1, -1), predictor.state_init) train_loss_mean += train_loss / n_samples # val val_loss_mean = 0 for sidx in range(n_val): val_loss, val_vf = run_sample(predictor, val_x[sidx, :, :], val_y[sidx].reshape(1, -1), predictor.state_init, train=False) val_loss_mean += val_loss / n_val # print('x: {}\ny: {}\ny_pred: {}'.format( # x[sidx,:,:], y[sidx].reshape(1,-1), val_vf)) # input() # report, track val_losses.append(val_loss_mean) print('epoch: {} / {}\ttrain loss: {}\tval loss: {}'.format( epoch, n_epochs, train_loss_mean, val_loss_mean)) return val_losses