示例#1
0
 def test_prediction_long_term_hard_brake(self):
     config = TestConfig()
     config.n_global_steps = 20000
     config.env_id = 'HeuristicRiskEnv-v0'
     config.discount = 0.  # 599. / 600
     config.max_timesteps = 10000
     config.prime_timesteps = 50
     config.learning_rate = 1e-3
     config.adam_beta1 = .995
     config.adam_beta2 = .999
     config.dropout_keep_prob = 1.
     config.l2_reg = 0.
     config.local_steps_per_update = 20
     config.hidden_layer_sizes = [32, 16]
     config.hard_brake_threshold = -3.
     config.hard_brake_n_past_frames = 1
     config.target_loss_index = 3
     config.loss_type = 'mse'
     env = build_envs.create_env(config)
     test_state = env.reset()
     summary_writer = tf.summary.FileWriter('/tmp/test')
     with tf.Session() as sess:
         trainer = async_td.AsyncTD(env, 0, config)
         sess.run(tf.global_variables_initializer())
         sess.run(trainer.sync)
         trainer.start(sess, summary_writer)
         global_step = sess.run(trainer.global_step)
         c, h = trainer.network.get_initial_features()
         while global_step < config.n_global_steps:
             trainer.process(sess)
             global_step = sess.run(trainer.global_step)
             value = trainer.network.value(test_state, c, h)
             print(value)
示例#2
0
    def test_validate_const_reward_discounted_env(self):
        # config
        config = TestConfig()
        config.n_global_steps = 50000
        config.env_id = 'RandObsConstRewardEnv-v0'
        config.discount = .9
        config.value_dim = 2
        config.adam_beta1 = .9
        config.local_steps_per_update = 1000
        config.hidden_layer_sizes = [256]
        config.learning_rate = 1e-3
        config.learning_rate_end = 1e-5
        config.loss_type = 'mse'
        config.target_loss_index = None

        # build env
        const_reward = .01
        horizon = 10000000
        rand_obs = False
        env = debug_envs.RandObsConstRewardEnv(horizon=horizon,
                                               reward=const_reward,
                                               value_dim=config.value_dim,
                                               rand_obs=rand_obs)
        env.spec = gym.envs.registration.EnvSpec(
            id='RandObsConstRewardEnv-v0',
            tags={'wrapper_config.TimeLimit.max_episode_steps': horizon + 1})

        n_samples = 2
        n_timesteps = 10  # predict after seeing this many timesteps
        n_prediction_timesteps = 10  # determines discount
        input_dim = 1
        obs_gen = np.random.randn if rand_obs else np.ones
        x = obs_gen(np.prod((n_samples, n_timesteps, input_dim))).reshape(
            (n_samples, n_timesteps, input_dim))
        y = (const_reward * np.ones(
            (n_samples, config.value_dim)) * n_prediction_timesteps)
        w = np.ones((n_samples, 1))
        dataset = validation.Dataset(x, y, w)

        # run it
        summary_writer = tf.summary.FileWriter('/tmp/test')
        avg_loss = -1
        with tf.Session() as sess:
            trainer = async_td.AsyncTD(env, 0, config)
            sess.run(tf.global_variables_initializer())
            sess.run(trainer.sync)
            trainer.start(sess, summary_writer)
            global_step = sess.run(trainer.global_step)
            while global_step < config.n_global_steps:
                trainer.process(sess)
                if global_step % 10 == 0:
                    avg_loss = trainer.validate(sess, dataset)
                global_step = sess.run(trainer.global_step)
示例#3
0
    def test_validate(self):
        # config
        config = TestConfig()
        config.n_global_steps = 5000
        config.env_id = 'SeqSumDebugEnv-v0'
        config.discount = 1.
        config.value_dim = 1
        config.adam_beta1 = .99
        config.local_steps_per_update = 100000
        config.hidden_layer_sizes = [128]
        config.learning_rate = 5e-4
        config.learning_rate_end = 5e-6
        config.loss_type = 'mse'
        config.target_loss_index = None

        # build env
        env = gym.make(config.env_id)

        # build validation set
        # in this case just sequences of either 1s or 0s (const per sequence)
        # e.g.,
        # horizon = 4, and seeing 1s: [1 1 1 1]
        # then after seeing [1 1], should predict a value from this point of 2
        # because that is the amount of reward expect to accrue in the future
        horizon = 4
        n_samples = 2
        n_timesteps = 2  # predict after seeing this many timesteps
        input_dim = 1
        # half ones and half neg ones
        x = np.ones((n_samples, n_timesteps, input_dim))
        x[int(n_samples / 2):] = -1
        # expect value to be how many timesteps have left * -1 or 1
        y = x[:, 0, :] * (horizon - n_timesteps + 1)
        w = np.ones(n_samples)
        dataset = validation.Dataset(x, y, w)

        # run it
        summary_writer = tf.summary.FileWriter('/tmp/test')
        avg_loss = -1
        with tf.Session() as sess:
            trainer = async_td.AsyncTD(env, 0, config)
            sess.run(tf.global_variables_initializer())
            sess.run(trainer.sync)
            trainer.start(sess, summary_writer)
            global_step = sess.run(trainer.global_step)
            while global_step < config.n_global_steps:
                trainer.process(sess)
                if global_step % 10 == 0:
                    avg_loss = trainer.validate(sess, dataset)
                global_step = sess.run(trainer.global_step)

        self.assertTrue(avg_loss < .1)
    def test_model_losses(self):
        config = TestConfig()
        config.hidden_layer_sizes = [32]
        config.value_dim = 1
        config.learning_rate = 5e-3

        # simple dataset, learn to output the sum
        n_samples = 10
        n_timesteps = 4
        input_dim = 1
        x = np.random.rand(n_samples, n_timesteps, input_dim)
        y = np.ones((n_samples, n_timesteps)) * 0.5

        for loss_type in ['mse', 'ce']:
            tf.reset_default_graph()
            with tf.Session() as session:
                predictor = model.LSTMPredictor((input_dim, ), config)
                target_placeholder = tf.placeholder(tf.float32, [None, 1],
                                                    'target')
                if loss_type == 'mse':
                    loss = tf.reduce_sum(
                        (predictor.vf - target_placeholder)**2)
                elif loss_type == 'ce':
                    yt = target_placeholder
                    yp = predictor.vf
                    loss = tf.reduce_sum(
                        (yt * -tf.log(tf.nn.sigmoid(yp)) +
                         (1 - yt) * -tf.log(1 - tf.nn.sigmoid(yp))))
                opt = tf.train.AdamOptimizer(config.learning_rate)
                train_op = opt.minimize(loss)
                session.run(tf.global_variables_initializer())

                def run_sample(p, x, y, state_in, train=True):
                    feed_dict = {
                        p.x: x,
                        target_placeholder: y,
                        p.dropout_keep_prob_ph: 1.,
                        p.state_in[0]: state_in[0],
                        p.state_in[1]: state_in[1],
                    }
                    outputs_list = [loss]
                    if train:
                        outputs_list += [train_op]
                    fetched = session.run(outputs_list, feed_dict=feed_dict)

                    if train:
                        val_loss, _ = fetched
                    else:
                        val_loss = fetched[0]
                    return val_loss

                n_epochs = 100
                n_train = int(n_samples * .8)
                n_val = n_samples - n_train
                verbose = False
                for epoch in range(n_epochs):

                    # train
                    train_loss_mean = 0
                    for sidx in range(n_train):
                        train_loss = run_sample(predictor, x[sidx, :, :],
                                                y[sidx].reshape(-1, 1),
                                                predictor.state_init)
                        train_loss_mean += train_loss / n_train

                    # val
                    val_loss_mean = 0
                    for sidx in range(n_val):
                        val_loss = run_sample(predictor,
                                              x[sidx, :, :],
                                              y[sidx].reshape(-1, 1),
                                              predictor.state_init,
                                              train=False)
                        val_loss_mean += val_loss / n_val

                        value = predictor.value(x[sidx, :, :],
                                                predictor.state_init[0],
                                                predictor.state_init[1],
                                                sequence=True)
                        if loss_type == 'ce':
                            value = 1 / (1 + np.exp(-value))
                        # print('x: {}\ny: {}\ny_pred: {}\n'.format(
                        #     x[sidx,:,:], y[sidx].reshape(-1,1), value))
                        # input()

                    # report
                    if verbose:
                        print('epoch: {} / {}\ttrain loss: {}\tval loss: {}'.
                              format(epoch, n_epochs, train_loss_mean,
                                     val_loss_mean))

                self.assertTrue(np.abs(value - .5) < 1e-2)
    def test_full_sequence_prediction(self):
        config = TestConfig()
        config.hidden_layer_sizes = [32, 32]
        config.value_dim = 1
        config.learning_rate = 1e-3

        # simple dataset, learn to output the sum
        n_samples = 100
        n_timesteps = 4
        input_dim = 1
        x = np.random.rand(n_samples, n_timesteps, input_dim)
        y = np.sum(x, axis=(1, 2)).reshape(-1, 1)

        with tf.Session() as session:
            predictor = model.LSTMPredictor((input_dim, ), config)
            target_placeholder = tf.placeholder(tf.float32, [None, 1],
                                                'target')
            loss = tf.reduce_sum((predictor.vf[-1] - target_placeholder)**2)
            opt = tf.train.AdamOptimizer(config.learning_rate)
            train_op = opt.minimize(loss)
            session.run(tf.global_variables_initializer())

            def run_sample(p, x, y, state_in, train=True):
                feed_dict = {
                    p.x: x,
                    target_placeholder: y,
                    p.dropout_keep_prob_ph: 1.,
                    p.state_in[0]: state_in[0],
                    p.state_in[1]: state_in[1],
                }
                outputs_list = [loss]
                if train:
                    outputs_list += [train_op]
                else:
                    outputs_list += [p.vf[-1]]
                fetched = session.run(outputs_list, feed_dict=feed_dict)

                if train:
                    val_loss, _ = fetched
                    return val_loss
                else:
                    val_loss, val_vf = fetched
                    return val_loss, val_vf

            n_epochs = 10
            n_train = int(n_samples * .8)
            n_val = n_samples - n_train
            verbose = False
            for epoch in range(n_epochs):

                # train
                train_loss_mean = 0
                for sidx in range(n_train):
                    train_loss = run_sample(predictor, x[sidx, :, :],
                                            y[sidx].reshape(1, -1),
                                            predictor.state_init)
                    train_loss_mean += train_loss / n_train

                # val
                val_loss_mean = 0
                for sidx in range(n_val):
                    val_loss, val_vf = run_sample(predictor,
                                                  x[sidx, :, :],
                                                  y[sidx].reshape(1, -1),
                                                  predictor.state_init,
                                                  train=False)
                    val_loss_mean += val_loss / n_val
                    # print('x: {}\ny: {}\ny_pred: {}'.format(
                    #     x[sidx,:,:], y[sidx].reshape(1,-1), val_vf))
                    # input()

                # report
                if verbose:
                    print(
                        'epoch: {} / {}\ttrain loss: {}\tval loss: {}'.format(
                            epoch, n_epochs, train_loss_mean, val_loss_mean))

            self.assertTrue(train_loss_mean < 1e-2)
            self.assertTrue(val_loss_mean < 1e-2)
示例#6
0
    def test_heuristic_deterministic_case(self):

        config = TestConfig()
        config.n_global_steps = 50000
        config.max_timesteps = 50
        config.env_id = 'BayesNetRiskEnv-v0'
        config.discount = 1.  # 49. / 50
        config.value_dim = 5
        config.adam_beta1 = .9
        config.local_steps_per_update = 100
        config.hidden_layer_sizes = [128]
        config.learning_rate = 1e-3
        config.learning_rate_end = 5e-6
        config.loss_type = 'mse'
        config.target_loss_index = 1
        config.validation_dataset_filepath = '/Users/wulfebw/Dropbox/School/Stanford/research/risk/risk_prediction/data/experiments/heuristic_determinstic_1_lane_5_sec/data/subselect_proposal_prediction_data.h5'
        config.max_validation_samples = 1
        config.validate_every = 1000
        config.visualize_every = 10000
        config.summarize_features = True

        validation.transfer_dataset_settings_to_config(
            config.validation_dataset_filepath, config)

        config.base_bn_filepath = '/Users/wulfebw/Dropbox/School/Stanford/research/risk/risk_prediction/data/experiments/heuristic_determinstic_1_lane_5_sec/data/base_bn_filepath.h5'
        config.base_prop_filepath = '/Users/wulfebw/Dropbox/School/Stanford/research/risk/risk_prediction/data/experiments/heuristic_determinstic_1_lane_5_sec/data/prop_bn_filepath.h5'
        config.max_validation_samples = 1000

        # config.roadway_radius = 400.
        # config.roadway_length = 100.
        # config.lon_accel_std_dev = 0.
        # config.lat_accel_std_dev = 0.
        # config.overall_response_time = .2
        # config.lon_response_time = .0
        # config.err_p_a_to_i = .15
        # config.err_p_i_to_a = .3
        # config.max_num_vehicles = 50
        # config.min_num_vehicles = 50
        # config.hard_brake_threshold = -3.
        # config.hard_brake_n_past_frames = 2
        # config.min_base_speed = 30.
        # config.max_base_speed = 30.
        # config.min_vehicle_length = 5.
        # config.max_vehicle_length = 5.
        # config.min_vehicle_width = 2.5
        # config.max_vehicle_width = 2.5
        # config.min_init_dist = 10.
        # config.heuristic_behavior_type = "normal"

        # build env
        env = build_envs.create_env(config)
        dataset = validation.build_dataset(config, env)
        print('mean validation targets: {}'.format(np.mean(dataset.y, axis=0)))

        # run it
        summary_writer = tf.summary.FileWriter('/tmp/test')
        avg_loss = -1
        last_global_step_val = 0
        with tf.Session() as sess:
            trainer = async_td.AsyncTD(env, 0, config)
            sess.run(tf.global_variables_initializer())
            sess.run(trainer.sync)
            trainer.start(sess, summary_writer)
            global_step = sess.run(trainer.global_step)
            while global_step < config.n_global_steps:
                trainer.process(sess)
                if (global_step -
                        last_global_step_val) > config.validate_every:
                    avg_loss = trainer.validate(sess, dataset)
                    last_global_step_val = global_step
                global_step = sess.run(trainer.global_step)
示例#7
0
def test_prediction_across_target_variance(val_x,
                                           val_y,
                                           sigma=0,
                                           n_mc=1,
                                           hidden_layer_sizes=[32, 32],
                                           n_epochs=50,
                                           n_samples=100,
                                           input_dim=1,
                                           n_timesteps=2):
    config = TestConfig()
    config.hidden_layer_sizes = hidden_layer_sizes
    config.value_dim = 1
    config.learning_rate = 1e-3
    config.n_epochs = n_epochs

    # simple dataset
    x, y = generate_data(sigma, n_mc, n_samples, n_timesteps, input_dim)
    val_losses = []
    with tf.Session() as session:
        predictor = model.LSTMPredictor((input_dim, ), config)
        target_placeholder = tf.placeholder(tf.float32, [None, 1], 'target')
        loss = tf.reduce_sum((predictor.vf[-1] - target_placeholder)**2)
        opt = tf.train.AdamOptimizer(config.learning_rate)
        train_op = opt.minimize(loss)
        session.run(tf.global_variables_initializer())

        def run_sample(p, x, y, state_in, train=True):
            feed_dict = {
                p.x: x,
                target_placeholder: y,
                p.dropout_keep_prob_ph: 1.,
                p.state_in[0]: state_in[0],
                p.state_in[1]: state_in[1],
            }
            outputs_list = [loss]
            if train:
                outputs_list += [train_op]
            else:
                outputs_list += [p.vf[-1]]
            fetched = session.run(outputs_list, feed_dict=feed_dict)

            if train:
                val_loss, _ = fetched
                return val_loss
            else:
                val_loss, val_vf = fetched
                return val_loss, val_vf

        n_val = len(val_x)
        for epoch in range(n_epochs):

            # train
            train_loss_mean = 0
            for sidx in range(n_samples):
                train_loss = run_sample(predictor, x[sidx, :, :],
                                        y[sidx].reshape(1, -1),
                                        predictor.state_init)
                train_loss_mean += train_loss / n_samples

            # val
            val_loss_mean = 0
            for sidx in range(n_val):
                val_loss, val_vf = run_sample(predictor,
                                              val_x[sidx, :, :],
                                              val_y[sidx].reshape(1, -1),
                                              predictor.state_init,
                                              train=False)
                val_loss_mean += val_loss / n_val
                # print('x: {}\ny: {}\ny_pred: {}'.format(
                #     x[sidx,:,:], y[sidx].reshape(1,-1), val_vf))
                # input()

            # report, track
            val_losses.append(val_loss_mean)
            print('epoch: {} / {}\ttrain loss: {}\tval loss: {}'.format(
                epoch, n_epochs, train_loss_mean, val_loss_mean))

    return val_losses