def plot_forward(_encoder, _inverse_model, _forward_model, env, policy, sess):
    from railrl.misc.pyhelper_fns.vis_utils import MyAnimationMulti
    vis_tool = MyAnimationMulti(None,
                                numPlots=2,
                                isIm=[1, 0],
                                axTitles=["", "Forward loss"])
    # Rebuild models
    act_space = env.action_space
    obs_space = env.observation_space

    s1_ph = tf.placeholder(tf.float32, [None] + list(obs_space.shape))
    s2_ph = tf.placeholder(tf.float32, [None] + list(obs_space.shape))
    a_ph = tf.placeholder(tf.float32, [None, act_space.flat_dim])

    encoder1 = _encoder.get_weight_tied_copy(observation_input=s1_ph)
    encoder2 = _encoder.get_weight_tied_copy(observation_input=s2_ph)
    inverse_model = _inverse_model.get_weight_tied_copy(
        feature_input1=encoder1.output, feature_input2=encoder2.output)
    forward_model = _forward_model.get_weight_tied_copy(
        feature_input=encoder1.output, action_input=a_ph)

    def get_forward_loss(obs, next_obs, actions):
        forward_loss = sess.run(tf.reduce_mean(tf.square(encoder2.output -
                                                         forward_model.output),
                                               axis=1),
                                feed_dict={
                                    s1_ph: obs,
                                    s2_ph: next_obs,
                                    a_ph: actions
                                })
        return forward_loss

    # Call rllab rollout for parallel
    while True:
        ob = env.reset()
        next_ob = None
        x = []
        y = []
        for t in range(
                env.wrapped_env._wrapped_env.env.spec.max_episode_steps):
            print()
            action, _ = policy.get_action(ob)
            next_ob, reward, done, env_infos = env.step(action)
            # import pdb; pdb.set_trace()
            image = env._wrapped_env._wrapped_env.env.env.render(
                mode='rgb_array')
            forward_loss = get_forward_loss([ob], [next_ob], [action])
            if done:
                ob = env.reset()
            else:
                ob = next_ob

            x.append(t)
            y.append(forward_loss)
            vis_tool._display([image, y])
        encoder = data['encoder']
        inverse_model = data['inverse_model']
        forward_model = data['forward_model']
        if args.shooting:
            ForwardModelPlanner = CEMPlanner
        controller = ForwardModelPlanner(forward_model,
                                         encoder,
                                         env,
                                         sess=sess,
                                         pos_only=True)
        env.reset()
        success_rate = []
        store_first_test_data = True

        if args.vis:
            vis_tool = MyAnimationMulti(None, height = 400, width = 400, numPlots=5, isIm=[1,1,1,0,0], axTitles=["S_init", "S_goal", \
               "S_current", "arm_l2(real_steps:{}) red real, green predicted", "box_l2, red real, green predicted"])
            planner_vis = MyAnimationMulti(None, numPlots=15, isIm=np.ones(15))
        for j in range(args.num_test):
            #choose the index first
            # index_list = np.random.randint(0,399, size = args.pairs)
            # index_list = [12,34,25]
            index_list = range(4, 20)
            succeed_case = 0
            counter = 1
            # import pdb; pdb.set_trace()
            for index in index_list:
                print("start test{}, traj{}".format(j + 1, counter))
                now = time.time()
                S_init_state = S_init_state_list[index]
                S_init_pixel = S_init_pixel_list[index]
                S_goal_state = S_goal_state_list[index]
Пример #3
0
            arm_l2_dis_list = data["arm_l2_dis_list"]
            final_distance = arm_l2_dis_list[-1]
            if args.success_case and (final_distance < 0.05):
                #video

                S_init_pixel = data['S_init_pixel']
                S_goal_pixel = data['S_goal_pixel']
                S_current_im_list = data['S_current_im_list']
                arm_predicted_l2_dis_list = data['arm_predicted_l2_dis_list']
                box_l2_dis_list = data['box_l2_dis_list']
                box_predicted_l2_dis_list = data['box_predicted_l2_dis_list']
                real_step = data['real_steps']
                print("#############real step is {}###############".format(
                    real_step))
                vis_tool = MyAnimationMulti(None, height = 200, width = 200, numPlots=5, isIm=[1,1,1,0,0], axTitles=["S_init", "S_goal", \
                    "S_current", "arm_l2 red real, green predicted, real step:{}".format(real_step), \
                    "box_l2, red real, green predicted"], large_figure = True)
                arm_threshold = []
                box_threshold = []
                for j in range(len(S_current_im_list)):
                    arm_threshold.append(0.05)
                    box_threshold.append(0.01)
                    vis_tool._display([S_init_pixel, S_goal_pixel, S_current_im_list[j], [range(j+1), arm_l2_dis_list[:j+1], 'r',\
                                 range(j+1), arm_predicted_l2_dis_list[:j+1], 'g',\
                                 range(j+1), arm_threshold, 'b'],\
                                 [range(j+1), box_l2_dis_list[:j+1], 'r',\
                                 range(j+1), box_predicted_l2_dis_list[:j+1], 'g',\
                                 range(j+1), box_threshold, 'b']])
                vis_tool.__del__()
            elif args.failure_case and (final_distance > 0.05):
                #video
    def __init__(
        self,
        dynamic_model,
        encoder,
        env,
        init_lr=0.5,
        sess=None,
        pos_only=False,
        max_length=15,
    ):
        self.env = env
        if sess == None:
            sess = tf.get_default_session()
        self.sess = sess
        self.init_lr = init_lr

        self.max_length = max_length
        self.action_ph = tf.placeholder(tf.float32, [max_length, 1, 4])
        self.forward_model_list = []
        #build the recurrent model w.t. the max length
        self.S_init_ph = tf.placeholder(tf.float32,
                                        list(env.observation_space.shape))
        self.S_goal_ph = tf.placeholder(tf.float32,
                                        list(env.observation_space.shape))
        #only two feature encoders
        self.encoder1 = encoder.get_weight_tied_copy(
            observation_input=[self.S_init_ph])
        self.encoder2 = encoder.get_weight_tied_copy(
            observation_input=[self.S_goal_ph])
        forward_model = dynamic_model.get_weight_tied_copy(
            feature_input=self.encoder1.output, action_input=self.action_ph[0])
        self.forward_model_list.append(forward_model)
        self.forward_model_output_list = [forward_model.output]
        for i in range(1, max_length):
            forward_model = dynamic_model.get_weight_tied_copy(feature_input = forward_model.output,\
                       action_input = self.action_ph[i])
            self.forward_model_list.append(forward_model)
            self.forward_model_output_list.append(forward_model.output)

        ## objective
        self.objective_list = []
        self.arm_loss_list = []
        self.box_loss_list = []
        if pos_only:
            for forward_model in self.forward_model_list:
                self.objective_list.append(
                    tf.reduce_sum(
                        tf.square(forward_model.output[0][:6] -
                                  self.encoder2.output[0][:6])))
                self.arm_loss_list.append(
                    tf.reduce_sum(
                        tf.square(forward_model.output[0][:4] -
                                  self.encoder2.output[0][:4])))
                self.box_loss_list.append(
                    tf.reduce_sum(
                        tf.square(forward_model.output[0][4:6] -
                                  self.encoder2.output[0][4:6])))
        else:
            for forward_model in self.forward_model_list:
                self.objective_list.append(
                    tf.reduce_sum(
                        tf.square(forward_model.output[0] -
                                  self.encoder2.output[0])))

        self.action_grad_list = []
        for obj in self.objective_list:
            #those tail term in action_ph will receive 0 gradient
            self.action_grad_list.append(tf.gradients(obj, self.action_ph))
        self.vis_tool = MyAnimationMulti(
            None,
            numPlots=2,
            isIm=[0, 0],
            axTitles=['(S1-S_goal)^2', '(S_n-S_goal)^2'])
class FastClippedSgdShootingForwardModelPlanner(object):
    def __init__(
        self,
        dynamic_model,
        encoder,
        env,
        init_lr=0.5,
        sess=None,
        pos_only=False,
        max_length=15,
    ):
        self.env = env
        if sess == None:
            sess = tf.get_default_session()
        self.sess = sess
        self.init_lr = init_lr

        self.max_length = max_length
        self.action_ph = tf.placeholder(tf.float32, [max_length, 1, 4])
        self.forward_model_list = []
        #build the recurrent model w.t. the max length
        self.S_init_ph = tf.placeholder(tf.float32,
                                        list(env.observation_space.shape))
        self.S_goal_ph = tf.placeholder(tf.float32,
                                        list(env.observation_space.shape))
        #only two feature encoders
        self.encoder1 = encoder.get_weight_tied_copy(
            observation_input=[self.S_init_ph])
        self.encoder2 = encoder.get_weight_tied_copy(
            observation_input=[self.S_goal_ph])
        forward_model = dynamic_model.get_weight_tied_copy(
            feature_input=self.encoder1.output, action_input=self.action_ph[0])
        self.forward_model_list.append(forward_model)
        self.forward_model_output_list = [forward_model.output]
        for i in range(1, max_length):
            forward_model = dynamic_model.get_weight_tied_copy(feature_input = forward_model.output,\
                       action_input = self.action_ph[i])
            self.forward_model_list.append(forward_model)
            self.forward_model_output_list.append(forward_model.output)

        ## objective
        self.objective_list = []
        self.arm_loss_list = []
        self.box_loss_list = []
        if pos_only:
            for forward_model in self.forward_model_list:
                self.objective_list.append(
                    tf.reduce_sum(
                        tf.square(forward_model.output[0][:6] -
                                  self.encoder2.output[0][:6])))
                self.arm_loss_list.append(
                    tf.reduce_sum(
                        tf.square(forward_model.output[0][:4] -
                                  self.encoder2.output[0][:4])))
                self.box_loss_list.append(
                    tf.reduce_sum(
                        tf.square(forward_model.output[0][4:6] -
                                  self.encoder2.output[0][4:6])))
        else:
            for forward_model in self.forward_model_list:
                self.objective_list.append(
                    tf.reduce_sum(
                        tf.square(forward_model.output[0] -
                                  self.encoder2.output[0])))

        self.action_grad_list = []
        for obj in self.objective_list:
            #those tail term in action_ph will receive 0 gradient
            self.action_grad_list.append(tf.gradients(obj, self.action_ph))
        self.vis_tool = MyAnimationMulti(
            None,
            numPlots=2,
            isIm=[0, 0],
            axTitles=['(S1-S_goal)^2', '(S_n-S_goal)^2'])

    def get_action(self, S_init, S_goal, steps=None, plot_loss=False):
        if steps == None:
            steps = 1  #greedy planner
        else:
            assert (steps <= self.max_length)
        action = np.zeros([self.max_length, 1, 4])
        action_grad = self.action_grad_list[steps - 1]
        # TODO: Find a good stop criteria
        now = time.time()
        S1_loss_list = []
        Sn_loss_list = []
        for i in range(0, 51):
            feed_dict = {
                self.S_init_ph: S_init,
                self.S_goal_ph: S_goal,
                self.action_ph: action
            }
            S1_loss, Sn_loss = self.sess.run(
                [self.box_loss_list[0], self.box_loss_list[steps - 1]],
                feed_dict=feed_dict)
            S1_loss_list.append(S1_loss)
            Sn_loss_list.append(Sn_loss)
            if plot_loss and i % 1 == 0:
                self.vis_tool._display([[range(i + 1), S1_loss_list],
                                        [range(i + 1), Sn_loss_list]])

            gradient = np.array(
                self.sess.run(action_grad, feed_dict=feed_dict)[0])
            if np.isnan(gradient).any():
                action = np.random.rand(self.max_length, 1, 4) - 0.5
                print('nan gradient step{}'.format(i))
                import pdb
                pdb.set_trace()
            else:
                if np.linalg.norm(gradient) > steps * 4:
                    gradient = gradient / np.linalg.norm(gradient) * 4 * steps

                action -= gradient / (1. + i * 0.05) * self.init_lr
                action = np.clip(action, -1, 1)
        arm_loss, box_loss, forward_models_outputs = \
         self.sess.run([self.arm_loss_list[0], self.box_loss_list[0], \
         self.forward_model_output_list], feed_dict)
        return action[0][0], planner_info(arm_loss, box_loss,
                                          forward_models_outputs[:steps])
class FastClippedSgdShootingForwardModelPlanner_cumulated_obj(object):
    def __init__(
        self,
        dynamic_model,
        encoder,
        env,
        init_lr=0.5,
        sess=None,
        pos_only=False,
        max_length=15,
    ):
        if sess == None:
            sess = tf.get_default_session()
        self.sess = sess
        self.init_lr = init_lr

        self.max_length = max_length
        self.action_ph = tf.placeholder(tf.float32, [max_length, 1, 4])
        self.forward_model_list = []
        #build the recurrent model w.t. the max length
        self.S_init_ph = tf.placeholder(tf.float32,
                                        list(env.observation_space.shape))
        self.S_goal_ph = tf.placeholder(tf.float32,
                                        list(env.observation_space.shape))
        #only two feature encoders
        self.encoder1 = encoder.get_weight_tied_copy(
            observation_input=[self.S_init_ph])
        self.encoder2 = encoder.get_weight_tied_copy(
            observation_input=[self.S_goal_ph])
        forward_model = dynamic_model.get_weight_tied_copy(
            feature_input=self.encoder1.output, action_input=self.action_ph[0])
        self.forward_model_list.append(forward_model)
        for i in range(1, max_length):
            forward_model = dynamic_model.get_weight_tied_copy(feature_input = forward_model.output,\
                       action_input = self.action_ph[i])
            self.forward_model_list.append(forward_model)

        ## objective
        self.objective_list = []
        self.forward_model_loss_list = []
        self.arm_loss_list = []
        self.box_loss_list = []
        objective = 0
        factor = 1
        if pos_only:
            for forward_model in self.forward_model_list:
                factor = factor * 0.4
                self.forward_model_loss_list.append(
                    tf.reduce_sum(
                        tf.square(forward_model.output[0][:6] -
                                  self.encoder2.output[0][:6])))

                objective += factor * tf.reduce_sum(
                    tf.square(forward_model.output[0][:6] -
                              self.encoder2.output[0][:6]))
                self.objective_list.append(objective)
                self.arm_loss_list.append(
                    tf.reduce_sum(
                        tf.square(forward_model.output[0][:4] -
                                  self.encoder2.output[0][:4])))
                self.box_loss_list.append(
                    tf.reduce_sum(
                        tf.square(forward_model.output[0][4:6] -
                                  self.encoder2.output[0][4:6])))
        else:
            for forward_model in self.forward_model_list:
                objective += tf.reduce_sum(
                    tf.square(forward_model.output[0] -
                              self.encoder2.output[0]))
                self.objective_list.append(objective)

        self.action_grad_list = []
        for obj in self.objective_list:
            #those tail term in action_ph will receive 0 gradient
            self.action_grad_list.append(tf.gradients(obj, self.action_ph))
        self.vis_tool = MyAnimationMulti(
            None,
            numPlots=2,
            isIm=[0, 0],
            axTitles=['(S1-S_goal)^2', 'sum(S_i-S_goal)^2'])

    def get_action(self, S_init, S_goal, steps=None, plot_loss=False):
        if steps == None:
            steps = 1  #greedy planner
        else:
            assert (steps <= self.max_length)
        action = np.zeros([self.max_length, 1, 4])
        action_grad = self.action_grad_list[steps - 1]
        # TODO: Find a good stop criteria
        now = time.time()
        S1_loss_list = []
        Sn_loss_list = []
        for i in range(0, 101):
            feed_dict = {
                self.S_init_ph: S_init,
                self.S_goal_ph: S_goal,
                self.action_ph: action
            }
            S1_loss, Sn_loss = self.sess.run(
                [self.objective_list[0], self.objective_list[steps - 1]],
                feed_dict=feed_dict)
            S1_loss_list.append(S1_loss)
            Sn_loss_list.append(Sn_loss)
            if plot_loss and i % 20 == 0:
                self.vis_tool._display([[range(i + 1), S1_loss_list],
                                        [range(i + 1), Sn_loss_list]])

            gradient = np.array(
                self.sess.run(action_grad, feed_dict=feed_dict)[0])
            if np.isnan(gradient).any():
                action = np.random.rand(self.max_length, 1, 4) - 0.5
                print('nan gradient step{}'.format(i))
                import pdb
                pdb.set_trace()
            else:
                if np.linalg.norm(gradient) > steps * 4:
                    gradient = gradient / np.linalg.norm(gradient) * 4 * steps

                action -= gradient / 1.0 * self.init_lr
                action = np.clip(action, -1, 1)

            # if i %200 == 0:
            # 	print("#########Optimizing action#########")
            # 	action_loss, predicted_next_state = self.sess.run([self.objective_list[steps-1], self.forward_model_list[steps-1].output], feed_dict = feed_dict)
            # 	box_loss = np.sum(np.square(predicted_next_state[0][4:6] - S_goal[4:6]))
            # 	arm_loss = np.sum(np.square(predicted_next_state[0][0:4] - S_goal[0:4]))
            # 	print("action_loss(sum_square_error(S_goal, S_next)) is {}, box_loss is {}, arm_loss is {}".format(action_loss, box_loss, arm_loss))
            # 	print("current_action is {}".format(action[0][0]))
            # 	# print("current s_next is {}".format(self.sess.run(self.forward_model.output, feed_dict = feed_dict)))
            # 	print("{} sec elapsed for 50 gradient steps".format(time.time() - now))
            # 	now = time.time()
        return action[0][0], self.sess.run([
            self.arm_loss_list[0], self.box_loss_list[0],
            self.forward_model_list[0].output
        ], feed_dict)
Пример #7
0
        action='store_true',
        help='if using shooting method(need to adjust prediction steps)')
    parser.add_argument('--visualize',
                        action='store_true',
                        help='use vis tool to visualize directly')

    args = parser.parse_args()
    env = gym.make(args.env_name)

    import tensorflow as tf
    from planner import ClippedSgdForwardModelPlanner, InverseModelPlanner, \
        ConstrainedForwardModelPlanner, SgdForwardModelPlanner,\
        FastClippedSgdForwardModelPlanner, FastClippedSgdShootingForwardModelPlanner
    ForwardModelPlanner = FastClippedSgdForwardModelPlanner
    if args.visualize:
        vis_tool = MyAnimationMulti(None, numPlots=4, isIm=[1,1,1,0], axTitles=["S_init", "S_goal", \
           "S_current", "L2 distance S_goal S_current"])
    with open(args.test_set_path, 'rb') as handle:
        data = pickle.load(handle)
        S_init_state_list = data['S_init_state']
        S_init_pixel_list = data['S_init_pixel']
        S_goal_state_list = data['S_goal_state']
        S_goal_pixel_list = data['S_goal_pixel']
    with tf.Session() as sess:
        ##load dynamic model
        data = joblib.load(args.dynamic_model_path)
        encoder = data['encoder']
        inverse_model = data['inverse_model']
        forward_model = data['forward_model']
        if args.shooting:
            ForwardModelPlanner = FastClippedSgdShootingForwardModelPlanner
        controller = ForwardModelPlanner(forward_model,