Exemplo n.º 1
0
    def change_policy(self, condition, policy, new_ref_ja, new_ref_ee):

        # Save all of these because we're about to initialize with something newww
        old_K, old_k = policy.K, policy.k
        # This was how many timesteps there were before, not including the padding!
        old_T = old_K.shape[0] - self.padding

        with open('old_policy.txt', 'w') as f:
            noise = np.zeros((old_K.shape[0], self.dU))
            f.write(str(policy_to_msg(policy, noise)))

        old_pol_covar, old_chol_pol_covar = policy.pol_covar, policy.chol_pol_covar
        old_inv_pol_covar = policy.inv_pol_covar
        old_T = min(
            old_T, self.T)  # Make sure to choose the smaller one I think (???)
        # Now we change the initial values to match what we learned before
        policy.__init__(*init_pd_ref(self._hyperparams['init_traj_distr'],
                                     new_ref_ja, new_ref_ee))
        policy.K[:
                 old_T, :, :], policy.k[:
                                        old_T, :] = old_K[:
                                                          old_T, :, :], old_k[:
                                                                              old_T, :]
        policy.pol_covar[:
                         old_T, :, :], policy.chol_pol_covar[:
                                                             old_T, :, :] = old_pol_covar[:
                                                                                          old_T, :, :], old_chol_pol_covar[:
                                                                                                                           old_T, :, :]
        policy.inv_pol_covar[:old_T, :, :] = old_inv_pol_covar[:old_T, :, :]

        # Writing the new policy to a text file just so we can examine
        with open('new_policy.txt', 'w') as f:
            noise = np.zeros((self.T, self.dU))
            f.write(str(policy_to_msg(policy, noise)))
Exemplo n.º 2
0
    def sample(self,
               policy,
               condition,
               reset=True,
               verbose=True,
               save=True,
               noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        if reset:
            self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(
                policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
Exemplo n.º 3
0
def get_lin_gauss_test(T=50):
    dX = 14
    x0 = np.zeros(dX)
    x0[0] = 1.0
    lgpol = init_pd({'init_var': 0.01}, x0, 7, 7, dX, T)
    print 'T:', lgpol.T
    print 'dX:', lgpol.dX
    #Conver lgpol to message
    noise = np.zeros((T, 7))
    controller_params = policy_to_msg(lgpol, noise)
    return controller_params
def get_lin_gauss_test(T=50):
    dX = 14
    x0 = np.zeros(dX)
    x0[0] = 1.0
    lgpol = init_pd({'init_var': 0.01}, x0, 7, 7, dX, T)
    print 'T:', lgpol.T
    print 'dX:', lgpol.dX
    #Conver lgpol to message
    noise = np.zeros((T, 7))
    controller_params = policy_to_msg(lgpol, noise)
    return controller_params
Exemplo n.º 5
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
Exemplo n.º 6
0
def get_lin_gauss_test(T=50):
    dX = 4
    x0 = np.zeros(dX)
    x0[0] = 1.0
    # lgpol = init_lqr({'x0' : x0, 'dU' : 2, 'dX' : dX, 'T' : T, 'dt' : 0.05})
    lgpol = init_pd({\
        'init_var' : 1.0, 'pos_gains' : 0.0,\
        'dQ' : 2, 'dt' : 0.05, 'x0' : x0,\
        'dU' : 2, 'dX' : 4, 'T' : T\
        })
    print 'T:', lgpol.T
    print 'dX:', lgpol.dX
    #Conver lgpol to message
    noise = np.zeros((T, 2))
    controller_params = policy_to_msg(lgpol, noise)
    return controller_params
Exemplo n.º 7
0
    def sample(self,
               policy,
               condition,
               reset=True,
               verbose=True,
               save=True,
               noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if reset:
            self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))
        # noise = noise*0.01

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        # ee_points and ee_points_tgt is uneccesary for mobile robot
        trial_command.ee_points = []
        trial_command.ee_points_tgt = []
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['obs_include']

        sample_msg = self._trial_service.publish_and_wait(
            trial_command, timeout=self._hyperparams['trial_timeout'])

        sample = msg_to_sample(sample_msg, self)
        if save:
            self._samples[condition].append(sample)
        return sample
Exemplo n.º 8
0
    def execute(self, policy):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        #self.reset(condition)
        # Generate noise.
        noise = generate_noise(self.T, self.dU, self._hyperparams)

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        #### @todo: tgt command try ##########Brook####################################################
        #trial_command.ee_points_tgt = self._tgt_subscribe.subscribe_and_wait()


        trial_command.ee_points_tgt = \
            self._hyperparams['ee_points_tgt'][0].tolist()

        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(
                policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
Exemplo n.º 9
0
    def execute(self, policy):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        #self.reset(condition)
        # Generate noise.
        noise = generate_noise(self.T, self.dU, self._hyperparams)

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
#### @todo: tgt command try ##########Brook####################################################
        #trial_command.ee_points_tgt = self._tgt_subscribe.subscribe_and_wait()


        trial_command.ee_points_tgt = \
            self._hyperparams['ee_points_tgt'][0].tolist()

        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
Exemplo n.º 10
0
    def sample(self, policy, condition, verbose=True, save=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
        Returns:
            sample: A Sample object.
        """
        self.reset(condition)

        # Generate noise.
        noise = generate_noise(self.T, self.dU, self._hyperparams)

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']
        sample_msg = self._trial_service.publish_and_wait(
            trial_command, timeout=self._hyperparams['trial_timeout']
        )

        sample = msg_to_sample(sample_msg, self)
        if save:
            self._samples[condition].append(sample)
        return sample
Exemplo n.º 11
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
        Returns:
            sample: A Sample object.
        """
        # There are different trajectories based on if reset or not
        if self.reset_time:
            trajectories = self.reset_trajectories
        else:
            trajectories = self.trajectories

        if condition not in trajectories:  # If this hasn't been initialized yet
            if self.reset_time:
                self.init_reset_traj(condition, policy)
            else:
                self.compute_reference_trajectory(condition, policy)

        self.samples_taken[condition] += 1  # Increment number of samples taken

        # Every some many samples, take more of the trajectory
        # Unless we have reached the end or we are using varying T
        if self.samples_taken[condition] % (self.num_samples * self.iter_per_seg) == 0 and self.iter_count != 0 \
        and (self.T != self.final_T or self.varying_T):
            self.update_T_then_policy(policy, condition)

        self.T = self.cur_T[
            condition]  # Make sure the T is correct for the condition we are on

        ref_traj_info = trajectories[condition]
        # Length of the trajectory
        traj_length = len(self.full_ref_ee[condition])
        # This is how long the current trajectory we're using is - self.T
        if self.T == self.final_T:  # If we have gotten to the whole trajectory
            ref_traj = self.trajectories[condition][
                'ee']  # Current reference trajectory
        else:  # Otherwise pad the reference trajectory as well
            ref_traj = self.trajectories[condition]['ee'][:self.T -
                                                          self.padding]
            ref_traj.extend([
                self.trajectories[condition]['ee'][self.T - self.padding - 1]
            ] * self.padding)

        print('The length of the trajectory we are currently using is ' +
              str(self.T))
        print 'Sampling, condition', condition
        self.reset(condition)

        #added from agent_ros.py of public gps codebase
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                print('well this got called')
                self._init_tf(policy.dU)

        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        #trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)

        if self.samples_taken[
                condition] % self.num_samples == 0 and self.samples_taken[
                    condition] != 0:
            self.iter_count += 1  # This is the full count
            self.pickle_self()  # Pickle self and send to data files lmaooo
            with open(
                    'iter' + str(self.iter_count) + '_cond' + str(condition) +
                    '.txt', 'w') as f:
                the_noise = np.zeros((self.T, self.dU))
                f.write(str(policy_to_msg(policy, the_noise)))

        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = ref_traj[-1]
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout'])
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(
                policy, time_to_run=self._hyperparams['trial_timeout'])

        sample = msg_to_sample(sample_msg, self)
        sample.set('target_traj_ee_points',
                   [points - ref_traj[-1] for points in ref_traj])

        sample.set(REF_OFFSETS, ref_traj_info['offsets'][:self.T])
        sample.set(REF_TRAJ,
                   np.array([ref_traj_info['offsets'].flatten()] * self.T))

        if save:
            self._samples[condition].append(sample)
        if self.varying_T:  # Only save this if you are gonna use varying T
            self.saved_samples[condition].append(
                sample)  # Save it here too just in case
        self.reset(condition)  # Might as well reset for the heck of it
        return sample
Exemplo n.º 12
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed (try import)
            if isinstance(policy, TfPolicy):        # False, policy = Linear Gaussian policy 
                self._init_tf(policy.dU)

        # Reset the agent for a particular experiment condition
        self.reset(condition)

        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand() # ROS message 
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise) # ControllerParams
        trial_command.T = self.T                                # Trajectory length
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']    # Controller frequency
        
        ee_points = self._hyperparams['end_effector_points']        # 3*n_points array containing offsets
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist() #  3*n_points array containing the desired ee_points for this trial
        
        trial_command.state_datatypes = self._hyperparams['state_include']  # Which data types to include in state
        trial_command.obs_datatypes = self._hyperparams['state_include']    # Which data types to include in observation

        # ------------- Local Policy -------------
        # use_tf is False 
        if self.use_tf is False:
            # self._trial_service = ServiceEmulator(
            #     self._hyperparams['trial_command_topic'], TrialCommand,
            #     self._hyperparams['sample_result_topic'], SampleResult
            # )
            
            # Publish a message and wait for the response
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
            
            # Saving the samples for tf[?]
            if save:
                self._samples[condition].append(sample)
            
            return sample
        else:
            self._trial_service.publish(trial_command)

            # Run an async controller from a policy. 
            # The async controller receives observations from ROS subscribers
            # and then uses them to publish actions
            sample_msg = self.run_trial_tf(policy, 
                time_to_run=self._hyperparams['trial_timeout'])
            
            sample = msg_to_sample(sample_msg, self)
            
            if save:
                self._samples[condition].append(sample)
            return sample
Exemplo n.º 13
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']

        trial_command.obs_datatypes = self._hyperparams['obs_include']

        if self.use_tf is False or not isinstance(policy, TfPolicy):
            print 'Not using TF controller'
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
        else:
            '''
            print 'Using TF controller'
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(policy, condition, time_to_run=self._hyperparams['trial_timeout'])
            pdb.set_trace()
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
            '''
            self.trial_manager.prep(policy, condition)
            self._trial_service.publish(trial_command, wait=True)
            self.trial_manager.run(self._hyperparams['trial_timeout'])
            while self._trial_service._waiting:
                print 'Waiting for sample to come in'
                rospy.sleep(1.0)
            sample_msg = self._trial_service._subscriber_msg

        sample = msg_to_sample(sample_msg, self)
        sample.set(NOISE, noise)
        sample.set(TIMESTEP, np.arange(self.T).reshape((self.T,1)))

        return sample