示例#1
0
def main(grid_size, discount_factor, epochs, learning_rate):
    # Creates an object for using the RobotMarkovModel class
    robot_mdp = RobotMarkovModel()
    # Reads the trajectory data from the csv files provided and returns the trajectories
    # The trajectories is numpy array of the number of trajectories provided
    trajectories = robot_mdp.generate_trajectories()
    # Finds the length of the trajectories data
    n_trajectories = len(trajectories)
    # Initialize the IRL class object, provide trajectory length as input, currently its value is 3
    irl = MaxEntIRL(n_trajectories, grid_size)
    # For storing results
    filename = "/home/vignesh/PycharmProjects/dvrk_automated_suturing/data/trial4_grid11_parallel/weights_grid11.txt"
    # Calculates the reward function based on the Max Entropy IRL algorithm
    reward, weights = irl.max_ent_irl(trajectories, discount_factor,
                                      n_trajectories, epochs, learning_rate,
                                      filename)

    print "r is ", reward.reshape((grid_size, grid_size), order='F')
    # print "r shape ", reward.shape
    print "weights is ", weights.reshape((grid_size, grid_size), order='F')
    # print "policy is ", policy[0][0]
    file_open = open(filename, 'a')
    file_open.write("\n \n \n \n Final result \n")
    savetxt(filename, weights, delimiter=',', fmt="%10.5f", newline=", ")
    file_open.close()
    def find_expected_svf(self, weights, discount, total_states, trajectories,
                          reward):
        # Creates object of robot markov model
        robot_mdp = RobotMarkovModel()
        # Returns the state and action values of the state space being created
        state_values_from_trajectory, _ = robot_mdp.return_trajectories_data()
        # Finds the terminal state value from the expert trajectory
        # 0 indicates that the first trajectory data is being used
        # total_states indicates that the last value of that trajectory data should be used as terminal state
        terminal_state_val_from_trajectory = state_values_from_trajectory[0][
            total_states - 1]
        # Pass the gridsize required
        # To create the state space model, we need to run the below commands
        env_obj = RobotStateUtils(self.grid_size, discount,
                                  terminal_state_val_from_trajectory)
        states = env_obj.create_state_space_model_func()
        action = env_obj.create_action_set_func()
        # print "State space created is ", states
        P_a = env_obj.get_transition_mat_deterministic()
        # print "P_a is ", P_a
        # Calculates the reward and feature for the trajectories being created in the state space
        policy = env_obj.value_iteration(reward)
        # policy = np.random.randint(27, size=1331)
        # print "policy is ", policy
        # Finds the sum of features of the expert trajectory and list of all the features of the expert trajectory
        expected_svf = env_obj.compute_state_visitation_frequency(
            trajectories, policy)

        # Returns the policy, state features of the trajectories considered in the state space and expected svf
        return policy, expected_svf
示例#3
0
def main(discount, epochs, learning_rate, weights, trajectory_length,
         n_policy_iter):
    # Creates an object for using the RobotMarkovModel class
    robot_mdp = RobotMarkovModel()
    # Initialize the IRL class object, provide trajectory length as input, currently its value is 1
    irl = MaxEntIRL(trajectory_length)

    # trajectory_features_array, trajectory_rewards_array = robot_mdp.trajectories_features_rewards_array(weights)
    # Finds the sum of features of the expert trajectory and list of all the features of the expert trajectory
    sum_trajectory_features, complete_features_array = robot_mdp.generate_trajectories(
    )
    # print "traj array ", trajectory_features_array.shape, trajectory_features_array
    # print "complete array ", complete_features_array.shape, complete_features_array
    # print trajectory_rewards_array
    # print model_rot_par_r
    # Returns the state and actions spaces of the expert trajectory
    state_trajectory_array, action_trajectory_array = robot_mdp.trajectories_data(
    )
    # Finds the length of the trajectories data
    n_trajectories = len(state_trajectory_array)
    # Calculates the reward function based on the Max Entropy IRL algorithm
    reward, alpha = irl.max_ent_irl(sum_trajectory_features,
                                    complete_features_array, discount,
                                    n_trajectories, epochs, learning_rate,
                                    n_policy_iter, weights)

    print "r is ", reward
    print "alpha is ", alpha
    def find_expected_svf(self, weights, discount):
        # Robot Object called
        # Pass the gridsize required
        env_obj = RobotStateUtils(11, weights)
        states = env_obj.create_state_space_model_func()
        action = env_obj.create_action_set_func()
        # print "State space created is ", states
        P_a = env_obj.get_transition_mat_deterministic()
        # print "P_a is ", P_a
        # print "shape of P_a ", P_a.shape
        # Calculates the reward and feature for the trajectories being created in the state space
        rewards = []
        state_features = []
        for i in range(len(states)):
            r, f = env_obj.reward_func(states[i][0], states[i][1], states[i][2], weights)
            rewards.append(r)
            state_features.append(f)
        # print "rewards is ", rewards
        value, policy = env_obj.value_iteration(P_a, rewards, gamma=discount)
        # policy = np.random.randint(27, size=1331)
        print "policy is ", policy
        robot_mdp = RobotMarkovModel()
        # Finds the sum of features of the expert trajectory and list of all the features of the expert trajectory
        expert_trajectory_states, _ = robot_mdp.return_trajectories_data()
        expected_svf = env_obj.compute_state_visition_frequency(P_a, expert_trajectory_states, policy)
        # Formats the features array in a way that it can be multiplied with the svf values
        state_features = np.array([state_features]).transpose().reshape((len(state_features[0]), len(state_features)))
        print "svf is ", expected_svf
        print "svf shape is ", expected_svf.shape

        # Returns the policy, state features of the trajectories considered in the state space and expected svf
        return policy, state_features, expected_svf
    def find_expected_svf(self, discount, total_states, trajectories, reward):
        # Creates object of robot markov model
        robot_mdp = RobotMarkovModel()
        # Returns the state and action values of the state space being created
        state_values_from_trajectory, _ = robot_mdp.return_trajectories_data()
        # Finds the terminal state value from the expert trajectory
        # 0 indicates that the first trajectory data is being used
        # total_states indicates that the last value of that trajectory data should be used as terminal state
        terminal_state_val_from_trajectory = state_values_from_trajectory[0][
            total_states - 1]
        # Pass the gridsize required
        # To create the state space model, we need to run the below commands
        env_obj = RobotStateUtils(self.grid_size, discount,
                                  terminal_state_val_from_trajectory)
        states = env_obj.create_state_space_model_func()
        action = env_obj.create_action_set_func()
        # print "State space created is ", states
        # V = {}
        # for state in env_obj.states:
        #     V[state] = 0
        # # Reinitialize policy
        # policy = {}
        # for state in env_obj.states:
        #     policy[state] = [key for key in env_obj.action_space]
        # values_of_states, policy = env_obj.value_iteration(V, policy, 0.01)
        # policy = np.random.randint(27, size=1331)
        # print "policy is ", policy
        # Finds the sum of features of the expert trajectory and list of all the features of the expert trajectory
        expected_svf = env_obj.compute_state_visitation_frequency(trajectories)

        # Returns the policy, state features of the trajectories considered in the state space and expected svf
        return expected_svf
示例#6
0
def main(grid_size, discount_factor, epochs, learning_rate):
    # Creates an object for using the RobotMarkovModel class
    robot_mdp = RobotMarkovModel()
    # Reads the trajectory data from the csv files provided and returns the trajectories
    # The trajectories is numpy array of the number of trajectories provided
    trajectories = robot_mdp.generate_trajectories()
    # Finds the length of the trajectories data
    n_trajectories = len(trajectories)
    # Initialize the IRL class object, provide trajectory length as input, currently its value is 3
    irl = MaxEntIRL(n_trajectories, grid_size)
    # Calculates the reward function based on the Max Entropy IRL algorithm
    reward, weights = irl.max_ent_irl(trajectories, discount_factor, n_trajectories, epochs, learning_rate)

    print "r is ", reward.reshape((grid_size, grid_size), order='F')
    # print "r shape ", reward.shape
    print "weights is ", weights.reshape((grid_size, grid_size), order='F')
示例#7
0
def main(discount, epochs, learning_rate):
    # Creates an object for using the RobotMarkovModel class
    robot_mdp = RobotMarkovModel()
    # Finds the sum of features of the expert trajectory and list of all the features of the expert trajectory
    sum_trajectory_features, feature_array_all_trajectories = robot_mdp.generate_trajectories()
    # Finds the length of the trajectories data
    n_trajectories = len(feature_array_all_trajectories)
    # Initialize the IRL class object, provide trajectory length as input, currently its value is 1
    irl = MaxEntIRL(n_trajectories)
    # Calculates the reward function based on the Max Entropy IRL algorithm
    reward, weights = irl.max_ent_irl(sum_trajectory_features, feature_array_all_trajectories, discount,
                                    n_trajectories, epochs, learning_rate)

    print "r is ", reward
    # print "r shape ", reward.shape
    print "weights is ", weights
    def find_expected_svf(self, grid_size, weights, discount, total_states):
        # Creates object of robot markov model
        robot_mdp = RobotMarkovModel()
        # Returns the state and action values of the state space being created
        state_values_from_trajectory, _ = robot_mdp.return_trajectories_data()
        # Finds the terminal state value from the expert trajectory
        # 0 indicates that the first trajectory data is being used
        # total_states indicates that the last value of that trajectory data should be used as terminal state
        terminal_state_val_from_trajectory = state_values_from_trajectory[0][
            total_states - 1]
        # Pass the gridsize required
        # To create the state space model, we need to run the below commands
        env_obj = RobotStateUtils(grid_size, weights,
                                  terminal_state_val_from_trajectory)
        states = env_obj.create_state_space_model_func()
        action = env_obj.create_action_set_func()
        # print "State space created is ", states
        P_a = env_obj.get_transition_mat_deterministic()
        # print "P_a is ", P_a
        # Calculates the reward and feature for the trajectories being created in the state space
        rewards = []
        state_features = []
        for i in range(len(states)):
            r, f = env_obj.reward_func(states[i][0], states[i][1],
                                       states[i][2], weights)
            rewards.append(r)
            state_features.append(f)
        # print "rewards is ", rewards
        value, policy = env_obj.value_iteration(P_a, rewards, gamma=discount)
        # policy = np.random.randint(27, size=1331)
        print "policy is ", policy
        # Finds the sum of features of the expert trajectory and list of all the features of the expert trajectory
        expert_trajectory_states, _ = robot_mdp.return_trajectories_data()
        expected_svf = env_obj.compute_state_visitation_frequency(
            P_a, expert_trajectory_states, policy)
        # Formats the features array in a way that it can be multiplied with the svf values
        state_features = np.array([state_features]).transpose().reshape(
            (len(state_features[0]), len(state_features)))
        print "svf is ", expected_svf
        print "svf shape is ", expected_svf.shape

        # Returns the policy, state features of the trajectories considered in the state space and expected svf
        return policy, state_features, expected_svf
    def calculate_optimal_policy_func(self, alpha, discount):
        # Creates an object for using the RobotMarkovModel class
        robot_mdp = RobotMarkovModel()
        self.create_state_space_model_func()
        self.create_action_set_func()
        n_actions = len(self.action_set)
        self.discount = discount
        self.model_index_pos_x, self.model_index_pos_y, self.model_index_pos_z = self.get_indices(
            self.model_end_pos_x, self.model_end_pos_y, self.model_end_pos_z)

        self.rewards, self.features, n_features = self.reward_func(
            self.model_end_pos_x, self.model_end_pos_y, self.model_end_pos_z,
            alpha)
        # self.features_arr = robot_mdp.features_func(self.model_end_pos_x, self.model_end_pos_y,
        #                                                     self.model_end_pos_z)
        # print "rewards is ", self.rewards.shape
        # print "reward 0 ", self.rewards[0][3][8]
        # return 0
        return self.rewards, self.features, n_features
示例#10
0
        print(i, 'sweeps of state space for value iteration')
        return V, policy


if __name__ == '__main__':
    # goal = np.array([0.005, 0.055, -0.125])
    goal = np.array([0.5, 0.5, 0])
    # term_state = np.random.randint(0, grid_size ** 3)]
    env_obj = RobotStateUtilsP(11, 0.9, goal)
    states = env_obj.create_state_space_model_func()
    action = env_obj.create_action_set_func()
    print "State space created is ", states
    print "actions is ", action
    index_val = env_obj.get_state_val_index(goal)
    print "index val is ", index_val
    mdp_obj = RobotMarkovModel()
    trajectories = mdp_obj.generate_trajectories()
    index_vals = np.zeros(len(trajectories[0]))
    rewards = np.zeros(len(states))
    for i in range(len(trajectories[0])):
        # print "traj is ", trajectories[0][i]
        index_vals[i] = env_obj.get_state_val_index(trajectories[0][i])
    for _, ele in enumerate(index_vals):
        if ele == index_val:
            rewards[int(ele)] = 10
        else:
            rewards[int(ele)] = 1
    env_obj.rewards = rewards
    print "rewards is ", rewards
    # initialize V(s)
    V = {}