示例#1
0
def main():
    rospy.init_node("reset_all", anonymous=False)

    rospy.set_param('/stage_number', 1)

    env = Env(4)
    print("Trying to reset sim")
    env.reset()
示例#2
0
rewards     = []
batch_size  = 128

#----------------------------------------
def action_unnormalized(action, high, low):
    action = low + (action + 1.0) * 0.5 * (high - low)
    action = np.clip(action, low, high)
    return action
#**********************************


if __name__ == '__main__':
    rospy.init_node('sac_stage_1')
    pub_result = rospy.Publisher('result', Float32, queue_size=5)
    result = Float32()
    env = Env()

    start_time = time.time()
    past_action = np.array([0.,0.])

    for ep in range(max_episodes):
        done = False
        state = env.reset()
        print('Episode: ' + str(ep))

        rewards_current_episode = 0

        for step in range(max_steps):
            state = np.float32(state)
            
            action = policy_net.get_action(state)
示例#3
0
print('Action Max: ' + str(ACTION_V_MAX) + ' m/s and ' + str(ACTION_W_MAX) +
      ' rad/s')
ram = MemoryBuffer(MAX_BUFFER)
trainer = Trainer(STATE_DIMENSION, ACTION_DIMENSION, ACTION_V_MAX,
                  ACTION_W_MAX, ram)
noise = OUNoise(ACTION_DIMENSION,
                max_sigma=0.1,
                min_sigma=0.1,
                decay_period=8000000)
#trainer.load_models(4880)

if __name__ == '__main__':
    rospy.init_node('ddpg_stage_1')
    pub_result = rospy.Publisher('result', Float32, queue_size=5)
    result = Float32()
    env = Env(action_dim=ACTION_DIMENSION)
    before_training = 4

    past_action = np.zeros(ACTION_DIMENSION)

    for ep in range(MAX_EPISODES):
        done = False
        state = env.reset()
        if is_training and not ep % 10 == 0 and ram.len >= before_training * MAX_STEPS:
            print('---------------------------------')
            print('Episode: ' + str(ep) + ' training')
            print('---------------------------------')
        else:
            if ram.len >= before_training * MAX_STEPS:
                print('---------------------------------')
                print('Episode: ' + str(ep) + ' evaluating')
示例#4
0
def main():
    rospy.init_node('sac_stage_1')
    pub_result = rospy.Publisher('result', Float32, queue_size=5)
    result = Float32()
    env = Env()
    torch.manual_seed(500)
    

    
    
    
    actor_optimizer = optim.Adam(actor.parameters(), lr=actor_lr)
    critic_optimizer = optim.Adam(critic.parameters(), lr=critic_lr)

    hard_target_update(critic, target_critic)
    
    # initialize automatic entropy tuning
    target_entropy = -torch.prod(torch.Tensor(action_size)).item()
    
    writer = SummaryWriter('./house_sac_4')

    replay_buffer = deque(maxlen=100000)
    recent_rewards = []
    
    for episode in range(10001):
        done = False
        score = 0.
        state = env.reset()
        print('Episode: ' + str(episode))
        past_action = np.array([0.,0.])

        for step in range(1000):
            state = np.float32(state)
            #print(state)
            mu, std = actor(torch.Tensor(state))
            action = get_action(mu, std)
            #action = np.array([np.clip(action[0], 0., 0.22),
             #                         np.clip(action[1], -2., 2.)])
           
            next_state, reward, done = env.step(action,past_action)
            print(action, reward)
            past_action = action
            
            next_state = np.float32(next_state)
            mask = 0 if done else 1
            if step > 1:
		score += reward
            	replay_buffer.append((state, action, reward, next_state, mask))

            state = next_state

            if done:
                recent_rewards.append(score)
                break
            
            if len(replay_buffer) >= 2*batch_size and is_training:

                mini_batch = random.sample(replay_buffer, batch_size)
                
                actor.train(), critic.train(), target_critic.train()
                alpha = train_model(actor, critic, target_critic, mini_batch, 
                                    actor_optimizer, critic_optimizer,
                                    target_entropy)
               
                soft_target_update(critic, target_critic, tau)
            
        result = score
        pub_result.publish(result)
        gc.collect()
        print('reward per ep: ' + str(score))
        
        if episode % 10 == 0:
            print('{} episode | score_avg: {:.2f}'.format(episode, np.mean(recent_rewards)))
            writer.add_scalar('log/score', float(np.mean(recent_rewards)),  episode+260)
            #writer.add_scalar('log/alpha', float(alpha.detach().numpy()),episode+260)
            recent_rewards = []
            print("save")
        if episode % 10 == 0:
           save_models(episode+260)
示例#5
0
def main():
    rospy.init_node('ddpg_stage_1')
    pub_result = rospy.Publisher('result', Float32, queue_size=5)
    result = Float32()
    env = Env()
    torch.manual_seed(1000)

    actor_optimizer = optim.Adam(actor.parameters(), lr=actor_lr)
    critic_optimizer = optim.Adam(critic.parameters(), lr=critic_lr)

    hard_target_update(actor, critic, target_actor, target_critic)
    ou_noise = OUNoise(action_size, theta, mu, sigma)

    writer = SummaryWriter('./house_td3_4')

    replay_buffer = deque(maxlen=100000)
    recent_rewards = []

    for episode in range(100001):
        done = False
        score = 0.
        state = env.reset()
        print('Episode: ' + str(episode))
        past_action = np.array([0., 0.])

        for step in range(1000):

            state = np.float32(state)
            #print(state)
            policy = actor(torch.Tensor(state))
            action = get_action(policy, ou_noise, episode)

            next_state, reward, done = env.step(action, past_action)
            print(action, reward)
            past_action = action

            next_state = np.float32(next_state)
            mask = 0 if done else 1
            if step > 1:
                score += reward
                replay_buffer.append((state, action, reward, next_state, mask))

            state = next_state

            if done:
                recent_rewards.append(score)
                break

            if len(replay_buffer) >= 2 * batch_size and is_training:

                mini_batch = random.sample(replay_buffer, batch_size)

                actor.train(), critic.train()
                target_actor.train(), target_critic.train()
                train_model(actor, critic, target_actor, target_critic,
                            actor_optimizer, critic_optimizer, mini_batch,
                            step)

                soft_target_update(actor, critic, target_actor, target_critic,
                                   tau)

        result = score
        pub_result.publish(result)
        gc.collect()
        print('reward per ep: ' + str(score))
示例#6
0
    def __init__(self, last=False):
        self.client = actionlib.SimpleActionClient('move_base', MoveBaseAction)
        self.client.wait_for_server()

        print('Hello World!')

        self.env = Env(4)

        # goal type
        self.goal = MoveBaseGoal()
        self.goalReached = False

        #self.goal_list = np.array([[0.6, 0.0], [0.9, 1.2]])

        self.stage = rospy.get_param("stage")
        self.method = str("move_base")
        rospy.set_param("method", self.method)

        if self.stage == "4":
            print(4)

            x_list = [
                0.6, 1.2, -1.2, -0.1, 0.5, -0.1, 1.2, -0.5, 0.0, -0.2, -1.2,
                0.1, 0.9
            ]
            y_list = [
                0, 0.5, 0.3, 1.1, 0.1, -1.2, 1.2, -0.1, -1.0, -0.3, -1, -1.6,
                1.2
            ]

            # x_list = [0.6, 1.9, 0.5, 0.2, -0.8, -1, -1.9, 0.5, 2, 0.5, 0, -0.1, -2]
            # y_list = [0, -0.5, -1.9, 1.5, -0.9, 1, 1.1, -1.5, 1.5, 1.8, -1, 1.6, -0.8]

            print('Stage 4')
            self.trial_index = [[3, 1, 10, 8, 6], [7, 10, 6, 0, 10],
                                [1, 2, 4, 6, 4], [0, 5, 6, 11, 4],
                                [0, 7, 6, 11, 6], [11, 8, 4, 6, 10],
                                [9, 9, 0, 7, 7], [2, 0, 9, 5, 7],
                                [2, 3, 1, 9, 6], [5, 3, 2, 10, 0]]

            row = len(self.trial_index)
            col = len(self.trial_index[0])
            print("row = ", row)
            print("col = ", col)
            self.goal_list = []
            trial_goals = []

            for i in range(row):
                for j in range(col):
                    trial_goals.append([
                        x_list[self.trial_index[i][j]],
                        y_list[self.trial_index[i][j]]
                    ])

                self.goal_list.append(trial_goals)
                trial_goals = []

            self.goal_list = np.array(self.goal_list)
            # self.goal_position.position.x = goal_x_list[self.trial_index[trial][goal_count]]
            # self.goal_position.position.y = goal_y_list[self.trial_index[trial][goal_count]]

        else:
            self.goal_list = np.array([[[0.6, 0.0], [0.9, 1.2], [-0.9, 0.0],
                                        [-0.1, -0.1], [0.5, 0.1]],
                                       [[0.9, 1.2], [-1.1, 0.9], [0.9, 1.2],
                                        [1.1, 0.3], [0.1, 1.0]],
                                       [[-0.6, -1.2], [1.2, 0.5], [1.2, -0.6],
                                        [-0.1, -1.2], [1.2, 1.2]],
                                       [[-0.5, -0.1], [-1.2, -1.1],
                                        [-0.1, -0.8], [0.8, 1.1], [-0.3, 1.2]],
                                       [[1.2, 1.0], [-0.6, 0.0], [1.2, -1.0],
                                        [-0.2, -1.2], [0.1, 0.7]],
                                       [[-1.1, -0.7], [-0.9, 1.1], [-0.8, 1.2],
                                        [-1.2, -0.3], [1.1, -0.1]],
                                       [[1.2, 0.7], [-1.2, -0.5], [1.2, -0.8],
                                        [-1.2, 0.8], [1.1, 0.7]],
                                       [[-1.1, 0.9], [0.0, -1.0], [-1.2, 0.1],
                                        [-0.5, -1.2], [0.6, 1.1]],
                                       [[-1.1, 0.5], [1.1, 1.0], [1.0, 0.0],
                                        [-0.9, 1.2], [0.0, -0.7]],
                                       [[-0.1, 1.1], [1.1, 0.3], [-0.8, 1.2],
                                        [-1.2, -0.3], [0.0, 0.9]]])

        # self.goal_list = np.array([[[0.6, 0.0], [0.9, 1.2], [-0.9, 0.0], [-0.1, -0.1]],
        #                             [[0.9, 1.2], [-1.1, 0.9], [0.9, 1.2], [1.1, 0.3]],
        #                             [[-0.6, -1.2], [1.2, 0.5], [1.2, -0.6], [-0.1, -1.2]],
        #                             [[-0.5, -0.1], [-1.2, -1.1], [-0.1, -0.8], [0.8, 1.1]],
        #                             [[1.2, 1.0], [-0.6, 0.0], [1.2, -1.0], [-0.2, -1.2]],
        #                             [[-1.1, -0.7], [-0.9, 1.1], [-0.8, 1.2], [-1.2, -0.3]],
        #                             [[1.2, 0.7], [-1.2, -0.5], [1.2, -0.8], [-1.2, 0.8]],
        #                             [[-1.1, 0.9], [0.0, -1.0], [-1.2, 0.1], [-0.5, -1.2]],
        #                             [[-1.1, 0.5], [1.1, 1.0], [1.0, 0.0], [-0.9, 1.2]],
        #                             [[-0.1, 1.1], [1.1, 0.3], [-0.8, 1.2], [-1.2, -0.3]]])

        print(self.goal_list)

        self.goal_list_RowIndex = 0  # 0-10

        self.goal_x_list = self.goal_list[self.goal_list_RowIndex, :, 0]
        self.goal_y_list = self.goal_list[self.goal_list_RowIndex, :, 1]

        self.finished = False

        self.noOfTests = self.goal_list.shape[1]  # 5 tests in a trial
        self.noOfTrials = self.goal_list.shape[0]  # 10 trials for evaluation
        self.trial_completed = 0
        self.count = 0

        if self.noOfTrials == 1:
            self.last = True
        else:
            self.last = False

        self.goalIndex = 0  # 0-5

        self.goalX = 0
        self.goalY = 0

        self.currentX = 0
        self.currentY = 0

        rospy.set_param("store_flag", False)
        rospy.set_param("save_flag", False)
        rospy.set_param("end_flag", False)
        rospy.set_param("all_finished_flag", False)
        # self.logger = DataLogger(self.stage, self.logger)
        rospy.set_param("trial", self.trial_completed + 1)

        print(self.noOfTests)
        print(self.noOfTrials)

        goal = self.setGoal()
        self.execute(goal)
示例#7
0
class Move():
    def __init__(self, last=False):
        self.client = actionlib.SimpleActionClient('move_base', MoveBaseAction)
        self.client.wait_for_server()

        print('Hello World!')

        self.env = Env(4)

        # goal type
        self.goal = MoveBaseGoal()
        self.goalReached = False

        #self.goal_list = np.array([[0.6, 0.0], [0.9, 1.2]])

        self.stage = rospy.get_param("stage")
        self.method = str("move_base")
        rospy.set_param("method", self.method)

        if self.stage == "4":
            print(4)

            x_list = [
                0.6, 1.2, -1.2, -0.1, 0.5, -0.1, 1.2, -0.5, 0.0, -0.2, -1.2,
                0.1, 0.9
            ]
            y_list = [
                0, 0.5, 0.3, 1.1, 0.1, -1.2, 1.2, -0.1, -1.0, -0.3, -1, -1.6,
                1.2
            ]

            # x_list = [0.6, 1.9, 0.5, 0.2, -0.8, -1, -1.9, 0.5, 2, 0.5, 0, -0.1, -2]
            # y_list = [0, -0.5, -1.9, 1.5, -0.9, 1, 1.1, -1.5, 1.5, 1.8, -1, 1.6, -0.8]

            print('Stage 4')
            self.trial_index = [[3, 1, 10, 8, 6], [7, 10, 6, 0, 10],
                                [1, 2, 4, 6, 4], [0, 5, 6, 11, 4],
                                [0, 7, 6, 11, 6], [11, 8, 4, 6, 10],
                                [9, 9, 0, 7, 7], [2, 0, 9, 5, 7],
                                [2, 3, 1, 9, 6], [5, 3, 2, 10, 0]]

            row = len(self.trial_index)
            col = len(self.trial_index[0])
            print("row = ", row)
            print("col = ", col)
            self.goal_list = []
            trial_goals = []

            for i in range(row):
                for j in range(col):
                    trial_goals.append([
                        x_list[self.trial_index[i][j]],
                        y_list[self.trial_index[i][j]]
                    ])

                self.goal_list.append(trial_goals)
                trial_goals = []

            self.goal_list = np.array(self.goal_list)
            # self.goal_position.position.x = goal_x_list[self.trial_index[trial][goal_count]]
            # self.goal_position.position.y = goal_y_list[self.trial_index[trial][goal_count]]

        else:
            self.goal_list = np.array([[[0.6, 0.0], [0.9, 1.2], [-0.9, 0.0],
                                        [-0.1, -0.1], [0.5, 0.1]],
                                       [[0.9, 1.2], [-1.1, 0.9], [0.9, 1.2],
                                        [1.1, 0.3], [0.1, 1.0]],
                                       [[-0.6, -1.2], [1.2, 0.5], [1.2, -0.6],
                                        [-0.1, -1.2], [1.2, 1.2]],
                                       [[-0.5, -0.1], [-1.2, -1.1],
                                        [-0.1, -0.8], [0.8, 1.1], [-0.3, 1.2]],
                                       [[1.2, 1.0], [-0.6, 0.0], [1.2, -1.0],
                                        [-0.2, -1.2], [0.1, 0.7]],
                                       [[-1.1, -0.7], [-0.9, 1.1], [-0.8, 1.2],
                                        [-1.2, -0.3], [1.1, -0.1]],
                                       [[1.2, 0.7], [-1.2, -0.5], [1.2, -0.8],
                                        [-1.2, 0.8], [1.1, 0.7]],
                                       [[-1.1, 0.9], [0.0, -1.0], [-1.2, 0.1],
                                        [-0.5, -1.2], [0.6, 1.1]],
                                       [[-1.1, 0.5], [1.1, 1.0], [1.0, 0.0],
                                        [-0.9, 1.2], [0.0, -0.7]],
                                       [[-0.1, 1.1], [1.1, 0.3], [-0.8, 1.2],
                                        [-1.2, -0.3], [0.0, 0.9]]])

        # self.goal_list = np.array([[[0.6, 0.0], [0.9, 1.2], [-0.9, 0.0], [-0.1, -0.1]],
        #                             [[0.9, 1.2], [-1.1, 0.9], [0.9, 1.2], [1.1, 0.3]],
        #                             [[-0.6, -1.2], [1.2, 0.5], [1.2, -0.6], [-0.1, -1.2]],
        #                             [[-0.5, -0.1], [-1.2, -1.1], [-0.1, -0.8], [0.8, 1.1]],
        #                             [[1.2, 1.0], [-0.6, 0.0], [1.2, -1.0], [-0.2, -1.2]],
        #                             [[-1.1, -0.7], [-0.9, 1.1], [-0.8, 1.2], [-1.2, -0.3]],
        #                             [[1.2, 0.7], [-1.2, -0.5], [1.2, -0.8], [-1.2, 0.8]],
        #                             [[-1.1, 0.9], [0.0, -1.0], [-1.2, 0.1], [-0.5, -1.2]],
        #                             [[-1.1, 0.5], [1.1, 1.0], [1.0, 0.0], [-0.9, 1.2]],
        #                             [[-0.1, 1.1], [1.1, 0.3], [-0.8, 1.2], [-1.2, -0.3]]])

        print(self.goal_list)

        self.goal_list_RowIndex = 0  # 0-10

        self.goal_x_list = self.goal_list[self.goal_list_RowIndex, :, 0]
        self.goal_y_list = self.goal_list[self.goal_list_RowIndex, :, 1]

        self.finished = False

        self.noOfTests = self.goal_list.shape[1]  # 5 tests in a trial
        self.noOfTrials = self.goal_list.shape[0]  # 10 trials for evaluation
        self.trial_completed = 0
        self.count = 0

        if self.noOfTrials == 1:
            self.last = True
        else:
            self.last = False

        self.goalIndex = 0  # 0-5

        self.goalX = 0
        self.goalY = 0

        self.currentX = 0
        self.currentY = 0

        rospy.set_param("store_flag", False)
        rospy.set_param("save_flag", False)
        rospy.set_param("end_flag", False)
        rospy.set_param("all_finished_flag", False)
        # self.logger = DataLogger(self.stage, self.logger)
        rospy.set_param("trial", self.trial_completed + 1)

        print(self.noOfTests)
        print(self.noOfTrials)

        goal = self.setGoal()
        self.execute(goal)

    def distanceToGoal(self):
        return round(
            math.hypot(self.goalX - self.currentX, self.goalY - self.currentY),
            2)

    def setGoal(self):
        goal = MoveBaseGoal()
        self.goalX = self.goal_x_list[self.goalIndex]
        self.goalY = self.goal_y_list[self.goalIndex]

        print('Set New Goal to ', self.goalX, self.goalY)
        self.reachedGoal = False
        goal.target_pose.header.frame_id = "map"
        goal.target_pose.header.stamp = rospy.Time.now()

        goal.target_pose.pose.position.x = self.goalX
        goal.target_pose.pose.position.y = self.goalY
        goal.target_pose.pose.orientation.w = 1.0

        self.goalIndex += 1

        if self.goalIndex <= self.noOfTests:
            self.finished = False

        return goal

    # send goal to move_base to execute goal
    def execute(self, goal):

        print("Executing goal")
        self.client.send_goal(goal, self.done, self.active_cb,
                              self.feedback_cb)

        while not (self.last and self.finished):
            rospy.spin()

    # runs this call-back function when move_base done executing goal
    def done(self, status, result):

        if status == 1:
            rospy.loginfo("Status 1")

        elif status == 2:
            rospy.loginfo("Status 2")

        elif status == 3:
            rospy.loginfo("Reached Goal")
            rospy.set_param("trial", self.trial_completed + 1)
            if (self.goalIndex < self.noOfTests):

                self.run()

            else:
                rospy.loginfo(' ######### End of Trial ######### ')
                rospy.set_param("save_flag", True)
                self.trial_restart()

        elif status == 4:
            rospy.loginfo("Status 4: Goal pose " + str(self.count) +
                          " was aborted by the Action Server")

            rospy.set_param("save_flag", True)
            rospy.set_param("trial", self.trial_completed + 1)

            self.trial_restart()

        elif status == 5:
            rospy.loginfo("Status 5")

        elif status == 6:
            rospy.loginfo("Status 6")

        elif status == 7:
            rospy.loginfo("Status 7")

        elif status == 8:
            rospy.loginfo("Status 8")
        else:
            rospy.logerr("Something else happened ")

    def trial_restart(self):
        self.trial_completed += 1
        self.goal_list_RowIndex += 1
        self.finished = True
        self.goalIndex = 0

        if self.last:
            print('All Trials Completed')
            rospy.set_param("all_finished_flag", True)
            self.shutdown()
        else:
            self.populateGoalLists()

            if self.trial_completed == self.noOfTrials - 1:
                self.last = True

            # reset env
            self.env.reset()

            self.run()

    def populateGoalLists(self):
        self.goal_x_list = self.goal_list[self.goal_list_RowIndex, :, 0]
        self.goal_y_list = self.goal_list[self.goal_list_RowIndex, :, 1]

    def active_cb(self):
        # print("Active callback")
        rospy.set_param("goal_reached_flag", True)

    def feedback_cb(self, feedback):
        # print("Feedback callback")
        rospy.set_param("store_flag", True)

    def run(self):
        goal = self.setGoal()
        self.client.send_goal(goal, self.done, self.active_cb,
                              self.feedback_cb)

    def shutdown(self):
        rospy.signal_shutdown("Goal pose " + str(self.count) +
                              " aborted, shutting down!")
示例#8
0
    def active_cb(self):
        # print("Active callback")
        rospy.set_param("goal_reached_flag", True)

    def feedback_cb(self, feedback):
        # print("Feedback callback")
        rospy.set_param("store_flag", True)

    def run(self):
        goal = self.setGoal()
        self.client.send_goal(goal, self.done, self.active_cb,
                              self.feedback_cb)

    def shutdown(self):
        rospy.signal_shutdown("Goal pose " + str(self.count) +
                              " aborted, shutting down!")


if __name__ == '__main__':
    env = Env(4)
    print("Trying to reset sim")
    rospy.init_node('movebase_action_client')
    env.reset()
    try:

        result = Move()

    except:
        rospy.loginfo("Error encountered in move_base initialization")

    print("RosPy got shut down, Bye Bye!")