def policy_rollout(agent, path, t_interval=1, timesteps=200):
    for j in range(1):
        robot = SwimmingRobot(a1=0, a2=0, t_interval=t_interval)
        xs = [robot.x]
        ys = [robot.y]
        thetas = [robot.theta]
        a1s = [robot.a1]
        a2s = [robot.a2]
        steps = [0]
        # robot.randomize_state(enforce_opposite_angle_signs=True)
        robot_params = []
        robot_param = [
            robot.x, robot.y, robot.theta,
            float(robot.a1),
            float(robot.a2), robot.a1dot, robot.a2dot
        ]
        robot_params.append(robot_param)
        print('Beginning', j + 1, 'th Policy Rollout')
        try:
            for i in range(timesteps):
                # rollout
                state = robot.state
                print('In', i + 1, 'th iteration the initial state is: ',
                      state)
                old_x = robot.x
                action = agent.choose_action(state)
                print('In', i + 1, 'th iteration the chosen action is: ',
                      action)
                robot.move(action=action)
                new_x = robot.x
                print('In', i + 1, 'th iteration, the robot moved ',
                      new_x - old_x, ' in x direction')

                # add values to lists
                xs.append(robot.x)
                ys.append(robot.y)
                thetas.append(robot.theta)
                a1s.append(robot.a1)
                a2s.append(robot.a2)
                steps.append(i + 1)
                robot_param = [
                    robot.x, robot.y, robot.theta,
                    float(robot.a1),
                    float(robot.a2), robot.a1dot, robot.a2dot
                ]
                robot_params.append(robot_param)

        except ZeroDivisionError as e:
            print(str(e), 'occured at ', j + 1, 'th policy rollout')

        # plotting
        make_rollout_graphs(xs, ys, thetas, a1s, a2s, steps, path=path)
        generate_csv(robot_params, path + "/policy_rollout.csv")
def get_random_edge_states():
    num = np.random.rand()
    if num < 0.2:
        print('Normal robot!')
        robot = SwimmingRobot(t_interval=1)
    elif num < 0.4:
        print('edge case 1!')
        robot = SwimmingRobot(a1=-pi / 2, a2=pi / 2, t_interval=0.5)
    elif num < 0.6:
        print('edge case 2!')
        robot = SwimmingRobot(a1=-pi / 2, a2=-pi / 2, t_interval=0.5)
    elif num < 0.8:
        print('edge case 3!')
        robot = SwimmingRobot(a1=pi / 2, a2=-pi / 2, t_interval=0.5)
    else:
        print('edge case 4')
        robot = SwimmingRobot(a1=pi / 2, a2=pi / 2, t_interval=0.5)

    return robot
示例#3
0
def main():

    # 0.99996 for 30000 iterations
    # 0.999 for 1000 iterations
    # 0.9998 for 10000 iterations
    # 0.99995 for 20000
    # 0.999965 for 40000
    # 0.999955 for 50000
    # 0.999975 for 60000
    # 0.999977 for 100000
    # 0.999993 for 200000
    # 0.999997 for 500000
    # 0.999997for 1000000
    # 0.999999 for 2000000
    # 0.9999994 for 3000000
    # 0.9999997 for 6000000

    robot = SwimmingRobot(t_interval=8)
    trial_name = 'DQN_swimming_w_theta_forward_20000_iters'
    trial_num = 0
    reward_function = forward_reward_function
    episodes = 20
    iterations = 1000
    total_iterations = episodes * iterations
    network_update_freq = 20
    batch_size = 8
    epsilon_decay = 0.99995
    learning_rate = 2e-4
    model_architecture = (50, 10)

    dqn_agent = DQN_Agent(robot=robot,
                          reward_function=reward_function,
                          trial_name=trial_name,
                          trial_num=trial_num,
                          episodes=episodes,
                          iterations=iterations,
                          network_update_freq=network_update_freq,
                          check_singularity=False,
                          input_dim=5,
                          output_dim=1,
                          actions_params=(-pi/8, pi/8, pi/8),
                          model_architecture=model_architecture,
                          memory_size=total_iterations//50,
                          memory_buffer_coef=20,
                          randomize_theta=False,
                          batch_size=batch_size,
                          gamma=0.99,
                          epsilon=1.0,
                          epsilon_min=0.1,
                          epsilon_decay=epsilon_decay,
                          learning_rate=learning_rate,
                          params=None)

    dqn_agent.run()
示例#4
0
def main():
    robot_type = args.robot_type
    if robot_type == "swimming":
        robot = SwimmingRobot(t_interval=args.t_interval,
                              a_upper=args.a_upper,
                              a_lower=args.a_lower,
                              no_joint_limit=args.no_joint_limit)
        check_singularity = False
    elif robot_type == "wheeled":
        robot = ThreeLinkRobot(t_interval=args.t_interval)
        check_singularity = True
    else:
        raise ValueError("Unknown robot type: {}".format(robot_type))

    episodes = args.episodes
    iterations = args.iterations
    total_iterations = episodes * iterations
    if args.reward_func == "forward":
        reward_function = forward_reward_function
    elif args.reward_func == "left":
        reward_function = left_reward_function
    else:
        raise ValueError("Unknown reward function: {}".format(args.reward_func))

    network_update_freq = args.network_update_freq
    batch_size = args.batch_size
    epsilon_min = args.epsilon_min
    epsilon_decay = epsilon_min ** (1/total_iterations)
    learning_rate = args.learning_rate
    model_architecture = [int(num) for num in args.model_architecture.split(' ')]

    trial_num = args.trial_num
    trial_name = 'DQN_{}_{}_{}_iters'.format(robot_type, args.reward_func, total_iterations)
    if args.trial_note:
        trial_name += "_{}".format(args.trial_note)

    params = {
        "robot_type": args.robot_type,
        "t_interval": args.t_interval,
        "a_upper": args.a_upper,
        "a_lower": args.a_lower,
        "no_joint_limit:": args.no_joint_limit,
        "trial_num": args.trial_num,
        "trial_note": args.trial_note,
        "episodes": args.episodes,
        "iterations": args.iterations,
        "reward_func": args.reward_func,
        "network_update_freq": args.network_update_freq,
        "epsilon_min": args.epsilon_min,
        "batch_size": args.batch_size,
        "learning_rate": args.learning_rate,
        "model_architecture": args.model_architecture,
    }

    dqn_agent = DQN_Agent(robot=robot,
                          reward_function=reward_function,
                          trial_name=trial_name,
                          trial_num=trial_num,
                          episodes=episodes,
                          iterations=iterations,
                          network_update_freq=network_update_freq,
                          check_singularity=check_singularity,
                          input_dim=len(robot.state) + 2,
                          output_dim=1,
                          actions_params=(-pi/8, pi/8, pi/8),
                          model_architecture=model_architecture,
                          memory_size=total_iterations//50,
                          memory_buffer_coef=5, #5 don't forget to change back to 20!
                          randomize_theta=False,
                          batch_size=batch_size,
                          gamma=0.99,
                          epsilon=1.0,
                          epsilon_min=epsilon_min,
                          epsilon_decay=epsilon_decay,
                          learning_rate=learning_rate,
                          params=params)

    dqn_agent.run()
示例#5
0
from Robots.ContinuousDeepRobots import ThreeLinkRobot
from Robots.ContinuousSwimmingBot import SwimmingRobot
from math import pi


def generate_csv(robot_params, filename):
    with open(filename, 'w') as file:
        w = csv.writer(file)
        w.writerows(robot_params)


if __name__ == "__main__":

    robot_params = []
    # robot = ThreeLinkRobot(a1=-0.01, a2=0.01, t_interval=0.02)
    robot = SwimmingRobot(t_interval=1, a1=0, a2=0)
    robot_param = [robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot]
    robot_params.append(robot_param)
    # for i in range(50):
    #     print('i: ', i)
    #     if i%2 == 0:
    #         action = (-pi/2, pi/2)
    #     else:
    #         action = (pi/2, -pi/2)
    #     for j in range(40):
    #         print('j: ', j)
    #         print('a1 a2: ', robot.a1, robot.a2)
    #         robot.move(action)
    #         robot_param = [robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot]
    #         robot_params.append(robot_param)
def perform_DQN(agent,
                episodes,
                iterations,
                path,
                batch_size=4,
                C=30,
                t_interval=1,
                randomize_theta=False):
    """
    :param agent: the RL agent
    :param batch_size: size of minibatch sampled from replay buffer
    :param C: network update frequency
    :return: agent, and other information about DQN
    """
    avg_losses = []
    std_losses = []
    avg_rewards = []
    std_rewards = []
    avg_Qs = []
    std_Qs = []
    # gd_iterations = [] # gradient descent iterations
    # gd_iteration = 0
    num_episodes = []

    try:
        # loop through each episodes
        for e in range(1, episodes + 1):

            # save model
            if e % (episodes / 10) == 0:
                agent.save_model(path, e)

            theta = random.uniform(-pi / 4, pi / 4) if randomize_theta else 0
            robot = SwimmingRobot(a1=0,
                                  a2=0,
                                  theta=theta,
                                  t_interval=t_interval)
            # state = robot.randomize_state()
            state = robot.state
            rewards = []
            losses = []
            Qs = []

            # loop through each iteration
            for i in range(1, iterations + 1):
                # print('In ', e, ' th epsiode, ', i, ' th iteration, the initial state is: ', state)
                action = agent.choose_action(state, epsilon_greedy=True)
                print(
                    'In {}th epsiode {}th iteration, the chosen action is: {}'.
                    format(e, i, action))
                robot_after_transition, reward, next_state = agent.act(
                    robot=robot,
                    action=action,
                    c_x=50,
                    c_joint=0,
                    c_zero_x=50,
                    c_theta=5)
                print('The reward is: {}'.format(reward))
                rewards.append(reward)
                # print('In ', e, ' th epsiode, ', i, ' th iteration, the state after transition is: ', next_state)
                agent.remember(state, action, reward, next_state)
                state = next_state
                robot = robot_after_transition
                if len(agent.memory) > agent.memory_size / 20:
                    loss, Q = agent.replay(batch_size)
                    # gd_iteration += 1
                    losses.append(loss)
                    Qs.append(Q)
                    # gd_iterations.append(gd_iteration)
                    print('The average loss is: {}'.format(loss))
                    print('The average Q is: {}'.format(Q))

                if i % C == 0:
                    agent.update_model()

            num_episodes.append(e)
            avg_rewards.append(np.mean(rewards))
            std_rewards.append(np.std(rewards))
            avg_losses.append(np.mean(losses))
            std_losses.append(np.std(losses))
            avg_Qs.append(np.mean(Qs))
            std_Qs.append(np.std(Qs))

    except TypeError as e:
        print(e)

    finally:

        # save learning data
        save_learning_data(path, num_episodes, avg_rewards, std_rewards,
                           avg_losses, std_losses, avg_Qs, std_Qs)
        return agent, num_episodes, avg_rewards, std_rewards, avg_losses, std_losses, avg_Qs, std_Qs
    #     print('Normal robot!')
    #     robot = SwimmingRobot(t_interval=1)
    # elif num < 0.4:
    #     print('edge case 1!')
    #     robot = SwimmingRobot(a1=-pi/2, a2=pi/2, t_interval=0.5)
    # elif num < 0.6:
    #     print('edge case 2!')
    #     robot = SwimmingRobot(a1=-pi/2, a2=-pi/2, t_interval=0.5)
    # elif num < 0.8:
    #     print('edge case 3!')
    #     robot = SwimmingRobot(a1=pi/2, a2=-pi/2, t_interval=0.5)
    # else:
    #     print('edge case 4')
    #     robot = SwimmingRobot(a1=pi/2, a2=pi/2, t_interval=0.5)

    robot = SwimmingRobot(a1=0, a2=0, t_interval=1)
    # state = robot.randomize_state()
    state = robot.state
    rewards = []
    losses = []
    for i in range(1, ITERATIONS + 1):
        # print('In ', e, ' th epsiode, ', i, ' th iteration, the initial state is: ', state)
        action = agent.choose_action(state, fixed_policy=True)
        print('In ', e, ' th epsiode, ', i,
              ' th iteration, the chosen action is: ', action)
        robot_after_transition, reward, next_state = agent.act(robot, action)
        print('In ', e, ' th epsiode, ', i, ' th iteration, the reward is: ',
              reward)
        rewards.append(reward)
        # print('In ', e, ' th epsiode, ', i, ' th iteration, the state after transition is: ', next_state)
        agent.remember(state, action, reward, next_state)