def policy_rollout(agent, path, t_interval=1, timesteps=200):
    for j in range(1):
        robot = SwimmingRobot(a1=0, a2=0, t_interval=t_interval)
        xs = [robot.x]
        ys = [robot.y]
        thetas = [robot.theta]
        a1s = [robot.a1]
        a2s = [robot.a2]
        steps = [0]
        # robot.randomize_state(enforce_opposite_angle_signs=True)
        robot_params = []
        robot_param = [
            robot.x, robot.y, robot.theta,
            float(robot.a1),
            float(robot.a2), robot.a1dot, robot.a2dot
        ]
        robot_params.append(robot_param)
        print('Beginning', j + 1, 'th Policy Rollout')
        try:
            for i in range(timesteps):
                # rollout
                state = robot.state
                print('In', i + 1, 'th iteration the initial state is: ',
                      state)
                old_x = robot.x
                action = agent.choose_action(state)
                print('In', i + 1, 'th iteration the chosen action is: ',
                      action)
                robot.move(action=action)
                new_x = robot.x
                print('In', i + 1, 'th iteration, the robot moved ',
                      new_x - old_x, ' in x direction')

                # add values to lists
                xs.append(robot.x)
                ys.append(robot.y)
                thetas.append(robot.theta)
                a1s.append(robot.a1)
                a2s.append(robot.a2)
                steps.append(i + 1)
                robot_param = [
                    robot.x, robot.y, robot.theta,
                    float(robot.a1),
                    float(robot.a2), robot.a1dot, robot.a2dot
                ]
                robot_params.append(robot_param)

        except ZeroDivisionError as e:
            print(str(e), 'occured at ', j + 1, 'th policy rollout')

        # plotting
        make_rollout_graphs(xs, ys, thetas, a1s, a2s, steps, path=path)
        generate_csv(robot_params, path + "/policy_rollout.csv")
Exemplo n.º 2
0
    robot = SwimmingRobot(t_interval=1, a1=0, a2=0)
    robot_param = [robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot]
    robot_params.append(robot_param)
    # for i in range(50):
    #     print('i: ', i)
    #     if i%2 == 0:
    #         action = (-pi/2, pi/2)
    #     else:
    #         action = (pi/2, -pi/2)
    #     for j in range(40):
    #         print('j: ', j)
    #         print('a1 a2: ', robot.a1, robot.a2)
    #         robot.move(action)
    #         robot_param = [robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot]
    #         robot_params.append(robot_param)

    for t in range(1000):
        print(t + 1, 'th iteration')
        a1dot = 0
        a2dot = 1/5 * cos(t/5)
        action = (a1dot, a2dot)
        robot.move(action)
        print('action taken(a1dot, a2dot): ', action)
        print('robot x y theta a1 a2: ', robot.x, robot.y, robot.theta, robot.a1, robot.a2)
        robot_param = [robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot]
        robot_params.append(robot_param)

    print(robot_params)
    generate_csv(robot_params, 'csv_outputs/swimming_test_a1_idle.csv')