Пример #1
0
def interact(env: Env, agent: Agent,
             start_obs: Arrayable) -> Tuple[array, array, array]:
    """One step interaction between env and agent.

    :args env: environment
    :args agent: agent
    :args start_obs: initial observation

    :return: (next observation, reward, terminal?)
    """
    action = agent.step(start_obs)
    next_obs, reward, done, information = env.step(action)
    time_limit = information[
        'time_limit'] if 'time_limit' in information else None
    agent.observe(next_obs, reward, done, time_limit)
    return next_obs, reward, done
Пример #2
0
target_pos = np.array([0., 0., 150.])
task = Task(init_pose=init_pos, target_pos=target_pos, runtime=10.)
agent = Agent(task)

# save rewards for plotting
rewards = []
rotor_speeds_var = []

for i_episode in range(1, num_episodes + 1):
    state = agent.reset_episode()  # start a new episode
    step = 0
    while True:
        step += 1
        action = agent.act(state)
        next_state, reward, done = task.step(action)
        agent.step(action, reward, next_state, done)
        state = next_state
        if done:
            rewards.append(agent.score)
            rotor_speeds_var.append(np.var(action))
            print(
                "\r\nEp={:4d}, score={:7.3f} (top={:7.3f}) pos={} {} {} {} {} {} {}"
                .format(i_episode, agent.score, agent.top_score,
                        round(task.sim.pose[:3][0], 2),
                        round(task.sim.pose[:3][1], 2),
                        round(task.sim.pose[:3][2], 2),
                        round(task.sim.pose[3:6][0], 2),
                        round(task.sim.pose[3:6][1], 2),
                        round(task.sim.pose[3:6][2], 2),
                        round(abs(task.sim.v).sum()), 2),
                end="")  # [debug]
Пример #3
0
#Results with the conditions of the quadcopter
labels = [
    'time', 'x', 'y', 'z', 'phi', 'theta', 'psi', 'x_velocity', 'y_velocity',
    'z_velocity', 'phi_velocity', 'theta_velocity', 'psi_velocity',
    'rotor_speed1', 'rotor_speed2', 'rotor_speed3', 'rotor_speed4'
]
results = {x: [] for x in labels}

num_episodes = 1000

for i_episode in range(1, num_episodes + 1):
    state = agent.reset_episode()  # start a new episode
    while True:
        action = agent.act(state)
        next_state, reward, done = takeoff.step(action)
        agent.step(reward, done)
        state = next_state
        to_write = [takeoff.sim.time] + list(takeoff.sim.pose) + list(
            takeoff.sim.v) + list(takeoff.sim.angular_v) + list(action)
        for ii in range(len(labels)):
            results[labels[ii]].append(to_write[ii])
        if done:
            print(
                "\rEpisode = {:4d}, score = {:7.3f} (best = {:7.3f}), noise_scale = {}"
                .format(i_episode, agent.score, agent.best_score,
                        agent.noise_scale),
                end="")  # [debug]
            break
    sys.stdout.flush()
''' Shows the results of the control
plt.plot(results['time'], results['x'], label='x')