Exemplo n.º 1
0
def train(num_episodes=20000, ):
    """Train."""

    # Create the task environment.
    env = gym.make(name)

    # Create the DDPG agent in the task environment.
    agent = DDPG(env)

    with open(name + '.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(labels)

        i_step = 0
        for i_episode in range(1, num_episodes + 1):
            # start a new episode
            state = agent.reset()
            sum_reward = 0.0
            N = 0
            while True:
                # env.render()
                # Actor commands the action
                action = agent.act(state)
                # Environment reacts with next state, reward and done for
                # end-of-episode
                next_state, reward, done, info = env.step(action)
                # Agent (actor-critic) learns
                losses = agent.step(action, reward, next_state, done)
                # S <- S
                state = next_state
                sum_reward += reward
                N += 1
                i_step += 1
                # if i_step % 1000 == 0 and losses is not None:
                if done and losses is not None:
                    loss_critic = losses
                    # End of episode. Show metrics.
                    to_write = (i_episode, i_step, loss_critic, sum_reward / N)
                    print('\rEpisode: {:4d}, '
                          'Step: {:7d}, '
                          'Loss-crit: {:10.4f}, '
                          'Av Rwd: {:10.4f}, '
                          ''.format(*to_write))
                    # Re-use same line to print on.
                    # sys.stdout.flush()
                    # Write CSV row
                    for i, label in enumerate(labels):
                        results[label].append(to_write[i])
                    writer.writerow(to_write)
                if done:
                    break

    # Plot
    i_episode, loss_actor, loss_critic = zip(*telemetry)
Exemplo n.º 2
0
def main(argv):
    env_name = FLAGS.env_name
    env = gym.make(env_name)
    agent = DDPG(env, load_path=FLAGS.load_path, training=False)

    for episodes in range(FLAGS.num_episodes):
        done = False
        obs = env.reset()
        episode_reward = 0
        while not done:
            env.render()
            action = agent.act(obs, noise=False).flatten()
            obs, rew, done, info = env.step(action)
            obs = obs.flatten()
            episode_reward += rew
        print(f'Episode Reward:{episode_reward}')
    env.close()
Exemplo n.º 3
0
writer = SummaryWriter(logdir=LOG_DIR)

total_numsteps = 0
n_updates = 0  # number of policy updates
for i_episode in itertools.count(1):

    episode_reward = 0
    episode_steps = 0
    done = False
    state = env.reset()

    while not done:
        if total_numsteps < args.start_steps:
            action = env.action_space.sample()  # Sample random action
        else:
            action = agent.act(state)  # Sample action from policy

        next_state, reward, done, _ = env.step(action)  # Step
        episode_steps += 1
        total_numsteps += 1
        episode_reward += reward
        # Ignore the "done" signal if it comes from hitting the time horizon.
        # (https://github.com/openai/spinningup/blob/master/spinup/algos/sac/sac.py)
        mask = 1 if episode_steps == env._max_episode_steps else float(
            not done)

        agent.step(state, action, reward, next_state, mask)
        if total_numsteps >= args.start_steps and total_numsteps % args.update_freq == 0:
            critic_loss, actor_loss = agent.update()

        state = next_state
             action_shape,
             batch_size=128,
             gamma=0.995,
             tau=0.001,
             actor_lr=0.0001,
             critic_lr=0.001,
             use_layer_norm=True)
print('DDPG agent configured')
agent.load_model(agent.current_path + '/model/model.ckpt')
agent.load_memory()

max_episode = 10000
tot_rewards = []
print('env reset')
observation, done = env.reset()
action = agent.act(observation)
print(action)
rospy.sleep(0.8)
observation, reward, done = env.step(action)
rospy.sleep(0.8)
noise_sigma = 0.15
save_cutoff = 1
cutoff_count = 0
save_count = 0
curr_highest_eps_reward = -1000.0
for i in xrange(max_episode):
    if i % 100 == 0 and noise_sigma > 0.03:
        agent.noise = OUNoise(agent.nb_actions, sigma=noise_sigma)
        noise_sigma /= 2.0
    step_num = 0
    while done == False:
Exemplo n.º 5
0
writer1 = csv.writer(fout1)
writer1.writerow(labels)

fout2 = open("physical_info.csv", 'w')
labels = [
    'time', 'x', 'y', 'z', 'phi', 'theta', 'psi', 'x_velocity', 'y_velocity',
    'z_velocity', 'phi_velocity', 'theta_velocity', 'psi_velocity',
    'rotor_speed1', 'rotor_speed2', 'rotor_speed3', 'rotor_speed4'
]
writer2 = csv.writer(fout2)
writer2.writerow(labels)

for i_episode in range(1, num_episodes + 1):
    state = agent.reset()  # start a new episode
    while True:
        action = agent.act(state)
        next_state, reward, done = task.step(action)
        agent.step(action, reward, next_state, done)
        state = next_state

        # Write info to file
        to_write = [task.sim.time] + list(
            task.sim.pose[:3]
        )  #+ list(task.sim.v) + list(task.sim.angular_v) + list(action)
        fout2.write(
            "{:4.2f},   {:7.3f},   {:7.3f},   {:7.3f}   {:7.3f}   {:7.3f}   {:7.3f}   {:7.3f}\n"
            .format(to_write[0], to_write[1], to_write[2], to_write[3],
                    action[0], action[1], action[2], action[3]))

        if done:
            print("\rEpisode = {:4d}, score = {:7.3f} (best = {:7.3f})".format(