Exemplos de DDPG.act em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: ddpg

Classe / Tipo: DDPG

Método / Função: act

Exemplos em hotexamples.com: 5

DDPG.act em Python - 5 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de ddpg.DDPG.act em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

DDPG(30)

evaluate_actor(17)

choose_action(12)

add_experience(8)

get_action(7)

eval(6)

act(5)

get_stats(4)

adapt_param_noise(4)

actor(3)

best_reward(2)

calc_last_error(2)

Action(2)

before_cycle(2)

expl_rate_decay(2)

get_action_noise(2)

hard_update(2)

before_epoch(2)

calc_action(2)

assign_global_optimizer(2)

addMemory(2)

Tensor(2)

Update(2)

apply_lr_decay(2)

initail_net(2)

add_optim(2)

freeze_shared_weights(1)

Add2Mem(1)

get_variables_to_save(1)

apply_action(1)

get_shared_weights(1)

get_model(1)

get_loss(1)

Train(1)

build(1)

act_without_noise(1)

converter(1)

experience(1)

action_taking(1)

ActorPredict(1)

eval_all(1)

add_step(1)

ddpg_update(1)

cuda(1)

critic(1)

eval_all1(1)

Métodos Frequentes

DDPG (30)

evaluate_actor (17)

choose_action (12)

add_experience (8)

get_action (7)

eval (6)

act (5)

get_stats (4)

adapt_param_noise (4)

actor (3)

Métodos Frequentes

best_reward (2)

calc_last_error (2)

Action (2)

before_cycle (2)

expl_rate_decay (2)

get_action_noise (2)

hard_update (2)

before_epoch (2)

calc_action (2)

assign_global_optimizer (2)

addMemory (2)

Tensor (2)

Update (2)

apply_lr_decay (2)

initail_net (2)

add_optim (2)

freeze_shared_weights (1)

Add2Mem (1)

get_variables_to_save (1)

apply_action (1)

Métodos Frequentes

addMemory (2)

Tensor (2)

Update (2)

apply_lr_decay (2)

initail_net (2)

add_optim (2)

freeze_shared_weights (1)

Add2Mem (1)

get_variables_to_save (1)

apply_action (1)

get_shared_weights (1)

get_model (1)

get_loss (1)

Train (1)

build (1)

act_without_noise (1)

converter (1)

experience (1)

action_taking (1)

ActorPredict (1)

eval_all (1)

add_step (1)

ddpg_update (1)

cuda (1)

critic (1)

eval_all1 (1)

Métodos Frequentes

get_shared_weights (1)

get_model (1)

get_loss (1)

Train (1)

build (1)

act_without_noise (1)

converter (1)

experience (1)

action_taking (1)

ActorPredict (1)

eval_all (1)

add_step (1)

ddpg_update (1)

cuda (1)

critic (1)

eval_all1 (1)

Exemplo n.º 1

0

Exibir arquivo

def train(num_episodes=20000, ): """Train.""" # Create the task environment. env = gym.make(name) # Create the DDPG agent in the task environment. agent = DDPG(env) with open(name + '.csv', 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(labels) i_step = 0 for i_episode in range(1, num_episodes + 1): # start a new episode state = agent.reset() sum_reward = 0.0 N = 0 while True: # env.render() # Actor commands the action action = agent.act(state) # Environment reacts with next state, reward and done for # end-of-episode next_state, reward, done, info = env.step(action) # Agent (actor-critic) learns losses = agent.step(action, reward, next_state, done) # S <- S state = next_state sum_reward += reward N += 1 i_step += 1 # if i_step % 1000 == 0 and losses is not None: if done and losses is not None: loss_critic = losses # End of episode. Show metrics. to_write = (i_episode, i_step, loss_critic, sum_reward / N) print('\rEpisode: {:4d}, ' 'Step: {:7d}, ' 'Loss-crit: {:10.4f}, ' 'Av Rwd: {:10.4f}, ' ''.format(*to_write)) # Re-use same line to print on. # sys.stdout.flush() # Write CSV row for i, label in enumerate(labels): results[label].append(to_write[i]) writer.writerow(to_write) if done: break # Plot i_episode, loss_actor, loss_critic = zip(*telemetry)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: run_ddpg.py Projeto: uchendui/rl-tf2

def main(argv): env_name = FLAGS.env_name env = gym.make(env_name) agent = DDPG(env, load_path=FLAGS.load_path, training=False) for episodes in range(FLAGS.num_episodes): done = False obs = env.reset() episode_reward = 0 while not done: env.render() action = agent.act(obs, noise=False).flatten() obs, rew, done, info = env.step(action) obs = obs.flatten() episode_reward += rew print(f'Episode Reward:{episode_reward}') env.close()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: run.py Projeto: bonniesjli/parc

writer = SummaryWriter(logdir=LOG_DIR) total_numsteps = 0 n_updates = 0 # number of policy updates for i_episode in itertools.count(1): episode_reward = 0 episode_steps = 0 done = False state = env.reset() while not done: if total_numsteps < args.start_steps: action = env.action_space.sample() # Sample random action else: action = agent.act(state) # Sample action from policy next_state, reward, done, _ = env.step(action) # Step episode_steps += 1 total_numsteps += 1 episode_reward += reward # Ignore the "done" signal if it comes from hitting the time horizon. # (https://github.com/openai/spinningup/blob/master/spinup/algos/sac/sac.py) mask = 1 if episode_steps == env._max_episode_steps else float( not done) agent.step(state, action, reward, next_state, mask) if total_numsteps >= args.start_steps and total_numsteps % args.update_freq == 0: critic_loss, actor_loss = agent.update() state = next_state

Exemplo n.º 4

0

Exibir arquivo

Arquivo: arm_learn.py Projeto: dVeon-loch/EEE4022-reinforcement-learning-robot-arm

action_shape, batch_size=128, gamma=0.995, tau=0.001, actor_lr=0.0001, critic_lr=0.001, use_layer_norm=True) print('DDPG agent configured') agent.load_model(agent.current_path + '/model/model.ckpt') agent.load_memory() max_episode = 10000 tot_rewards = [] print('env reset') observation, done = env.reset() action = agent.act(observation) print(action) rospy.sleep(0.8) observation, reward, done = env.step(action) rospy.sleep(0.8) noise_sigma = 0.15 save_cutoff = 1 cutoff_count = 0 save_count = 0 curr_highest_eps_reward = -1000.0 for i in xrange(max_episode): if i % 100 == 0 and noise_sigma > 0.03: agent.noise = OUNoise(agent.nb_actions, sigma=noise_sigma) noise_sigma /= 2.0 step_num = 0 while done == False:

Exemplo n.º 5

0

Exibir arquivo

writer1 = csv.writer(fout1) writer1.writerow(labels) fout2 = open("physical_info.csv", 'w') labels = [ 'time', 'x', 'y', 'z', 'phi', 'theta', 'psi', 'x_velocity', 'y_velocity', 'z_velocity', 'phi_velocity', 'theta_velocity', 'psi_velocity', 'rotor_speed1', 'rotor_speed2', 'rotor_speed3', 'rotor_speed4' ] writer2 = csv.writer(fout2) writer2.writerow(labels) for i_episode in range(1, num_episodes + 1): state = agent.reset() # start a new episode while True: action = agent.act(state) next_state, reward, done = task.step(action) agent.step(action, reward, next_state, done) state = next_state # Write info to file to_write = [task.sim.time] + list( task.sim.pose[:3] ) #+ list(task.sim.v) + list(task.sim.angular_v) + list(action) fout2.write( "{:4.2f}, {:7.3f}, {:7.3f}, {:7.3f} {:7.3f} {:7.3f} {:7.3f} {:7.3f}\n" .format(to_write[0], to_write[1], to_write[2], to_write[3], action[0], action[1], action[2], action[3])) if done: print("\rEpisode = {:4d}, score = {:7.3f} (best = {:7.3f})".format(