def main(test_episodes=20, test_steps=50): env = env_search_control() print(env.observation_space) print(env.action_space) act = deepq.learn(env, network=models.mlp(num_layers=1, num_hidden=64), total_timesteps=0, total_episodes=0, total_steps=0, load_path="assembly_model_fuzzy_final.pkl") episode_rewards = [] episode_states = [] for i in range(test_episodes): obs, done = env.reset() episode_rew = 0 episode_obs = [] logger.info( "================== The {} episode start !!! ===================". format(i)) for j in range(test_steps): obs, rew, done, _ = env.step(act(obs[None])[0], j) episode_rew += rew episode_obs.append(obs) episode_rewards.append(cp.deepcopy(episode_rew)) episode_states.append(cp.deepcopy(episode_obs)) print("Episode reward", episode_rew) np.save('../data/test_episode_reward_fuzzy_final_new', episode_rewards) np.save('../data/test_episode_state_fuzzy_final_new', episode_states)
def search(arg): env = env_search_control() # pull_finish = env.pull_peg_up() # force, state, pull_terminal = env.search_reset() # # # position control # if pull_terminal: # done = env.pos_control() state, obs, done = env.reset() print('force', state[:6]) print('state', state[6:]) Force, State = [], [] # force control if done: for i in range(arg['steps']): current_state = env.get_state() force, state = current_state[:6], current_state[6:] Force.append(cp.deepcopy(force)) State.append(cp.deepcopy(state)) _, _, finish = env.step(0, i) if finish: break pull_finish = env.pull_search_peg() np.save('../data/search_force', Force) np.save('../data/search_state', State)
def __init__(self, hyperparams): """ Initialize agent. Args: hyperparams: Dictionary of hyperparameters. init_node: Whether or not to initialize a new ROS node. """ config = copy.deepcopy(AGENT) config.update(hyperparams) Agent.__init__(self, config) self._env = env_search_control(step_max=200, fuzzy=False, add_noise=False) self.x0 = self._hyperparams['x0'] self.use_tf = False self.observations_stale = True
def main(): env = env_search_control() act = deepq.learn( env, network=models.mlp(num_hidden=64, num_layers=1), lr=1e-3, total_timesteps=5000, total_episodes=100, total_steps=50, target_network_update_freq=20, buffer_size=32, learning_starts=32, learning_times=10, exploration_fraction=0.2, exploration_final_eps=0.01, print_freq=10, param_noise=True, save_path='_fuzzy_noise_final', load_path='assembly_model_fuzzy_final.pkl' ) # load_path = 'assembly_model_fuzzy.pkl' # load_path = 'assembly_model.pkl' # print("Saving model to assembly_fuzzy_noise.pkl") act.save("assembly_model_fuzzy_final.pkl")
np.save( data_path + 'train_states_' + algorithm_name + '_' + noise_type + file_name, epochs_states) np.save( data_path + 'train_times_' + algorithm_name + '_' + noise_type + file_name, epochs_times) # # agent save agent.store(model_path + 'train_model_' + algorithm_name + '_' + noise_type + file_name) if __name__ == '__main__': algorithm_name = 'dyna_nn_ddpg' env = env_search_control(step_max=200, fuzzy=False, add_noise=False) data_path = './prediction_data/' model_path = './prediction_model/' file_name = '_epochs_5_episodes_100_none_fuzzy' model_name = './prediction_model/' learn(network='mlp', env=env, data_path=data_path, model_based=True, memory_extend=False, dyna_learning=True, model_type='mlp', noise_type='normal_0.2', file_name=file_name, model_path=model_path, model_name=model_name,
parser.add_argument('--steps', type=int, default=200) parser.add_argument('--memory_size', type=int, default=3000) parser.add_argument('--data-file', type=str) parser.add_argument('--lambda', type=float, default=0.6) parser.add_argument('--meta_step_size', type=float, default=0.00001) parser.add_argument('--eta', type=float, default=0.01) parser.add_argument('--loop', type=float, default=1) parser.add_argument('--noplot', action='store_false', dest='plot') parser.add_argument('--record-file', type=str) parser.add_argument('--seed', type=int) return vars(parser.parse_args()) if __name__ == '__main__': args = parse_args() env = env_search_control() obs, state, _ = env.reset() epoch_force_pose = [] epoch_action = [] action = np.zeros(6) for i in range(args['steps']): next_obs, next_state, reward, done, safe_or_not, execute_action = env.step( np.array([0., 0, 0., 0., 0., 0.]), i) epoch_force_pose.append(cp.deepcopy(next_state)) epoch_action.append(cp.deepcopy(execute_action)) if done: env.pull_peg_up()