def test_current_params(actor: base_actor, log_dir: str, episode: int): steps = input("How many steps for this sample?: ") if steps == "": utils.notice("default steps {}".format( config.params["default_sample_steps"])) steps = config.params["default_sample_steps"] # Record during sampling sim_distance_file = DataCSVSaver( "{}/train_result_ep{}_distance.txt".format(log_dir, episode), ("step", "distance")) sim_phase_diffs_file = DataCSVSaver( "{}/train_result_ep{}_phase_diffs.txt".format(log_dir, episode), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_actions_file = DataCSVSaver( "{}/train_result_ep{}_actions.txt".format(log_dir, episode), ["step"] + ["action_{}".format(i) for i in range(config.oscillators)]) sim_frictions_file = DataCSVSaver( "{}/train_result_ep{}_frictions.txt".format(log_dir, episode), ["step"] + ["friction_{}".format(i) for i in range(config.somites)]) caterpillar = new_caterpillar() locomotion_distance = utils.locomotion_distance_logger(caterpillar) for step in range(int(steps)): try: (phases, frictions, _), action = caterpillar_runner.observe_and_act( actor, caterpillar, []) feedbacks, angle_ranges = action[0], action[1] caterpillar.set_oscillation_ranges(tuple(angle_ranges)) caterpillar.step_with_feedbacks(config.params["time_delta"], tuple(feedbacks)) except Exception as e: print("exception occured during sample run,", e) continue else: # Save data sim_distance_file.append_data(step, locomotion_distance(caterpillar)) sim_phase_diffs_file.append_data(step, *utils.phase_diffs(phases)) sim_actions_file.append_data(step, *action[0]) sim_frictions_file.append_data(step, *frictions) print("Moved distance:", locomotion_distance(caterpillar)) caterpillar.save_simulation("{}/train_result_ep{}.json".format( log_dir, episode))
def run_caterpillar(actor, save_dir: str, steps: int, disable_list=None, broken_value=0): if disable_list is None: disable_list = [] caterpillar = Caterpillar(config.somites, mode="default") # Record during sampling sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"), ("step", "distance")) sim_phase_diffs_file = DataCSVSaver( os.path.join(save_dir, "phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_phases_file = DataCSVSaver( os.path.join(save_dir, "phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_actions_file = DataCSVSaver( os.path.join(save_dir, "actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.oscillators)]) sim_frictions_file = DataCSVSaver( os.path.join(save_dir, "frictions.txt"), ["step"] + ["friction_{}".format(i) for i in range(config.somites)]) sim_tension_file = DataCSVSaver( os.path.join(save_dir, "tensions.txt"), ["step"] + ["tension_{}".format(i) for i in range(config.oscillators)]) for step in range(steps): obv, action = observe_and_act(actor, caterpillar, disable_list=disable_list, broken_value=broken_value) _, frictions, tensions = obv caterpillar.step(step, int(1 / config.params["time_dalte"] / 10)) # Save data sim_distance_file.append_data(step, caterpillar.moved_distance()) sim_phase_diffs_file.append_data(step, *caterpillar.phases_from_base()) sim_phases_file.append_data(step, *caterpillar.phis) sim_actions_file.append_data(step, *action) sim_frictions_file.append_data(step, *frictions) sim_tension_file.append_data(step, *tensions) caterpillar.save_simulation("{}/render.sim".format(save_dir)) return caterpillar.moved_distance()
def train_caterpillar(save_dir: utils.SaveDir, actor_module_name: str): actor_module = import_module(actor_module_name) actor_class = getattr(actor_module, config.COMMON_ACTOR_NAME) actor = actor_class() # Dump train parameters config.print_config() config.dump_config(save_dir.log_dir(), actor.dump_config()) pepg = PEPG(actor.params_num()) pepg.set_parameters( np.fromiter( itertools.chain.from_iterable( [p.flatten().tolist() for p in actor.current_params()]), np.float64)) distance_log = DataCSVSaver( os.path.join(save_dir.log_dir(), "distance.txt"), ("episode", "distance")) reward_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "reward.txt"), ("episode", "reward")) sigma_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "sigma.txt"), ("episode", "average sigma")) episode = 0 while episode < config.params["episodes"]: current_mus = pepg.get_parameters() epsilons = pepg.sample_epsilons(config.params["batch_size"]) params_sets = np.concatenate([ current_mus[:, np.newaxis] + epsilons, current_mus[:, np.newaxis] - epsilons ], axis=1).T print("\nEpisode: {}".format(episode)) print( "---------------------------------------------------------------------" ) rewards = [] try: with Pool(processes=config.exec_params["worker_processes"], initializer=mute) as pool: rewards = pool.map( run_simulation, [(config.params["steps"], actor_module_name, p_set, []) for p_set in params_sets]) sample_num = epsilons.shape[1] r_plus = np.array(rewards[:sample_num]) r_minus = np.array(rewards[sample_num:]) nan_samples = np.where(np.isnan(r_plus) + np.isnan(r_minus))[0] # delete nan samples epsilons = np.delete(epsilons, nan_samples, axis=1) r_plus = np.delete(r_plus, nan_samples, axis=0) r_minus = np.delete(r_minus, nan_samples, axis=0) if epsilons.shape[1] > 0: pepg.update_parameters(epsilons, r_plus, r_minus) episode += 1 # Try parameters after this episode -------------------------------------- actor.set_params(pepg.get_parameters()) caterpillar = Caterpillar(config.somites, config.oscillators_list, config.caterpillar_params) locomotion_distance = utils.locomotion_distance_logger( caterpillar) (accumulated_tension, ) = exec_steps(config.params["steps"], actor, caterpillar, [], episode=episode - 1) d = locomotion_distance(caterpillar) # reward = d - accumulated_tension / config.params["tension_divisor"] reward = d # Save parameter performance distance_log.append_data(episode, d) sigma_log.append_data(episode, np.mean(pepg.get_sigmas())) reward_log.append_data(episode, reward) announce = " --- Distance: {} Reward: {}".format(d, reward) print(announce) else: print("got nan position. update failed") except KeyboardInterrupt: command = input("\nSample? Finish? : ") if command in ["sample", "Sample"]: actor.set_params(pepg.get_parameters()) test_current_params(actor, save_dir.log_dir(), episode) continue if command in ["finish", "Finish"]: print("Ending training ...") break actor.set_params(pepg.get_parameters()) actor.save(os.path.join(save_dir.model_dir(), 'actor_model.pickle')) return actor
def run(agent: DDPG, steps: int, save_dir: str) -> float: sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"), ("step", "distance")) sim_somite_phase_diffs_file = DataCSVSaver(os.path.join(save_dir, "somite_phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_gripper_phase_diffs_file = DataCSVSaver(os.path.join(save_dir, "gripper_phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.grippers)]) sim_somite_phases_file = DataCSVSaver(os.path.join(save_dir, "somite_phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_gripper_phases_file = DataCSVSaver(os.path.join(save_dir, "gripper_phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.grippers)]) sim_somite_actions_file = DataCSVSaver(os.path.join(save_dir, "somite_actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.oscillators)]) sim_gripper_actions_file = DataCSVSaver(os.path.join(save_dir, "gripper_actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.grippers)]) sim_frictions_file = DataCSVSaver(os.path.join(save_dir, "frictions.txt"), ["step"] + ["friction_{}".format(i) for i in range(config.somites)]) sim_tension_file = DataCSVSaver(os.path.join(save_dir, "tensions.txt"), ["step"] + ["tension_{}".format(i) for i in range(config.somites - 2)]) caterpillar = Caterpillar(config.somites, config.oscillators_list, config.grippers_list, config.caterpillar_params) locomotion_distance = utils.locomotion_distance_logger(caterpillar) for step in range(steps): obs, somite_phases, gripper_phases = observe(caterpillar) actions = agent.act(obs) feedbacks_somite, feedbacks_gripper = actions[:config.oscillators], actions[config.oscillators:] caterpillar.step_with_feedbacks(config.params["time_delta"], tuple(feedbacks_somite), tuple(feedbacks_gripper)) frictions, tensions, _, _ = np.split( obs, [config.somites, config.somites * 2 - 2, config.somites * 2 - 2 + config.oscillators]) sim_distance_file.append_data(step, locomotion_distance(caterpillar)) sim_somite_phase_diffs_file.append_data(step, *utils.phase_diffs(np.array(somite_phases))) sim_gripper_phase_diffs_file.append_data(step, *utils.phase_diffs(np.array(gripper_phases))) sim_somite_phases_file.append_data(step, *utils.mod2pi(np.array(somite_phases))) sim_gripper_phases_file.append_data(step, *utils.mod2pi(np.array(gripper_phases))) sim_frictions_file.append_data(step, *frictions) sim_tension_file.append_data(step, *tensions) sim_somite_actions_file.append_data(step, *feedbacks_somite) sim_gripper_actions_file.append_data(step, *feedbacks_gripper) caterpillar.save_simulation("{}/render.json".format(save_dir)) return locomotion_distance(caterpillar)
def run_earthworm(actor, save_dir: str, steps: int, disable_list=None, broken_value=0): if disable_list is None: disable_list = [] earthworm = Earthworm(config.somites, config.oscillators_list, config.earthworm_params) # Record during sampling sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"), ("step", "distance")) sim_somite_phase_diffs_file = DataCSVSaver( os.path.join(save_dir, "somite_phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_somite_phases_file = DataCSVSaver( os.path.join(save_dir, "somite_phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_somite_actions_file = DataCSVSaver( os.path.join(save_dir, "somite_actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.oscillators)]) sim_frictions_file = DataCSVSaver( os.path.join(save_dir, "frictions.txt"), ["step"] + ["friction_{}".format(i) for i in range(config.somites)]) sim_tension_file = DataCSVSaver( os.path.join(save_dir, "tensions.txt"), ["step"] + ["tension_{}".format(i) for i in range(config.somites - 1)]) locomotion_distance = utils.locomotion_distance_logger( earthworm) # closure to keep locomotion distance for step in range(steps): obv, action = observe_and_act(actor, earthworm, disable_list=disable_list, broken_value=broken_value) # for (oscillator_id, target_angle) in config.fixed_angles.items(): # earthworm.set_target_angle(oscillator_id, target_angle) earthworm.step_with_feedbacks(config.params["time_delta"], tuple(action)) # Save data phases, frictions, tensions = obv sim_distance_file.append_data(step, locomotion_distance(earthworm)) sim_somite_phase_diffs_file.append_data( step, *utils.phase_diffs(phases[:config.oscillators])) sim_somite_phases_file.append_data( step, *utils.mod2pi(phases[:config.oscillators])) sim_frictions_file.append_data(step, *frictions) sim_tension_file.append_data(step, *tensions) sim_somite_actions_file.append_data(step, *action) earthworm.save_simulation("{}/render.json".format(save_dir)) return locomotion_distance(earthworm)
def run_caterpillar(actor, save_dir: str, steps: int, disable_list=None, broken_value=0): if disable_list is None: disable_list = [] caterpillar = Caterpillar(config.somites, config.oscillators_list, config.caterpillar_params) # Record during sampling sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"), ("step", "distance")) sim_phase_diffs_file = DataCSVSaver( os.path.join(save_dir, "phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_phases_file = DataCSVSaver( os.path.join(save_dir, "phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_actions_file = DataCSVSaver( os.path.join(save_dir, "actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.oscillators)]) sim_frictions_file = DataCSVSaver( os.path.join(save_dir, "frictions.txt"), ["step"] + ["friction_{}".format(i) for i in range(config.somites)]) sim_tension_file = DataCSVSaver( os.path.join(save_dir, "tensions.txt"), ["step"] + ["tension_{}".format(i) for i in range(config.somites - 2)]) sim_angle_range_file = DataCSVSaver( os.path.join(save_dir, "angle_ranges.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) locomotion_distance = utils.locomotion_distance_logger( caterpillar) # closure to keep locomotion distance for step in range(steps): obv, action = observe_and_act(actor, caterpillar, disable_list=disable_list, broken_value=broken_value) feedbacks, angle_ranges = action[0], action[1] caterpillar.set_oscillation_ranges(tuple(angle_ranges)) caterpillar.step_with_feedbacks(config.params["time_delta"], tuple(feedbacks)) # Save data phases, frictions, tensions = obv sim_distance_file.append_data(step, locomotion_distance(caterpillar)) sim_phase_diffs_file.append_data(step, *utils.phase_diffs(phases)) sim_phases_file.append_data(step, *phases) sim_frictions_file.append_data(step, *frictions) sim_tension_file.append_data(step, *tensions) sim_actions_file.append_data(step, *action[0]) sim_angle_range_file.append_data(step, *action[1]) caterpillar.save_simulation("{}/render.json".format(save_dir)) return locomotion_distance(caterpillar)
def train(save_dir_path: str): agent = build_agent() reset_dir(save_dir_path) train_log_dir = os.path.join(save_dir_path, "train_log") os.makedirs(train_log_dir, exist_ok=True) config.dump_config(train_log_dir, {"RL method": "DDPG"}) distance_log = DataCSVSaver(os.path.join(train_log_dir, "distance.txt"), ("episode", "distance")) ep = 0 while ep < config.params['episodes']: try: caterpillar = Caterpillar(config.somites, config.oscillators_list, config.grippers_list, config.caterpillar_params) locomotion_distance = utils.locomotion_distance_logger(caterpillar) obs, _, _ = observe(caterpillar) position = 0 # current position reward = 0 R = 0 # accumulated reward t = 0 while t < STEPS: actions = agent.act_and_train(obs, reward) feedbacks_somite, feedbacks_gripper = actions[:config.oscillators], actions[config.oscillators:] caterpillar.step_with_feedbacks(config.params["time_delta"], tuple(feedbacks_somite), tuple(feedbacks_gripper)) reward = locomotion_distance(caterpillar) - position if np.isnan(reward): print("got invalid reward, {}".format(reward)) continue obs, _, _ = observe(caterpillar) R += reward position = position + reward t += 1 print("epoch: {} R: {}".format(ep+1, R)) distance_log.append_data(ep+1, R) agent.stop_episode_and_train(obs, reward) except FloatingPointError as e: print("episode {} --- got floating point error, {}. Skip".format(ep, e)) continue except KeyboardInterrupt: command = input("\nSample? Finish? : ") if command in ["sample", "Sample"]: steps = input("How many steps for this sample?: ") if steps == "": utils.notice("default steps {}".format(config.params["default_sample_steps"])) steps = config.params["default_sample_steps"] run_dir = os.path.join(train_log_dir, "train_result_ep{}".format(ep)) os.makedirs(run_dir, exist_ok=True) distance = run(agent, int(steps), run_dir) print("test run for {} steps, moved distance {}".format(int(steps), distance)) continue if command in ["finish", "Finish"]: print("Ending training ...") break else: ep += 1 print('Finished. Saving to {}...'.format(save_dir_path)) agent.save(save_dir_path)
def run_caterpillar(actor, save_dir: str, steps: int, disable_list=None, broken_value=0): if disable_list is None: disable_list = [] caterpillar = Caterpillar(config.somites, config.oscillators_list, config.grippers_list, config.caterpillar_params) # Record during sampling sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"), ("step", "distance")) sim_somite_phase_diffs_file = DataCSVSaver( os.path.join(save_dir, "somite_phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_gripper_phase_diffs_file = DataCSVSaver( os.path.join(save_dir, "gripper_phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.grippers)]) sim_somite_phases_file = DataCSVSaver( os.path.join(save_dir, "somite_phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_gripper_phases_file = DataCSVSaver( os.path.join(save_dir, "gripper_phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.grippers)]) sim_somite_actions_file = DataCSVSaver( os.path.join(save_dir, "somite_actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.oscillators)]) sim_gripper_actions_file = DataCSVSaver( os.path.join(save_dir, "gripper_actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.grippers)]) sim_frictions_file = DataCSVSaver( os.path.join(save_dir, "frictions.txt"), ["step"] + ["friction_{}".format(i) for i in range(config.somites)]) sim_tension_file = DataCSVSaver( os.path.join(save_dir, "tensions.txt"), ["step"] + ["tension_{}".format(i) for i in range(config.somites - 2)]) sim_grip_thresholds_file = DataCSVSaver( os.path.join(save_dir, "grip_thresholds.txt"), ["step"] + ["gripper_{}".format(i) for i in range(config.grippers)]) locomotion_distance = utils.locomotion_distance_logger( caterpillar) # closure to keep locomotion distance for step in range(steps): obv, action = observe_and_act(actor, caterpillar, disable_list=disable_list, broken_value=broken_value) feedbacks, gripping_phase_thresholds = action[0], action[1] for (oscillator_id, target_angle) in config.fixed_angles.items(): caterpillar.set_target_angle(oscillator_id, target_angle) caterpillar.set_gripping_phase_thresholds( tuple(gripping_phase_thresholds)) caterpillar.step_with_feedbacks(config.params["time_delta"], tuple(feedbacks[:config.oscillators]), tuple(feedbacks[config.oscillators:])) # Save data phases, frictions, tensions = obv sim_distance_file.append_data(step, locomotion_distance(caterpillar)) sim_somite_phase_diffs_file.append_data( step, *utils.phase_diffs(phases[:config.oscillators])) sim_gripper_phase_diffs_file.append_data( step, *utils.phase_diffs(phases[config.oscillators:])) sim_somite_phases_file.append_data( step, *utils.mod2pi(phases[:config.oscillators])) sim_gripper_phases_file.append_data( step, *utils.mod2pi(phases[config.oscillators:])) sim_frictions_file.append_data(step, *frictions) sim_tension_file.append_data(step, *tensions) sim_somite_actions_file.append_data(step, *feedbacks[:config.oscillators]) sim_gripper_actions_file.append_data(step, *feedbacks[config.oscillators:]) sim_grip_thresholds_file.append_data(step, *gripping_phase_thresholds) caterpillar.save_simulation("{}/render.json".format(save_dir)) return locomotion_distance(caterpillar)
def test_current_params(actor: base_actor, log_dir: str, episode: int): steps = input("How many steps for this sample?: ") if steps == "": utils.notice("default steps {}".format( config.params["default_sample_steps"])) steps = config.params["default_sample_steps"] # Record during sampling sim_distance_file = DataCSVSaver( "{}/train_result_ep{}_distance.txt".format(log_dir, episode), ("step", "distance")) sim_somite_phase_diffs_file = DataCSVSaver( "{}/train_result_ep{}_somite_phase_diffs.txt".format(log_dir, episode), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_gripping_phase_diffs_file = DataCSVSaver( "{}/train_result_ep{}_gripping_phase_diffs.txt".format( log_dir, episode), ["step"] + ["grip_phi_{}".format(i) for i in range(config.grippers)]) sim_somite_phases_file = DataCSVSaver( "{}/train_result_ep{}_somite_phases.txt".format(log_dir, episode), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_gripping_phases_file = DataCSVSaver( "{}/train_result_ep{}_gripping_phases.txt".format(log_dir, episode), ["step"] + ["grip_phi_{}".format(i) for i in range(config.grippers)]) sim_somite_actions_file = DataCSVSaver( "{}/train_result_ep{}_somite_actions.txt".format(log_dir, episode), ["step"] + ["action_{}".format(i) for i in range(config.oscillators)]) sim_gripper_actions_file = DataCSVSaver( "{}/train_result_ep{}_gripper_actions.txt".format(log_dir, episode), ["step"] + ["action_{}".format(i) for i in range(config.grippers)]) sim_frictions_file = DataCSVSaver( "{}/train_result_ep{}_frictions.txt".format(log_dir, episode), ["step"] + ["friction_{}".format(i) for i in range(config.somites)]) caterpillar = new_caterpillar() locomotion_distance = utils.locomotion_distance_logger(caterpillar) for step in range(int(steps)): try: (phases, frictions, _), action = caterpillar_runner.observe_and_act( actor, caterpillar, []) feedbacks, gripping_phase_thresholds = action[0], action[1] for (oscillator_id, target_angle) in config.fixed_angles.items(): caterpillar.set_target_angle(oscillator_id, target_angle) caterpillar.set_gripping_phase_thresholds( tuple(gripping_phase_thresholds)) caterpillar.step_with_feedbacks( config.params["time_delta"], tuple(feedbacks[:config.oscillators]), tuple(feedbacks[config.oscillators:])) except KeyboardInterrupt as e: print("exception occured during sample run,", e) continue else: # Save data sim_distance_file.append_data(step, locomotion_distance(caterpillar)) sim_somite_phase_diffs_file.append_data( step, *utils.phase_diffs(phases[:config.oscillators])) sim_gripping_phase_diffs_file.append_data( step, *utils.mod2pi(phases[config.oscillators:])) sim_somite_phases_file.append_data( step, *utils.mod2pi(phases[:config.oscillators])) sim_gripping_phases_file.append_data( step, *utils.mod2pi(phases[config.oscillators:])) sim_somite_actions_file.append_data( step, *feedbacks[:config.oscillators]) sim_gripper_actions_file.append_data( step, *feedbacks[config.oscillators:]) sim_frictions_file.append_data(step, *frictions) print("Moved distance:", locomotion_distance(caterpillar)) caterpillar.save_simulation("{}/train_result_ep{}.json".format( log_dir, episode))
def train(env, save_dir: str, epochs: int, update_epochs: int, agents: int, trajectory_steps: int, render: bool) -> object: reset_dir(save_dir) policy = MLPGaussianPolicy( env.observation_space.shape[0], env.action_space.shape[0], hidden_layers=[3, 2], action_high=env.action_space.high, action_low=env.action_space.low, ) value_fn = MLPValueFn(env.observation_space.shape[0], hidden_layers=[3, 2]) rl = PPO(policy, value_fn, update_epochs=update_epochs) reward_log = DataCSVSaver( os.path.join(save_dir, "distance.txt"), ("epoch", "averaged reward")) # log epoch development of reward loss_log = DataCSVSaver( os.path.join(save_dir, "loss.txt"), ("epoch", "iter", "loss") ) # log loss transition to check whether network update is carried out properly e = 0 # training while e < epochs: try: print("Epoch {} ......".format(e)) rl.reset_sample_buffers() average_rewards = [] # sampling print(" sampling...") for n in range(agents): # buffer to save samples from trajectory observations_list = [] actions_list = [] rewards_list = [] # init obs = env.reset() observations_list.append(obs) # run a trajectory for t in range(trajectory_steps): action = rl.act(obs) obs, r, done, _ = env.step(action) # save a sample actions_list.append(action) observations_list.append(obs) rewards_list.append(r) if done: break rl.feed_trajectory_samples(observations_list, actions_list, rewards_list, done) print( " agent {}: run for {} steps, average reward {}".format( n, t, np.mean(rewards_list))) average_rewards.append(np.mean(rewards_list)) # update parameters of policy and state value function print(" updating...") update_epoch_losses = rl.update() # logging reward_log.append_data( e, np.mean(average_rewards)) # save averaged reward for i, loss in enumerate(update_epoch_losses): loss_log.append_data(e, i, loss) # save loss of each update epoch print(" average reward {}".format(np.mean(average_rewards))) e += 1 except KeyboardInterrupt: command = input("\nSample? Finish? : ") if command in ["sample", "Sample"]: # run for X steps sample_steps = input("How many steps for this sample?: ") if sample_steps == "": sample_steps = steps print("default steps {}".format(sample_steps)) else: sample_steps = int(sample_steps) obs = env.reset() acc_r = 0 for t in range(sample_steps): if render: env.render() action = rl.act(obs) obs, r, done, _ = env.step(action) acc_r += r continue if command in ["finish", "Finish"]: print("Ending training ...") break print("Finish training. Saving the policy and value_fn in {}".format( save_dir)) rl.save(save_dir) return rl.policy
def train_caterpillar(save_dir: utils.SaveDir, actor_module_name: str): # Dump train parameters config.print_config() actor_module = import_module(actor_module_name) actor_class = getattr(actor_module, config.COMMON_ACTOR_NAME) rlm = PEPGActorManager(actor_class) caterpillar = Caterpillar(config.somites, mode="default") config.dump_config(save_dir.log_dir()) distance_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "distance.txt"), ("episode", "distance")) reward_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "reward.txt"), ("episode", "reward")) sigma_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "sigma.txt"), ("episode", "average sigma")) episode = 0 while episode < config.params["episodes"]: params_sets = rlm.sample_params() print("\nEpisode: {}".format(episode)) print("---------------------------------------------------------------------") rewards = [] try: with Pool(processes=config.exec_params["worker_processes"], initializer=mute) as pool: rewards = pool.map(run_simulation, [(config.params["steps"], actor_module_name, p_set, []) for p_set in params_sets]) rlm.update_params(np.array(rewards)) episode += 1 # Try parameters after this episode -------------------------------------- rlm.set_params(rlm.parameters) caterpillar.reset() (accumulated_tension,) = exec_steps(config.params["steps"], rlm.get_actor(), caterpillar, [], episode=episode - 1) reward = caterpillar.moved_distance() - accumulated_tension / config.params["tension_divisor"] # Save parameter performance distance_log.append_data(episode, caterpillar.moved_distance()) sigma_log.append_data(episode, np.mean(rlm.sigmas)) reward_log.append_data(episode, reward) announce = " --- Distance: {} Reward: {}".format(caterpillar.moved_distance(), reward) print(announce) except KeyboardInterrupt: command = input("\nSample? Finish? : ") if command in ["sample", "Sample"]: rlm.set_params(rlm.parameters) test_current_params(rlm.get_actor(), save_dir.log_dir(), episode) continue if command in ["finish", "Finish"]: print("Ending training ...") break rlm.set_params(rlm.parameters) rlm.save(save_file_path=os.path.join(save_dir.model_dir(), 'actor_model.pickle')) return rlm.get_actor()
def test_current_params(actor: base_actor, log_dir: str, episode: int): steps = input("How many steps for this sample?: ") if steps == "": utils.notice("default steps {}".format(config.params["default_sample_steps"])) steps = config.params["default_sample_steps"] # Record during sampling sim_distance_file = DataCSVSaver( "{}/train_result_ep{}_distance.txt".format(log_dir, episode), ("step", "distance") ) sim_phase_diffs_file = DataCSVSaver( "{}/train_result_ep{}_phase_diffs.txt".format(log_dir, episode), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)] ) sim_actions_file = DataCSVSaver( "{}/train_result_ep{}_actions.txt".format(log_dir, episode), ["step"] + ["action_{}".format(i) for i in range(config.oscillators)] ) sim_frictions_file = DataCSVSaver( "{}/train_result_ep{}_frictions.txt".format(log_dir, episode), ["step"] + ["friction_{}".format(i) for i in range(config.somites)] ) caterpillar = Caterpillar(config.somites, mode="default") for step in range(int(steps)): try: _, action = caterpillar_runner.observe_and_act(actor, caterpillar, []) caterpillar.step(step, int(1 / config.params["time_dalte"] / 10)) except Exception: continue else: # Save data sim_distance_file.append_data(step, caterpillar.moved_distance()) sim_phase_diffs_file.append_data(step, *caterpillar.phases_from_base()) sim_actions_file.append_data(step, *action) frictions = caterpillar.frictions() sim_frictions_file.append_data(step, *frictions) print("Moved distance:", caterpillar.moved_distance()) caterpillar.save_simulation("{}/train_result_ep{}.sim".format(log_dir, episode))