def run(agent: DDPG, steps: int, save_dir: str) -> float: sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"), ("step", "distance")) sim_somite_phase_diffs_file = DataCSVSaver(os.path.join(save_dir, "somite_phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_gripper_phase_diffs_file = DataCSVSaver(os.path.join(save_dir, "gripper_phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.grippers)]) sim_somite_phases_file = DataCSVSaver(os.path.join(save_dir, "somite_phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_gripper_phases_file = DataCSVSaver(os.path.join(save_dir, "gripper_phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.grippers)]) sim_somite_actions_file = DataCSVSaver(os.path.join(save_dir, "somite_actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.oscillators)]) sim_gripper_actions_file = DataCSVSaver(os.path.join(save_dir, "gripper_actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.grippers)]) sim_frictions_file = DataCSVSaver(os.path.join(save_dir, "frictions.txt"), ["step"] + ["friction_{}".format(i) for i in range(config.somites)]) sim_tension_file = DataCSVSaver(os.path.join(save_dir, "tensions.txt"), ["step"] + ["tension_{}".format(i) for i in range(config.somites - 2)]) caterpillar = Caterpillar(config.somites, config.oscillators_list, config.grippers_list, config.caterpillar_params) locomotion_distance = utils.locomotion_distance_logger(caterpillar) for step in range(steps): obs, somite_phases, gripper_phases = observe(caterpillar) actions = agent.act(obs) feedbacks_somite, feedbacks_gripper = actions[:config.oscillators], actions[config.oscillators:] caterpillar.step_with_feedbacks(config.params["time_delta"], tuple(feedbacks_somite), tuple(feedbacks_gripper)) frictions, tensions, _, _ = np.split( obs, [config.somites, config.somites * 2 - 2, config.somites * 2 - 2 + config.oscillators]) sim_distance_file.append_data(step, locomotion_distance(caterpillar)) sim_somite_phase_diffs_file.append_data(step, *utils.phase_diffs(np.array(somite_phases))) sim_gripper_phase_diffs_file.append_data(step, *utils.phase_diffs(np.array(gripper_phases))) sim_somite_phases_file.append_data(step, *utils.mod2pi(np.array(somite_phases))) sim_gripper_phases_file.append_data(step, *utils.mod2pi(np.array(gripper_phases))) sim_frictions_file.append_data(step, *frictions) sim_tension_file.append_data(step, *tensions) sim_somite_actions_file.append_data(step, *feedbacks_somite) sim_gripper_actions_file.append_data(step, *feedbacks_gripper) caterpillar.save_simulation("{}/render.json".format(save_dir)) return locomotion_distance(caterpillar)
rbuf, gamma=args.gamma, explorer=explorer, replay_start_size=5000, target_update_method='soft', target_update_interval=1, update_interval=4, soft_update_tau=1e-2, n_times_update=1, phi=phi, minibatch_size=128 #args.minibatch_size ) # Cargar el modelo guardado agent.load("DDPG_last_model_rew8k") for i in range(args.test_episodes): obs = env.reset() done = False reward_Alan = 0 t = 0 while not done and t < 200: #env.render() #agent.load("DDPG_best_model") action = agent.act(obs) obs, r, done, x = env.step(action) reward_Alan += r t += 1 print('Test episode:', i, 'Reward obtained: ', reward_Alan) #agent.stop_episode()