def test_current_params(actor: base_actor, log_dir: str, episode: int):
    steps = input("How many steps for this sample?: ")
    if steps == "":
        utils.notice("default steps {}".format(
            config.params["default_sample_steps"]))
        steps = config.params["default_sample_steps"]

    # Record during sampling
    sim_distance_file = DataCSVSaver(
        "{}/train_result_ep{}_distance.txt".format(log_dir, episode),
        ("step", "distance"))
    sim_phase_diffs_file = DataCSVSaver(
        "{}/train_result_ep{}_phase_diffs.txt".format(log_dir, episode),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_actions_file = DataCSVSaver(
        "{}/train_result_ep{}_actions.txt".format(log_dir, episode),
        ["step"] + ["action_{}".format(i) for i in range(config.oscillators)])
    sim_frictions_file = DataCSVSaver(
        "{}/train_result_ep{}_frictions.txt".format(log_dir, episode),
        ["step"] + ["friction_{}".format(i) for i in range(config.somites)])

    caterpillar = new_caterpillar()
    locomotion_distance = utils.locomotion_distance_logger(caterpillar)
    for step in range(int(steps)):
        try:
            (phases, frictions,
             _), action = caterpillar_runner.observe_and_act(
                 actor, caterpillar, [])

            feedbacks, angle_ranges = action[0], action[1]
            caterpillar.set_oscillation_ranges(tuple(angle_ranges))
            caterpillar.step_with_feedbacks(config.params["time_delta"],
                                            tuple(feedbacks))
        except Exception as e:
            print("exception occured during sample run,", e)
            continue
        else:
            # Save data
            sim_distance_file.append_data(step,
                                          locomotion_distance(caterpillar))
            sim_phase_diffs_file.append_data(step, *utils.phase_diffs(phases))
            sim_actions_file.append_data(step, *action[0])
            sim_frictions_file.append_data(step, *frictions)

    print("Moved distance:", locomotion_distance(caterpillar))
    caterpillar.save_simulation("{}/train_result_ep{}.json".format(
        log_dir, episode))
示例#2
0
def run_caterpillar(actor,
                    save_dir: str,
                    steps: int,
                    disable_list=None,
                    broken_value=0):
    if disable_list is None:
        disable_list = []

    caterpillar = Caterpillar(config.somites, mode="default")

    # Record during sampling
    sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"),
                                     ("step", "distance"))
    sim_phase_diffs_file = DataCSVSaver(
        os.path.join(save_dir, "phase_diffs.txt"),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_phases_file = DataCSVSaver(
        os.path.join(save_dir, "phases.txt"),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_actions_file = DataCSVSaver(
        os.path.join(save_dir, "actions.txt"),
        ["step"] + ["action_{}".format(i) for i in range(config.oscillators)])
    sim_frictions_file = DataCSVSaver(
        os.path.join(save_dir, "frictions.txt"),
        ["step"] + ["friction_{}".format(i) for i in range(config.somites)])
    sim_tension_file = DataCSVSaver(
        os.path.join(save_dir, "tensions.txt"),
        ["step"] + ["tension_{}".format(i) for i in range(config.oscillators)])

    for step in range(steps):
        obv, action = observe_and_act(actor,
                                      caterpillar,
                                      disable_list=disable_list,
                                      broken_value=broken_value)
        _, frictions, tensions = obv
        caterpillar.step(step, int(1 / config.params["time_dalte"] / 10))

        # Save data
        sim_distance_file.append_data(step, caterpillar.moved_distance())
        sim_phase_diffs_file.append_data(step, *caterpillar.phases_from_base())
        sim_phases_file.append_data(step, *caterpillar.phis)
        sim_actions_file.append_data(step, *action)
        sim_frictions_file.append_data(step, *frictions)
        sim_tension_file.append_data(step, *tensions)

    caterpillar.save_simulation("{}/render.sim".format(save_dir))

    return caterpillar.moved_distance()
def train_caterpillar(save_dir: utils.SaveDir, actor_module_name: str):
    actor_module = import_module(actor_module_name)
    actor_class = getattr(actor_module, config.COMMON_ACTOR_NAME)
    actor = actor_class()

    # Dump train parameters
    config.print_config()
    config.dump_config(save_dir.log_dir(), actor.dump_config())

    pepg = PEPG(actor.params_num())
    pepg.set_parameters(
        np.fromiter(
            itertools.chain.from_iterable(
                [p.flatten().tolist() for p in actor.current_params()]),
            np.float64))

    distance_log = DataCSVSaver(
        os.path.join(save_dir.log_dir(), "distance.txt"),
        ("episode", "distance"))
    reward_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "reward.txt"),
                              ("episode", "reward"))
    sigma_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "sigma.txt"),
                             ("episode", "average sigma"))

    episode = 0
    while episode < config.params["episodes"]:
        current_mus = pepg.get_parameters()
        epsilons = pepg.sample_epsilons(config.params["batch_size"])
        params_sets = np.concatenate([
            current_mus[:, np.newaxis] + epsilons,
            current_mus[:, np.newaxis] - epsilons
        ],
                                     axis=1).T

        print("\nEpisode: {}".format(episode))
        print(
            "---------------------------------------------------------------------"
        )
        rewards = []

        try:
            with Pool(processes=config.exec_params["worker_processes"],
                      initializer=mute) as pool:
                rewards = pool.map(
                    run_simulation,
                    [(config.params["steps"], actor_module_name, p_set, [])
                     for p_set in params_sets])

            sample_num = epsilons.shape[1]
            r_plus = np.array(rewards[:sample_num])
            r_minus = np.array(rewards[sample_num:])
            nan_samples = np.where(np.isnan(r_plus) + np.isnan(r_minus))[0]
            # delete nan samples
            epsilons = np.delete(epsilons, nan_samples, axis=1)
            r_plus = np.delete(r_plus, nan_samples, axis=0)
            r_minus = np.delete(r_minus, nan_samples, axis=0)
            if epsilons.shape[1] > 0:
                pepg.update_parameters(epsilons, r_plus, r_minus)
                episode += 1

                # Try parameters after this episode --------------------------------------
                actor.set_params(pepg.get_parameters())
                caterpillar = Caterpillar(config.somites,
                                          config.oscillators_list,
                                          config.caterpillar_params)
                locomotion_distance = utils.locomotion_distance_logger(
                    caterpillar)

                (accumulated_tension, ) = exec_steps(config.params["steps"],
                                                     actor,
                                                     caterpillar, [],
                                                     episode=episode - 1)
                d = locomotion_distance(caterpillar)
                # reward = d - accumulated_tension / config.params["tension_divisor"]
                reward = d

                # Save parameter performance
                distance_log.append_data(episode, d)
                sigma_log.append_data(episode, np.mean(pepg.get_sigmas()))
                reward_log.append_data(episode, reward)

                announce = "  --- Distance: {}   Reward: {}".format(d, reward)
                print(announce)
            else:
                print("got nan position. update failed")

        except KeyboardInterrupt:
            command = input("\nSample? Finish? : ")
            if command in ["sample", "Sample"]:
                actor.set_params(pepg.get_parameters())
                test_current_params(actor, save_dir.log_dir(), episode)
                continue
            if command in ["finish", "Finish"]:
                print("Ending training ...")
                break

    actor.set_params(pepg.get_parameters())
    actor.save(os.path.join(save_dir.model_dir(), 'actor_model.pickle'))
    return actor
def run(agent: DDPG, steps: int, save_dir: str) -> float:
    sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"), ("step", "distance"))
    sim_somite_phase_diffs_file = DataCSVSaver(os.path.join(save_dir, "somite_phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_gripper_phase_diffs_file = DataCSVSaver(os.path.join(save_dir, "gripper_phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.grippers)])
    sim_somite_phases_file = DataCSVSaver(os.path.join(save_dir, "somite_phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_gripper_phases_file = DataCSVSaver(os.path.join(save_dir, "gripper_phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.grippers)])
    sim_somite_actions_file = DataCSVSaver(os.path.join(save_dir, "somite_actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.oscillators)])
    sim_gripper_actions_file = DataCSVSaver(os.path.join(save_dir, "gripper_actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.grippers)])
    sim_frictions_file = DataCSVSaver(os.path.join(save_dir, "frictions.txt"), ["step"] + ["friction_{}".format(i) for i in range(config.somites)])
    sim_tension_file = DataCSVSaver(os.path.join(save_dir, "tensions.txt"), ["step"] + ["tension_{}".format(i) for i in range(config.somites - 2)])

    caterpillar = Caterpillar(config.somites, config.oscillators_list, config.grippers_list, config.caterpillar_params)
    locomotion_distance = utils.locomotion_distance_logger(caterpillar)
    for step in range(steps):
        obs, somite_phases, gripper_phases = observe(caterpillar)
        actions = agent.act(obs)
        feedbacks_somite, feedbacks_gripper = actions[:config.oscillators], actions[config.oscillators:]
        caterpillar.step_with_feedbacks(config.params["time_delta"], tuple(feedbacks_somite), tuple(feedbacks_gripper))

        frictions, tensions, _, _ = np.split(
            obs, [config.somites, config.somites * 2 - 2, config.somites * 2 - 2 + config.oscillators])
        sim_distance_file.append_data(step, locomotion_distance(caterpillar))
        sim_somite_phase_diffs_file.append_data(step, *utils.phase_diffs(np.array(somite_phases)))
        sim_gripper_phase_diffs_file.append_data(step, *utils.phase_diffs(np.array(gripper_phases)))
        sim_somite_phases_file.append_data(step, *utils.mod2pi(np.array(somite_phases)))
        sim_gripper_phases_file.append_data(step, *utils.mod2pi(np.array(gripper_phases)))
        sim_frictions_file.append_data(step, *frictions)
        sim_tension_file.append_data(step, *tensions)
        sim_somite_actions_file.append_data(step, *feedbacks_somite)
        sim_gripper_actions_file.append_data(step, *feedbacks_gripper)

    caterpillar.save_simulation("{}/render.json".format(save_dir))
    return locomotion_distance(caterpillar)
def run_earthworm(actor,
                  save_dir: str,
                  steps: int,
                  disable_list=None,
                  broken_value=0):
    if disable_list is None:
        disable_list = []

    earthworm = Earthworm(config.somites, config.oscillators_list,
                          config.earthworm_params)

    # Record during sampling
    sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"),
                                     ("step", "distance"))
    sim_somite_phase_diffs_file = DataCSVSaver(
        os.path.join(save_dir, "somite_phase_diffs.txt"),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_somite_phases_file = DataCSVSaver(
        os.path.join(save_dir, "somite_phases.txt"),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_somite_actions_file = DataCSVSaver(
        os.path.join(save_dir, "somite_actions.txt"),
        ["step"] + ["action_{}".format(i) for i in range(config.oscillators)])
    sim_frictions_file = DataCSVSaver(
        os.path.join(save_dir, "frictions.txt"),
        ["step"] + ["friction_{}".format(i) for i in range(config.somites)])
    sim_tension_file = DataCSVSaver(
        os.path.join(save_dir, "tensions.txt"),
        ["step"] + ["tension_{}".format(i) for i in range(config.somites - 1)])

    locomotion_distance = utils.locomotion_distance_logger(
        earthworm)  # closure to keep locomotion distance
    for step in range(steps):
        obv, action = observe_and_act(actor,
                                      earthworm,
                                      disable_list=disable_list,
                                      broken_value=broken_value)
        # for (oscillator_id, target_angle) in config.fixed_angles.items():
        #     earthworm.set_target_angle(oscillator_id, target_angle)
        earthworm.step_with_feedbacks(config.params["time_delta"],
                                      tuple(action))

        # Save data
        phases, frictions, tensions = obv
        sim_distance_file.append_data(step, locomotion_distance(earthworm))
        sim_somite_phase_diffs_file.append_data(
            step, *utils.phase_diffs(phases[:config.oscillators]))
        sim_somite_phases_file.append_data(
            step, *utils.mod2pi(phases[:config.oscillators]))
        sim_frictions_file.append_data(step, *frictions)
        sim_tension_file.append_data(step, *tensions)
        sim_somite_actions_file.append_data(step, *action)

    earthworm.save_simulation("{}/render.json".format(save_dir))

    return locomotion_distance(earthworm)
def run_caterpillar(actor,
                    save_dir: str,
                    steps: int,
                    disable_list=None,
                    broken_value=0):
    if disable_list is None:
        disable_list = []

    caterpillar = Caterpillar(config.somites, config.oscillators_list,
                              config.caterpillar_params)

    # Record during sampling
    sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"),
                                     ("step", "distance"))
    sim_phase_diffs_file = DataCSVSaver(
        os.path.join(save_dir, "phase_diffs.txt"),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_phases_file = DataCSVSaver(
        os.path.join(save_dir, "phases.txt"),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_actions_file = DataCSVSaver(
        os.path.join(save_dir, "actions.txt"),
        ["step"] + ["action_{}".format(i) for i in range(config.oscillators)])
    sim_frictions_file = DataCSVSaver(
        os.path.join(save_dir, "frictions.txt"),
        ["step"] + ["friction_{}".format(i) for i in range(config.somites)])
    sim_tension_file = DataCSVSaver(
        os.path.join(save_dir, "tensions.txt"),
        ["step"] + ["tension_{}".format(i) for i in range(config.somites - 2)])
    sim_angle_range_file = DataCSVSaver(
        os.path.join(save_dir, "angle_ranges.txt"),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])

    locomotion_distance = utils.locomotion_distance_logger(
        caterpillar)  # closure to keep locomotion distance
    for step in range(steps):
        obv, action = observe_and_act(actor,
                                      caterpillar,
                                      disable_list=disable_list,
                                      broken_value=broken_value)
        feedbacks, angle_ranges = action[0], action[1]
        caterpillar.set_oscillation_ranges(tuple(angle_ranges))
        caterpillar.step_with_feedbacks(config.params["time_delta"],
                                        tuple(feedbacks))

        # Save data
        phases, frictions, tensions = obv
        sim_distance_file.append_data(step, locomotion_distance(caterpillar))
        sim_phase_diffs_file.append_data(step, *utils.phase_diffs(phases))
        sim_phases_file.append_data(step, *phases)
        sim_frictions_file.append_data(step, *frictions)
        sim_tension_file.append_data(step, *tensions)
        sim_actions_file.append_data(step, *action[0])
        sim_angle_range_file.append_data(step, *action[1])

    caterpillar.save_simulation("{}/render.json".format(save_dir))

    return locomotion_distance(caterpillar)
def  train(save_dir_path: str):
    agent = build_agent()

    reset_dir(save_dir_path)
    train_log_dir = os.path.join(save_dir_path, "train_log")
    os.makedirs(train_log_dir, exist_ok=True)

    config.dump_config(train_log_dir, {"RL method": "DDPG"})

    distance_log = DataCSVSaver(os.path.join(train_log_dir, "distance.txt"), ("episode", "distance"))

    ep = 0
    while ep < config.params['episodes']:
        try:
            caterpillar = Caterpillar(config.somites, config.oscillators_list, config.grippers_list, config.caterpillar_params)
            locomotion_distance = utils.locomotion_distance_logger(caterpillar)
            obs, _, _ = observe(caterpillar)
            position = 0 # current position
            reward = 0
            R = 0  # accumulated reward
            t = 0
            while t < STEPS:
                actions = agent.act_and_train(obs, reward)
                feedbacks_somite, feedbacks_gripper = actions[:config.oscillators], actions[config.oscillators:]
                caterpillar.step_with_feedbacks(config.params["time_delta"], tuple(feedbacks_somite), tuple(feedbacks_gripper))

                reward = locomotion_distance(caterpillar) - position
                if np.isnan(reward):
                    print("got invalid reward, {}".format(reward))
                    continue
                obs, _, _ = observe(caterpillar)
                R += reward
                position = position + reward
                t += 1
            print("epoch: {}   R: {}".format(ep+1, R))
            distance_log.append_data(ep+1, R)

            agent.stop_episode_and_train(obs, reward)
        except FloatingPointError as e:
            print("episode {} --- got floating point error, {}. Skip".format(ep, e))
            continue
        except KeyboardInterrupt:
            command = input("\nSample? Finish? : ")
            if command in ["sample", "Sample"]:
                steps = input("How many steps for this sample?: ")
                if steps == "":
                    utils.notice("default steps {}".format(config.params["default_sample_steps"]))
                    steps = config.params["default_sample_steps"]

                run_dir = os.path.join(train_log_dir, "train_result_ep{}".format(ep))
                os.makedirs(run_dir, exist_ok=True)
                distance = run(agent, int(steps), run_dir)
                print("test run for {} steps, moved distance {}".format(int(steps), distance))
                continue
            if command in ["finish", "Finish"]:
                print("Ending training ...")
                break
        else:
            ep += 1

    print('Finished. Saving to {}...'.format(save_dir_path))
    agent.save(save_dir_path)
示例#8
0
def run_caterpillar(actor,
                    save_dir: str,
                    steps: int,
                    disable_list=None,
                    broken_value=0):
    if disable_list is None:
        disable_list = []

    caterpillar = Caterpillar(config.somites, config.oscillators_list,
                              config.grippers_list, config.caterpillar_params)

    # Record during sampling
    sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"),
                                     ("step", "distance"))
    sim_somite_phase_diffs_file = DataCSVSaver(
        os.path.join(save_dir, "somite_phase_diffs.txt"),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_gripper_phase_diffs_file = DataCSVSaver(
        os.path.join(save_dir, "gripper_phase_diffs.txt"),
        ["step"] + ["phi_{}".format(i) for i in range(config.grippers)])
    sim_somite_phases_file = DataCSVSaver(
        os.path.join(save_dir, "somite_phases.txt"),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_gripper_phases_file = DataCSVSaver(
        os.path.join(save_dir, "gripper_phases.txt"),
        ["step"] + ["phi_{}".format(i) for i in range(config.grippers)])
    sim_somite_actions_file = DataCSVSaver(
        os.path.join(save_dir, "somite_actions.txt"),
        ["step"] + ["action_{}".format(i) for i in range(config.oscillators)])
    sim_gripper_actions_file = DataCSVSaver(
        os.path.join(save_dir, "gripper_actions.txt"),
        ["step"] + ["action_{}".format(i) for i in range(config.grippers)])
    sim_frictions_file = DataCSVSaver(
        os.path.join(save_dir, "frictions.txt"),
        ["step"] + ["friction_{}".format(i) for i in range(config.somites)])
    sim_tension_file = DataCSVSaver(
        os.path.join(save_dir, "tensions.txt"),
        ["step"] + ["tension_{}".format(i) for i in range(config.somites - 2)])
    sim_grip_thresholds_file = DataCSVSaver(
        os.path.join(save_dir, "grip_thresholds.txt"),
        ["step"] + ["gripper_{}".format(i) for i in range(config.grippers)])

    locomotion_distance = utils.locomotion_distance_logger(
        caterpillar)  # closure to keep locomotion distance
    for step in range(steps):
        obv, action = observe_and_act(actor,
                                      caterpillar,
                                      disable_list=disable_list,
                                      broken_value=broken_value)
        feedbacks, gripping_phase_thresholds = action[0], action[1]
        for (oscillator_id, target_angle) in config.fixed_angles.items():
            caterpillar.set_target_angle(oscillator_id, target_angle)
        caterpillar.set_gripping_phase_thresholds(
            tuple(gripping_phase_thresholds))
        caterpillar.step_with_feedbacks(config.params["time_delta"],
                                        tuple(feedbacks[:config.oscillators]),
                                        tuple(feedbacks[config.oscillators:]))

        # Save data
        phases, frictions, tensions = obv
        sim_distance_file.append_data(step, locomotion_distance(caterpillar))
        sim_somite_phase_diffs_file.append_data(
            step, *utils.phase_diffs(phases[:config.oscillators]))
        sim_gripper_phase_diffs_file.append_data(
            step, *utils.phase_diffs(phases[config.oscillators:]))
        sim_somite_phases_file.append_data(
            step, *utils.mod2pi(phases[:config.oscillators]))
        sim_gripper_phases_file.append_data(
            step, *utils.mod2pi(phases[config.oscillators:]))
        sim_frictions_file.append_data(step, *frictions)
        sim_tension_file.append_data(step, *tensions)
        sim_somite_actions_file.append_data(step,
                                            *feedbacks[:config.oscillators])
        sim_gripper_actions_file.append_data(step,
                                             *feedbacks[config.oscillators:])
        sim_grip_thresholds_file.append_data(step, *gripping_phase_thresholds)

    caterpillar.save_simulation("{}/render.json".format(save_dir))

    return locomotion_distance(caterpillar)
def test_current_params(actor: base_actor, log_dir: str, episode: int):
    steps = input("How many steps for this sample?: ")
    if steps == "":
        utils.notice("default steps {}".format(
            config.params["default_sample_steps"]))
        steps = config.params["default_sample_steps"]

    # Record during sampling
    sim_distance_file = DataCSVSaver(
        "{}/train_result_ep{}_distance.txt".format(log_dir, episode),
        ("step", "distance"))
    sim_somite_phase_diffs_file = DataCSVSaver(
        "{}/train_result_ep{}_somite_phase_diffs.txt".format(log_dir, episode),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_gripping_phase_diffs_file = DataCSVSaver(
        "{}/train_result_ep{}_gripping_phase_diffs.txt".format(
            log_dir, episode),
        ["step"] + ["grip_phi_{}".format(i) for i in range(config.grippers)])
    sim_somite_phases_file = DataCSVSaver(
        "{}/train_result_ep{}_somite_phases.txt".format(log_dir, episode),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_gripping_phases_file = DataCSVSaver(
        "{}/train_result_ep{}_gripping_phases.txt".format(log_dir, episode),
        ["step"] + ["grip_phi_{}".format(i) for i in range(config.grippers)])
    sim_somite_actions_file = DataCSVSaver(
        "{}/train_result_ep{}_somite_actions.txt".format(log_dir, episode),
        ["step"] + ["action_{}".format(i) for i in range(config.oscillators)])
    sim_gripper_actions_file = DataCSVSaver(
        "{}/train_result_ep{}_gripper_actions.txt".format(log_dir, episode),
        ["step"] + ["action_{}".format(i) for i in range(config.grippers)])
    sim_frictions_file = DataCSVSaver(
        "{}/train_result_ep{}_frictions.txt".format(log_dir, episode),
        ["step"] + ["friction_{}".format(i) for i in range(config.somites)])

    caterpillar = new_caterpillar()
    locomotion_distance = utils.locomotion_distance_logger(caterpillar)
    for step in range(int(steps)):
        try:
            (phases, frictions,
             _), action = caterpillar_runner.observe_and_act(
                 actor, caterpillar, [])
            feedbacks, gripping_phase_thresholds = action[0], action[1]

            for (oscillator_id, target_angle) in config.fixed_angles.items():
                caterpillar.set_target_angle(oscillator_id, target_angle)
            caterpillar.set_gripping_phase_thresholds(
                tuple(gripping_phase_thresholds))
            caterpillar.step_with_feedbacks(
                config.params["time_delta"],
                tuple(feedbacks[:config.oscillators]),
                tuple(feedbacks[config.oscillators:]))
        except KeyboardInterrupt as e:
            print("exception occured during sample run,", e)
            continue
        else:
            # Save data
            sim_distance_file.append_data(step,
                                          locomotion_distance(caterpillar))
            sim_somite_phase_diffs_file.append_data(
                step, *utils.phase_diffs(phases[:config.oscillators]))
            sim_gripping_phase_diffs_file.append_data(
                step, *utils.mod2pi(phases[config.oscillators:]))
            sim_somite_phases_file.append_data(
                step, *utils.mod2pi(phases[:config.oscillators]))
            sim_gripping_phases_file.append_data(
                step, *utils.mod2pi(phases[config.oscillators:]))
            sim_somite_actions_file.append_data(
                step, *feedbacks[:config.oscillators])
            sim_gripper_actions_file.append_data(
                step, *feedbacks[config.oscillators:])
            sim_frictions_file.append_data(step, *frictions)

    print("Moved distance:", locomotion_distance(caterpillar))
    caterpillar.save_simulation("{}/train_result_ep{}.json".format(
        log_dir, episode))
示例#10
0
def train(env, save_dir: str, epochs: int, update_epochs: int, agents: int,
          trajectory_steps: int, render: bool) -> object:
    reset_dir(save_dir)

    policy = MLPGaussianPolicy(
        env.observation_space.shape[0],
        env.action_space.shape[0],
        hidden_layers=[3, 2],
        action_high=env.action_space.high,
        action_low=env.action_space.low,
    )
    value_fn = MLPValueFn(env.observation_space.shape[0], hidden_layers=[3, 2])
    rl = PPO(policy, value_fn, update_epochs=update_epochs)

    reward_log = DataCSVSaver(
        os.path.join(save_dir, "distance.txt"),
        ("epoch", "averaged reward"))  # log epoch development of reward
    loss_log = DataCSVSaver(
        os.path.join(save_dir, "loss.txt"), ("epoch", "iter", "loss")
    )  # log loss transition to check whether network update is carried out properly

    e = 0
    # training
    while e < epochs:
        try:
            print("Epoch {} ......".format(e))
            rl.reset_sample_buffers()

            average_rewards = []

            # sampling
            print("  sampling...")
            for n in range(agents):
                # buffer to save samples from trajectory
                observations_list = []
                actions_list = []
                rewards_list = []

                # init
                obs = env.reset()
                observations_list.append(obs)

                # run a trajectory
                for t in range(trajectory_steps):
                    action = rl.act(obs)
                    obs, r, done, _ = env.step(action)
                    # save a sample
                    actions_list.append(action)
                    observations_list.append(obs)
                    rewards_list.append(r)
                    if done:
                        break

                rl.feed_trajectory_samples(observations_list, actions_list,
                                           rewards_list, done)

                print(
                    "    agent {}: run for {} steps, average reward {}".format(
                        n, t, np.mean(rewards_list)))
                average_rewards.append(np.mean(rewards_list))

            # update parameters of policy and state value function
            print("  updating...")
            update_epoch_losses = rl.update()

            # logging
            reward_log.append_data(
                e, np.mean(average_rewards))  # save averaged reward
            for i, loss in enumerate(update_epoch_losses):
                loss_log.append_data(e, i,
                                     loss)  # save loss of each update epoch

            print("  average reward {}".format(np.mean(average_rewards)))
            e += 1

        except KeyboardInterrupt:
            command = input("\nSample? Finish? : ")
            if command in ["sample", "Sample"]:
                # run for X steps
                sample_steps = input("How many steps for this sample?: ")
                if sample_steps == "":
                    sample_steps = steps
                    print("default steps {}".format(sample_steps))
                else:
                    sample_steps = int(sample_steps)

                obs = env.reset()
                acc_r = 0
                for t in range(sample_steps):
                    if render:
                        env.render()
                    action = rl.act(obs)
                    obs, r, done, _ = env.step(action)
                    acc_r += r

                continue
            if command in ["finish", "Finish"]:
                print("Ending training ...")
                break

    print("Finish training. Saving the policy and value_fn in {}".format(
        save_dir))
    rl.save(save_dir)
    return rl.policy
def train_caterpillar(save_dir: utils.SaveDir, actor_module_name: str):
    # Dump train parameters
    config.print_config()

    actor_module = import_module(actor_module_name)
    actor_class = getattr(actor_module, config.COMMON_ACTOR_NAME)
    rlm = PEPGActorManager(actor_class)

    caterpillar = Caterpillar(config.somites, mode="default")
    config.dump_config(save_dir.log_dir())

    distance_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "distance.txt"), ("episode", "distance"))
    reward_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "reward.txt"), ("episode", "reward"))
    sigma_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "sigma.txt"), ("episode", "average sigma"))

    episode = 0
    while episode < config.params["episodes"]:
        params_sets = rlm.sample_params()

        print("\nEpisode: {}".format(episode))
        print("---------------------------------------------------------------------")
        rewards = []

        try:
            with Pool(processes=config.exec_params["worker_processes"], initializer=mute) as pool:
                rewards = pool.map(run_simulation, [(config.params["steps"], actor_module_name, p_set, []) for p_set in params_sets])

            rlm.update_params(np.array(rewards))
            episode += 1

            # Try parameters after this episode --------------------------------------
            rlm.set_params(rlm.parameters)
            caterpillar.reset()

            (accumulated_tension,) = exec_steps(config.params["steps"], rlm.get_actor(), caterpillar, [], episode=episode - 1)
            reward = caterpillar.moved_distance() - accumulated_tension / config.params["tension_divisor"]

            # Save parameter performance
            distance_log.append_data(episode, caterpillar.moved_distance())
            sigma_log.append_data(episode, np.mean(rlm.sigmas))
            reward_log.append_data(episode, reward)

            announce = "  --- Distance: {}   Reward: {}".format(caterpillar.moved_distance(), reward)
            print(announce)

        except KeyboardInterrupt:
            command = input("\nSample? Finish? : ")
            if command in ["sample", "Sample"]:
                rlm.set_params(rlm.parameters)
                test_current_params(rlm.get_actor(), save_dir.log_dir(), episode)
                continue
            if command in ["finish", "Finish"]:
                print("Ending training ...")
                break

    rlm.set_params(rlm.parameters)
    rlm.save(save_file_path=os.path.join(save_dir.model_dir(), 'actor_model.pickle'))
    return rlm.get_actor()
def test_current_params(actor: base_actor, log_dir: str, episode: int):
    steps = input("How many steps for this sample?: ")
    if steps == "":
        utils.notice("default steps {}".format(config.params["default_sample_steps"]))
        steps = config.params["default_sample_steps"]

    # Record during sampling
    sim_distance_file = DataCSVSaver(
        "{}/train_result_ep{}_distance.txt".format(log_dir, episode),
        ("step", "distance")
    )
    sim_phase_diffs_file = DataCSVSaver(
        "{}/train_result_ep{}_phase_diffs.txt".format(log_dir, episode),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]
    )
    sim_actions_file = DataCSVSaver(
        "{}/train_result_ep{}_actions.txt".format(log_dir, episode),
        ["step"] + ["action_{}".format(i) for i in range(config.oscillators)]
    )
    sim_frictions_file = DataCSVSaver(
        "{}/train_result_ep{}_frictions.txt".format(log_dir, episode),
        ["step"] + ["friction_{}".format(i) for i in range(config.somites)]
    )

    caterpillar = Caterpillar(config.somites, mode="default")
    for step in range(int(steps)):
        try:
            _, action = caterpillar_runner.observe_and_act(actor, caterpillar, [])
            caterpillar.step(step, int(1 / config.params["time_dalte"] / 10))
        except Exception:
            continue
        else:
            # Save data
            sim_distance_file.append_data(step, caterpillar.moved_distance())
            sim_phase_diffs_file.append_data(step, *caterpillar.phases_from_base())
            sim_actions_file.append_data(step, *action)
            frictions = caterpillar.frictions()
            sim_frictions_file.append_data(step, *frictions)

    print("Moved distance:", caterpillar.moved_distance())
    caterpillar.save_simulation("{}/train_result_ep{}.sim".format(log_dir, episode))