Exemplo n.º 1
0
def test_current_params(actor: base_actor, log_dir: str, episode: int):
    steps = input("How many steps for this sample?: ")
    if steps == "":
        utils.notice("default steps {}".format(
            config.params["default_sample_steps"]))
        steps = config.params["default_sample_steps"]

    # Record during sampling
    sim_distance_file = DataCSVSaver(
        "{}/train_result_ep{}_distance.txt".format(log_dir, episode),
        ("step", "distance"))
    sim_somite_phase_diffs_file = DataCSVSaver(
        "{}/train_result_ep{}_somite_phase_diffs.txt".format(log_dir, episode),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_somite_phases_file = DataCSVSaver(
        "{}/train_result_ep{}_somite_phases.txt".format(log_dir, episode),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_somite_actions_file = DataCSVSaver(
        "{}/train_result_ep{}_somite_actions.txt".format(log_dir, episode),
        ["step"] + ["action_{}".format(i) for i in range(config.oscillators)])
    sim_frictions_file = DataCSVSaver(
        "{}/train_result_ep{}_frictions.txt".format(log_dir, episode),
        ["step"] + ["friction_{}".format(i) for i in range(config.somites)])

    earthworm = new_earthworm()
    locomotion_distance = utils.locomotion_distance_logger(earthworm)
    for step in range(int(steps)):
        try:
            (phases, frictions, _), action = earthworm_runner.observe_and_act(
                actor, earthworm, [])
            # for (oscillator_id, target_angle) in config.fixed_angles.items():
            #     earthworm.set_target_angle(oscillator_id, target_angle)
            earthworm.step_with_feedbacks(config.params["time_delta"],
                                          tuple(action))
        except KeyboardInterrupt as e:
            print("exception occured during sample run,", e)
            continue
        else:
            # Save data
            sim_distance_file.append_data(step, locomotion_distance(earthworm))
            sim_somite_phase_diffs_file.append_data(
                step, *utils.phase_diffs(phases[:config.oscillators]))
            sim_somite_phases_file.append_data(
                step, *utils.mod2pi(phases[:config.oscillators]))
            sim_somite_actions_file.append_data(step, *action)
            sim_frictions_file.append_data(step, *frictions)

    print("Moved distance:", locomotion_distance(earthworm))
    earthworm.save_simulation("{}/train_result_ep{}.json".format(
        log_dir, episode))
def test_current_params(actor: base_actor, log_dir: str, episode: int):
    steps = input("How many steps for this sample?: ")
    if steps == "":
        utils.notice("default steps {}".format(
            config.params["default_sample_steps"]))
        steps = config.params["default_sample_steps"]

    # Record during sampling
    sim_distance_file = DataCSVSaver(
        "{}/train_result_ep{}_distance.txt".format(log_dir, episode),
        ("step", "distance"))
    sim_phase_diffs_file = DataCSVSaver(
        "{}/train_result_ep{}_phase_diffs.txt".format(log_dir, episode),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_actions_file = DataCSVSaver(
        "{}/train_result_ep{}_actions.txt".format(log_dir, episode),
        ["step"] + ["action_{}".format(i) for i in range(config.oscillators)])
    sim_frictions_file = DataCSVSaver(
        "{}/train_result_ep{}_frictions.txt".format(log_dir, episode),
        ["step"] + ["friction_{}".format(i) for i in range(config.somites)])

    caterpillar = new_caterpillar()
    locomotion_distance = utils.locomotion_distance_logger(caterpillar)
    for step in range(int(steps)):
        try:
            (phases, frictions,
             _), action = caterpillar_runner.observe_and_act(
                 actor, caterpillar, [])

            feedbacks, angle_ranges = action[0], action[1]
            caterpillar.set_oscillation_ranges(tuple(angle_ranges))
            caterpillar.step_with_feedbacks(config.params["time_delta"],
                                            tuple(feedbacks))
        except Exception as e:
            print("exception occured during sample run,", e)
            continue
        else:
            # Save data
            sim_distance_file.append_data(step,
                                          locomotion_distance(caterpillar))
            sim_phase_diffs_file.append_data(step, *utils.phase_diffs(phases))
            sim_actions_file.append_data(step, *action[0])
            sim_frictions_file.append_data(step, *frictions)

    print("Moved distance:", locomotion_distance(caterpillar))
    caterpillar.save_simulation("{}/train_result_ep{}.json".format(
        log_dir, episode))
def test_current_params(actor: base_actor, log_dir: str, episode: int):
    steps = input("How many steps for this sample?: ")
    if steps == "":
        utils.notice("default steps {}".format(config.params["default_sample_steps"]))
        steps = config.params["default_sample_steps"]

    # Record during sampling
    sim_distance_file = DataCSVSaver(
        "{}/train_result_ep{}_distance.txt".format(log_dir, episode),
        ("step", "distance")
    )
    sim_phase_diffs_file = DataCSVSaver(
        "{}/train_result_ep{}_phase_diffs.txt".format(log_dir, episode),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]
    )
    sim_actions_file = DataCSVSaver(
        "{}/train_result_ep{}_actions.txt".format(log_dir, episode),
        ["step"] + ["action_{}".format(i) for i in range(config.oscillators)]
    )
    sim_frictions_file = DataCSVSaver(
        "{}/train_result_ep{}_frictions.txt".format(log_dir, episode),
        ["step"] + ["friction_{}".format(i) for i in range(config.somites)]
    )

    caterpillar = Caterpillar(config.somites, mode="default")
    for step in range(int(steps)):
        try:
            _, action = caterpillar_runner.observe_and_act(actor, caterpillar, [])
            caterpillar.step(step, int(1 / config.params["time_dalte"] / 10))
        except Exception:
            continue
        else:
            # Save data
            sim_distance_file.append_data(step, caterpillar.moved_distance())
            sim_phase_diffs_file.append_data(step, *caterpillar.phases_from_base())
            sim_actions_file.append_data(step, *action)
            frictions = caterpillar.frictions()
            sim_frictions_file.append_data(step, *frictions)

    print("Moved distance:", caterpillar.moved_distance())
    caterpillar.save_simulation("{}/train_result_ep{}.sim".format(log_dir, episode))
def  train(save_dir_path: str):
    agent = build_agent()

    reset_dir(save_dir_path)
    train_log_dir = os.path.join(save_dir_path, "train_log")
    os.makedirs(train_log_dir, exist_ok=True)

    config.dump_config(train_log_dir, {"RL method": "DDPG"})

    distance_log = DataCSVSaver(os.path.join(train_log_dir, "distance.txt"), ("episode", "distance"))

    ep = 0
    while ep < config.params['episodes']:
        try:
            caterpillar = Caterpillar(config.somites, config.oscillators_list, config.grippers_list, config.caterpillar_params)
            locomotion_distance = utils.locomotion_distance_logger(caterpillar)
            obs, _, _ = observe(caterpillar)
            position = 0 # current position
            reward = 0
            R = 0  # accumulated reward
            t = 0
            while t < STEPS:
                actions = agent.act_and_train(obs, reward)
                feedbacks_somite, feedbacks_gripper = actions[:config.oscillators], actions[config.oscillators:]
                caterpillar.step_with_feedbacks(config.params["time_delta"], tuple(feedbacks_somite), tuple(feedbacks_gripper))

                reward = locomotion_distance(caterpillar) - position
                if np.isnan(reward):
                    print("got invalid reward, {}".format(reward))
                    continue
                obs, _, _ = observe(caterpillar)
                R += reward
                position = position + reward
                t += 1
            print("epoch: {}   R: {}".format(ep+1, R))
            distance_log.append_data(ep+1, R)

            agent.stop_episode_and_train(obs, reward)
        except FloatingPointError as e:
            print("episode {} --- got floating point error, {}. Skip".format(ep, e))
            continue
        except KeyboardInterrupt:
            command = input("\nSample? Finish? : ")
            if command in ["sample", "Sample"]:
                steps = input("How many steps for this sample?: ")
                if steps == "":
                    utils.notice("default steps {}".format(config.params["default_sample_steps"]))
                    steps = config.params["default_sample_steps"]

                run_dir = os.path.join(train_log_dir, "train_result_ep{}".format(ep))
                os.makedirs(run_dir, exist_ok=True)
                distance = run(agent, int(steps), run_dir)
                print("test run for {} steps, moved distance {}".format(int(steps), distance))
                continue
            if command in ["finish", "Finish"]:
                print("Ending training ...")
                break
        else:
            ep += 1

    print('Finished. Saving to {}...'.format(save_dir_path))
    agent.save(save_dir_path)