# 把键盘控制的车辆放到指定的点,同时关闭随机起点
env_dict[keyboard_agent_name].vehicle_start_point_index = 0
env_dict[keyboard_agent_name].use_random_start_point = False

# 把模仿对象的RL功能打开
worker_kwargs[keyboard_agent_name]["do_RL_learn"] = True

# 键盘控制的对象进行debug,观察是否模型进行了训练!
# worker_kwargs[keyboard_agent_name]["debug"] = True

DDPG_Agent_GAL_v1(
    env_prototype_dict_for_workers=env_dict,
    save_dir="./ddpg_IAJR_ckpt/",
    model_hook_dict=hook_dict,
    kwargs_for_model_dict=model_kwargs,
    kwargs_for_worker_dict=worker_kwargs,
    kwargs_for_global_model={
        # 预先求得准确q值给数据集
        "use_pre_calculated_g": use_pre_calculated_g,
        "gamma": gamma
    }).start()
# 注意:如果下面的代码没有运行,是上面start过后有join函数等待

# 然后打开keyboard hook控制
kh.start_keyboard_control()

# 接下来就是找到键盘控制的车辆,持续用键盘修改油门刹车转向!

# 主线程等待join
while 1:
    pass
Пример #2
0
            }
            worker_kwargs[name] = {
                "start_variance_for_each_action": (0, 0.0),
                "variance_decay_ratio_for_each_action": (0.995, 0.995),
                "variance_decay_step": 10,
                "start_offset_for_each_action": (0.0, 0.0),
                "offset_decay_value_for_each_action": (0.00, 0.00),
                "offset_decay_step": 2000
            }

agent = DDPG_Agent_GAL_v1(
    env_prototype_dict_for_workers=env_dict,
    save_dir="./ddpg_ckpt/",
    # 这两个参数是嵌套字典
    kwargs_for_model_dict=model_kwargs,
    kwargs_for_worker_dict=worker_kwargs,
    kwargs_for_global_model={
        # 预先求得准确q值给数据集
        "use_pre_calculated_g": use_pre_calculated_g,
        "gamma": gamma
    })
model_trainer = FloatActionTrainer(
    # 输入state
    input_placeholder=agent.global_model.S,
    # 输出state的频率
    output_graph=agent.global_model.a,
    action_space_size=(agent.action_space, ),
    tf_sess=agent.sess)
# 然后agent先启动
agent.start()
                carla_UE_ip=ip,
                carla_UE_port=port,
                n_waypoint=100,
                # DDPG没有IL相对难训练,所以间隔小一些!
                waypoint_spacing=3,
                vehicle_start_point_index=spawn_index_for_each_car_in_worker[
                    i],
                wait_time_after_apply_action=0.1,
                ratio_of_reaching=0.3,
                add_center_lane_state=True,
                # 这里是DDPG和A3C算法的区别,使用连续空间的action
                action_replace=ContinuousSteeringVelocityBrakeAction_v1(),
                # 实测DDPG和A3C表现差异很大,因此单独设计它的reward试试?
                #reward_replace=
            )
            env_dict[name] = env

            worker_kwargs[name] = {
                "start_variance": 0.0,  # debug时方差小一些,便于观察走势
                "variance_decay": 0.99,
                "debug": True
            }
            # model_kwargs[name] = {
            # }

DDPG_Agent_GAL_v1(
    env_prototype_dict_for_workers=env_dict,
    save_dir="./a3c_gal_ckpt/",
    kwargs_for_worker_dict=worker_kwargs,
).start()