Пример #1
0
use_pre_calculated_g = False
gamma = 0.99
for ip in server_config:
    for port in server_config[ip]:
        for i in range(n_workers_in_each_port):
            name = 'W_%s' % (str(ip) + "_" + str(port) + "_" + str(i)
                             )  # worker name
            env = LaneFollowEnv_v1(
                carla_egg_path=
                "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg",
                carla_pythonAPI_path="/home/wang/Desktop/carla/PythonAPI/carla",
                carla_UE_ip=ip,
                carla_UE_port=port,
                vehicle_start_point_index=spawn_index_for_each_car_in_worker[
                    i],
                wait_time_after_apply_action=0.1,
                action_replace=ContinuousSteeringVelocityBrakeAction_v1(),
                reward_replace=
                FinalStepDriveDistanceAverageSpeedAverageDistanceToLaneCost_v1(
                ),
                minimum_velocity=0.5,
                drive_time_after_reset=2.0,
                kwargs_for_done_condition={
                    "minimum_action_taken_on_low_velocity": 0
                })
            env_dict[name] = env
            model_kwargs[name] = {
                "use_pre_calculated_g": use_pre_calculated_g,
                "gamma": gamma
            }
            worker_kwargs[name] = {
                "start_variance_for_each_action": (0, 0.0),
for ip in server_config:
    for port in server_config[ip]:
        for i in range(n_workers_in_each_port):
            name = 'W_%s' % (str(ip) + "_" + str(port) + "_" + str(i)
                             )  # worker name
            env = LaneFollowEnv_v1(
                # '''
                # 有些地方道路曲率大(90度),车道线拟合不好导致认为出了车道,因此放宽车道条件
                # 但是放宽了条件后注意车辆会出现随意切换车道的现象!
                # 不过只要预先训练好,就根本不会出现变道的情况
                # '''
                maximum_distance_to_lane=0.5,
                # 建议只在debug阶段plot
                plot_lane_on_UE=False,
                use_random_start_point=True,
                carla_egg_path=carla_egg_path,
                carla_pythonAPI_path=carla_pythonAPI_path,
                carla_UE_ip=ip,
                carla_UE_port=port,
                wait_time_after_apply_action=0.1,
                action_replace=ContinuousSteeringVelocityBrakeAction_v1(),
                reward_replace=SafeDriveDistanceCost(),
                minimum_velocity=0.5,
                drive_time_after_reset=2.0,
                kwargs_for_done_condition={
                    "minimum_action_taken_on_low_velocity": 0
                })
            env_dict[name] = env
            model_kwargs[name] = {
                "use_pre_calculated_g": use_pre_calculated_g,
                "gamma": gamma
            }
use_pre_calculated_g = False
gamma = 0.99

for ip in server_config:
    for port in server_config[ip]:
        for i in range(n_workers_in_each_port):
            name = 'W_%s' % (str(ip) + "_" + str(port) + "_" + str(i)
                             )  # worker name
            env = LaneFollowEnv_v1(
                carla_egg_path=
                "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg",
                carla_pythonAPI_path="/home/wang/Desktop/carla/PythonAPI/carla",
                carla_UE_ip=ip,
                carla_UE_port=port,
                use_random_start_point=True,
                wait_time_after_apply_action=0.1,
                # 这里是DDPG和A3C算法的区别,使用连续空间的action
                action_replace=ContinuousSteeringVelocityBrakeAction_v1(),
                reward_replace=SafeDriveDistanceCost(),
                minimum_velocity=0.5,
                drive_time_after_reset=2.0,
                kwargs_for_done_condition={
                    "minimum_action_taken_on_low_velocity": 0
                })
            env_dict[name] = env
            model_kwargs[name] = {
                "gamma": gamma,
                "use_pre_calculated_g": use_pre_calculated_g,
            }
            worker_kwargs[name] = {
                # 因为是模仿为主,所以RL不进行探索
                "start_variance_for_each_action": (0., 0.),
            env = LaneFollowEnv_v1(
                # 随机起点,有助于测试过拟合情况
                use_random_start_point=True,
                carla_egg_path=
                "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg",
                carla_pythonAPI_path="/home/wang/Desktop/carla/PythonAPI/carla",
                carla_UE_ip=ip,
                carla_UE_port=port,
                vehicle_start_point_index=spawn_index_for_each_car_in_worker[
                    i],
                wait_time_after_apply_action=0.1,
                # 这里是DDPG和A3C算法的区别,使用连续空间的action
                action_replace=ContinuousSteeringVelocityBrakeAction_v1(),
                # 对于DDPG不能使用有正值的reward,而使用cost

                # 有效,很慢:使用安全驾驶的距离作为cost,只有最后一步有cost,然后按照gamma向前传播
                # reward_replace=SafeDriveDistanceCost(),

                # 使用到车道线距离作为cost
                # reward_replace=DistanceToLaneCost(),

                # 有效,但很慢:使用速度,车道线,总路程综合reward
                reward_replace=
                FinalStepDriveDistanceAverageSpeedAverageDistanceToLaneCost_v1(
                ),
                # 最小速度降低,便于转弯
                minimum_velocity=0.5,
                drive_time_after_reset=2.0,
                kwargs_for_done_condition={
                    # 因为有预先开出,所以再次降速后直接done
                    "minimum_action_taken_on_low_velocity": 0
                })
Пример #5
0
carla_egg_path = "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg"
carla_pythonAPI_path = "/home/wang/Desktop/carla/PythonAPI/carla"
use_pre_calculated_g = False
gamma = 0.99
for ip in server_config:
    for port in server_config[ip]:
        for i in range(n_workers_in_each_port):
            name = 'W_%s' % (str(ip) + "_" + str(port) + "_" + str(i)
                             )  # worker name
            env = LaneFollowEnv_v1(
                use_random_start_point=True,
                carla_egg_path=carla_egg_path,
                carla_pythonAPI_path=carla_pythonAPI_path,
                carla_UE_ip=ip,
                carla_UE_port=port,
                wait_time_after_apply_action=0.1,
                action_replace=ContinuousSteeringVelocityBrakeAction_v1(),
                reward_replace=SafeDriveDistanceCost(),
                minimum_velocity=0.5,
                drive_time_after_reset=2.0,
                kwargs_for_done_condition={
                    "minimum_action_taken_on_low_velocity": 0
                })
            env_dict[name] = env
            model_kwargs[name] = {
                "use_pre_calculated_g": use_pre_calculated_g,
                "gamma": gamma
            }
            worker_kwargs[name] = {
                "start_variance_for_each_action": (1, 1.0),
                "variance_decay_ratio_for_each_action": (0.995, 0.995),
                "variance_decay_step": 10,
model_kwargs = {}

for ip in server_config:
    for port in server_config[ip]:
        for i in range(n_workers_in_each_port):
            name = 'W_%s' % (str(ip) + "_" + str(port) + "_" + str(i)
                             )  # worker name
            env = LaneFollowEnv_v1(
                carla_egg_path=
                "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg",
                carla_pythonAPI_path="/home/wang/Desktop/carla/PythonAPI/carla",
                carla_UE_ip=ip,
                carla_UE_port=port,
                vehicle_start_point_index=spawn_index_for_each_car_in_worker[
                    i],
                wait_time_after_apply_action=0.1,
                # 这里是DDPG和A3C算法的区别,使用连续空间的action
                action_replace=ContinuousSteeringVelocityBrakeAction_v1(),
                # 对于DDPG不能使用有正值的reward,而使用cost
                reward_replace=SafeDriveDistanceCost(),
                drive_time_after_reset=2.0,
                kwargs_for_done_condition={
                    # 因为有预先开出,所以再次降速后直接done
                    "minimum_action_taken_on_low_velocity": 0
                })
            env_dict[name] = env
            model_kwargs[name] = {
                # 预先求得准确q值给数据集,这里local模型和后面的global都需要!
                "use_pre_calculated_g": False,
                "gamma": 0.9
            }
            worker_kwargs[name] = {
for ip in server_config:
    for port in server_config[ip]:
        for i in range(n_workers_in_each_port):
            name = 'W_%s' % (str(ip) + "_" + str(port) + "_" + str(i)
                             )  # worker name
            # 拿到最后一个worker的name,用于延迟debug
            delay_debug_worker_name = name

            env = LaneFollowEnv_v1(
                carla_egg_path=
                "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg",
                carla_pythonAPI_path="/home/wang/Desktop/carla/PythonAPI/carla",
                carla_UE_ip=ip,
                carla_UE_port=port,
                vehicle_start_point_index=spawn_index_for_each_car_in_worker[
                    i],
                wait_time_after_apply_action=0.1,
                # 使用高精度控制的action
                action_replace=HighPrecisionControl_v1(),
                # 对于a3c,车道线的限制不需要太严格,否则也很难训练!
                maximum_distance_to_lane=10,
            )
            env_dict[name] = env

delay_debug_worker_name = []

# 用于延迟debug的worker name,注释掉以关闭最后一个worker进行delay debug
#delay_debug_worker_name = [delay_debug_worker_name]

# 最后用name:env的字典传入env,env用于agent训练,每个env分配一个worker
A3C_GAL_Train_Agent_v1(
Пример #8
0
'''

from ReinforcementLearning.Modules.Agents.DDPG_Agent import DDPG_Agent_v1
from ReinforcementLearning.Modules.Environments.Environments_laneFollow import LaneFollowEnv_v1
from ReinforcementLearning.Modules.Environments.Actions import ContinuousSteeringVelocityBrakeAction_v1
from ReinforcementLearning.Modules.Environments.Rewards import DistanceToLaneCost, SafeDriveDistanceCost

env = LaneFollowEnv_v1(
    carla_egg_path=
    "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg",
    carla_pythonAPI_path="/home/wang/Desktop/carla/PythonAPI/carla",
    carla_UE_ip="10.10.9.128",
    carla_UE_port=2000,
    vehicle_start_point_index=0,
    wait_time_after_apply_action=0.1,
    action_replace=ContinuousSteeringVelocityBrakeAction_v1(),
    reward_replace=SafeDriveDistanceCost(),
    # 预先开车2.0秒
    drive_time_after_reset=2.0,
    kwargs_for_done_condition={
        # 因为有预先开出,所以再次降速后直接done
        "minimum_action_taken_on_low_velocity": 0
    }
    #reward_replace=SafeDriveDistanceReward_v1(),
)
# 注意使用的q值是手动计算得到的
DDPG_Agent_v1(env=env,
              kwargs_for_model={
                  "gamma": 0.9
              },
              use_model_with_pre_calculated_g=True).train()