Python DDQN示例，My_DDQN.DDQN Python示例

示例#1

0

显示文件

文件： My_Client.py 项目： ManavGuru/FederatedRL

 def create_one_client(self, i, env_name):
     env = gym.make(env_name)
     env.seed(i)
     # env = gym.make('LunarLander-v2')
     observation_space = env.observation_space.shape[0]
     agent = DDQN(observation_space, env.action_space.n)
     return agent, env

示例#2

0

显示文件

文件： car_ped_2.py 项目： prabhasak/chicken

	def __init__(self,W=10,d=50,C_velocity_limit=8,C_acceleration_limit=3,P_velocity_limit=1.5,safe_x=4,safe_y=1.5,safer_x=6,safer_y=2,prob=0.8,delta_t=0.1):
		self.d = d
		self.W = W
		self.state = None
		self.C_state = None
		self.P_state = None
		self.P_velocity_limit = P_velocity_limit 
		self.C_velocity_limit = C_velocity_limit		
		self.C_acceleration_limit = C_acceleration_limit
		self.action_space = spaces.Box(np.array([-1*self.C_acceleration_limit]),np.array([self.C_acceleration_limit]), dtype= np.float32)
		self.safe_x=safe_x
		self.safe_y=safe_y
		self.safer_x=safer_x
		self.safer_y=safer_y
		self.prob=prob
		self.delta_t=delta_t
		self.time=None
		self.observation_space=5#we get the number of parameters in the state
		self.action_ped=10 #number of discrete velocities pedestrian can take
		self.agent=DDQN(self.observation_space, self.action_ped)
		self.agent.exploration_rate=0
		self.agent.model=load_model('ddqn_ped_Learner.h5')

示例#3

0

显示文件

文件： render_pedestrian.py 项目： prabhasak/chicken

#rendering the model
from ped_car_2 import PedestrianEnv
import numpy as np
import random
from matplotlib import pyplot as plt
from matplotlib.animation import FuncAnimation
from keras.models import load_model
from My_DDQN import DDQN
env = PedestrianEnv()
observation = env.reset()
observation_space = len(
    observation)  #we get the number of parameters in the state
action_space = 10  #number of discrete velocities pedestrian can take
agent = DDQN(observation_space, action_space)
agent.exploration_rate = 0
agent.model = load_model('ddqn_ped_Learner.h5')
death_toll = 0
safe_chicken = 0
done_count = 0
count = 0
Ped_Pos = []
Car_xPos = []
Car_yPos = []
d = env.d
W = env.W

env = PedestrianEnv()
episodes = 3
for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, observation_space])

示例#4

0

显示文件

文件： car_ped_2.py 项目： prabhasak/chicken

class CarEnv(gym.Env):

	"""
	Description:
		A pedestrian aims to cross the road without being hit by the autonomous car. Goal is to make the pedestrian reach the other end safely.
	Observation: 
		Parameters
		Width of the road W=9
		Length of observation D=40
		Safe Region square box of size 1.5 around the pedestrian 

		Type: Box(4)
		Num	Observation                          Min         Max
		0	Car Position (C_position)			[0,1.5]		[40,7.5]			          
		1	Car Velocity (C_velocity)			  0		  	  15         
		2	Car acceleration (C_acceleration)	 -1.5		 1.5              
		3	Pedestrian postition (P_position)    [40,0]      [40,9]
		
	Actions:
		Type: Discrete(2)
		Num	Action
		0	Move front with velocity v
		1	Move back with velocity -v
		
	#######Have to edit this portion.

		Reward is 1 for every step taken, including the termination step
	Starting State: ????
		All observations are assigned a uniform random value in [-0.05..0.05]

	Episode Termination: Check with Desik ?????
		The pedestrian is dead. RIP
		Pedestrian reaches end of the cross road of dimension W or Car crosses length d.
		
		Solved Requirements
		Considered solved when the pedestrian reaches the other side of the road safely. 
	"""
	def __init__(self,W=10,d=50,C_velocity_limit=8,C_acceleration_limit=3,P_velocity_limit=1.5,safe_x=4,safe_y=1.5,safer_x=6,safer_y=2,prob=0.8,delta_t=0.1):
		self.d = d
		self.W = W
		self.state = None
		self.C_state = None
		self.P_state = None
		self.P_velocity_limit = P_velocity_limit 
		self.C_velocity_limit = C_velocity_limit		
		self.C_acceleration_limit = C_acceleration_limit
		self.action_space = spaces.Box(np.array([-1*self.C_acceleration_limit]),np.array([self.C_acceleration_limit]), dtype= np.float32)
		self.safe_x=safe_x
		self.safe_y=safe_y
		self.safer_x=safer_x
		self.safer_y=safer_y
		self.prob=prob
		self.delta_t=delta_t
		self.time=None
		self.observation_space=5#we get the number of parameters in the state
		self.action_ped=10 #number of discrete velocities pedestrian can take
		self.agent=DDQN(self.observation_space, self.action_ped)
		self.agent.exploration_rate=0
		self.agent.model=load_model('ddqn_ped_Learner.h5')

	def reset(self):
		self.time=0				
		self.P_state=[0]		
		self.C_state=[0,
					random.uniform(0+self.safer_y,self.W-self.safer_y),
					0,
					random.uniform(-1*self.C_acceleration_limit,self.C_acceleration_limit)]
		# self.C_state=[0,
		# 			2,
		# 			random.uniform(10,self.C_velocity_limit),
		# 			3]							
		self.state=self.P_state+self.C_state		
		return self.state	

	def step_C(self,state,action):	
		C_acceleration_temp = action
		C_position_y_temp = state[2]
		C_position_x_temp = max(0,state[1]+state[3]*self.delta_t+0.5*C_acceleration_temp*self.delta_t**2)
		C_velocity_temp =min(max(0,state[3]+C_acceleration_temp*self.delta_t),self.C_velocity_limit)
		C_state_temp=[C_position_x_temp,
					C_position_y_temp,
					C_velocity_temp,
					C_acceleration_temp]		
		done=False
		if(state[0]>state[2]-self.safe_y and
			state[0]<state[2]+self.safe_y and
			state[1]>self.d-self.safe_x and
			state[1]<self.d+self.safe_x) :
			reward=-10000
			done=True
			return C_state_temp,reward,done
		elif(state[1]>self.safer_x+self.d):
			reward=5000
			done=True			
			return C_state_temp,reward,done
		else:
			reward	=-1*self.delta_t*self.time

		return C_state_temp,reward,done

	def step_P(self,state):
		state_1 = np.reshape(state, [1, self.observation_space])
		action=self.agent.act(state_1)		
		action=-1*self.P_velocity_limit+action*(2*(self.P_velocity_limit)/(self.action_ped-1))		
		P_state_temp=min(self.W,max(0,state[0]+action*self.delta_t))				
		return [10]

	def step(self, action_1):		
		assert self.action_space.contains(action_1), "%r (%s) invalid"%(action_1, type(action_1))
		action=action_1[0]		
		self.time+=1
		C_state_temp,reward,done=self.step_C(self.state,action)
		P_state_temp=self.step_P(self.state)
		self.C_state=C_state_temp
		self.P_state=P_state_temp
		self.state=self.P_state+self.C_state
		if(self.time>100):
			done=True
			reward=-1000
		
		return self.state,reward,done