示例#1
0
    def __init__(self, env, is_batch_norm=False, is_grad_inverter=True):
        super().__init__(env)
        assert isinstance(env.action_space,
                          Box), "action space must be continuous"
        if is_batch_norm:
            self.critic_net = CriticNet_bn(self.observation_space_size,
                                           self.action_space_size)
            self.actor_net = ActorNet_bn(self.observation_space_size,
                                         self.action_space_size)

        else:
            self.critic_net = CriticNet(self.observation_space_size,
                                        self.action_space_size)
            self.actor_net = ActorNet(self.observation_space_size,
                                      self.action_space_size)

        self.is_grad_inverter = is_grad_inverter
        self.replay_memory = deque()

        self.time_step = 0

        action_max = np.array(self.high).tolist()
        action_min = np.array(self.low).tolist()
        action_bounds = [action_max, action_min]
        self.grad_inv = grad_inverter(action_bounds)
示例#2
0
 def __init__(self,env, is_batch_norm=False):
     self.env = env 
     self.num_states = env.observation_space.shape[0]
     self.num_actions = env.action_space.shape[0]
     
     
     if is_batch_norm:
         self.critic_net = CriticNet_bn(self.num_states, self.num_actions) 
         self.actor_net = ActorNet_bn(self.num_states, self.num_actions)
         
     else:
         self.critic_net = CriticNet(self.num_states, self.num_actions) 
         self.actor_net = ActorNet(self.num_states, self.num_actions)
     
     #Initialize Buffer Network:
     self.replay_memory = deque()
     
     #Intialize time step:
     self.time_step = 0
     self.counter = 0
     
     action_max = np.array(env.action_space.high).tolist()
     action_min = np.array(env.action_space.low).tolist()        
     action_bounds = [action_max,action_min] 
     self.grad_inv = grad_inverter(action_bounds)
示例#3
0
 def __init__(self,env, is_batch_norm):
     self.env = env 
     self.num_states = 59
     self.num_actions = 3
     
     
     if is_batch_norm:
         self.critic_net = CriticNet_bn(self.num_states, self.num_actions) 
         self.actor_net = ActorNet_bn(self.num_states, self.num_actions)
         
     else:
         self.critic_net = CriticNet(self.num_states, self.num_actions) 
         self.actor_net = ActorNet(self.num_states, self.num_actions)
     
     #Initialize Buffer Network:
     self.replay_memory = deque()
     
     #Intialize time step:
     self.time_step = 0
     self.counter = 0
     
     action_max = [1.0, 1.0, 1.0]
     action_min = [-1.0, -1.0, -1.0]      
     action_bounds = [action_max,action_min] 
     self.grad_inv = grad_inverter(action_bounds)
示例#4
0
    def __init__(self, num_states, num_actions, is_batch_norm):
        self.num_states = num_states
        self.num_actions = num_actions

        if is_batch_norm:
            self.critic_net = CriticNet_bn(self.num_states, self.num_actions)
            self.actor_net = ActorNet_bn(self.num_states, self.num_actions)

        else:
            self.critic_net = CriticNet(self.num_states, self.num_actions)
            self.actor_net = ActorNet(self.num_states, self.num_actions)

        #Initialize Buffer Network:
        self.replay_memory = deque()

        #Intialize time step:
        self.time_step = 0
        self.counter = 0

        action_max = 5 * np.ones((1, num_actions))
        action_max = action_max.flatten()
        action_max = action_max.tolist()
        action_min = 0 * np.ones((1, num_actions))
        action_min = action_min.flatten()
        action_min = action_min.tolist()
        action_bounds = [action_max, action_min]
        self.grad_inv = grad_inverter(action_bounds)
示例#5
0
文件: agent.py 项目: yejunhong1/DRAG
    def __init__( self, hisar_size, ar_size, action_size, TAU = 0.001, is_batch_norm = 0, write_sum = 0, net_size_scale=1, max_load=1, beta0=beta):
        self.hisar_size  = hisar_size
        self.load_size   = action_size + 1
        self.ar_size     = ar_size
        self.state_size  = action_size * 2
        self.action_size = action_size
        self.ar_action_size = ar_size + action_size

        #print("net_size_scale: "+str(net_size_scale))
        if is_batch_norm:
            if len(CN_N_HIDDENS)==2:
                self.critic_net   = CriticNet_bn(  self.state_size, self.action_size, TAU, write_sum, net_size_scale  )
            else:
                self.critic_net   = CriticNet_bn_3(  self.state_size, self.action_size, TAU, write_sum, net_size_scale  )
            self.actor_net    = ActorNet_bn(   self.state_size, self.action_size, TAU, write_sum, net_size_scale  )
            self.ar_pred_net  = ARPredNet_bn(  self.hisar_size, self.ar_size,     write_sum, net_size_scale )           # arrival rate prediction network
            self.load_map_net = LoadMapNet_bn( self.ar_size,    self.action_size, self.load_size, write_sum, net_size_scale )           # load mapping network
        else:
            self.critic_net   = CriticNet(  self.state_size, self.action_size, TAU, write_sum, net_size_scale )
            self.actor_net    = ActorNet(   self.state_size, self.action_size, TAU, write_sum, net_size_scale )
            self.ar_pred_net  = ARPredNet(  self.hisar_size, self.ar_size,     write_sum, net_size_scale )           # arrival rate prediction network
            self.load_map_net = LoadMapNet( self.ar_size,    self.action_size, self.load_size, write_sum, net_size_scale )           # load mapping network

        self.env = ENV( action_size, max_load=max_load, beta0=beta0 )

        #self.k_nearest_neighbors = int(max_actions * k_ratio )
        #Initialize Network Buffers:
        self.replay_memory_ac  = deque()
        self.replay_memory_arp = deque()
        self.replay_memory_lm  = deque()

        #Intialize time step:
        self.time_step = 0
        self.counter   = 0
        
        action_max    = np.ones(  ( self.action_size ) ).tolist()
        action_min    = np.zeros( ( self.action_size ) ).tolist()
        action_bounds = [action_max, action_min] 
        self.grad_inv = grad_inverter( action_bounds )