def __init__(self, env, is_batch_norm=False, is_grad_inverter=True): super().__init__(env) assert isinstance(env.action_space, Box), "action space must be continuous" if is_batch_norm: self.critic_net = CriticNet_bn(self.observation_space_size, self.action_space_size) self.actor_net = ActorNet_bn(self.observation_space_size, self.action_space_size) else: self.critic_net = CriticNet(self.observation_space_size, self.action_space_size) self.actor_net = ActorNet(self.observation_space_size, self.action_space_size) self.is_grad_inverter = is_grad_inverter self.replay_memory = deque() self.time_step = 0 action_max = np.array(self.high).tolist() action_min = np.array(self.low).tolist() action_bounds = [action_max, action_min] self.grad_inv = grad_inverter(action_bounds)
def __init__(self,env, is_batch_norm=False): self.env = env self.num_states = env.observation_space.shape[0] self.num_actions = env.action_space.shape[0] if is_batch_norm: self.critic_net = CriticNet_bn(self.num_states, self.num_actions) self.actor_net = ActorNet_bn(self.num_states, self.num_actions) else: self.critic_net = CriticNet(self.num_states, self.num_actions) self.actor_net = ActorNet(self.num_states, self.num_actions) #Initialize Buffer Network: self.replay_memory = deque() #Intialize time step: self.time_step = 0 self.counter = 0 action_max = np.array(env.action_space.high).tolist() action_min = np.array(env.action_space.low).tolist() action_bounds = [action_max,action_min] self.grad_inv = grad_inverter(action_bounds)
def __init__(self,env, is_batch_norm): self.env = env self.num_states = 59 self.num_actions = 3 if is_batch_norm: self.critic_net = CriticNet_bn(self.num_states, self.num_actions) self.actor_net = ActorNet_bn(self.num_states, self.num_actions) else: self.critic_net = CriticNet(self.num_states, self.num_actions) self.actor_net = ActorNet(self.num_states, self.num_actions) #Initialize Buffer Network: self.replay_memory = deque() #Intialize time step: self.time_step = 0 self.counter = 0 action_max = [1.0, 1.0, 1.0] action_min = [-1.0, -1.0, -1.0] action_bounds = [action_max,action_min] self.grad_inv = grad_inverter(action_bounds)
def __init__(self, num_states, num_actions, is_batch_norm): self.num_states = num_states self.num_actions = num_actions if is_batch_norm: self.critic_net = CriticNet_bn(self.num_states, self.num_actions) self.actor_net = ActorNet_bn(self.num_states, self.num_actions) else: self.critic_net = CriticNet(self.num_states, self.num_actions) self.actor_net = ActorNet(self.num_states, self.num_actions) #Initialize Buffer Network: self.replay_memory = deque() #Intialize time step: self.time_step = 0 self.counter = 0 action_max = 5 * np.ones((1, num_actions)) action_max = action_max.flatten() action_max = action_max.tolist() action_min = 0 * np.ones((1, num_actions)) action_min = action_min.flatten() action_min = action_min.tolist() action_bounds = [action_max, action_min] self.grad_inv = grad_inverter(action_bounds)
def __init__( self, hisar_size, ar_size, action_size, TAU = 0.001, is_batch_norm = 0, write_sum = 0, net_size_scale=1, max_load=1, beta0=beta): self.hisar_size = hisar_size self.load_size = action_size + 1 self.ar_size = ar_size self.state_size = action_size * 2 self.action_size = action_size self.ar_action_size = ar_size + action_size #print("net_size_scale: "+str(net_size_scale)) if is_batch_norm: if len(CN_N_HIDDENS)==2: self.critic_net = CriticNet_bn( self.state_size, self.action_size, TAU, write_sum, net_size_scale ) else: self.critic_net = CriticNet_bn_3( self.state_size, self.action_size, TAU, write_sum, net_size_scale ) self.actor_net = ActorNet_bn( self.state_size, self.action_size, TAU, write_sum, net_size_scale ) self.ar_pred_net = ARPredNet_bn( self.hisar_size, self.ar_size, write_sum, net_size_scale ) # arrival rate prediction network self.load_map_net = LoadMapNet_bn( self.ar_size, self.action_size, self.load_size, write_sum, net_size_scale ) # load mapping network else: self.critic_net = CriticNet( self.state_size, self.action_size, TAU, write_sum, net_size_scale ) self.actor_net = ActorNet( self.state_size, self.action_size, TAU, write_sum, net_size_scale ) self.ar_pred_net = ARPredNet( self.hisar_size, self.ar_size, write_sum, net_size_scale ) # arrival rate prediction network self.load_map_net = LoadMapNet( self.ar_size, self.action_size, self.load_size, write_sum, net_size_scale ) # load mapping network self.env = ENV( action_size, max_load=max_load, beta0=beta0 ) #self.k_nearest_neighbors = int(max_actions * k_ratio ) #Initialize Network Buffers: self.replay_memory_ac = deque() self.replay_memory_arp = deque() self.replay_memory_lm = deque() #Intialize time step: self.time_step = 0 self.counter = 0 action_max = np.ones( ( self.action_size ) ).tolist() action_min = np.zeros( ( self.action_size ) ).tolist() action_bounds = [action_max, action_min] self.grad_inv = grad_inverter( action_bounds )