def __init__(self, state_size, action_size): super(Actor, self).__init__() self.device = TrainerMetadata().device self.layer_sizes = [state_size, 24, action_size] self.linear1 = nn.Linear(self.layer_sizes[0], self.layer_sizes[1]) self.linear2 = nn.Linear(self.layer_sizes[1], self.layer_sizes[2]) self.head = nn.Softmax(dim=-1) u.fanin_init(self.linear1.weight) u.fanin_init(self.linear2.weight)
def __init__(self, state_size, action_size): super(Critic, self).__init__() self.layer_sizes = [state_size + action_size, 400, 300, action_size] self.linear1 = nn.Linear(self.layer_sizes[0], self.layer_sizes[1]) self.linear2 = nn.Linear(self.layer_sizes[1], self.layer_sizes[2]) self.head = nn.Linear(self.layer_sizes[2], self.layer_sizes[3]) u.fanin_init(self.linear1.weight) u.fanin_init(self.linear2.weight) nn.init.uniform_(self.head.weight, a=-3 * 10e-4, b=3 * 10e-4)
def __init__(self, state_size, value_size): super(Critic, self).__init__() self.device = TrainerMetadata().device self.layer_sizes = [state_size, 24, 24, value_size] self.linear1 = nn.Linear(self.layer_sizes[0], self.layer_sizes[1]) self.linear2 = nn.Linear(self.layer_sizes[1], self.layer_sizes[2]) self.head = nn.Linear(self.layer_sizes[2], self.layer_sizes[3]) u.fanin_init(self.linear1.weight) u.fanin_init(self.linear2.weight) nn.init.uniform_(self.head.weight, a=-3 * 10e-4, b=3 * 10e-4)
def __init__(self, state_size, action_size): super(MetaPredictor, self).__init__() sensorimotor_size = state_size + action_size self.layer_sizes = [sensorimotor_size, 32, 16, 1] self.linear1 = nn.Linear(self.layer_sizes[0], self.layer_sizes[1]) self.linear2 = nn.Linear(self.layer_sizes[1], self.layer_sizes[2]) self.head = nn.Linear(self.layer_sizes[2], self.layer_sizes[3]) u.fanin_init(self.linear1.weight) u.fanin_init(self.linear2.weight) nn.init.uniform_(self.head.weight, a=-3 * 10e-4, b=3 * 10e-4)
def __init__(self, state_size, action_size): super(StatePredictor, self).__init__() # TODO: 상태 예측은 망 별로 안 커도 학습될 듯? (상태 예측만 테스트 해 보기) # 내발적 동기를 위해서 상태를 예측한다는 개념 = 2007년 Oudeyer 논문을 참조한 것 sensorimotor_size = state_size + action_size self.layer_sizes = [sensorimotor_size, 32, 16, state_size] self.linear1 = nn.Linear(self.layer_sizes[0], self.layer_sizes[1]) self.linear2 = nn.Linear(self.layer_sizes[1], self.layer_sizes[2]) self.head = nn.Linear(self.layer_sizes[2], self.layer_sizes[3]) u.fanin_init(self.linear1.weight) u.fanin_init(self.linear2.weight) nn.init.uniform_(self.head.weight, a=-3*10e-4, b=3*10e-4)
def __init__(self, state_size, action_size, action_range=(-1, 1)): super(DQNNetwork, self).__init__() self.device = TrainerMetadata().device self.layer_sizes = [state_size, 24, 24, action_size] # TODO: 정규화된 입력인지 검사 문구 넣고 range 빼기 self.action_low, self.action_high = action_range self.linear1 = nn.Linear(self.layer_sizes[0], self.layer_sizes[1]) self.linear2 = nn.Linear(self.layer_sizes[1], self.layer_sizes[2]) self.head = nn.Linear(self.layer_sizes[2], self.layer_sizes[3]) u.fanin_init(self.linear1.weight) u.fanin_init(self.linear2.weight) nn.init.uniform_(self.head.weight, a=-3 * 10e-3, b=3 * 10e-3)
def __init__(self, state_size, action_size, action_range=(-1, 1)): super(Actor, self).__init__() self.layer_sizes = [state_size, 400, 300, action_size] # self.layer_size = [state_size, 256, 128, action_size] # TODO: 정규화된 입력인지 검사 문구 넣고 range 빼기 self.action_low, self.action_high = action_range self.linear1 = nn.Linear(self.layer_sizes[0], self.layer_sizes[1]) self.linear2 = nn.Linear(self.layer_sizes[1], self.layer_sizes[2]) self.head = nn.Linear(self.layer_sizes[2], self.layer_sizes[3]) u.fanin_init(self.linear1.weight) u.fanin_init(self.linear2.weight) nn.init.uniform_(self.head.weight, a=-3 * 10e-3, b=3 * 10e-3)