def __init__(self, N_STATES, N_ACTIONS, H1Size, H2Size, done=None, reward=None): super(EnvModel, self).__init__() # build network layers self.fc1 = nn.Linear(N_STATES + N_ACTIONS, H1Size) self.fc2 = nn.Linear(H1Size, H2Size) self.statePrime = nn.Linear(H2Size, N_STATES) init_list = [self.fc1, self.fc2, self.statePrime] if reward is None: self.reward = nn.Linear(N_STATES, 1) self.reward_layer = True init_list.append(self.reward) else: #reward function has been given self.reward = reward self.reward_layer = False if done is None: self.done = nn.Linear(N_STATES, 1) self.done_layer = True init_list.append(self.done) else: #done function has been given self.done = done self.done_layer = False # initialize layers utils.weights_init_normal(init_list, 0.0, 0.1)
def __init__(self, N_STATES, N_ACTIONS, H1Size, H2Size): super(Q_Net, self).__init__() # build network layers self.fc1 = nn.Linear(N_STATES, H1Size) self.fc2 = nn.Linear(H1Size, H2Size) self.out = nn.Linear(H2Size, N_ACTIONS) # initialize layers utils.weights_init_normal([self.fc1, self.fc2, self.out], 0.0, 0.1)
def __init__(self, N_STATES): super().__init__() self.fc1 = nn.Linear(N_STATES, N_STATES) self.out = nn.Linear(N_STATES, 1) utils.weights_init_normal([self.fc1, self.out], 0.0, 0.1)