示例#1
0
    def __init__(self,
                 N_STATES,
                 N_ACTIONS,
                 H1Size,
                 H2Size,
                 done=None,
                 reward=None):
        super(EnvModel, self).__init__()
        # build network layers
        self.fc1 = nn.Linear(N_STATES + N_ACTIONS, H1Size)
        self.fc2 = nn.Linear(H1Size, H2Size)
        self.statePrime = nn.Linear(H2Size, N_STATES)

        init_list = [self.fc1, self.fc2, self.statePrime]

        if reward is None:

            self.reward = nn.Linear(N_STATES, 1)
            self.reward_layer = True
            init_list.append(self.reward)

        else:  #reward function has been given
            self.reward = reward
            self.reward_layer = False

        if done is None:
            self.done = nn.Linear(N_STATES, 1)
            self.done_layer = True
            init_list.append(self.done)
        else:  #done function has been given
            self.done = done
            self.done_layer = False

        # initialize layers
        utils.weights_init_normal(init_list, 0.0, 0.1)
示例#2
0
    def __init__(self, N_STATES, N_ACTIONS, H1Size, H2Size):
        super(Q_Net, self).__init__()
        # build network layers
        self.fc1 = nn.Linear(N_STATES, H1Size)
        self.fc2 = nn.Linear(H1Size, H2Size)
        self.out = nn.Linear(H2Size, N_ACTIONS)

        # initialize layers
        utils.weights_init_normal([self.fc1, self.fc2, self.out], 0.0, 0.1)
示例#3
0
    def __init__(self, N_STATES):
        super().__init__()
        self.fc1 = nn.Linear(N_STATES, N_STATES)
        self.out = nn.Linear(N_STATES, 1)

        utils.weights_init_normal([self.fc1, self.out], 0.0, 0.1)