Пример #1
0
class Actor(object):
    def __init__(self, n_st, n_act):
        super(Actor, self).__init__()
        self.n_st = n_st
        self.n_act = n_act
        self.model = NN(n_st, n_act)
        self.optimizer = optimizers.Adam()
        self.optimizer.setup(self.model)
        self.noise = ou_process(np.zeros((n_act), dtype=np.float32))

    def action(self, st, noise=False):
        a = self.model(st, norm=True)

        if noise:
            n = next(self.noise)
            a = np.clip(a.data + n, -1, 1)
            return a
        else:
            return a.data

    def update(self, st, dqda):
        mu = self.model(st, norm=True)
        self.model.cleargrads()
        mu.grad = -dqda
        mu.backward()
        self.optimizer.update()

    def update_target(self, tau, current_NN):
        self.model.weight_update(tau, current_NN)

    def save_model(self, outputfile):
        serializers.save_npz(outputfile, self.model)

    def load_model(self, inputfile):
        serializers.load_npz(inputfile, self.model)
Пример #2
0
class Critic(object):
    def __init__(self, n_st, n_act):
        super(Critic, self).__init__()
        self.n_st = n_st
        self.n_act = n_act
        self.model = NN(n_st + n_act, 1)
        self.optimizer = optimizers.Adam()
        self.optimizer.setup(self.model)
        self.log = []

    def Q_value(self, st, act):
        state_action_vector = np.concatenate((st, act), axis=1)
        Q = self.model(state_action_vector).data
        return Q

    def return_dqda(self, st, act):
        state_action_vector = Variable(np.concatenate((st, act), axis=1))
        self.model.cleargrads()
        Q = self.model(state_action_vector)
        Q.grad = np.ones((state_action_vector.shape[0], 1), dtype=np.float32)
        Q.backward()
        grad = state_action_vector.grad[:, self.n_st:]
        return grad

    def update(self, y, st, act):
        self.model.cleargrads()

        state_action_vector = np.concatenate((st, act), axis=1)
        Q = self.model(state_action_vector)

        loss = F.mean_squared_error(Q, Variable(y))

        loss.backward()
        self.optimizer.update()

        self.log.append('Q:{0},y:{1}\n'.format(Q.data.T, y.T))

        return loss.data

    def update_target(self, tau, current_NN):
        self.model.weight_update(tau, current_NN)

    def save_model(self, outputfile):
        serializers.save_npz(outputfile, self.model)

    def load_model(self, inputfile):
        serializers.load_npz(inputfile, self.model)