示例#1
0
    def grad(self, params, x, y):
        w, alpha = self.inflate(params)

        d_prev = self.decide(0., x[0], w, alpha)

        d_prev_by_w = sech2(dot(w, x[0])) * x[0]
        r_prev_by_w = d_prev_by_w * y[0] - self.delta * sign(
            d_prev) * d_prev_by_w
        A_prev_by_w = r_prev_by_w

        d_prev_by_alpha = r_prev_by_alpha = A_prev_by_alpha = 0.

        for i in range(1, len(y)):
            x_t = x[i]
            q_t = y[i]

            d_t = self.decide(d_prev, x_t, w, alpha)
            d_i = dot(w, x_t) + alpha * d_prev

            d_t_by_w = sech2(d_i) * (x_t + alpha * d_prev_by_w)
            r_t_by_w = d_t_by_w * q_t - self.delta * sign(d_t - d_prev) * (
                d_t_by_w - d_prev_by_w)
            A_t_by_w = A_prev_by_w + (1. / (i + 1)) * (r_t_by_w - A_prev_by_w)

            if i == 1:
                d_t_by_alpha = sech2(d_i) * d_prev
            else:
                d_t_by_alpha = sech2(d_i) * (alpha * d_prev_by_alpha + d_prev)

            r_t_by_alpha = d_t_by_alpha * q_t - (
                self.delta * sign(d_t - d_prev) *
                (d_t_by_alpha - d_prev_by_alpha))
            A_t_by_alpha = A_prev_by_alpha + (1. / (i + 1)) * (r_t_by_alpha -
                                                               A_prev_by_alpha)

            d_prev = d_t

            d_prev_by_w = d_t_by_w
            r_prev_by_w = r_t_by_w
            A_prev_by_w = A_t_by_w

            d_prev_by_alpha = d_t_by_alpha
            r_prev_by_alpha = r_t_by_alpha
            A_prev_by_alpha = A_t_by_alpha

        return -1 * append(A_t_by_w, A_t_by_alpha) + 2 * self.lmb * params
示例#2
0
文件: models.py 项目: rreas/drl
    def grad(self, params, x, y):
        w, alpha = self.inflate(params)

        d_prev = self.decide(0., x[0], w, alpha)

        d_prev_by_w = sech2(dot(w, x[0])) * x[0]
        r_prev_by_w = d_prev_by_w*y[0] - self.delta*sign(d_prev)*d_prev_by_w
        A_prev_by_w = r_prev_by_w

        d_prev_by_alpha = r_prev_by_alpha = A_prev_by_alpha = 0.

        for i in range(1, len(y)):
            x_t = x[i]
            q_t = y[i]

            d_t = self.decide(d_prev, x_t, w, alpha)
            d_i = dot(w, x_t) + alpha*d_prev

            d_t_by_w = sech2(d_i) * (x_t + alpha*d_prev_by_w)
            r_t_by_w = d_t_by_w*q_t - self.delta*sign(d_t - d_prev)*(d_t_by_w - d_prev_by_w)
            A_t_by_w = A_prev_by_w + (1./(i+1))*(r_t_by_w - A_prev_by_w)

            if i == 1:
                d_t_by_alpha = sech2(d_i) * d_prev
            else:
                d_t_by_alpha = sech2(d_i) * (alpha * d_prev_by_alpha + d_prev)

            r_t_by_alpha = d_t_by_alpha*q_t - (
                    self.delta*sign(d_t - d_prev)*(d_t_by_alpha - d_prev_by_alpha))
            A_t_by_alpha = A_prev_by_alpha + (1./(i+1))*(r_t_by_alpha - A_prev_by_alpha)

            d_prev = d_t

            d_prev_by_w = d_t_by_w
            r_prev_by_w = r_t_by_w
            A_prev_by_w = A_t_by_w

            d_prev_by_alpha = d_t_by_alpha
            r_prev_by_alpha = r_t_by_alpha
            A_prev_by_alpha = A_t_by_alpha

        return -1 * append(A_t_by_w, A_t_by_alpha) + 2*self.lmb*params
示例#3
0
    def grad(self, params, x, y):
        W, w, alpha = self.inflate(params)

        x_t = x[0]
        q_t = y[0]
        h_t = tanh(dot(W, x_t))

        d_prev = self.decide(0., x_t, W, w, alpha)

        d_prev_by_w = sech2(dot(w, h_t)) * h_t
        r_prev_by_w = d_prev_by_w * y[0] - self.delta * sign(
            d_prev) * d_prev_by_w
        A_prev_by_w = r_prev_by_w

        h_prev_by_W = zeros((w.size, W.shape[0], W.shape[1]))
        for i in range(self.hidden):
            h_prev_by_W[i, :, :] = sech2(dot(W[i, :], x_t)) * x_t

        d_prev_by_W = zeros(W.shape) + sech2(dot(w, h_t))
        for i in range(self.hidden):
            d_prev_by_W[i, :] *= w[i] * h_prev_by_W[i, i, :]

        r_prev_by_W = d_prev_by_W * y[0] - self.delta * sign(
            d_prev) * d_prev_by_W
        A_prev_by_W = r_prev_by_W

        d_prev_by_alpha = zeros(alpha.shape)
        r_prev_by_alpha = zeros(alpha.shape)
        A_prev_by_alpha = zeros(alpha.shape)

        for step in range(1, len(y)):
            x_t = x[step]
            q_t = y[step]
            h_t = tanh(dot(W, x_t) + alpha * d_prev)
            d_t = self.decide(d_prev, x_t, W, w, alpha)

            h_t_by_w = dot(outer(d_prev_by_w, alpha),
                           diag(sech2(dot(W, x_t) + alpha * d_prev)))
            d_t_by_w = sech2(dot(w, h_t)) * (dot(h_t_by_w, w) + h_t)
            r_t_by_w = d_t_by_w * q_t - self.delta * sign(d_t - d_prev) * (
                d_t_by_w - d_prev_by_w)
            A_t_by_w = A_prev_by_w + (1. /
                                      (step + 1)) * (r_t_by_w - A_prev_by_w)

            h_t_by_W = zeros((w.size, W.shape[0], W.shape[1]))
            for k in range(w.size):
                a = sech2(dot(W[k, :], x_t) + alpha[k] * d_prev)
                for i in range(W.shape[0]):
                    for j in range(W.shape[1]):
                        b = alpha[k] * d_prev_by_W[i][j]
                        if k == i:
                            b += x_t[j]
                        h_t_by_W[k][i][j] = a * b

            d_t_by_W = zeros(W.shape)
            for i in range(self.hidden):
                for j in range(self.exlen):
                    d_t_by_W[i][j] = sech2(dot(w, h_t)) * dot(
                        w, h_t_by_W[:, i, j])

            r_t_by_W = d_t_by_W * q_t - self.delta * sign(d_t - d_prev) * (
                d_t_by_W - d_prev_by_W)
            A_t_by_W = A_prev_by_W + (1. /
                                      (step + 1)) * (r_t_by_W - A_prev_by_W)

            a = sech2(dot(W, x_t) + alpha * d_prev)
            b = outer(d_prev_by_alpha, alpha) + eye(alpha.size) * d_prev
            h_t_by_alpha = dot(b, diag(a))

            d_t_by_alpha = sech2(dot(w, h_t)) * dot(h_t_by_alpha, w)
            r_t_by_alpha = d_t_by_alpha * q_t - self.delta * sign(
                d_t - d_prev) * (d_t_by_alpha - d_prev_by_alpha)
            A_t_by_alpha = A_prev_by_alpha + (1. /
                                              (step + 1)) * (r_t_by_alpha -
                                                             A_prev_by_alpha)

            d_prev = d_t

            d_prev_by_w = d_t_by_w
            r_prev_by_w = r_t_by_w
            A_prev_by_w = A_t_by_w

            d_prev_by_W = d_t_by_W
            r_prev_by_W = r_t_by_W
            A_prev_by_W = A_t_by_W

            d_prev_by_alpha = d_t_by_alpha
            r_prev_by_alpha = r_t_by_alpha
            A_prev_by_alpha = A_t_by_alpha

        return -1 * concatenate((A_t_by_W.reshape((A_t_by_W.size, )), A_t_by_w,
                                 A_t_by_alpha)) + 2 * self.lmb * params
示例#4
0
文件: models.py 项目: rreas/drl
    def grad(self, params, x, y):
        W, w, alpha = self.inflate(params)

        x_t = x[0]
        q_t = y[0]
        h_t = tanh(dot(W, x_t))
        
        d_prev = self.decide(0., x_t, W, w, alpha)

        d_prev_by_w = sech2(dot(w, h_t)) * h_t
        r_prev_by_w = d_prev_by_w*y[0] - self.delta*sign(d_prev)*d_prev_by_w
        A_prev_by_w = r_prev_by_w

        h_prev_by_W = zeros((w.size, W.shape[0], W.shape[1]))
        for i in range(self.hidden):
            h_prev_by_W[i,:,:] = sech2(dot(W[i,:], x_t)) * x_t

        d_prev_by_W = zeros(W.shape) + sech2(dot(w, h_t))
        for i in range(self.hidden):
            d_prev_by_W[i,:] *= w[i]*h_prev_by_W[i,i,:]

        r_prev_by_W = d_prev_by_W*y[0] - self.delta*sign(d_prev)*d_prev_by_W
        A_prev_by_W = r_prev_by_W

        d_prev_by_alpha = zeros(alpha.shape)
        r_prev_by_alpha = zeros(alpha.shape)
        A_prev_by_alpha = zeros(alpha.shape)

        for step in range(1, len(y)):
            x_t = x[step]
            q_t = y[step]
            h_t = tanh(dot(W, x_t) + alpha*d_prev)
            d_t = self.decide(d_prev, x_t, W, w, alpha)

            h_t_by_w = dot(outer(d_prev_by_w, alpha), diag(sech2(dot(W, x_t) + alpha*d_prev)))
            d_t_by_w = sech2(dot(w, h_t)) * (dot(h_t_by_w, w) + h_t)
            r_t_by_w = d_t_by_w*q_t - self.delta*sign(d_t - d_prev)*(d_t_by_w - d_prev_by_w)
            A_t_by_w = A_prev_by_w + (1./(step+1))*(r_t_by_w - A_prev_by_w)

            h_t_by_W = zeros((w.size, W.shape[0], W.shape[1]))
            for k in range(w.size):
                a = sech2(dot(W[k,:], x_t) + alpha[k]*d_prev)
                for i in range(W.shape[0]):
                    for j in range(W.shape[1]):
                        b = alpha[k]*d_prev_by_W[i][j]
                        if k == i:
                            b += x_t[j]
                        h_t_by_W[k][i][j] = a*b

            d_t_by_W = zeros(W.shape)
            for i in range(self.hidden):
                for j in range(self.exlen):
                    d_t_by_W[i][j] = sech2(dot(w, h_t)) * dot(w, h_t_by_W[:,i,j])

            r_t_by_W = d_t_by_W*q_t - self.delta*sign(d_t - d_prev)*(d_t_by_W - d_prev_by_W)
            A_t_by_W = A_prev_by_W + (1./(step+1))*(r_t_by_W - A_prev_by_W)

            a = sech2(dot(W, x_t) + alpha*d_prev)
            b = outer(d_prev_by_alpha, alpha) + eye(alpha.size)*d_prev
            h_t_by_alpha = dot(b, diag(a))

            d_t_by_alpha = sech2(dot(w, h_t)) * dot(h_t_by_alpha, w)
            r_t_by_alpha = d_t_by_alpha*q_t - self.delta*sign(d_t - d_prev)*(d_t_by_alpha -
                                                                             d_prev_by_alpha)
            A_t_by_alpha = A_prev_by_alpha + (1./(step+1))*(r_t_by_alpha - A_prev_by_alpha)

            d_prev = d_t

            d_prev_by_w = d_t_by_w
            r_prev_by_w = r_t_by_w
            A_prev_by_w = A_t_by_w

            d_prev_by_W = d_t_by_W
            r_prev_by_W = r_t_by_W
            A_prev_by_W = A_t_by_W

            d_prev_by_alpha = d_t_by_alpha
            r_prev_by_alpha = r_t_by_alpha
            A_prev_by_alpha = A_t_by_alpha

        return -1 * concatenate((A_t_by_W.reshape((A_t_by_W.size,)), A_t_by_w,
            A_t_by_alpha)) + 2*self.lmb*params