Exemplo n.º 1
0
    def sample(self, T, g, g0=None):
        if g0==None:
            g0 = g

        v, h = self.v, self.h
        VH, HH, b_init = self

        V = zeros((T, v))
        H = zeros((T, h))
        B = zeros((T, h))
        
        VH_t = 1*VH

        VH_t[2] = VH[2] + b_init
        V[[0]], H[[0]] = rbm.sample(VH_t, g0, 1, self.vis_gauss)

        ## mean-fieldize the output:
        if self.vis_gauss:
            V[[0]] = VH_t.T() * H[[0]]
        else:
            V[[0]] = sigmoid(VH_t.T() * H[[0]])
        for t in range(1, T):
            B[[t]] = HH*H[[t-1]]

            VH_t[2] = VH[2] + B[t]
            V[[t]], H[[t]] = rbm.sample(VH_t, g, 1, self.vis_gauss)

            ## mean-field-ize the output.
            if self.vis_gauss:
                V[[t]] = VH_t.T() * H[[t]]
            else:
                V[[t]] = sigmoid(VH_t.T() * H[[t]])
        return V
Exemplo n.º 2
0
    def sample(self, T, g, g0=None):
        if g0==None:
            g0 = g

        v, h = self.v, self.h
        VH, HH, b_init = self

        V = zeros((T, v))
        H = zeros((T, h))
        B = zeros((T, h))
        
        VH_t = 1*VH

        VH_t[2] = VH[2] + b_init
        V[[0]], H_t_stoch = rbm.sample(VH_t, g0, 1, self.vis_gauss)
        H[[0]] = sigmoid(VH_t * V[[0]])
        if self.vis_gauss:
            V[[0]] = VH_t.T() * H_t_stoch
        else:
            V[[0]] = sigmoid(VH_t.T() * H_t_stoch)
        for t in range(1, T):
            B[[t]] = HH*H[[t-1]]

            VH_t[2] = VH[2] + B[t]
            V[[t]], H_t_stoch = rbm.sample(VH_t, g, 1, self.vis_gauss)

            H[[t]] = sigmoid(VH_t * V[[t]])
            if self.vis_gauss:
                V[[t]] = VH_t.T() * H_t_stoch
            else:
                V[[t]] = sigmoid(VH_t.T() * H_t_stoch)
        return V
Exemplo n.º 3
0
    def sample(self, T, g, g0=None):
        if g0 == None:
            g0 = g

        v, h = self.v, self.h
        VH, HH, b_init = self

        V = zeros((T, v))
        H = zeros((T, h))
        B = zeros((T, h))

        VH_t = 1 * VH

        VH_t[2] = VH[2] + b_init
        V[[0]], H_t_stoch = rbm.sample(VH_t, g0, 1, self.vis_gauss)
        H[[0]] = sigmoid(VH_t * V[[0]])
        if self.vis_gauss:
            V[[0]] = VH_t.T() * H_t_stoch
        else:
            V[[0]] = sigmoid(VH_t.T() * H_t_stoch)
        for t in range(1, T):
            B[[t]] = HH * H[[t - 1]]

            VH_t[2] = VH[2] + B[t]
            V[[t]], H_t_stoch = rbm.sample(VH_t, g, 1, self.vis_gauss)

            H[[t]] = sigmoid(VH_t * V[[t]])
            if self.vis_gauss:
                V[[t]] = VH_t.T() * H_t_stoch
            else:
                V[[t]] = sigmoid(VH_t.T() * H_t_stoch)
        return V
Exemplo n.º 4
0
    def grad(self, V):
        T, v = V.shape
        assert (v == self.v)
        h = self.h
        VH, HH, b_init = self

        G = 0 * self
        d_VH, d_HH, d_b_init = G

        H = zeros((T, h))
        B = zeros((T, h))

        H[0] = sigmoid(VH * V[[0]] + b_init[newaxis, :])

        for t in range(1, T):
            B[[t]] = HH * H[[t - 1]]
            H[[t]] = sigmoid(VH * V[[t]] + B[[t]])

        dB = zeros((T, h))
        dBL = zeros((T, h))

        F_t = zeros(h)

        loss = 0

        VH_t = 1 * VH
        for t in reversed(range(T)):
            dB[t] = H[t] * (1 - H[t]) * F_t

            VH_t[2] = B[t] + VH[2]

            if self.CD_n > 0:
                dVH_t, dict_loss = rbm.rbm_grad_cd(VH_t, V[[t]], self.CD_n,
                                                   self.vis_gauss)
            else:
                dVH_t, dict_loss = rbm.rbm_grad_exact(VH_t, V[[t]],
                                                      self.vis_gauss)
            loss += dict_loss['loss']

            dBL[t] = dVH_t[2]

            d_VH += dVH_t

            HH.direction = up
            F_t[:] = HH.T() * (dB[[t]] + dBL[[t]])
            HH.direction = None

        d_b_init += dB[0]

        HH.direction = up
        VH.direction = up
        for t in range(1, T):
            d_HH += HH.outp_up(H[[t - 1]], dB[[t]] + dBL[[t]])
            d_VH += VH.outp_up(V[[t]], dB[[t]])
        d_VH += VH.outp_up(V[[0]], dB[[0]])

        HH.direction = None
        VH.direction = None

        return G, dict(loss=loss)
Exemplo n.º 5
0
def rbm_grad_exact(W, x, vis_gauss=False):

    batch_size = float(len(x))
    v, h = W.v, W.h
    G = 0 * W
    H = sigmoid(W*x)
    G += 1./batch_size * W.outp(x, H) 
    if not vis_gauss:
        Z = brute_force_Z(W)

        def prob(V):
            return exp( free_energy(W, V)[0] - Z )

        for i in xrange(2**v):
            V = int_to_bin(i, v)
            H = sigmoid(W* V)
            G -= prob(V) * W.outp(V, H) 

            loss = - ( free_energy(W, x).mean(0) - Z )

    if vis_gauss:
        Z = brute_force_Z_vis_gauss(W)

        def prob(H):
            b = W.T() * H
            z = .5*dot(b,b.T) + dot(H,W[2])
            return float(exp(z-Z) )

        for i in xrange(2**h):
            H = int_to_bin(i, h)
            b = W.T()*H
            G -= prob(H) * W.outp(b,H)
        loss = - (-.5*amap(dot,x,x).mean() + free_energy(W, x).mean(0) - Z)

    return batch_size * G, dict(loss=batch_size * loss)            
Exemplo n.º 6
0
    def grad(self, V):
        T, v = V.shape
        assert(v == self.v)
        h = self.h
        VH, HH, b_init = self

        G = 0 * self
        d_VH, d_HH, d_b_init = G

        H = zeros((T, h))
        B = zeros((T, h))
        
        H[0] = sigmoid(VH * V[[0]]+ b_init[newaxis, :])

        for t in range(1, T):
            B[[t]] = HH*H[[t-1]]
            H[[t]] = sigmoid(VH*V[[t]] + B[[t]])    
    
        dB  = zeros((T, h))
        dBL = zeros((T, h))
        
        F_t = zeros(h)

        loss =0 

        VH_t = 1 * VH

        for t in reversed(range(T)):
            VH_t[2] = B[t] + VH[2]  

            if self.CD_n > 0:
                dVH_t, dict_loss = rbm.rbm_grad_cd   (VH_t, V[[t]], self.CD_n, self.vis_gauss)
            else:
                dVH_t, dict_loss = rbm.rbm_grad_exact(VH_t, V[[t]], self.vis_gauss)
            loss += dict_loss['loss']
            
            d_VH += dVH_t
            if t>0:
                HH.direction = up
                d_HH += HH.outp(H[[t-1]], dVH_t[2][newaxis,:])
                HH.direction = None
            else:
                d_b_init += dVH_t[2]

        return G, dict(loss=loss)
Exemplo n.º 7
0
def rbm_grad_cd(W, x, cd, vis_gauss=False):
    batch_size = float(len(x))
    v, h, = W.v, W.h
    
    V = x
    H = sigmoid(W * x)
    G = W.outp(V, H) 
    for g in range(cd):
        H = stochastic(H)
        if vis_gauss:
            V = W.T()*H + randn(batch_size,v)
        else:
            V = Rsigmoid(W.T() * H)
        H = sigmoid(W * V)
    G -= W.outp(V, H)

    loss = abs(V - x).sum()
    return G, dict(loss=loss)
Exemplo n.º 8
0
    def grad(self, V1):
        batch_size, v = V1.shape
        assert (v == self.v)
        self.V1 = V1

        W = self.w[0]
        G = 0 * self
        dW = G[0]

        H1 = sigmoid(W * V1)

        dW += W.outp(V1, H1)

        H1 = stochastic(H1)

        V2 = Rsigmoid(W.T() * H1)
        H2 = sigmoid(W * V2)

        dW -= W.outp(V2, H2)

        recon = abs(V1 - V2).sum(1).sum()

        return G, dict(loss=recon)
Exemplo n.º 9
0
    def grad(self, V1):
        batch_size, v = V1.shape
        assert(v==self.v)
        self.V1 = V1

        W = self.w[0]
        G = 0 * self
        dW = G[0]

        H1 = sigmoid(W * V1)

        dW+= W.outp(V1, H1)

        H1 = stochastic(H1)
        
        V2 = Rsigmoid(W.T() * H1)
        H2 = sigmoid(W * V2)

        dW -= W.outp(V2, H2)
        
        recon = abs(V1 - V2).sum(1).sum()
        
        return G, dict(loss=recon)
Exemplo n.º 10
0
def sample_last_mf(W,g,batch_size, vis_gauss=False):
    v,h = W.v, W.h
    V = rand(batch_size,v)
    for gg in range(g):
        H = Rsigmoid(W*V)

        if vis_gauss:
            V = W.T()*H + randn(batch_size,v)
        else:
            V = Rsigmoid(W.T()*H)

    if vis_gauss:	
        V = W.T()*H 
    else:
        V = sigmoid(W.T()*H)

    return V,H
Exemplo n.º 11
0
def empirically_evaluate(t):
    """
    basically, what do we want to do here? 
    I want to run inference (be it mean field or not) and then
    predict the next timestep. 
    
    This is kind of nice because the way in which we do the
    approximate prediction is identical in both the TRBM and the
    RTRBM. 

    In particular, the RTRBM uses mean-field for inference. 
    """

    V = t.valid_data_fn()
    W = t.W

    ## step 1: do the approximate mean-field inference (note: that's
    ## also the way the TRBM does inference--the TRBM does not
    ## sample the hiddens, but only computes their "mean-field"
    ## values, which is identical to the inference procedure of
    ## the RTRBM).

    T, v = V.shape
    h = W.h
    assert (W.v == v)

    VH, HH, b_init = W

    from pylab import zeros, sigmoid, newaxis, sqrt

    H = zeros((T, h))
    B = zeros((T, h))

    inp_from_past = zeros((T, h))

    H[0] = sigmoid(VH * V[[0]] + b_init[newaxis, :])

    ## STEP 1: INFER THE HIDDEN UNITS
    for t in range(1, T):
        B[[t]] = HH * H[[t - 1]]
        H[[t]] = sigmoid(VH * V[[t]] + B[[t]])

    from pylab import Rsigmoid
    loss = 0
    for p in range(2, T):
        target = V[t]

        hid_bias = HH * H[[t - 1]]

        # VH_t is the set of weights of the "RBM" at time t.
        # its the same as normal vis-hid, except that it has
        # the additional bias from the previous time step.
        VH_t = 1 * VH
        VH_t[2] = VH[2] + hid_bias
        ## original bias + extra, dynamic bias.

        ## the point of sampling the last one from MF is so that in the gaussian case,
        ##
        pred, hid = rbm.sample_last_mf(VH_t, g_gen, 1, W.vis_gauss)

        loss += ((target - pred)**2).sum()

    return (float(loss) / T) / v
Exemplo n.º 12
0
def empirically_evaluate(t):

    """
    basically, what do we want to do here? 
    I want to run inference (be it mean field or not) and then
    predict the next timestep. 
    
    This is kind of nice because the way in which we do the
    approximate prediction is identical in both the TRBM and the
    RTRBM. 

    In particular, the RTRBM uses mean-field for inference. 
    """

    V = t.valid_data_fn()
    W = t.W

    ## step 1: do the approximate mean-field inference (note: that's
    ## also the way the TRBM does inference--the TRBM does not
    ## sample the hiddens, but only computes their "mean-field"
    ## values, which is identical to the inference procedure of
    ## the RTRBM).

    T, v = V.shape
    h = W.h
    assert(W.v == v)

    VH, HH, b_init = W

    from pylab import zeros, sigmoid, newaxis, sqrt


    H = zeros((T, h))
    B = zeros((T, h))

    inp_from_past = zeros((T, h))

    H[0] = sigmoid(VH * V[[0]]+ b_init[newaxis, :])

    
    ## STEP 1: INFER THE HIDDEN UNITS
    for t in range(1, T):
        B[[t]] = HH*H[[t - 1]]
        H[[t]] = sigmoid(VH*V[[t]] + B[[t]])    



    from pylab import Rsigmoid
    loss = 0
    for p in range(2, T):
        target = V[t]

        hid_bias  = HH * H[[t - 1]]
            
        # VH_t is the set of weights of the "RBM" at time t.
        # its the same as normal vis-hid, except that it has 
        # the additional bias from the previous time step.
        VH_t = 1*VH
        VH_t[2] = VH[2]            + hid_bias
        ## original bias + extra, dynamic bias.

	## the point of sampling the last one from MF is so that in the gaussian case, 
	## 
        pred, hid = rbm.sample_last_mf(VH_t, g_gen, 1, W.vis_gauss)

        loss += ((target - pred)**2).sum()
        
    return (float(loss) / T) / v