Exemplo n.º 1
0
    def __init__(self, model, type_model):
        super(LatentTypeWithTuningCurve, self).__init__(model, type_model)

        # Also initialize the tuning curves
        self.mu = self.type_model['mu']
        self.sigma = self.type_model['sigma']

        # Create a basis for the stimulus response
        self.spatial_basis = create_basis(self.type_model['spatial_basis'])
        self.spatial_shape = self.type_model['spatial_shape']
        self.spatial_ndim = len(self.spatial_shape)
        (_,Bx) = self.spatial_basis.shape

        self.temporal_basis = create_basis(self.type_model['temporal_basis'])
        (_,Bt) = self.temporal_basis.shape

        # Save the filter sizes
        self.Bx = Bx
        self.Bt = Bt

        # Initialize interpolated bases
        self.initialize_basis()

        # Initialize RxBx and RxBt matrices for the per-type tuning curves
        self.w_x = T.dmatrix('w_x')
        self.w_t = T.dmatrix('w_t')

        # Create function handles for the stimulus responses
        self.stim_resp_t = T.dot(self.temporal_basis, self.w_t)
        self.stim_resp_x = T.dot(self.spatial_basis, self.w_x)

        # Add the probability of these tuning curves to the log probability
        self.log_p += -0.5/self.sigma**2 *T.sum((self.w_x-self.mu)**2) + \
                      -0.5/self.sigma**2 *T.sum((self.w_t-self.mu)**2)
Exemplo n.º 2
0
def test_pickle():
    """Test that a module can be pickled"""
    M = Module()
    M.x = (T.dmatrix())
    M.y = (T.dmatrix())
    a = T.dmatrix()
    M.f = Method([a], a + M.x + M.y)
    M.g = Method([a], a * M.x * M.y)

    mode = get_mode()
    m = M.make(x=numpy.zeros((4,5)), y=numpy.ones((2,3)), mode=mode)

    m_dup = cPickle.loads(cPickle.dumps(m, protocol=-1))

    assert numpy.all(m.x == m_dup.x) and numpy.all(m.y == m_dup.y)

    m_dup.x[0,0] = 3.142
    assert m_dup.f.input_storage[1].data[0,0] == 3.142
    assert m.x[0,0] == 0.0 #ensure that m is not aliased to m_dup

    #check that the unpickled version has the same argument/property aliasing
    assert m_dup.x is m_dup.f.input_storage[1].data
    assert m_dup.y is m_dup.f.input_storage[2].data
    assert m_dup.x is m_dup.g.input_storage[1].data
    assert m_dup.y is m_dup.g.input_storage[2].data
Exemplo n.º 3
0
def make_functions(num_features):
	W1_shape = (num_features/4, num_features)
	b1_shape = num_features/4
	W2_shape = (nb_classes, num_features/4)
	b2_shape = nb_classes
	
	W1 = shared(np.random.random(W1_shape) - 0.5, name = "W1")
	b1 = shared(np.random.random(b1_shape) - 0.5, name = "b1")
	W2 = shared(np.random.random(W2_shape) - 0.5, name = "W2")
	b2 = shared(np.random.random(b2_shape) - 0.5, name = "b2")


	x = T.dmatrix("x")
	labels = T.dmatrix("labels")

	hidden = T.nnet.sigmoid(x.dot(W1.transpose())+b1)
	output = T.nnet.softmax(hidden.dot(W2.transpose()) + b2)
	prediction = T.argmax(output, axis=1)

	reg_lambda = 0.0001
	regularization = reg_lambda * ((W1 * W1).sum() + (W2 * W2).sum() + (b1 * b1).sum() + (b2 * b2).sum())
	
	cost = T.nnet.binary_crossentropy(output, labels).mean() + regularization

	compute_prediction = function([x], prediction)

	alpha = T.dscalar("alpha")
	weights = [W1, W2, b1, b2]
	updates = [(w, w-alpha * grad(cost, w)) for w in weights]
	train_nn = function([x, labels, alpha],
	                    cost,
	                    updates = updates)
	return train_nn, compute_prediction
    def test_free_energy(self):
        self.setUpAssociativeRBM()
        rbm = self.rbm
        w = rbm.W.get_value(borrow=True)
        u = rbm.U.get_value(borrow=True)
        v = T.dmatrix("v")
        v2 = T.dmatrix("v2")
        v_bias = rbm.v_bias.eval()
        v_bias2 = rbm.v_bias2.eval()
        h_bias = rbm.h_bias.eval()

        res = rbm.free_energy(v, v2)
        f = theano.function([v, v2], [res])
        theano_res = f(self.x, self.y)

        # Test for case only v1 is present
        n1 = - np.dot(self.x, v_bias)
        n2 = - np.dot(self.y, v_bias2)
        n3 = - np.sum(np.log(1 + np.exp(h_bias + np.dot(self.x, w) + np.dot(self.y, u))))
        np_res = n1 + n2 + n3

        print theano_res
        print np_res

        diff = theano_res == np_res
        self.assertTrue(np.all(diff))
Exemplo n.º 5
0
    def test_prop_up(self):
        self.setUpSimpleRBM()

        rbm = self.rbm
        v1 = T.dmatrix("v1")
        v2 = T.dmatrix("v2")
        # Test Single
        out = rbm.prop_up(v1)
        out_fn = theano.function([], [out[0], out[1]], givens={v1: self.x1})
        out_sum, out_sum_mapped = out_fn()

        h_sum = np.dot(self.x1, rbm.W.get_value(borrow=True)) + rbm.h_bias.eval()
        h_sum_mapped = 1 / (1 + np.exp(-h_sum))
        self.assertTrue(np.all(out_sum == h_sum))
        self.assertTrue((np.all(out_sum_mapped == h_sum_mapped)))


        # Test Double
        out = rbm.prop_up(v1, v2)
        out_fn = theano.function([], [out[0], out[1]], givens={v1: self.x1, v2: self.x12})
        out_sum, out_sum_mapped = out_fn()
        h_sum = np.dot(self.x1, rbm.W.get_value(borrow=True)) + np.dot(self.x12, rbm.U.get_value(
            borrow=True)) + rbm.h_bias.eval()
        h_sum_mapped = 1 / (1 + np.exp(-h_sum))
        # h_sum_mapped = theano.function([], [log_sig(h_sum)])()

        self.assertTrue(np.all(out_sum == h_sum))
        self.assertTrue((np.all(out_sum_mapped == h_sum_mapped)))
Exemplo n.º 6
0
    def test_prop_down(self):
        self.setUpRBM()
        self.assertTrue(self.rbm.h_n == 10)
        rbm = self.rbm
        W = rbm.W.get_value(borrow=True)
        U = rbm.U.get_value(borrow=True)
        v1 = T.dmatrix("v1")
        v2 = T.dmatrix("v2")
        h = np.array([[1, 2, 3, 4, 5, -1, -2, -3, -4, -5]])

        # Single
        x = T.dmatrix("x")
        out = rbm.prop_down(x)
        f = theano.function([x], out)
        out_sum, out_sum_mapped = f(h)
        h_sum = np.dot(h, W.T) + rbm.v_bias.eval()
        h_sum_mapped = theano.function([], [log_sig(h_sum)])()
        self.assertTrue(np.all(out_sum == h_sum))
        self.assertTrue(np.all(out_sum_mapped == h_sum_mapped))

        # Assoc
        out = rbm.prop_down_assoc(x)
        f = theano.function([x], out)
        out_sum, out_sum_mapped = f(h)
        h_sum2 = np.dot(h, U.T) + rbm.v_bias2.eval()
        h_sum_mapped2 = theano.function([], [log_sig(h_sum2)])()
        self.assertTrue(np.all(out_sum == h_sum2))
        self.assertTrue(np.all(out_sum_mapped == h_sum_mapped2))
Exemplo n.º 7
0
    def test_validity2(self):
        theano.config.on_unused_input = 'warn'
        a0_var = T.dmatrix('a0')
        r0_var = T.dmatrix('r0')
        fri_var = T.dmatrix("fri")
        out = T.dmatrix("out")
        out_stale = T.dmatrix("out_stale")

        f = theano.function([a0_var, r0_var, fri_var, out, out_stale],
                            dqn.build_loss(out, out_stale, a0_var, r0_var, fri_var, gamma=0.5))

        sqr_mean, mean, y, q = f(np.array([[1, 0, 0, 0, 0, 0],
                                           [0, 1, 0, 0, 0, 0],
                                           [0, 0, 0, 0, 0, 1]]),
                                 np.array([[1],
                                           [0],
                                           [5]]),
                                 np.array([[1],
                                           [1],
                                           [0]]),
                                 np.array([[-5, 1, 2, 3, 4, 7],
                                           [1, 4, 3, 4, 5, 9],
                                           [0, 9, 0, 3, 2, 1]]),
                                 np.array([[-5, 1, 2, 3, 4, 5],
                                           [1, 2, 3, 4, 5, 6],
                                           [8, 0, -1, -1, 2, 3]]))

        print(y, q)
Exemplo n.º 8
0
def createMLP(layers, s):
    l_in = lasagne.layers.InputLayer(shape=(None, s))
    prev_layer = l_in
    Ws = []
    for layer in layers:
        enc = lasagne.layers.DenseLayer(prev_layer, num_units=layer, nonlinearity=rectify, W=init.Uniform(0.01))
        Ws += [enc.W]
        drop = lasagne.layers.DropoutLayer(enc, p=0.5)
        prev_layer = drop
    idx = 1
    # creating mask
    mask = lasagne.layers.InputLayer(shape=(None, layers[-1]))
    prev_layer = lasagne.layers.ElemwiseMergeLayer([prev_layer, mask], merge_function=T.mul)
    for layer in layers[-2::-1]:
        print layer
        dec = lasagne.layers.DenseLayer(prev_layer, num_units=layer, nonlinearity=rectify, W=Ws[-idx].T)
        idx += 1
        drop = lasagne.layers.DropoutLayer(dec, p=0.0)
        prev_layer = drop
    model = lasagne.layers.DenseLayer(prev_layer, num_units=s, nonlinearity=identity, W=Ws[0].T)

    x_sym = T.dmatrix()
    mask_sym = T.dmatrix()
    all_params = lasagne.layers.get_all_params(model)
    output = lasagne.layers.get_output(model, inputs={l_in: x_sym, mask: mask_sym})
    loss_eval = lasagne.objectives.squared_error(output, x_sym).sum()
    loss_eval /= (2.*batch_size)
    updates = lasagne.updates.adam(loss_eval, all_params)

    return l_in, mask, model, theano.function([x_sym, mask_sym], loss_eval, updates=updates)
Exemplo n.º 9
0
def NNet(x=None, y=None, n_hid_layers=2):
    # our points, one point per row
    if x is None:
        x = T.dmatrix()
    # targets , one per row
    if y is None:
        y = T.dmatrix()
    layers = []
    _x = x
    for i in xrange(n_hid_layers):
        layers.append(Layer(x=_x))
        _x = layers[-1].y
    classif = LR(x=_x)

    @symbolicmethod
    def params():
        rval = classif.params()
        for l in layers:
            rval.extend(l.params())
        print([id(r) for r in rval])
        return rval

    if 0:
        @symbolicmethod
        def update(x, y):
            pp = params()
            gp = T.grad(classif.loss, pp)
            return dict((p, p - 0.01*g) for p, g in zip(pp, gp))

    return locals()
Exemplo n.º 10
0
def LR(x=None, y=None, v=None, c=None, l2_coef=None):
    # our points, one point per row
    if x is None:
        x = T.dmatrix()
    # targets , one per row
    if y is None:
        y = T.dmatrix()
    # first layer weights
    if v is None:
        v = T.dmatrix()
    # first layer biases
    if c is None:
        c = T.dvector()

    if l2_coef is None:
        l2_coef = T.dscalar()

    pred = T.dot(x, v) + c
    sse = T.sum((pred - y) * (pred - y))
    mse = sse / T.shape(y)[0]
    v_l2 = T.sum(T.sum(v*v))
    loss = mse + l2_coef * v_l2

    @symbolicmethod
    def params():
        return [v, c]

    return locals()
Exemplo n.º 11
0
    def __init__(self,N,Nsub,NRGC,prior=1):
        self.N     = N
        self.Nsub  = Nsub
        self.NRGC  = NRGC
        U   = Th.dmatrix()                   # SYMBOLIC variables       #
        V1  = Th.dvector()                                              #
        V2  = Th.dvector()                                              #
        STA = Th.dvector()                                              #
        STC = Th.dmatrix()                                              #
        theta = Th.dot( U.T , V1 )                                      #
        UV1U  = Th.dot( U , theta )                                     #
        UV1V2U= Th.dot( V1 * U.T , (V2 * U.T).T )                       #
        posterior  = -0.5 * Th.sum( V1 * V2 * U.T*U.T ) \
                     -0.25* Th.sum( UV1V2U.T * UV1V2U ) \
                     -0.5 * Th.sum( UV1U * UV1U * UV1U *V2 *V2 * V1 ) \
                     -0.5 * Th.sum( UV1U * UV1U * V2 * V1 ) \
                     -0.5 * Th.sum( theta * theta ) \
                     + Th.dot( theta.T , STA ) \
                     + Th.sum( Th.dot( V1* V2*U.T , U ) \
                     * (STC + STA.T*STA) )
        dpost_dU  = Th.grad( cost           = posterior ,               #
                             wrt            = U         )               #
        dpost_dV1 = Th.grad( cost           = posterior ,               #
                             wrt            = V1        )               #
        dpost_dV2 = Th.grad( cost           = posterior ,               #
                             wrt            = V2        )               #
#        self.posterior  = function( [U,V2,V1,STA,STC],  UV1V2U)      #
        self.posterior  = function( [U,V2,V1,STA,STC],  posterior)      #
        self.dpost_dU   = function( [U,V2,V1,STA,STC], dpost_dU  )      #
        self.dpost_dV1  = function( [U,V2,V1,STA,STC], dpost_dV1 )      #
        self.dpost_dV2  = function( [U,V2,V1,STA,STC], dpost_dV2 )      #
Exemplo n.º 12
0
    def make_theano_functions(self) :
        x  = T.dmatrix('x')
        h1 = T.dot(x, self.w1.T) + self.b1
        a1 = 1. / (1. + T.exp(-h1))
        h2 = T.dot(a1,self.w2.T) + self.b2
        a2 = T.nnet.softmax(h2)
        
        f = theano.function([x], a2)

        y  = T.dmatrix('y')
        loss = T.mean(T.sum(y*-T.log(a2), axis=1))

        gradw1 = T.grad(loss, self.w1)
        gradw2 = T.grad(loss, self.w2)
        gradb1 = T.grad(loss, self.b1)
        gradb2 = T.grad(loss, self.b2)

        gradf = theano.function(
                [x, y],
                [loss, a2],
                updates = [
                    (self.w1, self.w1-self.lr*gradw1),
                    (self.w2, self.w2-self.lr*gradw2),
                    (self.b1, self.b1-self.lr*gradb1),
                    (self.b2, self.b2-self.lr*gradb2)
                    ]
                )

        return f, gradf
Exemplo n.º 13
0
def UV12_input(V1=Th.dmatrix(),
               STAs=Th.dmatrix(),
               STCs=Th.dtensor3(),
               N_spikes=Th.dvector(),
               **other):
    other.update(locals())
    return named(**other)
Exemplo n.º 14
0
def theano_sed():
    """
    Function to create a theano function to compute the euclidian distances efficiently
    Returns:
        theano.compile.function_module.Function: Compiled function

    """

    theano.config.compute_test_value = "ignore"

    # Set symbolic variable as matrix (with the XYZ coords)
    coord_T_x1 = T.dmatrix()
    coord_T_x2 = T.dmatrix()

    # Euclidian distances function
    def squared_euclidean_distances(x_1, x_2):
        sqd = T.sqrt(T.maximum(
            (x_1 ** 2).sum(1).reshape((x_1.shape[0], 1)) +
            (x_2 ** 2).sum(1).reshape((1, x_2.shape[0])) -
            2 * x_1.dot(x_2.T), 0
        ))
        return sqd

    # Compiling function
    f = theano.function([coord_T_x1, coord_T_x2],
                        squared_euclidean_distances(coord_T_x1, coord_T_x2),
                        allow_input_downcast=False)
    return f
Exemplo n.º 15
0
 def test_infer_shape(self):
     admat = dmatrix()
     bdmat = dmatrix()
     admat_val = numpy.random.rand(3, 4)
     bdmat_val = numpy.random.rand(3, 4)
     self._compile_and_check([admat, bdmat], [SoftmaxGrad()(admat, bdmat)],
                         [admat_val, bdmat_val], SoftmaxGrad)
Exemplo n.º 16
0
def LQLEP_wBarrier( LQLEP    = Th.dscalar(), ldet = Th.dscalar(), v1 = Th.dvector(), 
                    N_spike  = Th.dscalar(), ImM  = Th.dmatrix(),  U = Th.dmatrix(),
                    V2       = Th.dvector(),    u = Th.dvector(),  C = Th.dmatrix(),
                    **other):
    '''
    The actual Linear-Quadratic-Exponential-Poisson log-likelihood, 
    as a function of theta and M, 
    with a barrier on the log-det term and a prior.
    '''
    sq_nonlinearity = V2**2.*Th.sum( Th.dot(U,C)*U, axis=[1])  #Th.sum(U**2,axis=[1])
    nonlinearity = V2 * Th.sqrt( Th.sum( Th.dot(U,C)*U, axis=[1])) #Th.sum(U**2,axis=[1]) )
    if other.has_key('uc'):
        LQLEP_wPrior = LQLEP + 0.5 * N_spike * ( 1./(ldet+250.)**2. \
                     - 0.000001 * Th.sum(Th.log(1.-4*sq_nonlinearity))) \
                     + 10. * Th.sum( (u[2:]+u[:-2]-2*u[1:-1])**2. ) \
                     + 10. * Th.sum( (other['uc'][2:]+other['uc'][:-2]-2*other['uc'][1:-1])**2. ) \
                     + 0.000000001 * Th.sum( v1**2. )
#                     + 100. * Th.sum( v1 )
    #                 + 0.0001*Th.sum( V2**2 )
    else:
        LQLEP_wPrior = LQLEP + 0.5 * N_spike * ( 1./(ldet+250.)**2. \
                     - 0.000001 * Th.sum(Th.log(1.-4*sq_nonlinearity))) \
                     + 10. * Th.sum( (u[2:]+u[:-2]-2*u[1:-1])**2. ) \
                     + 0.000000001 * Th.sum( v1**2. )
#                     + 100. * Th.sum( v1 )
    #                 + 0.0001*Th.sum( V2**2 )
    eigsImM,barrier = eig( ImM )
    barrier   = 1-(Th.sum(Th.log(eigsImM))>-250) * \
                  (Th.min(eigsImM)>0) * (Th.max(4*sq_nonlinearity)<1)
    other.update(locals())
    return named( **other )
Exemplo n.º 17
0
def get_hidden_layers(dbn, layers):
    print "... getting hidden layers"
    test_data, test_label = get_test_set()
    index = T.lscalar()
    hidden_features = []
    total_layers = len(layers)

    w = T.dmatrix("w")
    t = T.dmatrix("t")
    b = T.vector("b")
    z = T.dot(w,t)
    # function for testing model
    test_f = theano.function([w,t], z)

    #loop through each layer
    for i in xrange(total_layers):
        weights = layers[i][0]
        bias = layers[i][1]

        if i == 0:
            hidden_features.append( test_f(test_data,weights) )
        else:
            #use previous layer
            prev_layer = hidden_features[i-1]
            hidden_features.append( test_f(prev_layer,weights) )

    # apply sigmoid
    with open('hidden.pkl', 'w') as f:
        cPickle.dump(hidden_features, f)
Exemplo n.º 18
0
    def test_validity(self):
        theano.config.on_unused_input = 'warn'
        a0_var = T.dmatrix('a0')
        r0_var = T.dmatrix('r0')
        fri_var = T.dmatrix("fri")
        out = T.dmatrix("out")
        out_stale = T.dmatrix("out_stale")

        f = theano.function([a0_var, r0_var, fri_var, out, out_stale],
                            dqn.build_loss(out, out_stale, a0_var, r0_var, fri_var, gamma=0.5))

        loss, not_loss, y, q = f(np.array([[1, 0, 0, 0, 0, 0],
                                           [0, 1, 0, 0, 0, 0]]),
                                 np.array([[1],
                                           [0]]),
                                 np.array([[1],
                                           [1]]),
                                 np.array([[-5, 1, 2, 3, 4, 7],
                                           [1, 4, 3, 4, 5, 9]]),
                                 np.array([[-5, 1, 2, 3, 4, 5],
                                           [1, 2, 3, 4, 5, 6]]))

        self.assertTrue(np.all(y == [[3.5], [3]]))
        self.assertTrue(np.all(q == [[-5], [4]]))
        print(loss)
        print(not_loss)
        self.assertTrue(loss == 8.5)
Exemplo n.º 19
0
    def __init__(self, beta=0.1, n_in=1, n_out=1):
        self.__beta = beta
        self.__x = T.dmatrix('x')
        self.__y = T.dmatrix('y')
        self.__n_in = n_in
        self.__n_out = n_out

        self.__clf_model = _LogisticRegressionModel(d_input=self.__x, 
            n_in=self.__n_in,
            n_out=self.__n_out)
        self.__cost = self.__clf_model.negative_log_likelihood(self.__y)

        # compute the gradient of cost with respect to theta = (W,b)
        self.__g_W = T.grad(cost=self.__cost, wrt=self.__clf_model.W)
        self.__g_b = T.grad(cost=self.__cost, wrt=self.__clf_model.b)

        # start-snippet-3
        # specify how to update the parameters of the model as a list of
        # (variable, update expression) pairs.
        self.__updates = [(self.__clf_model.W, 
            self.__clf_model.W - self.__beta * self.__g_W),
               (self.__clf_model.b, self.__clf_model.b 
                - self.__beta * self.__g_b)]

        self.__train_model = theano.function(
            inputs=[self.__x, self.__y],
            outputs=[self.__cost, self.__clf_model.y_pred, self.__g_W, self.__g_b],
            updates=self.__updates,
        )

        self.__prediction_model = theano.function(
            inputs=[self.__clf_model.input],
            outputs=self.__clf_model.y_pred
        )
Exemplo n.º 20
0
def test_mixin_composition():
    # Check composed expressions as parameters
    a = theano.shared(0.0)
    b = theano.shared(-1.0)
    mu = a + b - 1.0
    sigma = T.abs_(a * b)
    p = Normal(mu=mu, sigma=sigma)
    assert a in p.parameters_
    assert b in p.parameters_

    # Compose parameters with observed variables
    a = theano.shared(1.0)
    b = theano.shared(0.0)
    y = T.dmatrix(name="y")
    p = Normal(mu=a * y + b)
    assert len(p.parameters_) == 3
    assert a in p.parameters_
    assert b in p.parameters_
    assert p.sigma in p.parameters_
    assert p.mu not in p.parameters_
    assert len(p.observeds_) == 1
    assert y in p.observeds_

    # Check signatures
    data_X = np.random.rand(10, 1)
    data_y = np.random.rand(10, 1)
    p.pdf(X=data_X, y=data_y)
    p.cdf(X=data_X, y=data_y)
    p.rvs(10, y=data_y)

    # Check error
    a = theano.shared(1.0)
    b = theano.shared(0.0)
    y = T.dmatrix()  # y must be named
    assert_raises(ValueError, Normal, mu=a * y + b)
Exemplo n.º 21
0
def asho_test():
	import theano.tensor as T
	x = T.dmatrix('x')
	w = T.dmatrix('w')
	y = T.dot(x,w)

	f = function([x,w],y)	
Exemplo n.º 22
0
    def theano_setup(self):
    
        W = T.dmatrix('W')
        b = T.dvector('b')
        c = T.dvector('c')
        x = T.dmatrix('x')
    
        s = T.dot(x, W) + c
        # h = 1 / (1 + T.exp(-s))
        # h = T.nnet.sigmoid(s)
        h = T.tanh(s)
        # r = T.dot(h,W.T) + b
        # r = theano.printing.Print("r=")(2*T.tanh(T.dot(h,W.T) + b))
        ract = T.dot(h,W.T) + b
        r = self.output_scaling_factor * T.tanh(ract)
    
        #g  = function([W,b,c,x], h)
        #f  = function([W,b,c,h], r)
        #fg = function([W,b,c,x], r)
    
        # Another variable to be able to call a function
        # with a noisy x and compare it to a reference x.
        y = T.dmatrix('y')

        all_losses = ((r - y)**2)
        loss = T.sum(all_losses)
        #loss = ((r - y)**2).sum()
        
        self.theano_encode_decode = function([W,b,c,x], r)
        self.theano_all_losses = function([W,b,c,x,y], [all_losses, T.abs_(s), T.abs_(ract)])
        self.theano_gradients = function([W,b,c,x,y], [T.grad(loss, W), T.grad(loss, b), T.grad(loss, c)])
Exemplo n.º 23
0
  def __init__(self,
      np_rng             = np.random.RandomState(1234),
      theano_rng         = None,
      n_in               = 424 * 424 * 3,
      n_out              = 37, # galaxy classes
      hidden_layer_sizes = [500, 500],
      corruption_levels  = [0.1, 0.2]):

    self.np_rng = np_rng
    if not theano_rng: theano_rng = RandomStreams(np_rng.randint(2 ** 30))
    self.n_in = n_in
    self.n_out = n_out
    self.hidden_layer_sizes = hidden_layer_sizes
    self.corruption_levels = corruption_levels

    self.sigmoid_layers = []
    self.da_layers = []
    self.params = []
    self.n_layers = len(hidden_layer_sizes)

    assert self.n_layers > 0, 'must have some hidden layers'

    self.x = T.dmatrix('x')
    self.y = T.dmatrix('y')

    self.build_layers()
Exemplo n.º 24
0
 def train( self, train_set, batch_size = 100 ):
 		for i in xrange(len(self.layers) - 1):
 				train_data = T.dmatrix('train_data')
 				x = T.dmatrix('x')
 				rng = numpy.random.RandomState(123)
 				theano_rng = RandomStreams(rng.randint(2 ** 10))
 				da = dA(
     				numpy_rng=rng,
     				theano_rng=theano_rng,
     				input=x,
     				n_visible=self.layers[i],
     				n_hidden=self.layers[i+1]
 				)
 				cost, updates = da.get_cost_updates(
     				corruption_level=0.,
     				learning_rate=0.4
 				)
 				train_da = theano.function(
 						[train_data],
     				cost,
     				updates=updates,
     				givens={
         				x: train_data
     				}
     		)
     		
 				for epoch in xrange(200):
 						train_cost = []
 						for index in xrange(len(train_set)/batch_size):
 								train_cost.append(train_da(numpy.asarray(train_set[index * batch_size: (index + 1) * batch_size])))
 						print 'Training 1st ae epoch %d, cost ' % epoch, numpy.mean(train_cost)
 				train_set = da.get_hidden_values(train_set).eval()
 				self.dAs.append(da)
Exemplo n.º 25
0
    def neural_net(
            x=T.dmatrix(),    #our points, one point per row
            y=T.dmatrix(),    #our targets
            w=T.dmatrix(),    #first layer weights
            b=T.dvector(),    #first layer bias
            v=T.dmatrix(),    #second layer weights
            c=T.dvector(),    #second layer bias
            step=T.dscalar(), #step size for gradient descent
            l2_coef=T.dscalar() #l2 regularization amount
            ):
        """Idea A:
        """
        hid = T.tanh(T.dot(x, w) + b)
        pred = T.dot(hid, v) + c
        sse = T.sum((pred - y) * (pred - y))
        w_l2 = T.sum(T.sum(w*w))
        v_l2 = T.sum(T.sum(v*v))
        loss = sse + l2_coef * (w_l2 + v_l2)

        def symbolic_params(cls):
            return [cls.w, cls.b, cls.v, cls.c]

        def update(cls, x, y, **kwargs):
            params = cls.symbolic_params()
            gp = T.grad(cls.loss, params)
            return [], [In(p, update=p - cls.step * g) for p,g in zip(params, gp)]

        def predict(cls, x, **kwargs):
            return cls.pred, []

        return locals()
    def theano_setup(self):
    
        # The matrices Wb and Wc were originally tied.
        # Because of that, I decided to keep Wb and Wc with
        # the same shape (instead of being transposed) to
        # avoid disturbing the code as much as possible.

        Wb = T.dmatrix('Wb')
        Wc = T.dmatrix('Wc')
        b = T.dvector('b')
        c = T.dvector('c')
        s = T.dscalar('s')
        x = T.dmatrix('x')
    
        h_act = T.dot(x, Wc) + c
        if self.act_func[0] == 'tanh':
            h = T.tanh(h_act)
        elif self.act_func[0] == 'sigmoid':
            h = T.nnet.sigmoid(h_act)
        elif self.act_func[0] == 'id':
            # bad idae
            h = h_act
        else:
            raise("Invalid act_func[0]")

        r_act = T.dot(h, Wb.T) + b
        if self.act_func[1] == 'tanh':
            r = s * T.tanh(r_act)
        elif self.act_func[1] == 'sigmoid':
            r = s * T.nnet.sigmoid(r_act)
        elif self.act_func[1] == 'id':
            r = s * r_act
        else:
            raise("Invalid act_func[1]")


        # Another variable to be able to call a function
        # with a noisy x and compare it to a reference x.
        y = T.dmatrix('y')

        loss = ((r - y)**2)
        sum_loss = T.sum(loss)
        
        # theano_encode_decode : vectorial function in argument X.
        # theano_loss : vectorial function in argument X.
        # theano_gradients : returns triplet of gradients, each of
        #                    which involves the all data X summed
        #                    so it's not a "vectorial" function.

        self.theano_encode_decode = function([Wb,Wc,b,c,s,x], r)
        self.theano_loss = function([Wb,Wc,b,c,s,x,y], loss)

        self.theano_gradients = function([Wb,Wc,b,c,s,x,y],
                                         [T.grad(sum_loss, Wb), T.grad(sum_loss, Wc),
                                          T.grad(sum_loss, b),  T.grad(sum_loss, c),
                                          T.grad(sum_loss, s)])
        # other useful theano functions for the experiments that involve
        # adding noise to the hidden states
        self.theano_encode = function([Wc,c,x], h)
        self.theano_decode = function([Wb,b,s,h], r)
Exemplo n.º 27
0
def test_argsort():
    # Set up
    rng = np.random.RandomState(seed=utt.fetch_seed())
    m_val = rng.rand(3, 2)
    v_val = rng.rand(4)

    # Example 1
    a = tensor.dmatrix()
    w = argsort(a)
    f = theano.function([a], w)
    gv = f(m_val)
    gt = np.argsort(m_val)
    assert np.allclose(gv, gt)

    # Example 2
    a = tensor.dmatrix()
    axis = tensor.lscalar()
    w = argsort(a, axis)
    f = theano.function([a, axis], w)
    for axis_val in 0, 1:
        gv = f(m_val, axis_val)
        gt = np.argsort(m_val, axis_val)
        assert np.allclose(gv, gt)

    # Example 3
    a = tensor.dvector()
    w2 = argsort(a)
    f = theano.function([a], w2)
    gv = f(v_val)
    gt = np.argsort(v_val)
    assert np.allclose(gv, gt)

    # Example 4
    a = tensor.dmatrix()
    axis = tensor.lscalar()
    l = argsort(a, axis, "mergesort")
    f = theano.function([a, axis], l)
    for axis_val in 0, 1:
        gv = f(m_val, axis_val)
        gt = np.argsort(m_val, axis_val)
        assert np.allclose(gv, gt)

    # Example 5
    a = tensor.dmatrix()
    axis = tensor.lscalar()
    a1 = ArgSortOp("mergesort", [])
    a2 = ArgSortOp("quicksort", [])
    # All the below should give true
    assert a1 != a2
    assert a1 == ArgSortOp("mergesort", [])
    assert a2 == ArgSortOp("quicksort", [])

    # Example 6: Testing axis=None
    a = tensor.dmatrix()
    w2 = argsort(a, None)
    f = theano.function([a], w2)
    gv = f(m_val)
    gt = np.argsort(m_val, None)
    assert np.allclose(gv, gt)
    def test_infer_shape(self):
        x = tensor.dmatrix()
        y = tensor.dmatrix()

        self._compile_and_check([x, y], [self.op_class()(x, y)],
                                [numpy.random.rand(5, 6),
                                 numpy.random.rand(5, 6)],
                                self.op_class)
Exemplo n.º 29
0
def eigs( theta = Th.dvector('theta'), M    = Th.dmatrix('M') ,
          STA   = Th.dvector('STA')  , STC   = Th.dmatrix('STC'), **other):
    '''
    Return eigenvalues of I-sym(M), for display/debugging purposes.
    '''
    ImM = Th.identity_like(M)-(M+M.T)/2
    w,v = eig( ImM )
    return w
Exemplo n.º 30
0
def ldet( theta = Th.dvector('theta'), M    = Th.dmatrix('M') ,
          STA   = Th.dvector('STA'), STC  = Th.dmatrix('STC'), **other):
    '''
    Return log-det of I-sym(M), for display/debugging purposes.
    '''
    ImM = Th.identity_like(M)-(M+M.T)/2
    w, v = eig(ImM)
    return Th.sum(Th.log(w))
Exemplo n.º 31
0
def fit_rkl(data, log_p, max_epochs=20):
    """
    Fit isotropic Gaussian by minimizing reverse Kullback-Leibler divergence.
    """

    # data dimensionality
    D = data.shape[0]

    # data and hidden states
    X = tt.dmatrix('X')
    Z = tt.dmatrix('Z')

    nr.seed(int(time() * 1000.) % 4294967295)
    idx = nr.permutation(data.shape[1])[:100]

    # initialize parameters
    b = th.shared(np.mean(data[:, idx], 1)[:, None],
                  broadcastable=(False, True))
    a = th.shared(np.std(data[:, idx] - b.get_value(), 1)[:, None],
                  broadcastable=[False, True])

    # model density
    q = lambda X: normal(X, b, a)
    log_q = lambda X: -0.5 * tt.sum(tt.square(
        (X - b) / a), 0) - D * tt.log(tt.abs_(a)) - D / 2. * np.log(np.pi)

    G = lambda Z: a * Z + b

    # geometric Jensen-Shannon divergence
    RKL = tt.mean(tt.exp(log_p(X)) * (log_p(X) - log_q(X))) + tt.mean(0.0 * Z)

    # function computing G-JSD and its gradient
    f_rkl = th.function([Z, X], [RKL, th.grad(RKL, a), th.grad(RKL, b)])

    # SGD hyperparameters
    B = 200
    mm = 0.8
    lr = .5

    da = 0.
    db = 0.

    try:
        # display initial JSD
        print('{0:>4} {1:.4f}'.format(
            0, float(f_rkl(nr.randn(*data.shape), data)[0])))

        for epoch in range(max_epochs):
            values = []

            # stochastic gradient descent
            for t in range(0, data.shape[1], B):
                Z = nr.randn(D, B)
                Y = data[:, t:t + B]

                v, ga, gb = f_rkl(Z, Y)
                da = mm * da - lr * ga
                db = mm * db - lr * gb

                values.append(v)

                a.set_value(a.get_value() + da)
                b.set_value(b.get_value() + db)

            # reduce learning rate
            lr /= 2.

            # display estimated JSD
            print('{0:>4} {1:.4f}'.format(epoch + 1, np.mean(values)))

    except KeyboardInterrupt:
        pass

    return a.get_value() * np.eye(D), b.get_value()
Exemplo n.º 32
0
def evaluate_lenet5(learning_rate=0.05,
                    n_epochs=2000,
                    nkerns=[50],
                    batch_size=1,
                    window_width=4,
                    maxSentLength=64,
                    emb_size=300,
                    hidden_size=200,
                    margin=0.5,
                    L2_weight=0.0003,
                    update_freq=1,
                    norm_threshold=5.0,
                    max_truncate=40):
    maxSentLength = max_truncate + 2 * (window_width - 1)
    model_options = locals().copy()
    print "model options", model_options
    rootPath = '/mounts/data/proj/wenpeng/Dataset/WikiQACorpus/'
    rng = numpy.random.RandomState(23455)
    datasets, vocab_size = load_wikiQA_corpus(
        rootPath + 'vocab.txt', rootPath + 'WikiQA-train.txt',
        rootPath + 'test_filtered.txt', max_truncate,
        maxSentLength)  #vocab_size contain train, dev and test
    #datasets, vocab_size=load_wikiQA_corpus(rootPath+'vocab_lower_in_word2vec.txt', rootPath+'WikiQA-train.txt', rootPath+'test_filtered.txt', maxSentLength)#vocab_size contain train, dev and test
    mtPath = '/mounts/data/proj/wenpeng/Dataset/WikiQACorpus/MT/BLEU_NIST/'
    mt_train, mt_test = load_mts_wikiQA(
        mtPath + 'result_train/concate_2mt_train.txt',
        mtPath + 'result_test/concate_2mt_test.txt')
    wm_train, wm_test = load_wmf_wikiQA(
        rootPath + 'train_word_matching_scores.txt',
        rootPath + 'test_word_matching_scores.txt')
    #wm_train, wm_test=load_wmf_wikiQA(rootPath+'train_word_matching_scores_normalized.txt', rootPath+'test_word_matching_scores_normalized.txt')
    indices_train, trainY, trainLengths, normalized_train_length, trainLeftPad, trainRightPad = datasets[
        0]
    indices_train_l = indices_train[::2, :]
    indices_train_r = indices_train[1::2, :]
    trainLengths_l = trainLengths[::2]
    trainLengths_r = trainLengths[1::2]
    normalized_train_length_l = normalized_train_length[::2]
    normalized_train_length_r = normalized_train_length[1::2]

    trainLeftPad_l = trainLeftPad[::2]
    trainLeftPad_r = trainLeftPad[1::2]
    trainRightPad_l = trainRightPad[::2]
    trainRightPad_r = trainRightPad[1::2]
    indices_test, testY, testLengths, normalized_test_length, testLeftPad, testRightPad = datasets[
        1]
    indices_test_l = indices_test[::2, :]
    indices_test_r = indices_test[1::2, :]
    testLengths_l = testLengths[::2]
    testLengths_r = testLengths[1::2]
    normalized_test_length_l = normalized_test_length[::2]
    normalized_test_length_r = normalized_test_length[1::2]

    testLeftPad_l = testLeftPad[::2]
    testLeftPad_r = testLeftPad[1::2]
    testRightPad_l = testRightPad[::2]
    testRightPad_r = testRightPad[1::2]

    n_train_batches = indices_train_l.shape[0] / batch_size
    n_test_batches = indices_test_l.shape[0] / batch_size

    train_batch_start = list(numpy.arange(n_train_batches) * batch_size)
    test_batch_start = list(numpy.arange(n_test_batches) * batch_size)

    indices_train_l = theano.shared(numpy.asarray(indices_train_l,
                                                  dtype=theano.config.floatX),
                                    borrow=True)
    indices_train_r = theano.shared(numpy.asarray(indices_train_r,
                                                  dtype=theano.config.floatX),
                                    borrow=True)
    indices_test_l = theano.shared(numpy.asarray(indices_test_l,
                                                 dtype=theano.config.floatX),
                                   borrow=True)
    indices_test_r = theano.shared(numpy.asarray(indices_test_r,
                                                 dtype=theano.config.floatX),
                                   borrow=True)
    indices_train_l = T.cast(indices_train_l, 'int64')
    indices_train_r = T.cast(indices_train_r, 'int64')
    indices_test_l = T.cast(indices_test_l, 'int64')
    indices_test_r = T.cast(indices_test_r, 'int64')

    rand_values = random_value_normal((vocab_size + 1, emb_size),
                                      theano.config.floatX,
                                      numpy.random.RandomState(1234))
    rand_values[0] = numpy.array(numpy.zeros(emb_size),
                                 dtype=theano.config.floatX)
    #rand_values[0]=numpy.array([1e-50]*emb_size)
    rand_values = load_word2vec_to_init(rand_values,
                                        rootPath + 'vocab_embs_300d.txt')
    #rand_values=load_word2vec_to_init(rand_values, rootPath+'vocab_lower_in_word2vec_embs_300d.txt')
    embeddings = theano.shared(value=rand_values, borrow=True)

    #cost_tmp=0
    error_sum = 0

    # allocate symbolic variables for the data
    index = T.lscalar()
    x_index_l = T.lmatrix(
        'x_index_l')  # now, x is the index matrix, must be integer
    x_index_r = T.lmatrix('x_index_r')
    y = T.lvector('y')
    left_l = T.lscalar()
    right_l = T.lscalar()
    left_r = T.lscalar()
    right_r = T.lscalar()
    length_l = T.lscalar()
    length_r = T.lscalar()
    norm_length_l = T.dscalar()
    norm_length_r = T.dscalar()
    mts = T.dmatrix()
    wmf = T.dmatrix()
    cost_tmp = T.dscalar()
    #x=embeddings[x_index.flatten()].reshape(((batch_size*4),maxSentLength, emb_size)).transpose(0, 2, 1).flatten()
    ishape = (emb_size, maxSentLength)  # this is the size of MNIST images
    filter_size = (emb_size, window_width)
    #poolsize1=(1, ishape[1]-filter_size[1]+1) #?????????????????????????????
    length_after_wideConv = ishape[1] + filter_size[1] - 1

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    #layer0_input = x.reshape(((batch_size*4), 1, ishape[0], ishape[1]))
    layer0_l_input = embeddings[x_index_l.flatten()].reshape(
        (batch_size, maxSentLength,
         emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)
    layer0_r_input = embeddings[x_index_r.flatten()].reshape(
        (batch_size, maxSentLength,
         emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)

    conv_W, conv_b = create_conv_para(rng,
                                      filter_shape=(nkerns[0], 1,
                                                    filter_size[0],
                                                    filter_size[1]))

    #layer0_output = debug_print(layer0.output, 'layer0.output')
    layer0_l = Conv_with_input_para(rng,
                                    input=layer0_l_input,
                                    image_shape=(batch_size, 1, ishape[0],
                                                 ishape[1]),
                                    filter_shape=(nkerns[0], 1, filter_size[0],
                                                  filter_size[1]),
                                    W=conv_W,
                                    b=conv_b)
    layer0_r = Conv_with_input_para(rng,
                                    input=layer0_r_input,
                                    image_shape=(batch_size, 1, ishape[0],
                                                 ishape[1]),
                                    filter_shape=(nkerns[0], 1, filter_size[0],
                                                  filter_size[1]),
                                    W=conv_W,
                                    b=conv_b)
    layer0_l_output = debug_print(layer0_l.output, 'layer0_l.output')
    layer0_r_output = debug_print(layer0_r.output, 'layer0_r.output')

    layer1 = Average_Pooling_for_Top(rng,
                                     input_l=layer0_l_output,
                                     input_r=layer0_r_output,
                                     kern=nkerns[0],
                                     left_l=left_l,
                                     right_l=right_l,
                                     left_r=left_r,
                                     right_r=right_r,
                                     length_l=length_l + filter_size[1] - 1,
                                     length_r=length_r + filter_size[1] - 1,
                                     dim=maxSentLength + filter_size[1] - 1)

    #layer2=HiddenLayer(rng, input=layer1_out, n_in=nkerns[0]*2, n_out=hidden_size, activation=T.tanh)

    sum_uni_l = T.sum(layer0_l_input, axis=3).reshape((1, emb_size))
    aver_uni_l = sum_uni_l / layer0_l_input.shape[3]
    norm_uni_l = sum_uni_l / T.sqrt((sum_uni_l**2).sum())
    sum_uni_r = T.sum(layer0_r_input, axis=3).reshape((1, emb_size))
    aver_uni_r = sum_uni_r / layer0_r_input.shape[3]
    norm_uni_r = sum_uni_r / T.sqrt((sum_uni_r**2).sum())

    uni_cosine = cosine(sum_uni_l, sum_uni_r)
    aver_uni_cosine = cosine(aver_uni_l, aver_uni_r)
    uni_sigmoid_simi = debug_print(
        T.nnet.sigmoid(T.dot(norm_uni_l, norm_uni_r.T)).reshape((1, 1)),
        'uni_sigmoid_simi')
    '''
    linear=Linear(sum_uni_l, sum_uni_r)
    poly=Poly(sum_uni_l, sum_uni_r)
    sigmoid=Sigmoid(sum_uni_l, sum_uni_r)
    rbf=RBF(sum_uni_l, sum_uni_r)
    gesd=GESD(sum_uni_l, sum_uni_r)
    '''
    eucli_1 = 1.0 / (1.0 + EUCLID(sum_uni_l, sum_uni_r))  #25.2%
    #eucli_1_exp=1.0/T.exp(EUCLID(sum_uni_l, sum_uni_r))

    len_l = norm_length_l.reshape((1, 1))
    len_r = norm_length_r.reshape((1, 1))
    '''
    len_l=length_l.reshape((1,1))
    len_r=length_r.reshape((1,1))  
    '''
    #length_gap=T.log(1+(T.sqrt((len_l-len_r)**2))).reshape((1,1))
    #length_gap=T.sqrt((len_l-len_r)**2)
    #layer3_input=mts
    layer3_input = T.concatenate(
        [  #mts,
            uni_cosine,  #eucli_1_exp,#uni_sigmoid_simi,  #norm_uni_l-(norm_uni_l+norm_uni_r)/2,#uni_cosine, #
            layer1.
            output_cosine,  #layer1.output_eucli_to_simi_exp,#layer1.output_sigmoid_simi,#layer1.output_vector_l-(layer1.output_vector_l+layer1.output_vector_r)/2,#layer1.output_cosine, #
            len_l,
            len_r,
            wmf
        ],
        axis=1)  #, layer2.output, layer1.output_cosine], axis=1)
    #layer3_input=T.concatenate([mts,eucli, uni_cosine, len_l, len_r, norm_uni_l-(norm_uni_l+norm_uni_r)/2], axis=1)
    #layer3=LogisticRegression(rng, input=layer3_input, n_in=11, n_out=2)
    layer3 = LogisticRegression(rng,
                                input=layer3_input,
                                n_in=(1) + (1) + 2 + 2,
                                n_out=2)

    #L2_reg =(layer3.W** 2).sum()+(layer2.W** 2).sum()+(layer1.W** 2).sum()+(conv_W** 2).sum()
    L2_reg = debug_print(
        (layer3.W**2).sum() + (conv_W**2).sum(),
        'L2_reg')  #+(layer1.W** 2).sum()++(embeddings**2).sum()
    cost_this = debug_print(layer3.negative_log_likelihood(y),
                            'cost_this')  #+L2_weight*L2_reg
    cost = debug_print(
        (cost_this + cost_tmp) / update_freq + L2_weight * L2_reg, 'cost')
    #cost=debug_print((cost_this+cost_tmp)/update_freq, 'cost')

    test_model = theano.function(
        [index], [layer3.prop_for_posi, layer3_input, y],
        givens={
            x_index_l: indices_test_l[index:index + batch_size],
            x_index_r: indices_test_r[index:index + batch_size],
            y: testY[index:index + batch_size],
            left_l: testLeftPad_l[index],
            right_l: testRightPad_l[index],
            left_r: testLeftPad_r[index],
            right_r: testRightPad_r[index],
            length_l: testLengths_l[index],
            length_r: testLengths_r[index],
            norm_length_l: normalized_test_length_l[index],
            norm_length_r: normalized_test_length_r[index],
            mts: mt_test[index:index + batch_size],
            wmf: wm_test[index:index + batch_size]
        },
        on_unused_input='ignore')

    #params = layer3.params + layer2.params + layer1.params+ [conv_W, conv_b]
    params = layer3.params + [conv_W, conv_b]  #+[embeddings]# + layer1.params
    params_conv = [conv_W, conv_b]

    accumulator = []
    for para_i in params:
        eps_p = numpy.zeros_like(para_i.get_value(borrow=True),
                                 dtype=theano.config.floatX)
        accumulator.append(theano.shared(eps_p, borrow=True))

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    updates = []
    for param_i, grad_i, acc_i in zip(params, grads, accumulator):
        grad_i = debug_print(grad_i, 'grad_i')
        acc = acc_i + T.sqr(grad_i)
        updates.append(
            (param_i,
             param_i - learning_rate * grad_i / T.sqrt(acc)))  #AdaGrad
        updates.append((acc_i, acc))

    train_model = theano.function(
        [index, cost_tmp],
        cost,
        updates=updates,
        givens={
            x_index_l: indices_train_l[index:index + batch_size],
            x_index_r: indices_train_r[index:index + batch_size],
            y: trainY[index:index + batch_size],
            left_l: trainLeftPad_l[index],
            right_l: trainRightPad_l[index],
            left_r: trainLeftPad_r[index],
            right_r: trainRightPad_r[index],
            length_l: trainLengths_l[index],
            length_r: trainLengths_r[index],
            norm_length_l: normalized_train_length_l[index],
            norm_length_r: normalized_train_length_r[index],
            mts: mt_train[index:index + batch_size],
            wmf: wm_train[index:index + batch_size]
        },
        on_unused_input='ignore')

    train_model_predict = theano.function(
        [index], [cost_this, layer3.errors(y), layer3_input, y],
        givens={
            x_index_l: indices_train_l[index:index + batch_size],
            x_index_r: indices_train_r[index:index + batch_size],
            y: trainY[index:index + batch_size],
            left_l: trainLeftPad_l[index],
            right_l: trainRightPad_l[index],
            left_r: trainLeftPad_r[index],
            right_r: trainRightPad_r[index],
            length_l: trainLengths_l[index],
            length_r: trainLengths_r[index],
            norm_length_l: normalized_train_length_l[index],
            norm_length_r: normalized_train_length_r[index],
            mts: mt_train[index:index + batch_size],
            wmf: wm_train[index:index + batch_size]
        },
        on_unused_input='ignore')

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 500000000000000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    svm_max = 0.0
    best_epoch = 0

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        #for minibatch_index in xrange(n_train_batches): # each batch
        minibatch_index = 0
        #shuffle(train_batch_start)#shuffle training data
        cost_tmp = 0.0
        for batch_start in train_batch_start:
            # iter means how many batches have been runed, taking into loop
            iter = (epoch - 1) * n_train_batches + minibatch_index + 1

            minibatch_index = minibatch_index + 1
            #if epoch %2 ==0:
            #    batch_start=batch_start+remain_train
            #time.sleep(0.5)
            #print batch_start
            if iter % update_freq != 0:
                cost_ij, error_ij, layer3_input, y = train_model_predict(
                    batch_start)
                #print 'layer3_input', layer3_input
                cost_tmp += cost_ij
                error_sum += error_ij
                #print 'cost_acc ',cost_acc
                #print 'cost_ij ', cost_ij
                #print 'cost_tmp before update',cost_tmp
            else:
                cost_average = train_model(batch_start, cost_tmp)
                #print 'layer3_input', layer3_input
                error_sum = 0
                cost_tmp = 0.0  #reset for the next batch
                #print 'cost_average ', cost_average
                #print 'cost_this ',cost_this
                #exit(0)
            #exit(0)
            if iter % n_train_batches == 0:
                print 'training @ iter = ' + str(
                    iter) + ' average cost: ' + str(
                        cost_average) + ' error: ' + str(
                            error_sum) + '/' + str(
                                update_freq) + ' error rate: ' + str(
                                    error_sum * 1.0 / update_freq)
            #if iter ==1:
            #    exit(0)

            if iter % validation_frequency == 0:
                #write_file=open('log.txt', 'w')
                test_probs = []
                test_y = []
                test_features = []
                for i in test_batch_start:
                    prob_i, layer3_input, y = test_model(i)
                    #test_losses = [test_model(i) for i in test_batch_start]
                    test_probs.append(prob_i[0][0])
                    test_y.append(y[0])
                    test_features.append(layer3_input[0])

                MAP, MRR = compute_map_mrr(rootPath + 'test_filtered.txt',
                                           test_probs)
                #now, check MAP and MRR
                print(
                    ('\t\t\t\t\t\tepoch %i, minibatch %i/%i, test MAP of best '
                     'model %f, MRR  %f') %
                    (epoch, minibatch_index, n_train_batches, MAP, MRR))
                #now, see the results of LR
                #write_feature=open(rootPath+'feature_check.txt', 'w')
                train_y = []
                train_features = []
                count = 0
                for batch_start in train_batch_start:
                    cost_ij, error_ij, layer3_input, y = train_model_predict(
                        batch_start)
                    train_y.append(y[0])
                    train_features.append(layer3_input[0])
                    #write_feature.write(str(batch_start)+' '+' '.join(map(str,layer3_input[0]))+'\n')
                    #count+=1

                #write_feature.close()

                clf = svm.SVC(C=1.0, kernel='linear')
                clf.fit(train_features, train_y)
                results_svm = clf.decision_function(test_features)
                MAP_svm, MRR_svm = compute_map_mrr(
                    rootPath + 'test_filtered.txt', results_svm)

                lr = LinearRegression().fit(train_features, train_y)
                results_lr = lr.predict(test_features)
                MAP_lr, MRR_lr = compute_map_mrr(
                    rootPath + 'test_filtered.txt', results_lr)
                print '\t\t\t\t\t\t\tSVM, MAP: ', MAP_svm, ' MRR: ', MRR_svm, ' LR: ', MAP_lr, ' MRR: ', MRR_lr

            if patience <= iter:
                done_looping = True
                break
        #after each epoch, increase the batch_size
        if epoch % 2 == 1:
            update_freq = update_freq * 1
        else:
            update_freq = update_freq / 1

        #store the paras after epoch 15
        if epoch == 15:
            store_model_to_file(params_conv)
            print 'Finished storing best conv params'
            exit(0)

        #print 'Batch_size: ', update_freq
    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Exemplo n.º 33
0
    def evaluate_lenet5(self):
    #def evaluate_lenet5(learning_rate=0.1, n_epochs=2000, nkerns=[6, 12], batch_size=70, useAllSamples=0, kmax=30, ktop=5, filter_size=[10,7],
    #                    L2_weight=0.000005, dropout_p=0.5, useEmb=0, task=5, corpus=1):
        rng = numpy.random.RandomState(23455)


        n_train_batches=self.raw_data[0].shape[0]/self.batch_size
        n_valid_batches=self.raw_data[1].shape[0]/self.batch_size
        n_test_batches=self.raw_data[2].shape[0]/self.batch_size

        train_batch_start=[]
        dev_batch_start=[]
        test_batch_start=[]
        if self.useAllSamples:
            train_batch_start=list(numpy.arange(n_train_batches)*self.batch_size)+[self.raw_data[0].shape[0]-self.batch_size]
            dev_batch_start=list(numpy.arange(n_valid_batches)*self.batch_size)+[self.raw_data[1].shape[0]-self.batch_size]
            test_batch_start=list(numpy.arange(n_test_batches)*self.batch_size)+[self.raw_data[2].shape[0]-self.batch_size]
            n_train_batches=n_train_batches+1
            n_valid_batches=n_valid_batches+1
            n_test_batches=n_test_batches+1
        else:
            train_batch_start=list(numpy.arange(n_train_batches)*self.batch_size)
            dev_batch_start=list(numpy.arange(n_valid_batches)*self.batch_size)
            test_batch_start=list(numpy.arange(n_valid_batches)*self.batch_size)
        '''
        indices_train_theano=theano.shared(numpy.asarray(indices_train, dtype=theano.config.floatX), borrow=True)
        indices_dev_theano=theano.shared(numpy.asarray(indices_dev, dtype=theano.config.floatX), borrow=True)
        indices_train_theano=T.cast(indices_train_theano, 'int32')
        indices_dev_theano=T.cast(indices_dev_theano, 'int32')
        '''
        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch
        x = T.dmatrix('x')   # now, x is the index matrix, must be integer
        y = T.dmatrix('y') 

        
        ######################
        # BUILD ACTUAL MODEL #
        ######################
        print '... building the model'
    
        inputs=debug_print(x, 'inputs')
        labels=debug_print(y, 'labels')
        layer2 = HiddenLayer(rng, input=inputs, n_in=self.source_embedding_size, n_out=self.target_embedding_size, activation=None)
        layer2_output=debug_print(layer2.output, 'layer2_output')
        #J= debug_print(- T.sum(labels * T.log(layer2_output) + (1 - labels) * T.log(1 - layer2_output), axis=1), 'J') # a vector of cross-entropy
        J=T.sum((layer2_output - labels)**2, axis=1)
        L2_reg = (layer2.W** 2).sum()
        self.cost = T.mean(J) + self.L2_weight*L2_reg
        
        validate_model = theano.function([index], self.cost,
                givens={
                    x: self.dev_source[index: index + self.batch_size],
                    y: self.dev_target[index: index + self.batch_size]})

        test_model = theano.function([index], layer2_output,
                givens={
                    x: self.test_source[index: index + self.batch_size],
                    y: self.test_source[index: index + self.batch_size]})   
        # create a list of all model parameters to be fit by gradient descent
        self.params = layer2.params
        #params = layer3.params + layer2.params + layer0.params+[embeddings]
        
        accumulator=[]
        for para_i in self.params:
            eps_p=numpy.zeros_like(para_i.get_value(borrow=True),dtype=theano.config.floatX)
            accumulator.append(theano.shared(eps_p, borrow=True))
          
        # create a list of gradients for all model parameters
        grads = T.grad(self.cost, self.params)
        updates = []
        for param_i, grad_i, acc_i in zip(self.params, grads, accumulator):
            acc = acc_i + T.sqr(grad_i)
            updates.append((param_i, param_i - self.ini_learning_rate * grad_i / T.sqrt(acc)))   #AdaGrad
            updates.append((acc_i, acc))    
           
        train_model = theano.function([index], self.cost, updates=updates,
              givens={
                x: self.train_source[index: index + self.batch_size],
                y: self.train_target[index: index + self.batch_size]})
    
        ###############
        # TRAIN MODEL #
        ###############
        print '... training'
        # early-stopping parameters
        patience = 500000  # look as this many examples regardless
        patience_increase = 2  # wait this much longer when a new best is
                               # found
        improvement_threshold = 0.995  # a relative improvement of this much is
                                       # considered significant
        validation_frequency = min(n_train_batches, patience / 2)
                                      # go through this many
                                      # minibatche before checking the network
                                      # on the validation set; in this case we
                                      # check every epoch
    
        best_params = None
        best_validation_loss = numpy.inf
        best_iter = 0
        test_score = 0.
        start_time = time.clock()
    
        epoch = 0
        done_looping = False
        vali_loss_list=[]
        lowest_vali_loss=0
        OOV_embs=numpy.zeros((len(self.OOV),self.target_embedding_size), dtype=theano.config.floatX)
        while (epoch < self.n_epochs) and (not done_looping):
            epoch = epoch + 1
            #for minibatch_index in xrange(n_train_batches): # each batch
            minibatch_index=0
            for batch_start in train_batch_start: 
                # iter means how many batches have been runed, taking into loop
                iter = (epoch - 1) * n_train_batches + minibatch_index +1
    
                minibatch_index=minibatch_index+1
                
                cost_of_each_iteration= train_model(batch_start)
                #exit(0)
                #print 'sentence embeddings:'
                #print sentences_embs[:6,:]
                #if iter ==1:
                #    exit(0)
                if iter % validation_frequency == 0:
                    print 'training @ iter = '+str(iter)+' cost: '+str(cost_of_each_iteration)# +' error: '+str(error_ij)
                if iter % validation_frequency == 0:
                    #print '\t iter: '+str(iter)
                    # compute zero-one loss on validation set
                    #validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                    validation_losses=[]
                    for batch_start in dev_batch_start:
                        vali_loss_i=validate_model(batch_start)
                        validation_losses.append(vali_loss_i)
                    this_validation_loss = numpy.mean(validation_losses)
                    print('\t\tepoch %i, minibatch %i/%i, validation cost %f ' % \
                      (epoch, minibatch_index , n_train_batches, \
                       this_validation_loss))
                    
                    if this_validation_loss < (minimal_of_list(vali_loss_list)-1.0): #is very small
                        #print str(minimal_of_list(vali_loss_list))+'-'+str(this_validation_loss)+'='+str(minimal_of_list(vali_loss_list)-this_validation_loss)
                        del vali_loss_list[:]
                        vali_loss_list.append(this_validation_loss)
                        lowest_vali_loss=this_validation_loss
                        #store params
                        self.best_params=self.params
                        for batch_start in test_batch_start:
                            predicted_embeddings=test_model(batch_start)
                            for row in range(batch_start, batch_start + self.batch_size):
                                OOV_embs[row]=predicted_embeddings[row-batch_start]
                        if len(vali_loss_list)==self.vali_cost_list_length: # only happen when self.vali_cost_list_length==1
                            print 'Training over, best model got at vali_cost:'+str(lowest_vali_loss)
                            return OOV_embs, self.OOV
                    elif len(vali_loss_list)<self.vali_cost_list_length:                        
                        if this_validation_loss < minimal_of_list(vali_loss_list): #if it's small, but not small enough
                            self.best_params=self.params
                            lowest_vali_loss=this_validation_loss
                            for batch_start in test_batch_start:
                                predicted_embeddings=test_model(batch_start)
                                for row in range(batch_start, batch_start + self.batch_size):
                                    OOV_embs[row]=predicted_embeddings[row-batch_start]   
                        vali_loss_list.append(this_validation_loss)                         
                        if len(vali_loss_list)==self.vali_cost_list_length:
                            print 'Training over, best model got at vali_cost:'+str(lowest_vali_loss)
                            return OOV_embs, self.OOV
                    #print vali_loss_list
    
    
                if patience <= iter:
                    done_looping = True
                    break
    
        end_time = time.clock()
        '''
        print('Optimization complete.')
        print('Best validation score of %f %% obtained at iteration %i,'\
              'with test performance %f %%' %
              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
        '''
        print >> sys.stderr, ('The code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))
        return OOV_embs, self.OOV
Exemplo n.º 34
0








###############################################
# EXAMPLE 2
###############################################

print('Building the model...')

x = T.dmatrix('x')  # Symbolic input matrix



# Initializing the weight matrix and bias vector
W = theano.shared(value=np.zeros((28*28, 10),dtype=theano.config.floatX), name='W')
b = theano.shared(value=np.zeros((10,),dtype=theano.config.floatX), name='b')


p_y_given_x = T.exp(T.dot(x, W) + b)
p_y_given_x = p_y_given_x / T.sum(p_y_given_x,axis=1)[:,None]



# Symbolic description of how to compute prediction as class whose
# probability is maximal
Exemplo n.º 35
0
    def __init__(self, state_length, action_length, state_bounds, action_bounds, settings_):

        super(NeuralNetwork,self).__init__(state_length, action_length, state_bounds, action_bounds, 0, settings_)
        
        batch_size=32
        # data types for model
        State = T.dmatrix("State")
        State.tag.test_value = np.random.rand(batch_size,self._state_length)
        # ResultState = T.dmatrix("ResultState")
        # ResultState.tag.test_value = np.random.rand(batch_size,self._state_length)
        Action = T.dmatrix("Action")
        Action.tag.test_value = np.random.rand(batch_size, self._action_length)
        # create a small convolutional neural network
        inputLayerState = lasagne.layers.InputLayer((None, self._state_length), input_var=State)
        # inputLayerAction = lasagne.layers.InputLayer((None, self._action_length), Action)
        # concatLayer = lasagne.layers.ConcatLayer([inputLayerState, inputLayerAction])
        l_hid2ActA = lasagne.layers.DenseLayer(
                inputLayerState, num_units=128,
                nonlinearity=lasagne.nonlinearities.leaky_rectify,
                W=lasagne.init.Uniform())
        num_layers=1
        """
        l_hid2ActA = lasagne.layers.DenseLayer(
                inputLayerState, num_units=128,
                nonlinearity=lasagne.nonlinearities.leaky_rectify)
        
        l_hid2ActA = lasagne.layers.DenseLayer(
                l_hid2ActA, num_units=64,
                nonlinearity=lasagne.nonlinearities.leaky_rectify)
        
        l_hid2ActA = lasagne.layers.DenseLayer(
                l_hid2ActA, num_units=32,
                nonlinearity=lasagne.nonlinearities.leaky_rectify)
        """
        for i in range(num_layers):
            l_hid2ActA = lasagne.layers.DenseLayer(
                l_hid2ActA, num_units=64,
                nonlinearity=lasagne.nonlinearities.leaky_rectify
                # ,W=lasagne.init.Uniform()
                )
            
        self._l_out = lasagne.layers.DenseLayer(
                l_hid2ActA, num_units=self._action_length,
                nonlinearity=lasagne.nonlinearities.linear
                # ,W=lasagne.init.Uniform()
                )
                # print "Initial W " + str(self._w_o.get_value()) 
        
        self._learning_rate = 0.01
        self._rho = 0.95
        self._rms_epsilon = 0.001
        
        self._updates=0
        
        self._states_shared = theano.shared(
            np.zeros((batch_size, self._state_length),
                     dtype=theano.config.floatX))

        """self._next_states_shared = theano.shared(
            np.zeros((batch_size, self._state_length),
                     dtype=theano.config.floatX))
        """
        self._actions_shared = theano.shared(
            np.zeros((batch_size, self._action_length), dtype=theano.config.floatX),
            )
        
        inputs_ = {
            State: State,
            # Action: Action,
        }
        self._forward = lasagne.layers.get_output(self._l_out, inputs_, deterministic=True)
        
        # self._target = (Reward + self._discount_factor * self._q_valsB)
        self._diff = Action - self._forward
        self._loss = 0.5 * self._diff ** 2 
        self._loss = T.mean(self._loss) + (1e-5 * lasagne.regularization.regularize_network_params(self._l_out, lasagne.regularization.l2))
        self._loss2 = T.mean(self._loss)
        
        self._params = lasagne.layers.helper.get_all_params(self._l_out)
        self._givens_ = {
            State: self._states_shared,
            # ResultState: self._next_states_shared,
            Action: self._actions_shared,
        }
        
        # SGD update
        # self._updates_ = lasagne.updates.rmsprop(self._loss, self._params, self._learning_rate, self._rho,
        #                                   self._rms_epsilon)
        self._all_grads = T.grad(self._loss, self._params)
        # gself._all_grads = lasagne.updates.total_norm_constraint(self._all_grads, 0.5)
        # self._params = lasagne.updates.norm_constraint(self._params, max_norm=0.4)
        self._updates_ = lasagne.updates.momentum(self._all_grads, self._params, self._learning_rate, 0.9)
        # self._updates_ = lasagne.updates.norm_constraint(self._updates_, self._params, max_norm=0.4)
        # updates = lasagne.updates.nesterov(loss, params)
        # updates = norm_constraint(updates, someparam, abs_max=15)
        # TD update
        # minimize Value function error
        #self._updates_ = lasagne.updates.rmsprop(T.mean(self._q_func) + (1e-4 * lasagne.regularization.regularize_network_params(
        #self._l_outA, lasagne.regularization.l2)), self._params, 
        #            self._learning_rate * -T.mean(self._diff), self._rho, self._rms_epsilon)
        
        
        # actDiff1 = (Action - self._q_valsActB) #TODO is this correct?
        # actDiff = (actDiff1 - (Action - self._q_valsActA))
        # actDiff = ((Action - self._q_valsActB2)) # Target network does not work well here?
        #self._actDiff = ((Action - self._q_valsActA)) # Target network does not work well here?
        #self._actLoss = 0.5 * self._actDiff ** 2 + (1e-4 * lasagne.regularization.regularize_network_params( self._l_outActA, lasagne.regularization.l2))
        #self._actLoss = T.mean(self._actLoss)
        
        
        
        
        self._train = theano.function([], [self._loss], updates=self._updates_, givens=self._givens_)
        self._forwardDynamics = theano.function([], self._forward,
                                       givens={State: self._states_shared,
                                                # Action: self._actions_shared
                                                })
        
        inputs_ = [State, 
                   # ResultState,
                   Action]
        self._bellman_error = theano.function(inputs=inputs_, outputs=self._diff, allow_input_downcast=True)
        # self._diffs = theano.function(input=[State])
        
        # grad_params_ = [self._states_shared]
        # grad_params_.extend(self._params)
        self._get_grad = theano.function([], outputs=lasagne.updates.get_or_compute_grads(self._loss, [lasagne.layers.get_all_layers(self._l_out)[0].input_var] + self._params), allow_input_downcast=True, givens=self._givens_)
Exemplo n.º 36
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 input=None,
                 n_visible=300,
                 n_hidden=150,
                 W=None,
                 bhid=None,
                 bvis=None):
        self.n_visible = n_visible
        self.n_hidden = n_hidden

        # create a Theano random generator that gives symbolic random values
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        # note : W' was written as `W_prime` and b' as `b_prime`
        if W == None:
            # W is initialized with `initial_W` which is uniformely sampled
            # from -4*sqrt(6./(n_visible+n_hidden)) and
            # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
            # converted using asarray to dtype
            # theano.config.floatX so that the code is runable on GPU
            print('这里W是空的')
            initial_W = numpy.asarray(numpy_rng.uniform(
                low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                size=(n_visible, n_hidden)),
                                      dtype=theano.config.floatX)
            W = theano.shared(value=initial_W, name='W', borrow=True)

        if bvis == None:
            print('这里bvis是空的')
            bvis = theano.shared(value=numpy.zeros(n_visible,
                                                   dtype=theano.config.floatX),
                                 name='bvis',
                                 borrow=True)

        if bhid == None:
            print('这里bhid是空的')
            bhid = theano.shared(value=numpy.zeros(n_hidden,
                                                   dtype=theano.config.floatX),
                                 name='bhid',
                                 borrow=True)

        self.W = W
        # b corresponds to the bias of the hidden
        self.b = bhid
        # b_prime corresponds to the bias of the visible
        self.b_prime = bvis
        # tied weights, therefore W_prime is W transpose
        self.W_prime = self.W.T
        self.theano_rng = theano_rng
        # if no input is given, generate a variable representing the input
        if input is None:
            # we use a matrix because we expect a minibatch of several
            # examples, each example being a row
            self.x = T.dmatrix(name='input')
        else:
            self.x = input

        self.params = [self.W, self.b, self.b_prime]
Exemplo n.º 37
0
import gzip
import cPickle

f = gzip.open('C:/nnets/mnist.pkl.gz', 'rb')
train_set, valid_set, test_set = cPickle.load(f)
f.close()

n_train, n_test = map(lambda x: len(x[0]), [train_set, test_set])
dims = train_set[0].shape[1]
n_classes = len(set(train_set[1]))

import numpy
import theano
import theano.tensor as T

X = T.dmatrix()
y = T.ivector()

prepare_data = lambda x: (theano.shared(x[0].astype('float64')),
                          theano.shared(x[1].astype('int32')))
(training_x, training_y), (test_x, test_y), (validation_x, validation_y) = map(
    prepare_data, [train_set, test_set, valid_set])

W = theano.shared(numpy.zeros([dims, n_classes]))
b = theano.shared(numpy.zeros(n_classes))

y_hat = T.nnet.softmax(T.dot(X, W) + b)
y_pred = T.argmax(y_hat, axis=1)
test_error = T.mean(T.neq(y_pred, y))
training_error = -T.mean(T.log(y_hat)[T.arange(y.shape[0]), y])
Exemplo n.º 38
0
class Layer(object):
    def __init__(self, inputs, in_size, out_size, activation_function=None):
        self.W = theano.shared(numpy.random.normal(0, 1, (in_size, out_size)))
        self.b = theano.shared(numpy.zeros(out_size) + 0.1)
        self.Wx_plus_b = T.dot(inputs,self.W) + self.b
        self.activation_function = activation_function
        if activation_function is None:
            self.out_puts = self.Wx_plus_b
        else:
            self.out_puts = self.activation_function(self.Wx_plus_b)

x_data = numpy.linspace(-1, 1, 300)[:, numpy.newaxis]
noise = numpy.random.normal(0, 0.05, x_data.shape)
y_data = numpy.square(x_data) - 0.5 + noise

x = T.dmatrix('x')
y = T.dmatrix('y')
l1 = Layer(x, 1, 10, T.nnet.relu)
l2 = Layer(l1.out_puts, 10, 1, None)
cost = T.mean(T.square(l2.out_puts - y))

gW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b])
learning_rate = 0.05
train = theano.function(inputs=[x, y], outputs=cost, updates=[(l1.W, l1.W - learning_rate*gW1), \
                                                                 (l1.b, l1.b - learning_rate*gb1), \
                                                                 (l2.W, l2.W - learning_rate*gW2), \
                                                                 (l2.b, l2.b - learning_rate*gb2)])

predict = theano.function(inputs=[x],outputs=l2.out_puts)

flg = plt.figure()
Exemplo n.º 39
0
import theano
import theano.tensor as T
from math import sqrt
rng = numpy.random

N = 400                                   # training sample size
feats = 784                               # number of input variables
hidden_layer = 100                        # Número de capas ocultas


# generate a dataset: D = (input_values, target_class)
D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
training_steps = 10000

# Declare Theano symbolic variables
x = T.dmatrix("x")
y = T.dvector("y")

# initialize the weight vector w randomly
#
# this and the following bias variable b
# are shared so they keep their values
# between training iterations (updates)
w0 = theano.shared(rng.randn(feats, hidden_layer), name="w0")
w1 = theano.shared(rng.randn(hidden_layer) * sqrt(2.0/hidden_layer), name="w1")

# initialize the bias term
b0 = theano.shared(0., name="b0")
b1 = theano.shared(0., name="b1")

print("Initial model:")
Exemplo n.º 40
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 input=None,
                 n_visible=784,
                 n_hidden=500,
                 W=None,
                 bhid=None,
                 bvis=None):
        self.n_hidden = n_hidden
        self.n_visible = n_visible

        #create a symbolic random variable:
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(23))

        if not W:
            W_values = numpy.asarray(numpy_rng.uniform(
                low=12 * numpy.sqrt(6. / (n_hidden + n_visible)),
                high=16 * numpy.sqrt(6. / (n_hidden + n_visible)),
                size=(n_visible, n_hidden)),
                                     dtype=theano.config.floatX)
            W = theano.shared(value=W_values, name='W', borrow=True)

        if not bvis:
            bvis = theano.shared(
                value=4 * numpy.zeros(n_visible, dtype=theano.config.floatX),
                borrow=True)

        if not bhid:
            bhid = theano.shared(
                value=4 * numpy.zeros(n_hidden, dtype=theano.config.floatX),
                borrow=True)
        # we are using tied weights, in which the output weights are just the
        # transpose of the input ones.
        self.W = W
        self.W_prime = W.T
        self.b = bhid
        self.b_prime = bvis

        #        deltas = numpy.zeros(shape=(n_visible, n_hidden), dtype=theano.config.floatX)
        self.deltaW = theano.shared(value=numpy.zeros(
            shape=(n_visible, n_hidden), dtype=theano.config.floatX),
                                    borrow=True)

        self.deltaBvis = theano.shared(value=numpy.zeros(
            n_visible, dtype=theano.config.floatX),
                                       borrow=True)

        self.deltaBhid = theano.shared(value=numpy.zeros(
            n_hidden, dtype=theano.config.floatX),
                                       borrow=True)

        self.theano_rng = theano_rng

        if input is None:
            self.x = T.dmatrix(name='input')
        else:
            self.x = input

        # bundle up all the params. No W_prime as is updated whenever W is updated.
        self.params = [self.W, self.b, self.b_prime]
        self.gparams = []
        self.deltaParams = [self.deltaW, self.deltaBhid, self.deltaBvis]
Exemplo n.º 41
0
def fit_mmd(data):
	"""
	Fit isotropic Gaussian by minimizing maximum mean discrepancy.

	B{References:}
		- A. Gretton et al., I{A Kernel Method for the Two-Sample-Problem}, NIPS, 2007
		- Y. Li et al., I{Generative Moment Matching Networks}, ICML, 2015
	"""

	def gaussian_kernel(x, y, sigma=1.):
		return tt.exp(-tt.sum(tt.square(x - y)) / sigma**2)

	def mixed_kernel(x, y, sigma=[.5, 1., 2., 4., 8.]):
		return tt.sum([gaussian_kernel(x, y, s) for s in sigma])
		
	def gram_matrix(X, Y, kernel):
		M = X.shape[0]
		N = Y.shape[0]

		G, _ = th.scan(
			fn=lambda k: kernel(X[k // N], Y[k % N]),
			sequences=[tt.arange(M * N)])

		return G.reshape([M, N])

	# hiddens
	Z = tt.dmatrix('Z')

	# parameters
	b = th.shared(np.mean(data, 1)[None], broadcastable=[True, False])
	A = th.shared(np.std(data - b.get_value().T))

	# model samples
	X = Z * A + b

	# data
	Y = tt.dmatrix('Y')
	M = X.shape[0]
	N = Y.shape[0]

	Kyy = gram_matrix(Y, Y, mixed_kernel)
	Kxy = gram_matrix(X, Y, mixed_kernel)
	Kxx = gram_matrix(X, X, mixed_kernel)

	MMDsq = tt.sum(Kxx) / M**2 - 2. / (N * M) * tt.sum(Kxy) + tt.sum(Kyy) / N**2
	MMD = tt.sqrt(MMDsq)

	f = th.function([Z, Y], [MMD, tt.grad(MMD, A), tt.grad(MMD, b)])

	# batch size, momentum, learning rate schedule
	B = 100
	mm = 0.8
	kappa = .7
	tau = 1.

	values = []

	try:
		for t in range(0, data.shape[1], B):
			if t % 10000 == 0:
				# reset momentum
				dA = 0.
				db = 0.

			Z = nr.randn(B, data.shape[0])
			Y = data.T[t:t + B]

			lr = np.power(tau + (t + B) / B, -kappa)

			v, gA, gb = f(Z, Y)
			dA = mm * dA - lr * gA
			db = mm * db - lr * gb

			values.append(v)

			A.set_value(A.get_value() + dA)
			b.set_value(b.get_value() + db)

			print('{0:>6} {1:.4f}'.format(t, np.mean(values[-100:])))

	except KeyboardInterrupt:
		pass

	return A.get_value() * np.eye(data.shape[0]), b.get_value().T
Exemplo n.º 42
0
import theano.tensor as T
from theano import function
from theano.tensor.shared_randomstreams import RandomStreams
import numpy

random = RandomStreams(seed=42)

a = random.normal((1, 3))
b = T.dmatrix('a')

f1 = a * b

g1 = function([b], f1)

print('numpy.ones((1,3)=', numpy.ones((1, 3)))
print('numpy.ones((1,3)=', numpy.ones((1, 3)))
print('numpy.ones((1,3)=', numpy.ones((1, 3)))

for i in range(50):
    print("Invocation 1:", g1(numpy.ones((1, 3))))
Exemplo n.º 43
0
    def test_sparse():

        print '\n\n*************************************************'
        print '           TEST SPARSE'
        print '*************************************************'

        # fixed parameters
        bsize = 10  # batch size
        imshp = (28, 28)
        kshp = (5, 5)
        nkern = 1  # per output pixel
        ssizes = ((1, 1), (2, 2))
        convmodes = (
            'full',
            'valid',
        )

        # symbolic stuff
        bias = T.dvector()
        kerns = T.dvector()
        input = T.dmatrix()
        rng = N.random.RandomState(3423489)

        import theano.gof as gof
        #Mode(optimizer='fast_run', linker=gof.OpWiseCLinker(allow_gc=False)),):
        ntot, ttot = 0, 0
        for conv_mode in convmodes:
            for ss in ssizes:

                output, outshp = sp.applySparseFilter(kerns, kshp,\
                        nkern, input, imshp, ss, bias=bias, mode=conv_mode)
                f = function([kerns, bias, input], output)

                # build actual input images
                img2d = N.arange(bsize * N.prod(imshp)).reshape((bsize, ) +
                                                                imshp)
                img1d = img2d.reshape(bsize, -1)
                zeropad_img = N.zeros((bsize,\
                                       img2d.shape[1]+2*(kshp[0]-1),\
                                       img2d.shape[2]+2*(kshp[1]-1)))
                zeropad_img[:, kshp[0] - 1:kshp[0] - 1 + img2d.shape[1],
                            kshp[1] - 1:kshp[1] - 1 + img2d.shape[2]] = img2d

                # build kernel matrix -- flatten it for theano stuff
                filters = N.arange(N.prod(outshp)*N.prod(kshp)).\
                            reshape(nkern,N.prod(outshp[1:]),N.prod(kshp))
                spfilt = filters.flatten()
                biasvals = N.arange(N.prod(outshp))

                # compute output by hand
                ntime1 = time.time()
                refout = N.zeros((bsize, nkern, outshp[1], outshp[2]))
                patch = N.zeros((kshp[0], kshp[1]))
                for b in xrange(bsize):
                    for k in xrange(nkern):
                        pixi = 0  # pixel index in raster order
                        for j in xrange(outshp[1]):
                            for i in xrange(outshp[2]):
                                n = j * ss[0]
                                m = i * ss[1]
                                patch = zeropad_img[b, n:n + kshp[0],
                                                    m:m + kshp[1]]
                                refout[b,k,j,i] = N.dot(filters[k,pixi,:],\
                                                        patch.flatten())
                                pixi += 1
                refout = refout.reshape(bsize, -1) + biasvals
                ntot += time.time() - ntime1
                # need to flatten images
                ttime1 = time.time()
                out1 = f(spfilt, biasvals, img1d)
                ttot += time.time() - ttime1
                temp = refout - out1
                assert (temp < 1e-10).all()
                # test downward propagation
                vis = T.grad(output, input, output)
                downprop = function([kerns, output], vis)
                temp1 = time.time()
                for zz in range(100):
                    visval = downprop(spfilt, out1)
                indices, indptr, spmat_shape, sptype, outshp, kmap = \
                        sp.convolution_indices.sparse_eval(imshp,kshp,nkern,ss,conv_mode)
                spmat = sparse.csc_matrix((spfilt[kmap], indices, indptr),
                                          spmat_shape)
                visref = N.dot(out1, spmat.todense())
                assert N.all(visref == visval)

            print '**** Sparse Profiling Results ****'
            print 'Numpy processing time: ', ntot
            print 'Theano processing time: ', ttot
Exemplo n.º 44
0
def mogaussian(D=2, K=10, N=100000, seed=2, D_max=100):
    """
    Creates a random mixture of Gaussians and corresponding samples.

    @rtype: C{tuple}
    @return: a function representing the density and samples
    """

    nr.seed(seed)

    # mixture weights
    p = nr.dirichlet([.5] * K)

    # variances
    v = 1. / np.square(nr.rand(K) + 1.)

    # means; D_max makes sure that data only depends on seed and not on D
    m = nr.randn(D_max, K) * 1.5
    m = m[:D]

    # density function
    X = tt.dmatrix('X')
    C = [np.eye(D) * _ for _ in v]

    def log_p(X):
        """
        @type  X: C{ndarray}/C{TensorVariable}
        @param X: data points stored column-wise

        @rtype: C{ndarray}/C{TensorVariable}
        """

        if isinstance(X, tt.TensorVariable):
            return tt.log(
                tt.sum(
                    [p[i] * normal(X, m[:, [i]], C[i]) for i in range(len(p))],
                    0))
        else:
            if log_p.f is None:
                Y = tt.dmatrix('Y')
                log_p.f = th.function([Y], log_p(Y))
            return log_p.f(X)

    log_p.f = None

    def nonlog_p(X):
        """
        @type  X: C{ndarray}/C{TensorVariable}
        @param X: data points stored column-wise

        @rtype: C{ndarray}/C{TensorVariable}
        """

        if isinstance(X, tt.TensorVariable):
            return tt.sum(
                [p[i] * normal(X, m[:, [i]], C[i]) for i in range(len(p))], 0)
        else:
            if nonlog_p.f is None:
                Y = tt.dmatrix('Y')
                nonlog_p.f = th.function([Y], nonlog_p(Y))
            return nonlog_p.f(X)

    nonlog_p.f = None

    # sample data
    M = nr.multinomial(N, p)
    data = np.hstack(
        nr.randn(D, M[i]) * np.sqrt(v[i]) + m[:, [i]] for i in range(len(p)))
    data = data[:, nr.permutation(N)]

    return nonlog_p, log_p, data
Exemplo n.º 45
0
 def test_searchsortedOp_on_no_1d_inp(self):
     no_1d = tt.dmatrix("no_1d")
     with pytest.raises(ValueError):
         searchsorted(no_1d, self.v)
     with pytest.raises(ValueError):
         searchsorted(self.x, self.v, sorter=no_1d)
Exemplo n.º 46
0
            self.outputs = self.Wx_plus_b
        else:
            self.outputs = self.activation_function(self.Wx_plus_b)


# Make up some fake data
x_data = np.linspace(-1, 1, 300)[:, np.newaxis]
noise = np.random.normal(0, 0.05, x_data.shape)
y_data = np.square(x_data) - 0.5 + noise  # y = x^2 - 0.5

# show the fake data
plt.scatter(x_data, y_data)
plt.show()

# determine the inputs dtype
x = T.dmatrix("x")
y = T.dmatrix("y")

# add layers
l1 = Layer(x, 1, 10, T.nnet.relu)
l2 = Layer(l1.outputs, 10, 1, None)

# compute the cost
cost = T.mean(T.square(l2.outputs - y))

# compute the gradients
gW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b])

# apply gradient descent
learning_rate = 0.05
train = theano.function(inputs=[x, y],
Exemplo n.º 47
0
def evaluate_lenet5(learning_rate=0.085, n_epochs=2000, nkerns=[50], batch_size=1, window_width=3,
                    maxSentLength=60, emb_size=300, hidden_size=200,
                    margin=0.5, L2_weight=0.0001, update_freq=1, norm_threshold=5.0):

    model_options = locals().copy()
    print "model options", model_options
    rootPath='/mounts/data/proj/wenpeng/Dataset/MicrosoftParaphrase/tokenized_msr/';
    rng = numpy.random.RandomState(23455)
    datasets, vocab_size=load_msr_corpus(rootPath+'vocab.txt', rootPath+'tokenized_train.txt', rootPath+'tokenized_test.txt', maxSentLength)
    mtPath='/mounts/data/proj/wenpeng/Dataset/paraphraseMT/'
    mt_train, mt_test=load_mts(mtPath+'concate_15mt_train.txt', mtPath+'concate_15mt_test.txt')
    wm_train, wm_test=load_wmf_wikiQA(rootPath+'train_word_matching_scores_normalized.txt', rootPath+'test_word_matching_scores_normalized.txt')
    indices_train, trainY, trainLengths, normalized_train_length, trainLeftPad, trainRightPad= datasets[0]
    indices_train_l=indices_train[::2,:]
    indices_train_r=indices_train[1::2,:]
    trainLengths_l=trainLengths[::2]
    trainLengths_r=trainLengths[1::2]
    normalized_train_length_l=normalized_train_length[::2]
    normalized_train_length_r=normalized_train_length[1::2]

    trainLeftPad_l=trainLeftPad[::2]
    trainLeftPad_r=trainLeftPad[1::2]
    trainRightPad_l=trainRightPad[::2]
    trainRightPad_r=trainRightPad[1::2]    
    indices_test, testY, testLengths,normalized_test_length, testLeftPad, testRightPad= datasets[1]
    indices_test_l=indices_test[::2,:]
    indices_test_r=indices_test[1::2,:]
    testLengths_l=testLengths[::2]
    testLengths_r=testLengths[1::2]
    normalized_test_length_l=normalized_test_length[::2]
    normalized_test_length_r=normalized_test_length[1::2]
    
    testLeftPad_l=testLeftPad[::2]
    testLeftPad_r=testLeftPad[1::2]
    testRightPad_l=testRightPad[::2]
    testRightPad_r=testRightPad[1::2]  

    n_train_batches=indices_train_l.shape[0]/batch_size
    n_test_batches=indices_test_l.shape[0]/batch_size
    
    train_batch_start=list(numpy.arange(n_train_batches)*batch_size)
    test_batch_start=list(numpy.arange(n_test_batches)*batch_size)

    
    indices_train_l=theano.shared(numpy.asarray(indices_train_l, dtype=theano.config.floatX), borrow=True)
    indices_train_r=theano.shared(numpy.asarray(indices_train_r, dtype=theano.config.floatX), borrow=True)
    indices_test_l=theano.shared(numpy.asarray(indices_test_l, dtype=theano.config.floatX), borrow=True)
    indices_test_r=theano.shared(numpy.asarray(indices_test_r, dtype=theano.config.floatX), borrow=True)
    indices_train_l=T.cast(indices_train_l, 'int64')
    indices_train_r=T.cast(indices_train_r, 'int64')
    indices_test_l=T.cast(indices_test_l, 'int64')
    indices_test_r=T.cast(indices_test_r, 'int64')
    


    rand_values=random_value_normal((vocab_size+1, emb_size), theano.config.floatX, numpy.random.RandomState(1234))
    rand_values[0]=numpy.array(numpy.zeros(emb_size))
    #rand_values[0]=numpy.array([1e-50]*emb_size)
    rand_values=load_word2vec_to_init(rand_values, rootPath+'vocab_embs_300d.txt')
    embeddings=theano.shared(value=rand_values, borrow=True)      
    
    cost_tmp=0
    error_sum=0
    
    # allocate symbolic variables for the data
    index = T.lscalar()
    x_index_l = T.lmatrix('x_index_l')   # now, x is the index matrix, must be integer
    x_index_r = T.lmatrix('x_index_r')
    y = T.lvector('y')  
    left_l=T.lscalar()
    right_l=T.lscalar()
    left_r=T.lscalar()
    right_r=T.lscalar()
    length_l=T.lscalar()
    length_r=T.lscalar()
    norm_length_l=T.dscalar()
    norm_length_r=T.dscalar()
    mts=T.dmatrix()
    wmf=T.dmatrix()
    cost_tmp=T.dscalar()
    #x=embeddings[x_index.flatten()].reshape(((batch_size*4),maxSentLength, emb_size)).transpose(0, 2, 1).flatten()
    ishape = (emb_size, maxSentLength)  # this is the size of MNIST images
    filter_size=(emb_size,window_width)
    #poolsize1=(1, ishape[1]-filter_size[1]+1) #?????????????????????????????
    length_after_wideConv=ishape[1]+filter_size[1]-1
    
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    #layer0_input = x.reshape(((batch_size*4), 1, ishape[0], ishape[1]))
    layer0_l_input = embeddings[x_index_l.flatten()].reshape((batch_size,maxSentLength, emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)
    layer0_r_input = embeddings[x_index_r.flatten()].reshape((batch_size,maxSentLength, emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)
    
    
    conv_W, conv_b=create_conv_para(rng, filter_shape=(nkerns[0], 1, filter_size[0], filter_size[1]))

    #layer0_output = debug_print(layer0.output, 'layer0.output')
    layer0_l = Conv_with_input_para(rng, input=layer0_l_input,
            image_shape=(batch_size, 1, ishape[0], ishape[1]),
            filter_shape=(nkerns[0], 1, filter_size[0], filter_size[1]), W=conv_W, b=conv_b)
    layer0_r = Conv_with_input_para(rng, input=layer0_r_input,
            image_shape=(batch_size, 1, ishape[0], ishape[1]),
            filter_shape=(nkerns[0], 1, filter_size[0], filter_size[1]), W=conv_W, b=conv_b)
    layer0_l_output=debug_print(layer0_l.output, 'layer0_l.output')
    layer0_r_output=debug_print(layer0_r.output, 'layer0_r.output')
    
    layer1=Average_Pooling_for_Top(rng, input_l=layer0_l_output, input_r=layer0_r_output, kern=nkerns[0],
                                       left_l=left_l, right_l=right_l, left_r=left_r, right_r=right_r, 
                                       length_l=length_l+filter_size[1]-1, length_r=length_r+filter_size[1]-1,
                                       dim=maxSentLength+filter_size[1]-1)
    

    
    
    #layer2=HiddenLayer(rng, input=layer1_out, n_in=nkerns[0]*2, n_out=hidden_size, activation=T.tanh)
    
    
    sum_uni_l=T.sum(layer0_l_input, axis=3).reshape((1, emb_size))
    norm_uni_l=sum_uni_l/T.sqrt((sum_uni_l**2).sum())
    sum_uni_r=T.sum(layer0_r_input, axis=3).reshape((1, emb_size))
    norm_uni_r=sum_uni_r/T.sqrt((sum_uni_r**2).sum())
    
    uni_cosine=cosine(sum_uni_l, sum_uni_r)
    '''
    linear=Linear(sum_uni_l, sum_uni_r)
    poly=Poly(sum_uni_l, sum_uni_r)
    sigmoid=Sigmoid(sum_uni_l, sum_uni_r)
    rbf=RBF(sum_uni_l, sum_uni_r)
    gesd=GESD(sum_uni_l, sum_uni_r)
    '''
    eucli_1=1.0/(1.0+EUCLID(sum_uni_l, sum_uni_r))#25.2%
    #eucli_1=EUCLID(sum_uni_l, sum_uni_r)
    
    len_l=norm_length_l.reshape((1,1))
    len_r=norm_length_r.reshape((1,1))  
    
    '''
    len_l=length_l.reshape((1,1))
    len_r=length_r.reshape((1,1))  
    '''
    #length_gap=T.log(1+(T.sqrt((len_l-len_r)**2))).reshape((1,1))
    #length_gap=T.sqrt((len_l-len_r)**2)
    #layer3_input=mts
    layer3_input=T.concatenate([mts, 
                                eucli_1, #uni_cosine,#norm_uni_l-(norm_uni_l+norm_uni_r)/2,#uni_cosine, #
                                layer1.output_eucli_to_simi, #layer1.output_cosine,#layer1.output_vector_l-(layer1.output_vector_l+layer1.output_vector_r)/2,#layer1.output_cosine, #
                                len_l, len_r,
                                #layer1.output_attentions,
                                #wmf,
                                ], axis=1)#, layer2.output, layer1.output_cosine], axis=1)
    #layer3_input=T.concatenate([mts,eucli, uni_cosine, len_l, len_r, norm_uni_l-(norm_uni_l+norm_uni_r)/2], axis=1)
    #layer3=LogisticRegression(rng, input=layer3_input, n_in=11, n_out=2)
    layer3=LogisticRegression(rng, input=layer3_input, n_in=15+(2)+(2)+2, n_out=2)
    
    #L2_reg =(layer3.W** 2).sum()+(layer2.W** 2).sum()+(layer1.W** 2).sum()+(conv_W** 2).sum()
    L2_reg =debug_print((layer3.W** 2).sum()+(conv_W** 2).sum(), 'L2_reg')#+(layer1.W** 2).sum()
    cost_this =debug_print(layer3.negative_log_likelihood(y), 'cost_this')#+L2_weight*L2_reg
    cost=debug_print((cost_this+cost_tmp)/update_freq+L2_weight*L2_reg, 'cost')
    

    
    test_model = theano.function([index], [layer3.errors(y), layer3.y_pred, layer3_input, y],
          givens={
            x_index_l: indices_test_l[index: index + batch_size],
            x_index_r: indices_test_r[index: index + batch_size],
            y: testY[index: index + batch_size],
            left_l: testLeftPad_l[index],
            right_l: testRightPad_l[index],
            left_r: testLeftPad_r[index],
            right_r: testRightPad_r[index],
            length_l: testLengths_l[index],
            length_r: testLengths_r[index],
            norm_length_l: normalized_test_length_l[index],
            norm_length_r: normalized_test_length_r[index],
            mts: mt_test[index: index + batch_size],
            wmf: wm_test[index: index + batch_size]}, on_unused_input='ignore')


    #params = layer3.params + layer2.params + layer1.params+ [conv_W, conv_b]
    params = layer3.params+ [conv_W, conv_b]# + layer1.params 
    
    accumulator=[]
    for para_i in params:
        eps_p=numpy.zeros_like(para_i.get_value(borrow=True),dtype=theano.config.floatX)
        accumulator.append(theano.shared(eps_p, borrow=True))
      
    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    updates = []
    for param_i, grad_i, acc_i in zip(params, grads, accumulator):
        #grad_i=debug_print(grad_i,'grad_i')
        #norm=T.sqrt((grad_i**2).sum())
        #if T.lt(norm_threshold, norm):
        #    print 'big norm'
        #    grad_i=grad_i*(norm_threshold/norm)
        acc = acc_i + T.sqr(grad_i)
        updates.append((param_i, param_i - learning_rate * grad_i / T.sqrt(acc)))   #AdaGrad
        updates.append((acc_i, acc))    
  
    train_model = theano.function([index,cost_tmp], [cost,layer3.errors(y), layer3_input], updates=updates,
          givens={
            x_index_l: indices_train_l[index: index + batch_size],
            x_index_r: indices_train_r[index: index + batch_size],
            y: trainY[index: index + batch_size],
            left_l: trainLeftPad_l[index],
            right_l: trainRightPad_l[index],
            left_r: trainLeftPad_r[index],
            right_r: trainRightPad_r[index],
            length_l: trainLengths_l[index],
            length_r: trainLengths_r[index],
            norm_length_l: normalized_train_length_l[index],
            norm_length_r: normalized_train_length_r[index],
            mts: mt_train[index: index + batch_size],
            wmf: wm_train[index: index + batch_size]}, on_unused_input='ignore')

    train_model_predict = theano.function([index], [cost_this,layer3.errors(y), layer3_input, y , sum_uni_l, sum_uni_r, uni_cosine],
          givens={
            x_index_l: indices_train_l[index: index + batch_size],
            x_index_r: indices_train_r[index: index + batch_size],
            y: trainY[index: index + batch_size],
            left_l: trainLeftPad_l[index],
            right_l: trainRightPad_l[index],
            left_r: trainLeftPad_r[index],
            right_r: trainRightPad_r[index],
            length_l: trainLengths_l[index],
            length_r: trainLengths_r[index],
            norm_length_l: normalized_train_length_l[index],
            norm_length_r: normalized_train_length_r[index],
            mts: mt_train[index: index + batch_size],
            wmf: wm_train[index: index + batch_size]}, on_unused_input='ignore')



    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 500000000000000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches/5, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    
    max_acc=0.0
    best_epoch=0

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        #for minibatch_index in xrange(n_train_batches): # each batch
        minibatch_index=0
        #shuffle(train_batch_start)#shuffle training data
        cost_tmp=0.0
        for batch_start in train_batch_start: 
            # iter means how many batches have been runed, taking into loop
            iter = (epoch - 1) * n_train_batches + minibatch_index +1

            minibatch_index=minibatch_index+1
            #if epoch %2 ==0:
            #    batch_start=batch_start+remain_train
            #time.sleep(0.5)
            if iter%update_freq != 0:
                cost_ij, error_ij, layer3_input, y, sum_uni_l, sum_uni_r, uni_cosine=train_model_predict(batch_start)
                #print 'cost_ij: ', cost_ij
                cost_tmp+=cost_ij
                error_sum+=error_ij
            else:
                cost_average, error_ij, layer3_input= train_model(batch_start,cost_tmp)
                #print 'training @ iter = '+str(iter)+' average cost: '+str(cost_average)+' sum error: '+str(error_sum)+'/'+str(update_freq)
                error_sum=0
                cost_tmp=0.0#reset for the next batch
                #print layer3_input
                #exit(0)
            #exit(0)
            if iter % n_train_batches == 0:
                print 'training @ iter = '+str(iter)+' average cost: '+str(cost_average)+' error: '+str(error_sum)+'/'+str(update_freq)+' error rate: '+str(error_sum*1.0/update_freq)
            #if iter ==1:
            #    exit(0)
            
            if iter % validation_frequency == 0:
                #write_file=open('log.txt', 'w')
                test_losses=[]
                for i in test_batch_start:
                    test_loss, pred_y, layer3_input, y=test_model(i)
                    #test_losses = [test_model(i) for i in test_batch_start]
                    test_losses.append(test_loss)
                    #write_file.write(str(pred_y[0])+'\n')#+'\t'+str(testY[i].eval())+

                #write_file.close()
                test_score = numpy.mean(test_losses)
                print(('\t\t\t\t\t\tepoch %i, minibatch %i/%i, test acc of best '
                           'model %f %%') %
                          (epoch, minibatch_index, n_train_batches,
                           (1-test_score) * 100.))
                #now, see the results of svm
                write_feature=open('feature_check.txt', 'w')
                train_y=[]
                train_features=[]
                for batch_start in train_batch_start: 
                    cost_ij, error_ij, layer3_input, y, sum_uni_l, sum_uni_r, uni_cosine=train_model_predict(batch_start)
                    train_y.append(y[0])
                    train_features.append(layer3_input[0])
                    write_feature.write(' '.join(map(str,layer3_input[0]))+'\n')
                write_feature.close()
                test_y=[]
                test_features=[]
                for i in test_batch_start:
                    test_loss, pred_y, layer3_input, y=test_model(i)
                    test_y.append(y[0])
                    test_features.append(layer3_input[0])
                clf = svm.SVC(kernel='linear')#OneVsRestClassifier(LinearSVC()) #linear 76.11%, poly 75.19, sigmoid 66.50, rbf 73.33
                clf.fit(train_features, train_y)
                results=clf.predict(test_features)
                lr=LinearRegression().fit(train_features, train_y)
                results_lr=lr.predict(test_features)
                corr_count=0
                corr_lr=0
                test_size=len(test_y)
                for i in range(test_size):
                    if results[i]==test_y[i]:
                        corr_count+=1
                    if numpy.absolute(results_lr[i]-test_y[i])<0.5:
                        corr_lr+=1
                acc=corr_count*1.0/test_size
                acc_lr=corr_lr*1.0/test_size
                if acc > max_acc:
                    max_acc=acc
                    best_epoch=epoch
                if acc_lr> max_acc:
                    max_acc=acc_lr
                    best_epoch=epoch
                print '\t\t\t\t\t\t\t\t\t\t\tsvm acc: ', acc, 'LR acc: ', acc_lr, ' max acc: ',    max_acc , ' at epoch: ', best_epoch     
                #exit(0)
            if patience <= iter:
                done_looping = True
                break
        #after each epoch, increase the batch_size
        if epoch%2==1:
            update_freq=update_freq*1
        else:
            update_freq=update_freq/1
        #print 'Batch_size: ', update_freq
    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Exemplo n.º 48
0
    def train(self, input_train, target_train=None, input_test=None,
              target_test=None, epochs=100, epsilon=None,
              summary_type='table'):
        """
        Method train neural network.

        Parameters
        ----------
        input_train : array-like
        target_train : array-like or None
        input_test : array-like or None
        target_test : array-like or None
        epochs : int
            Defaults to `100`.
        epsilon : float or None
            Defaults to ``None``.
        """
        show_epoch = self.show_epoch
        logs = self.logs
        training = self.training = AttributeKeyDict()

        if epochs <= 0:
            raise ValueError("Number of epochs needs to be greater than 0.")

        if epsilon is not None and epochs <= 2:
            raise ValueError("Network should train at teast 3 epochs before "
                             "check the difference between errors")

        if summary_type == 'table':
            logging_info_about_the_data(self, input_train, input_test)
            logging_info_about_training(self, epochs, epsilon)
            logs.newline()

            summary = SummaryTable(
                table_builder=table.TableBuilder(
                    table.Column(name="Epoch #"),
                    table.NumberColumn(name="Train err"),
                    table.NumberColumn(name="Valid err"),
                    table.TimeColumn(name="Time", width=10),
                    stdout=logs.write
                ),
                network=self,
                delay_limit=1.,
                delay_history_length=10,
            )

        elif summary_type == 'inline':
            summary = InlineSummary(network=self)

        else:
            raise ValueError("`{}` is unknown summary type"
                             "".format(summary_type))

        iterepochs = create_training_epochs_iterator(self, epochs, epsilon)
        show_epoch = parse_show_epoch_property(self, epochs, epsilon)
        training.show_epoch = show_epoch

        # Storring attributes and methods in local variables we prevent
        # useless __getattr__ call a lot of times in each loop.
        # This variables speed up loop in case on huge amount of
        # iterations.
        training_errors = self.errors
        validation_errors = self.validation_errors
        shuffle_data = self.shuffle_data

        train_epoch = self.train_epoch
        epoch_end_signal = self.epoch_end_signal
        train_end_signal = self.train_end_signal
        on_epoch_start_update = self.on_epoch_start_update

        is_first_iteration = True
        can_compute_validation_error = (input_test is not None)
        last_epoch_shown = 0


        symMatrix = tt.dmatrix("symMatrix")
        symEigenvalues, eigenvectors = tt.nlinalg.eig(symMatrix)
        get_Eigen = theano.function([symMatrix], [symEigenvalues, eigenvectors] )

        epsilon = []
        alpha = []
        alpha0 = []
        with logs.disable_user_input():
            for epoch in iterepochs:
                validation_error = None
                epoch_start_time = time.time()
                on_epoch_start_update(epoch)

                if shuffle_data:
                    input_train, target_train = shuffle(input_train,
                                                        target_train)
                try:
                    train_error = train_epoch(input_train, target_train)
                    H = self.variables.hessian.get_value()
                    ev, _ = get_Eigen(H)
                    if can_compute_validation_error:
                        validation_error = self.prediction_error(input_test,
                                                                 target_test)
                    epsilon.append(train_error)
                    alpha.append(numpy.sum(ev < 0))
                    alpha0.append(numpy.sum(ev == 0))
                    
                    training_errors.append(train_error)
                    validation_errors.append(validation_error)

                    epoch_finish_time = time.time()
                    training.epoch_time = epoch_finish_time - epoch_start_time

                    if epoch % training.show_epoch == 0 or is_first_iteration:
                        summary.show_last()
                        last_epoch_shown = epoch

                    if epoch_end_signal is not None:
                        epoch_end_signal(self)

                    is_first_iteration = False

                except StopNetworkTraining as err:
                    # TODO: This notification breaks table view in terminal.
                    # I need to show it in a different way.
                    logs.message("TRAIN", "Epoch #{} stopped. {}"
                                          "".format(epoch, str(err)))
                    break

            if epoch != last_epoch_shown:
                summary.show_last()

            if train_end_signal is not None:
                train_end_signal(self)

            summary.finish()
            logs.newline()
            plt.plot(alpha,epsilon,'r')
            plt.plot(alpha0,epsilon,'b')
            plt.xlabel('alpha')
            plt.ylabel('epsilon')
            
            # want to collect the output of stdout in a variable
            capture = StringIO()
            capture.truncate(0)
            save_stdout = sys.stdout
            sys.stdout = capture
            print self.connection
            sys.stdout=save_stdout
            s =  capture.getvalue()
            s=s.split('\n')[0:][0]
            str = self.class_name()
                        
            str1 = s+'---'+str+'-alpha-epsilon'+'.eps'
            plt.savefig(str1,format='eps',dpi=1000)
            plt.plot(iterepochs,epsilon)
            plt.xlabel('iterepochs')
            plt.ylabel('epsilon')
            str2=s+'---'+str+'-epsilon-iterepochs'+'.eps'
            plt.savefig(str2,format='eps',dpi=1000)
def train(num_epochs, batch_size, X_train, y_train, X_val, y_val, input_dim, output_dim, depth, num_units,
          drop_input=None, drop_hidden=None, report=50):
    """
    Train neural network.
    :param num_epochs: training epochs count
    :param batch_size: integer
    :param X_train: numpy array with train data
    :param y_train: numpy array with train targets
    :param X_val: numpy array with validation data
    :param y_val: numpy arrays with validation targets
    :param input_dim: count of input units
    :param output_dim: count of output units
    :param depth: hidden layers count
    :param num_units: count of units in hidden layers
    :param drop_input: input dropout value
    :param drop_hidden: hidden dropout value
    :param report: report output frequency
    :return: lasagne network
    """
    input_var = T.dmatrix('inputs')
    target_var = T.imatrix('targets')

    network = build_mlp(input_dim, output_dim, depth, num_units, drop_input, drop_hidden, input_var)

    # create a loss expression for training
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()

    # create update expressions for training
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9)

    # create a loss expression for validation with deterministic forward pass (disable dropout layers)
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var)
    test_loss = test_loss.mean()

    # create an expression for the classification accuracy
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), T.argmax(target_var, axis=1)),
                      dtype=theano.config.floatX)

    # compile a function performing a training step on a mini-batch
    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # compile a function computing the validation loss and accuracy
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    for epoch in range(num_epochs):
        # full pass over the training data
        train_err = 0
        train_batches = 0
        start_time = time.time()

        for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

        # full pass over the validation data
        val_err = 0
        val_acc = 0
        val_batches = 0

        for batch in iterate_minibatches(X_val, y_val, batch_size, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        if (epoch + 1) % report == 0:
            print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time))
            print("\ttraining loss:\t\t\t{:.6f}".format(train_err / train_batches))
            print("\tvalidation loss:\t\t{:.6f}".format(val_err / val_batches))
            print("\tvalidation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100))

    return network
Exemplo n.º 50
0
    def test_convolution(self):
        #        print '\n\n*************************************************'
        #        print '           TEST CONVOLUTION'
        #        print '*************************************************'

        # fixed parameters
        bsize = 10  # batch size
        imshp = (28, 28)
        kshp = (5, 5)
        nkern = 5
        ssizes = ((1, 1), (2, 2), (3, 3), (4, 4))
        convmodes = ('full', 'valid')

        # symbolic stuff
        bias = tensor.dvector()
        kerns = tensor.dmatrix()
        input = tensor.dmatrix()
        rng = numpy.random.RandomState(3423489)
        filters = rng.randn(nkern, numpy.prod(kshp))
        biasvals = rng.randn(nkern)

        for mode in ('FAST_COMPILE', 'FAST_RUN'):  # , profmode):
            ttot, ntot = 0, 0
            for conv_mode in convmodes:
                for ss in ssizes:

                    output, outshp = sp.convolve(kerns, kshp, nkern, input,\
                            imshp, ss, bias=bias, mode=conv_mode)
                    f = function([kerns, bias, input], output, mode=mode)

                    # now test with real values
                    img2d = numpy.arange(bsize * numpy.prod(imshp)).reshape(( \
                                                            bsize,) + imshp)
                    img1d = img2d.reshape(bsize, -1)

                    # create filters (need to be flipped to use convolve2d)
                    filtersflipped = numpy.zeros((nkern, ) + kshp)
                    for k in range(nkern):
                        it = reversed(filters[k, :])
                        for i in range(kshp[0]):
                            for j in range(kshp[1]):
                                filtersflipped[k, i, j] = it.next()

                    # compute output with convolve2d
                    if conv_mode == 'valid':
                        fulloutshp = numpy.array(imshp) - numpy.array(kshp) + 1
                    else:
                        fulloutshp = numpy.array(imshp) + numpy.array(kshp) - 1
                    ntime1 = time.time()
                    refout = numpy.zeros((bsize, ) + tuple(fulloutshp) +
                                         (nkern, ))
                    for b in range(bsize):
                        for n in range(nkern):
                            refout[b, ...,
                                   n] = convolve2d(img2d[b, :, :],
                                                   filtersflipped[n, ...],
                                                   conv_mode)
                    ntot += time.time() - ntime1

                    # need to flatten images
                    bench1 = refout[:, 0::ss[0],
                                    0::ss[1], :].reshape(bsize, -1, nkern)
                    bench1 += biasvals.reshape(1, 1, nkern)

                    # swap the last two dimensions (output needs to be nkern x outshp)
                    bench1 = numpy.swapaxes(bench1, 1, 2)
                    ttime1 = time.time()
                    out1 = f(filters, biasvals, img1d)
                    ttot += time.time() - ttime1
                    temp = bench1.flatten() - out1.flatten()

                    assert (temp < 1e-5).all()
Exemplo n.º 51
0
    def __init__(self,
                 theano_rng=None,
                 input=None,
                 n_visible=None,
                 n_hidden=None,
                 W=None,
                 bhid=None,
                 bvis=None,
                 activation=None,
                 firstlayer=1,
                 variance=None):

        self.n_visible = n_visible
        self.n_hidden = n_hidden

        if not W:
            initial_W = numpy.asarray(theano_rng.normal(0.0,
                                                        1.0 / numpy.sqrt(n_in),
                                                        size=(n_visible,
                                                              n_hidden)),
                                      dtype=theano.config.floatX)
            W = theano.shared(value=initial_W, name='W')
            #initial_W = numpy.asarray( numpy_rng.uniform(
            #          low  = -4*numpy.sqrt(6./(n_hidden+n_visible)),
            #          high =  4*numpy.sqrt(6./(n_hidden+n_visible)),
            #          size = (n_visible, n_hidden)),
            #                           dtype = theano.config.floatX)

        if not bvis:
            bvis = theano.shared(
                value=numpy.zeros(n_visible, dtype=theano.config.floatX))

        if not bhid:
            bhid = theano.shared(value=numpy.zeros(n_hidden,
                                                   dtype=theano.config.floatX),
                                 name='b')

        self.W = W
        self.b = bhid
        self.b_prime = bvis
        self.W_prime = self.W.T
        self.theano_rng = theano_rng
        self.activation = activation

        if input == None:
            self.x = T.dmatrix(name='input')
        else:
            self.x = input

        self.params = [self.W, self.b, self.b_prime]

        # first layer, use Gaussian noise
        self.firstlayer = firstlayer

        if self.firstlayer == 1:
            if variance == None:
                self.var = T.vector(name='input')
            else:
                self.var = variance
        else:
            self.var = None
Exemplo n.º 52
0
"eval()" takes a dictionary with names of variables and values to be assigned to them

eval() utimately imports a "function()", so we end up in the same situation, so it is
slower the first time we invoke this, subsequent invocations are faster since it saves
"function()" imported already

that way we don't need to import "function()", but importing and using it is more 
flexible than relyin on "eval()" itself
'''

'''
Addition of two matrices is also very simple, the only difference is using 
T.dmatrix instead of T.dscalar
'''

x = T.dmatrix('x')
y = T.dmatrix('y')
z = x + y
f2 = function([x, y], z)

'''
now we do not assign the matrices to x and y directly, but instead we pass matrices 
to function as variables, invoking it like: "f2([matrix_nr_1], [matrix_nr_2])

here we add, elementwise, 2 2-dimensional matrices
'''

a1 = [[1, 2],
      [3, 4]]
a2 = [[10, 20],
      [30, 40]]
Exemplo n.º 53
0
def exec_multilayer_conv_nnet_old(conv_mode,
                                  ss,
                                  bsize,
                                  imshp,
                                  kshps,
                                  nkerns,
                                  unroll_batch=0,
                                  unroll_kern=0,
                                  img=T.dmatrix(),
                                  validate=True,
                                  conv_op_py=False,
                                  do_print=True,
                                  repeat=1,
                                  unroll_patch=False,
                                  unroll_patch_size=False,
                                  verbose=0):

    # build actual input images
    imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2])

    a = T.dmatrix()
    kerns = [a for i in nkerns]
    inputs4 = dmatrix4()
    kerns4 = dmatrix4()

    # for each layer
    ntot = 0
    tctot = 0
    tpytot = 0

    for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns,
                                          range(len(nkerns))):
        if do_print:
            print '************* layer %i ***************' % n_layer

            print conv_mode, ss, n_layer, kshp, nkern

        # actual values
        w = global_rng.random_sample(N.r_[nkern, imshp[0], kshp])
        w_flip = flip(w, kshp).reshape(w.shape)

        # manual implementation
        # check first stage
        padimg = imgval
        if conv_mode == 'full':
            padimg_shp = N.array(
                imshp[1:]) + 2 * (N.array(kshp) - N.array([1, 1]))
            padimg = N.zeros(N.r_[bsize, imshp[0], padimg_shp])
            padimg[:, :, kshp[0] - 1:-kshp[0] + 1,
                   kshp[1] - 1:-kshp[1] + 1] = imgval

        outshp = N.hstack(
            (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)))

        time1 = time.time()
        outval = N.zeros(N.r_[bsize, outshp])
        if validate:
            # causes an atexit problem
            from scipy.signal.sigtools import _convolve2d
            from scipy.signal.signaltools import _valfrommode, _bvalfromboundary
            val = _valfrommode(conv_mode)
            bval = _bvalfromboundary('fill')
            for b in range(bsize):  # loop over batches
                for n in range(nkern):  # loop over filters
                    for i in range(imshp[0]):  # loop over input feature maps
                        outval[b, n, ...] +=  _convolve2d(\
                            imgval[b, i, ...], w_flip[n, i, ...], 1, val, bval, 0)[0::ss[0], 0::ss[1]]
            ntot += time.time() - time1

        # ConvOp
        if unroll_patch and not unroll_patch_size:
            conv_op = ConvOp(dx=ss[0],
                             dy=ss[1],
                             output_mode=conv_mode,
                             unroll_patch=unroll_patch,
                             verbose=verbose)(inputs4, kerns4)
        else:
            conv_op = ConvOp(imshp,
                             kshp,
                             nkern,
                             bsize,
                             ss[0],
                             ss[1],
                             conv_mode,
                             unroll_batch=unroll_batch,
                             unroll_kern=unroll_kern,
                             unroll_patch=unroll_patch,
                             verbose=verbose)(inputs4, kerns4)
        l1shp = N.hstack(
            (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)))
        propup2 = function([inputs4, kerns4], conv_op)
        propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py"))

        time1 = time.time()
        for i in range(repeat):
            hidval2_ = propup2(imgval, w_flip)
        hidval2 = hidval2_  # [:,:,0::ss[0],0::ss[1]]
        tctot += time.time() - time1

        if conv_op_py:
            time1 = time.time()
            for i in range(repeat):
                hidval3_ = propup3(imgval, w_flip)
            hidval3 = hidval3_  # [:,:,0::ss[0],0::ss[1]]
            tpytot += time.time() - time1
            assert (N.abs(hidval2 - hidval3) < 1e-5).all()
        else:
            tpytot += 0

        if validate:
            temp = N.abs(outval - hidval2)
            assert (temp < 1e-5).all()
        if validate and conv_op_py:
            temp = N.abs(outval - hidval3)
            assert (temp < 1e-5).all()

        imshp = tuple(outshp)
        imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2])

    return tctot, tpytot, ntot
Exemplo n.º 54
0
# View more python tutorials on my Youtube and Youku channel!!!

# Youtube video tutorial: https://www.youtube.com/channel/UCdyjiB5H8Pu7aDTNVXTTpcg
# Youku video tutorial: http://i.youku.com/pythontutorial

# 5 - theano.function
"""
Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
"""
from __future__ import print_function
import numpy as np
import theano
import theano.tensor as T

# activation function example
x = T.dmatrix('x')
s = 1 / (1 + T.exp(-x))    # logistic or soft step
logistic = theano.function([x], s)
print(logistic([[0, 1],[-1, -2]]))

# multiply outputs for a function
a, b = T.dmatrices('a', 'b')
diff = a - b
abs_diff = abs(diff)
diff_squared = diff ** 2
f = theano.function([a, b], [diff, abs_diff, diff_squared])
print( f(np.ones((2, 2)), np.arange(4).reshape((2, 2))) )

# default value and name for a function
x, y, w = T.dscalars('x', 'y', 'w')
z = (x+y)*w
Exemplo n.º 55
0
def exec_multilayer_conv_nnet(conv_mode,
                              ss,
                              bsize,
                              imshp,
                              kshps,
                              nkerns,
                              unroll_batch=0,
                              unroll_kern=0,
                              img=T.dmatrix(),
                              do_print=True,
                              repeat=1,
                              unroll_patch=False,
                              unroll_patch_size=False,
                              verbose=0):

    # build actual input images
    imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2])

    a = T.dmatrix()
    kerns = [a for i in nkerns]
    inputs4 = dmatrix4()
    kerns4 = dmatrix4()

    # for each layer
    ntot = 0
    tctot = 0
    tpytot = 0

    for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns,
                                          range(len(nkerns))):
        if do_print:
            print '************* layer %i ***************' % n_layer

            print conv_mode, ss, n_layer, kshp, nkern

        # actual values
        w = global_rng.random_sample(N.r_[nkern, imshp[0], kshp])
        w_flip = flip(w, kshp).reshape(w.shape)

        outshp = N.hstack(
            (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)))

        time1 = time.time()
        outval = N.zeros(N.r_[bsize, outshp])

        # ConvOp
        if unroll_patch and not unroll_patch_size:
            conv_op = ConvOp(dx=ss[0],
                             dy=ss[1],
                             output_mode=conv_mode,
                             unroll_patch=unroll_patch,
                             verbose=verbose)(inputs4, kerns4)
        else:
            conv_op = ConvOp(imshp,
                             kshp,
                             nkern,
                             bsize,
                             ss[0],
                             ss[1],
                             conv_mode,
                             unroll_batch=unroll_batch,
                             unroll_kern=unroll_kern,
                             unroll_patch=unroll_patch,
                             verbose=verbose)(inputs4, kerns4)
        l1shp = N.hstack(
            (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)))
        propup2 = function([inputs4, kerns4], conv_op)

        time1 = time.time()
        for i in range(repeat):
            hidval2_ = propup2(imgval, w_flip)
        hidval2 = hidval2_  # [:,:,0::ss[0],0::ss[1]]
        tctot += time.time() - time1

        imshp = tuple(outshp)
        imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2])

    return tctot, tpytot, ntot
Exemplo n.º 56
0
 def test1(self):
     a = tensor.dmatrix()
     w = sort(a)
     f = theano.function([a], w)
     utt.assert_allclose(f(self.m_val), np.sort(self.m_val))
Exemplo n.º 57
0
    def __init__(
        self,
        numpy_rng,
        theano_rng=None,
        input=None,
        n_visible=784,
        n_hidden=500,
        W=None,
        bhid=None,
        bvis=None
    ):
        """
        Initialize the dA class by specifying the number of visible units (the
        dimension d of the input ), the number of hidden units ( the dimension
        d' of the latent or hidden space ) and the corruption level. The
        constructor also receives symbolic variables for the input, weights and
        bias. Such a symbolic variables are useful when, for example the input
        is the result of some computations, or when weights are shared between
        the dA and an MLP layer. When dealing with SdAs this always happens,
        the dA on layer 2 gets as input the output of the dA on layer 1,
        and the weights of the dA are used in the second stage of training
        to construct an MLP.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: number random generator used to generate weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                     generated based on a seed drawn from `rng`

        :type input: theano.tensor.TensorType
        :param input: a symbolic description of the input or None for
                      standalone dA

        :type n_visible: int
        :param n_visible: number of visible units

        :type n_hidden: int
        :param n_hidden:  number of hidden units

        :type W: theano.tensor.TensorType
        :param W: Theano variable pointing to a set of weights that should be
                  shared belong the dA and another architecture; if dA should
                  be standalone set this to None

        :type bhid: theano.tensor.TensorType
        :param bhid: Theano variable pointing to a set of biases values (for
                     hidden units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None

        :type bvis: theano.tensor.TensorType
        :param bvis: Theano variable pointing to a set of biases values (for
                     visible units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None


        """
        self.n_visible = n_visible
        self.n_hidden = n_hidden

        # create a Theano random generator that gives symbolic random values
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # note : W' was written as `W_prime` and b' as `b_prime`
        if not W:
            # W is initialized with `initial_W` which is uniformely sampled
            # from -4*sqrt(6./(n_visible+n_hidden)) and
            # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
            # converted using asarray to dtype
            # theano.config.floatX so that the code is runable on GPU
            initial_W = numpy.asarray(
                numpy_rng.uniform(
                    low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                    high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                    size=(n_visible, n_hidden)
                ),
                dtype=theano.config.floatX
            )
            W = theano.shared(value=initial_W, name='W', borrow=True)

        if not bvis:
            bvis = theano.shared(
                value=numpy.zeros(
                    n_visible,
                    dtype=theano.config.floatX
                ),
                borrow=True
            )

        if not bhid:
            bhid = theano.shared(
                value=numpy.zeros(
                    n_hidden,
                    dtype=theano.config.floatX
                ),
                name='b',
                borrow=True
            )

        self.W = W
        # b corresponds to the bias of the hidden
        self.b = bhid
        # b_prime corresponds to the bias of the visible
        self.b_prime = bvis
        # tied weights, therefore W_prime is W transpose
        self.W_prime = self.W.T
        self.theano_rng = theano_rng
        # if no input is given, generate a variable representing the input
        if input is None:
            # we use a matrix because we expect a minibatch of several
            # examples, each example being a row
            self.x = T.dmatrix(name='input')
        else:
            self.x = input

        self.params = [self.W, self.b, self.b_prime]
Exemplo n.º 58
0
def speed_multilayer_conv():
    # calculate the speed up of different combination of unroll
    # put the paramter to the same you will try.

    validate = False  # we don't validate the result to have it much faster!
    repeat = 3
    verbose = 1
    unroll_batch = [1, 2, 3, 4, 5, 6, 10]  # 15, 30, 60 always much slower
    unroll_kern = [1, 2, 3, 4, 5, 6, 10]  # 15, 30, 60 always much slower
    #unroll_batch = [1,4,5]
    #unroll_kern = [1,4,5]
    #unroll_batch = [1,4]
    #unroll_kern = [1,4]
    unroll_patch = [True, False]

    bsize = 60  # batch size
    imshp_start = (1, 48, 48)  # un square shape to test more corner case.
    kshps = ([11, 12], )  # un square shape to test more corner case.
    nkerns = [60]  # per output pixel
    ssizes = [
        (1, 1),
    ]  # (1,1)]#(2,2) bugged
    convmodes = ['valid', 'full']
    do_convolve2 = False
    a = T.dmatrix()
    kerns = [a for i in nkerns]

    assert len(kshps) == len(nkerns) == len(kerns)

    timing = N.zeros(
        (len(unroll_batch), len(unroll_kern), 3, len(convmodes) * len(ssizes)))
    t_b_k = []
    # calculate the timing with unrolling

    print 'time unroll batch kern'
    best = []
    worst = []
    t_ = []
    for unroll_b, n_b in zip(unroll_batch, range(len(unroll_batch))):
        for unroll_k, n_k in zip(unroll_kern, range(len(unroll_kern))):
            t_b_k.append(str(unroll_b) + "/" + str(unroll_k))
            if not t_:
                tctot, tpytot, ntot = [], [], []
                for conv_mode, n_mode in zip(convmodes, range(len(convmodes))):
                    for ss, n_ss in zip(ssizes, range(len(ssizes))):
                        #                            tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate, verbose=verbose,do_print=False)
                        tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
                            conv_mode,
                            ss,
                            bsize,
                            imshp_start,
                            kshps,
                            nkerns,
                            unroll_batch=unroll_b,
                            unroll_kern=unroll_k,
                            verbose=verbose,
                            do_print=False,
                            repeat=repeat)
                        tctot += [tctot_]
                        tpytot += [tpytot_]
                        ntot += [ntot_]
                if unroll_b == 4 and unroll_k == 4:
                    # print "unroll 4/4",tctot
                    best = tctot
                if unroll_b == 1 and unroll_k == 1:
                    # print "unroll 1/1",tctot
                    worst = tctot
                timing[n_b,
                       n_k] = [tctot, tpytot,
                               ntot]  # [sum(tctot), sum(tpytot), sum(ntot)]
    if not t_:
        t = timing[:, :, 0, :]  # We select only the c timing.
    else:
        t = t_
    t = N.asarray(t)
    # calculate the old timing
    print 'time old version'
    tctot, tpytot, ntot = [], [], []
    tctot_ = []
    if not tctot_:
        for conv_mode, n_mode in zip(convmodes, range(len(convmodes))):
            for ss, n_ss in zip(ssizes, range(len(ssizes))):
                #                    tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate, verbose=verbose,do_print=False)
                tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
                    conv_mode,
                    ss,
                    bsize,
                    imshp_start,
                    kshps,
                    nkerns,
                    unroll_batch=0,
                    unroll_kern=0,
                    verbose=verbose,
                    do_print=False,
                    repeat=repeat)
                tctot += [tctot_]
                tpytot += [tpytot_]
                ntot += [ntot_]
    else:
        tctot = N.asarray(tctot_)
    print "old code timing %.3fs" % sum(tctot), tctot
    best = N.asarray(best)
    worst = N.asarray(worst)
    print "timing for unrolled version"
    print "unroll_batch/unroll_kern valid_mode full_mode"
    for n_b in range(len(unroll_batch)):
        for n_k in range(len(unroll_kern)):
            print(unroll_batch[n_b], unroll_kern[n_k]) + tuple(t[n_b,
                                                                 n_k]), ','
    t_detail = t
    t = t.sum(axis=2)
    print "max %.3fs" % t.max(
    ), "max param(batch unloop size/kernel unloop size)", t_b_k[t.argmax()]
    print "min %.3fs" % t.min(
    ), "min param(batch unloop size/kernel unloop size)", t_b_k[t.argmin()]
    print "speedup vs (1/1)%.3fx, vs old %.3fx" % (t.max() / t.min(),
                                                   sum(tctot) / t.min())
    print worst / best, tctot / best

    # calculate the timing of unroll_patch
    print 'time unroll_patch'
    tctot_patch = []
    tctot_patch_size = []
    for conv_mode, n_mode in zip(convmodes, range(len(convmodes))):
        for ss, n_ss in zip(ssizes, range(len(ssizes))):
            #tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate,unroll_patch=True,verbose=verbose,do_print=False)
            tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
                conv_mode,
                ss,
                bsize,
                imshp_start,
                kshps,
                nkerns,
                unroll_batch=0,
                unroll_kern=0,
                unroll_patch=True,
                verbose=verbose,
                do_print=False,
                repeat=repeat)
            tctot_patch += [tctot_]
            #tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate,unroll_patch=True,verbose=verbose,do_print=False,unroll_patch_size=True)
            tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
                conv_mode,
                ss,
                bsize,
                imshp_start,
                kshps,
                nkerns,
                unroll_batch=0,
                unroll_kern=0,
                unroll_patch=True,
                verbose=verbose,
                do_print=False,
                unroll_patch_size=True,
                repeat=repeat)
            tctot_patch_size += [tctot_]

    t_patch = sum(tctot_patch)
    print "unroll_patch without shape time", tctot_patch
    print "speedup vs (1/1)%.3fx, vs old %.3fx" % (t.max() / t_patch,
                                                   sum(tctot) / t_patch)
    print best / tctot_patch, worst / tctot_patch
    t_patch_size = sum(tctot_patch_size)
    print "unroll_patch with shape time", tctot_patch_size
    print "speedup vs (1/1)%.3fx, vs old %.3fx" % (t.max() / t_patch_size,
                                                   sum(tctot) / t_patch_size)
    print best / tctot_patch_size, worst / tctot_patch_size

    return
Exemplo n.º 59
0
class GaussianLikelihoodModel(LikelihoodModel):
    def __init__(self, **parameters):
        super(GaussianLikelihoodModel, self).__init__(**parameters)
        self.sigma0inv = np.linalg.inv(self.sigma0)
        self.D = self.sigma.shape[0]
        self.compile()

    def transition_probability(self, parent, child):
        child_latent, child_time = child.get_state(
            'latent_value'), child.get_state('time')
        if parent is None:
            return self.calculate_transition(child_latent, self.mu0,
                                             child_time, -1)
        parent_latent, parent_time = parent.get_state(
            'latent_value'), parent.get_state('time')
        assert parent_time < child_time, (parent_time, child_time)
        return self.calculate_transition(child_latent, parent_latent,
                                         child_time, parent_time)

    @theanify(T.dvector('state'), T.dvector('parent'), T.dscalar('time'),
              T.dscalar('parent_time'))
    def calculate_transition(self, state, parent, time, parent_time):
        sigma = (time - parent_time) * self.sigma
        mu = parent

        logdet = T.log(T.nlinalg.det(sigma))
        delta = state - mu
        pre = -(self.D / 2.0 * np.log(2 * np.pi) + 1 / 2.0 * logdet)
        return pre + -0.5 * (T.dot(
            delta, T.dot(T.nlinalg.matrix_inverse(sigma), delta)))

    @theanify(T.dvector('mean'), T.dmatrix('cov'))
    def sample(self, mean, cov):
        e, v = T.nlinalg.eigh(cov)
        x = RandomStreams().normal(size=(self.D, ))
        x = T.dot(x, T.sqrt(e)[:, None] * v)
        return x + mean

    def sample_transition(self, node, parent):
        children = node.children
        time = node.get_state('time')
        if parent is None:
            mu0 = self.mu0
            sigma0 = self.sigma0
            sigma0inv = self.sigma0inv
        else:
            mu0 = parent.get_state('latent_value')
            sigma0 = self.sigma * (time - parent.get_state('time'))
            sigma0inv = np.linalg.inv(sigma0)

        mus = [c.get_state('latent_value') for c in children]

        sigmas = [self.sigma * (c.get_state('time') - time) for c in children]

        sigmas_inv = [np.linalg.inv(s) for s in sigmas]

        sigman = np.linalg.inv(sigma0inv + sum(sigmas_inv))
        mun = np.dot(
            sigman,
            np.dot(sigma0inv, mu0) +
            sum([np.dot(a, b) for a, b in zip(sigmas_inv, mus)]))
        return self.sample(mun, sigman)

    def get_parameters(self):
        return {"sigma", "sigma0", "mu0"}
Exemplo n.º 60
0
from blocks.serialization import load
from theano import tensor, function

# theano variables
features_car_cat = tensor.dmatrix('features_car_cat')
features_car_int = tensor.dmatrix('features_car_int')
features_nocar_cat = tensor.dmatrix('features_nocar_cat')
features_nocar_int = tensor.dmatrix('features_nocar_int')
features_cp = tensor.imatrix('codepostal')
features_hascar = tensor.imatrix('features_hascar')

main_loop = load(open("./model", "rb"))
model = main_loop.model

f = model.get_theano_function()

from fuel.datasets.hdf5 import H5PYDataset

submit_set = H5PYDataset('./data/data.hdf5',
                         which_sets=('submit', ),
                         load_in_memory=True)

print model.inputs
print submit_set.provides_sources
m = []
for i in model.inputs:
    m.append(submit_set.provides_sources.index(i.name))

from fuel.schemes import SequentialScheme
from fuel.streams import DataStream