示例#1
0
	def __init__(self, layers, cost, x, shared_input, x_mask=None, shared_mask=None, batch_size=64, 
								learning_rate=0.05, momentum=0.9, weight_decay=0.0002):
		self.i, self.bs = T.iscalars('i', 'bs')

		if type(layers) == list:
			self.layers = layers
		else:
			self.layers = [layers]
		self.cost = cost
		self.x = x
		self.x_mask = x_mask
		self.shared_input = shared_input
		self.shared_mask = shared_mask
		self.batch_size = batch_size
		self.momentum = momentum
		self.weight_decay = weight_decay
		self.learning_rate = learning_rate

		self.params = [param for layer in self.layers for param in layer.parameters]
		self.gradients = T.grad(self.cost, self.params)
		self.lr = T.dscalar('lr')

		if momentum:
			self.momentums = {param : theano.shared(np.zeros(param.get_value().shape)) 
									for param in self.params}

		self.steps = 0
		self.costs = [(0, 999999)]
    def get_testing_function(test_data, test_mask, pct_blackout=0.5):
        raise Error("fix me!")

        i, batch_size = T.iscalars('i', 'batch_size')
        self.test_noise = T.shared_randomstreams.RandomStreams(1234).binomial(
                            (self.inputs.shape), n=1, p=1-pct_blackout, 
                            dtype=theano.config.floatX)
        self.test_noisy = self.test_noise * self.inputs
        self.test_active_hidden = T.nnet.sigmoid(T.dot(self.test_noisy, self.W) + self.b_in)
        self.test_output = T.nnet.sigmoid(T.dot(self.test_active_hidden, self.W.T) + self.b_out)

        # root mean squared error of unknowns only

        # taking the original input vector's mask of which beers had no input information (no rating)
        # mask out any output predicted ratings where there was no rating of the original beer
        # so we aren't affecting the error factor in dimensions where we don't have any meaningful information in the original input data
        # flattenedOutputVector = dot product ( (mask vector of which items we sent through the network to test, so we only test accuracy of non-inputted answers) with dot product ( inputMask with full output vector ) )
        self.only_originally_unknown = T.dot(1-self.test_noise, T.dot(self.inputs_mask, self.test_output))
        self.test_error = T.pow(T.mean(T.pow(T.dot(self.inputs_mask, self.test_output) - self.inputs, 2)), 0.5)

        self.testing_function = theano.function([i, batch_size], self.test_error, 
                                                givens={self.inputs:        test_data[i:i+batch_size],
                                                        self.inputs_mask:   test_mask[i:i+batch_size]})

        return self.testing_function
示例#3
0
def test_equal_computations():

    a, b = tensor.iscalars(2)

    with pytest.raises(ValueError):
        equal_computations([a], [a, b])

    assert equal_computations([a], [a])
    assert equal_computations([tensor.as_tensor(1)], [tensor.as_tensor(1)])
    assert not equal_computations([b], [a])
    assert not equal_computations([tensor.as_tensor(1)], [tensor.as_tensor(2)])

    assert equal_computations([2], [2])
    assert equal_computations([np.r_[2, 1]], [np.r_[2, 1]])
    assert equal_computations([np.r_[2, 1]], [tensor.as_tensor(np.r_[2, 1])])
    assert equal_computations([tensor.as_tensor(np.r_[2, 1])], [np.r_[2, 1]])

    assert not equal_computations([2], [a])
    assert not equal_computations([np.r_[2, 1]], [a])
    assert not equal_computations([a], [2])
    assert not equal_computations([a], [np.r_[2, 1]])

    c = tensor.type_other.NoneConst
    assert equal_computations([c], [c])

    m = tensor.matrix()
    max_argmax1 = tensor.max_and_argmax(m)
    max_argmax2 = tensor.max_and_argmax(m)
    assert equal_computations(max_argmax1, max_argmax2)
示例#4
0
 def extract_feature_from_data_provider(self, data_provider, feature_layer_name, 
                                        train_mean=None, batch_mean_subtraction=False, 
                                        niter=1, noiseless=False):
     assert isinstance(data_provider, LabeledDataProvider), (
            'data_provider need to be a subclass from LabeledDataProvider'
            ' so that it provides labeled data for supervised models')
     
     assert feature_layer_name in self.name_index_dic, ('need to provide feature_layer_name '
                                                        'that is in the current network structure')
     
     layer_outputs = self.network_fprop(isTest=True, noiseless=noiseless)
     
     # assumes the output is always the last layer of the network for now
     final_output = layer_outputs[feature_layer_name]
     
     self.shared_train, _, _, _ = data_provider.get_train_labeled_data_and_idx(0)
     start_index, end_index = T.iscalars('s_i', 'e_i')
     xgiven = {}
     for i in xrange(self.ninputs):
         xgiven[self.xs[i]] = self.shared_train[i][start_index:end_index]
         
     if self.shared_train[0].dtype=='uint8':
         for i in xrange(self.ninputs):
             xgiven[self.xs[i]] = T.cast(xgiven[self.xs[i]], dtype='float32')
     
     if train_mean is not None and batch_mean_subtraction:
         tm = [None] * self.ninputs
         for i in xrange(self.ninputs):
             tm[i] = theano.shared(numpy.asarray(train_mean[i], dtype='float32'))
             xgiven[self.xs[i]] -= tm[i]
         
     self.__predict = theano.function([start_index, end_index], 
                                      final_output,
                                      givens=xgiven)
     
     ndata = data_provider.get_number_of_train_data()
     
     prediction = numpy.zeros((ndata,)+self.layers[self.name_index_dic[feature_layer_name]].getOutputShape()[1:], 
                              dtype='float32')
     
     for minibatch_idx in xrange(data_provider.get_number_of_train_batches()):
         self.shared_train, _, s_i, e_i = data_provider.get_train_labeled_data_and_idx(minibatch_idx)
         pred_start = minibatch_idx*self.batch_size
         pred_end = (minibatch_idx+1)*self.batch_size
         if pred_end > ndata:
             pred_start = ndata-self.batch_size
             pred_end = ndata
         
         for j in xrange(niter):
             if j == 0:
                 p = self.__predict(s_i, e_i)
             else:
                 p += self.__predict(s_i, e_i)
                 
         prediction[pred_start:pred_end] = p/float(niter)
         
     return prediction
    def reconstruct_from_data_provider(self, data_provider, train_mean=None, 
                                       batch_mean_subtraction=False, steps=1, noiseless=True):
        assert isinstance(data_provider, UnlabeledDataProvider), (
               'data_provider need to be a subclass from UnlabeledDataProvider'
               ' so that it provides appropriate data for unsupervised models')
        
        assert ((steps==1) == noiseless), (
                'need noise to simulate generalized deonising autoencoder, '
                'for noiseless case there is no need to taking multiple steps '
                'in reconstruction')
        
        enc_outputs = self.network_fprop(self.enc_layers, self.x, isTest=True, noiseless=noiseless)
        dec_outputs = self.network_fprop(self.dec_layers, enc_outputs[self.encoder_ns[-1]['name']], isTest=True, noiseless=noiseless)
         
        # the reconstruction is always the last layer of the network
        x_hat_given_x = dec_outputs[self.decoder_ns[-1]['name']]

        self.shared_train, _, _ = data_provider.get_train_data_and_idx(0)
        start_index, end_index = T.iscalars('s_i', 'e_i')
        xgiven = self.shared_train[start_index:end_index]
        if self.shared_train.dtype=='uint8':
            xgiven = T.cast(xgiven, dtype='float32')
        
        if train_mean is not None and batch_mean_subtraction:
            tm = theano.shared(numpy.asarray(train_mean, dtype='float32'))
            xgiven -= tm
            
        reconstruct_dp = theano.function([start_index, end_index], 
                                         x_hat_given_x,
                                         givens={self.x:xgiven})
        
        reconstruct_mem = theano.function([self.x], x_hat_given_x)
        
        ndata = data_provider.get_number_of_train_data()
        
        recs = numpy.zeros((ndata, self.shared_train.get_value().shape[1]), 
                                 dtype='float32')
        
        for minibatch_idx in xrange(data_provider.get_number_of_train_batches()):
            self.shared_train, s_i, e_i = data_provider.get_train_data_and_idx(minibatch_idx)
            pred_start = minibatch_idx*self.batch_size
            pred_end = (minibatch_idx+1)*self.batch_size
            if pred_end > ndata:
                pred_start = ndata-self.batch_size
                pred_end = ndata
            
            for j in xrange(steps):
                if j == 0:
                    p = reconstruct_dp(s_i, e_i)
                else:
                    p = reconstruct_mem(p)
                    
            recs[pred_start:pred_end] = p
            
        return recs
示例#6
0
    def __init__(self, input_tensor, n_in, n_hidden, learning_rate, pct_blackout=0.2, 
                    W=None, b_in=None, b_out=None):
        if W == None:
            # initialization of weights as suggested in theano tutorials
            W = np.asarray(np.random.uniform(
                                        low=-4 * np.sqrt(6. / (n_hidden + n_in)),
                                        high=4 * np.sqrt(6. / (n_hidden + n_in)),
                                        size=(n_in, n_hidden)), 
                                        dtype=theano.config.floatX)

        self.W = theano.shared(W, 'W')

        if b_in == None:
            self.b_in = theano.shared(np.zeros(n_hidden, dtype=theano.config.floatX), 'b_in')
        else:
            self.b_in = theano.shared(b_in, 'b_in')

        if b_out == None:
            self.b_out = theano.shared(np.zeros(n_in, dtype=theano.config.floatX), 'b_out')
        else:
            self.b_out = theano.shared(b_out, 'b_out')

        matrixType = T.TensorType(theano.config.floatX, (False,)*2)


        self.n_in = n_in
        self.n_hidden = n_hidden
        self.inputs = input_tensor
        self.x = matrixType('x')
        self.pct_blackout = pct_blackout
        self.noise = T.shared_randomstreams.RandomStreams(1234).binomial(
                            (self.x.shape), n=1, p=1-(self.pct_blackout), 
                            dtype=theano.config.floatX)
        self.noisy = self.noise * self.x
        self.active_hidden = T.nnet.sigmoid(T.dot(self.noisy, self.W) + self.b_in)
        self.output = T.nnet.sigmoid(T.dot(self.active_hidden, self.W.T) + self.b_out)

        self.entropy = -T.sum(self.x * T.log(self.output) + 
                                (1 - self.x) * T.log(1 - self.output), axis=1)

        self.cost = T.mean(self.entropy)

        self.params = [self.W, self.b_in, self.b_out]
        self.gradients = T.grad(self.cost, self.params)

        self.learning_rate = learning_rate

        self.updates = []
        for param, grad in zip(self.params, self.gradients):
            self.updates.append((param, param - self.learning_rate * grad))

        i, batch_size = T.iscalars('i', 'batch_size')
        self.train_step = theano.function([i, batch_size], self.cost, 
                                            updates=self.updates, 
                                            givens={self.x:self.inputs[i:i+batch_size]})
示例#7
0
    def __init__(self, model):
        self.model = model

        a = T.scalars('a')
        b = T.scalars('b')
        c = T.iscalars('c')
        d = T.iscalars('d')
        state = T.vector('state')
        action = T.vector('action')
        o = T.vector('o')
        z_switch = T.switch(T.lt(a, b), c, d)
        _predict_obs = self.model.tf_predict_guide(state.reshape((1, -1)),
                                                   action.reshape((1, -1)))
        self._update_state = self.model.tf_update_state(state, o, action)
        self.f_switch = theano.function([a, b, c, d],
                                        z_switch,
                                        mode=theano.Mode(linker='vm'))
        self.predict_obs = theano.function(inputs=[state, action],
                                           outputs=_predict_obs,
                                           on_unused_input='ignore')
        self.update_state = theano.function(inputs=[state, o, action],
                                            outputs=self._update_state)
    def extract_feature_from_data_provider(self, data_provider, feature_layer_name, 
                                           train_mean=None, batch_mean_subtraction=False, 
                                           niter=1, noiseless=False):
        assert isinstance(data_provider, UnlabeledDataProvider), (
               'data_provider need to be a subclass from UnlabeledDataProvider'
               ' so that it provides appropriate data for unsupervised models')
        
        assert feature_layer_name in self.ae_name_index_dic, ('need to provide feature_layer_name '
                                                           'that is in the current network structure')
        
        layer_outputs = self.network_fprop(self.enc_layers, self.x, isTest=True, noiseless=noiseless)
        h_given_x = layer_outputs[feature_layer_name]

        
        self.shared_train, _, _ = data_provider.get_train_data_and_idx(0)
        start_index, end_index = T.iscalars('s_i', 'e_i')
        xgiven = self.shared_train[start_index:end_index]
        if self.shared_train.dtype=='uint8':
            xgiven = T.cast(xgiven, dtype='float32')
        
        if train_mean is not None and batch_mean_subtraction:
            tm = theano.shared(numpy.asarray(train_mean, dtype='float32'))
            xgiven -= tm
            
        extract_feature = theano.function([start_index, end_index], 
                                         h_given_x,
                                         givens={self.x:xgiven})
        
        ndata = data_provider.get_number_of_train_data()
        
        features = numpy.zeros((ndata,)+self.layers[self.ae_name_index_dic[feature_layer_name]].getOutputShape()[1:], 
                                 dtype='float32')
        
        for minibatch_idx in xrange(data_provider.get_number_of_train_batches()):
            self.shared_train, s_i, e_i = data_provider.get_train_data_and_idx(minibatch_idx)
            pred_start = minibatch_idx*self.batch_size
            pred_end = (minibatch_idx+1)*self.batch_size
            if pred_end > ndata:
                pred_start = ndata-self.batch_size
                pred_end = ndata
            
            for j in xrange(niter):
                if j == 0:
                    p = extract_feature(s_i, e_i)
                else:
                    p += extract_feature(s_i, e_i)
                    
            features[pred_start:pred_end] = p/float(niter)
            
        return features
示例#9
0
 def compile_functions(self, opt, noiseless_validation=True, **args):
     print '... compiling training functions'
     
     # propagte for training with batch normalization with upated std and mean for each batch
     layer_outputs = self.network_fprop(self.layers, self.x, isTest=False, noiseless=False)
     cost, show_cost = self.get_cost(layer_outputs, self.layers)
     self.opt = opt
     updates = self.opt.get_updates(cost, self.params)
     
     start_index, end_index = T.iscalars('s_i', 'e_i')
     if self.uint8_data:
         given_train_x = T.cast(self.shared_train[start_index:end_index], dtype='float32')
     else:
         given_train_x = self.shared_train[start_index:end_index]
         
     if self.batch_mean_subtraction:
         assert self.train_mean is not None, 'train_mean cannot be None for batch mean subtraction'
         given_train_x -= self.train_mean
         
     self.train_model = theano.function( inputs=[start_index, end_index], 
                                         outputs=show_cost, updates = updates,
                                         givens = {
                                                   self.x: given_train_x,
                                                   }
                                        )
     
     
     if self.nvalid_batches>0:
         layer_outputs = self.network_fprop(self.layers, self.x, isTest=True, noiseLess=noiseless_validation)
         final_output = layer_outputs[self.ae_structure[-1]['name']]
         _, show_cost = self.get_cost(layer_outputs, self.layers)
 
         if self.uint8_data:
             given_valid_x = T.cast(self.shared_valid[start_index:end_index], dtype='float32')
         else:
             given_valid_x = self.shared_valid[start_index:end_index]
             
         if self.batch_mean_subtraction:
             assert self.train_mean is not None, 'train_mean cannot be None for batch mean subtraction'
             given_valid_x -= self.train_mean
             
         self.validate_model = theano.function(inputs=[start_index, end_index], 
                                            outputs=show_cost,
                                             givens = {
                                                       self.x: given_valid_x,
                                                       }
                                           )
示例#10
0
    def test_infer_shape(self):
        x = T.dvector('x')
        m = T.iscalars('m')
        a = np.random.random(50)

        self._compile_and_check([x, m], [repeat(x, m)], [a, 2], self.op_class)

        x = T.dmatrix('x')
        a = np.random.random((40, 50))
        for axis in range(len(a.shape)):
            self._compile_and_check([x, m], [repeat(x, m, axis=axis)], [a, 2],
                                    self.op_class)

        m = T.lvector('m')
        repeats = np.random.random_integers(5, size=(40, ))
        self._compile_and_check([x, m], [repeat(x, m, axis=0)], [a, repeats],
                                self.op_class)
示例#11
0
    def get_testing_function(test_data, test_mask, pct_blackout=0.5):
        raise Error("fix me!")

        i, batch_size = T.iscalars('i', 'batch_size')
        self.test_noise = T.shared_randomstreams.RandomStreams(1234).binomial(
            (self.inputs.shape),
            n=1,
            p=1 - pct_blackout,
            dtype=theano.config.floatX)
        self.test_noisy = self.test_noise * self.inputs
        self.test_active_hidden = T.nnet.sigmoid(
            T.dot(self.test_noisy, self.W) + self.b_in)
        self.test_output = T.nnet.sigmoid(
            T.dot(self.test_active_hidden, self.W.T) + self.b_out)

        # root mean squared error of unknowns only

        # taking the original input vector's mask of which beers had no input information (no rating)
        # mask out any output predicted ratings where there was no rating of the original beer
        # so we aren't affecting the error factor in dimensions where we don't have any meaningful information in the original input data
        # flattenedOutputVector = dot product ( (mask vector of which items we sent through the network to test, so we only test accuracy of non-inputted answers) with dot product ( inputMask with full output vector ) )
        self.only_originally_unknown = T.dot(
            1 - self.test_noise, T.dot(self.inputs_mask, self.test_output))
        self.test_error = T.pow(
            T.mean(
                T.pow(
                    T.dot(self.inputs_mask, self.test_output) - self.inputs,
                    2)), 0.5)

        self.testing_function = theano.function(
            [i, batch_size],
            self.test_error,
            givens={
                self.inputs: test_data[i:i + batch_size],
                self.inputs_mask: test_mask[i:i + batch_size]
            })

        return self.testing_function
示例#12
0
    def test_infer_shape(self):
        x = T.dvector('x')
        m = T.iscalars('m')
        a = np.random.random(50)

        self._compile_and_check([x, m],
                                [repeat(x, m)],
                                [a, 2],
                                self.op_class)

        x = T.dmatrix('x')
        a = np.random.random((40, 50))
        for axis in range(len(a.shape)):
            self._compile_and_check([x, m],
                                    [repeat(x, m, axis=axis)],
                                    [a, 2],
                                    self.op_class)

        m = T.lvector('m')
        repeats = np.random.random_integers(5, size=(40, ))
        self._compile_and_check([x, m],
                                [repeat(x, m, axis=0)],
                                [a, repeats],
                                self.op_class)
示例#13
0
def build_model(prepared_data, clamp_L0=0.4, eeg_column_i=None, **kwargs):
    # ##########
    # STEP1: order the data properly so that we can read from it sequentially
    # when training the model

    subject_x, skill_x, correct_y, start_x, eeg_x, eeg_table, stim_pairs, train_idx, valid_idx = prepared_data
    N = len(correct_y)
    train_mask = idx_to_mask(train_idx, N)
    valid_mask = idx_to_mask(valid_idx, N)

    # sort data by subject and skill
    sorted_i = sorted(xrange(N),
                      key=lambda i: (subject_x[i], skill_x[i], start_x[i]))
    skill_x = skill_x[sorted_i]
    subject_x = subject_x[sorted_i]
    correct_y = correct_y[sorted_i]
    start_x = start_x[sorted_i]
    train_mask = train_mask[sorted_i]
    valid_mask = valid_mask[sorted_i]
    train_idx = np.nonzero(train_mask)[0]
    valid_idx = np.nonzero(valid_mask)[0]

    n_skills = np.max(skill_x) + 1
    n_subjects = np.max(subject_x) + 1

    # binarize eeg
    eeg_single_x = np.zeros(N)
    if eeg_column_i is not None:
        eeg_column = eeg_table[eeg_x, eeg_column_i]
        above_median = np.greater(eeg_column, np.median(eeg_column))
        eeg_single_x[above_median] = 1

    # prepare parameters
    p_T = 0.5
    p_G = 0.1
    p_S = 0.2
    p_L0 = 0.7
    if clamp_L0 is None:
        p_L0 = 0.7
    else:
        p_L0 = clamp_L0
    # eeg_single_x = np.zeros(N)
    parameter_base = np.ones(n_skills)
    tp_L0, t_L0 = make_probability(parameter_base * p_L0, name='L0')
    tp_T, t_T = make_probability(np.ones((n_skills, 2)) * p_T, name='p(T)')
    tp_G, t_G = make_probability(p_G, name='p(G)')
    tp_S, t_S = make_probability(p_S, name='p(S)')

    # declare and prepare variables for theano
    i = T.ivector('i')
    dummy_float = make_shared(0, name='dummy')
    skill_i, subject_i = T.iscalars('skill_i', 'subject_i')
    correct_y = make_shared(correct_y, to_int=True)
    eeg_single_x = make_shared(eeg_single_x, to_int=True)

    def step(correct_i, eeg, prev_L, prev_p_C, P_T, P_S, P_G):
        Ln = prev_L + (1 - prev_L) * P_T[eeg]
        p_C = prev_L * (1 - P_S) + (1 - prev_L) * P_G
        return Ln, p_C

    # set up theano functions
    ((results, p_C),
     updates) = theano.scan(fn=step,
                            sequences=[correct_y[i], eeg_single_x[i]],
                            outputs_info=[tp_L0[skill_i], dummy_float],
                            non_sequences=[tp_T[skill_i], tp_G, tp_S])

    p_y = T.stack(1 - p_C, p_C)
    loss = neg_log_loss(p_y, correct_y[i])

    learning_rate = T.fscalar('learning_rate')
    if clamp_L0 is None:
        params = [t_T, t_L0]
    else:
        params = [t_T]
    update_parameters = [(param, param - learning_rate * T.grad(loss, param))
                         for param in params]

    tf_train = theano.function(inputs=[i, skill_i, learning_rate],
                               updates=update_parameters,
                               outputs=[loss, results, i],
                               allow_input_downcast=True)
    tf_valid = theano.function(inputs=[i, skill_i],
                               outputs=[loss, results, i],
                               allow_input_downcast=True)

    def f_train((i, (subject_i, skill_i)), learning_rate):
        return tf_train(i, skill_i, learning_rate)
示例#14
0
 def train_batch():
     current, prediction = T.iscalars('current', 'prediction')
     learn_step = self.learn_batch(current, prediction)
     train_one_epoch = theano.function([current, prediction], learn_step, updates=[train_cost, ])
示例#15
0
import numpy as np
import theano
import theano.tensor as T
'''
Learn one more theano function => scan()
sample code from tutorial is at hmm_class file

Fibonacci 
'''

N = T.iscalars('N')


def recurrence(n, fn_1, fn_2):
    return fn_1 + fn_2, fn_1


outputs, updates = theano.scan(
    # 這裡的outputs會是兩個scale, 但經過 iterator 會形成兩個list
    # 因為 recurrence的 return有兩個參數,經過scan跑出來會是一個list內部包著兩個array
    fn=recurrence,
    sequences=T.arange(N),
    n_steps=N,
    outputs_info=[1., 1.])

fabonacci = theano.function(
    inputs=[N],
    outputs=outputs,  # 這個參數參考到scan function 的output,output的[] 可加可不加
)

o_val = fabonacci(8)
    def compile_functions(self, opt, noiseless_validation=True, **args):
        print '... compiling training functions'
        
        (prior_gen_cost, prior_gen_show_cost, 
         prior_dis_cost, prior_dis_show_cost, 
         data_gen_cost, data_gen_show_cost,
         data_dis_cost, data_dis_show_cost,
         rec_cost, rec_show_cost) = self.get_cost(isTest=False)
           
        self.opt = opt
        prior_gen_updates = self.opt.get_updates(prior_gen_cost, self.enc_params)
        prior_dis_updates = self.opt.get_updates(prior_dis_cost, self.prior_dis_params)
        data_gen_updates = self.opt.get_updates(data_gen_cost, self.dec_params)
        data_dis_updates = self.opt.get_updates(data_dis_cost, self.data_dis_params)
        ae_updates = self.opt.get_updates(rec_cost, self.enc_params) #+rec_cost +self.dec_params
        
        start_index, end_index = T.iscalars('s_i', 'e_i')
        if self.uint8_data:
            print 'converting uint8 data to float32 for each batch'
            given_train_x = T.cast(self.shared_train[start_index:end_index], dtype='float32')
        else:
            given_train_x = self.shared_train[start_index:end_index]
            
        if self.batch_mean_subtraction:
            assert self.train_mean is not None, 'train_mean cannot be None for batch mean subtraction'
            given_train_x -= self.train_mean
        
        if self.batch_data_process_func is not None:
            given_train_x = self.batch_data_process_func(given_train_x)
        
#         self.get_data_dis_cost = theano.function(
#                               [start_index, end_index, self.z], #
#                               data_dis_show_cost,
#                               givens={self.x:given_train_x}
#                               )
         
        self.train_ae_model = theano.function(
                              [start_index, end_index],
                              rec_show_cost,
                              updates=ae_updates,
                              givens={self.x:given_train_x}
                                )
        
        self.train_data_gen_model = theano.function(
                [start_index, end_index],#[self.z], #
                data_gen_show_cost,
                updates=data_gen_updates,
                givens={self.x:given_train_x}
                )
        
        self.train_data_dis_model = theano.function(
                [start_index, end_index], #, self.z
                data_dis_show_cost,
                updates=data_dis_updates,
                givens={self.x:given_train_x}
                )
        
        self.train_prior_gen_model = theano.function(
            [start_index, end_index],
            prior_gen_show_cost,
            updates=prior_gen_updates,
            givens={self.x:given_train_x}
            )
    
        self.train_prior_dis_model = theano.function(
            [start_index, end_index, self.z],
            prior_dis_show_cost,
            updates=prior_dis_updates,
            givens={self.x:given_train_x}
            )
示例#17
0
    def __init__(self, numargs, embed_size, pred_vocab_size, arg_vocab_size, initial_pred_rep=None, initial_arg_rep = None, margin = 5, lr=0.01, activation=T.nnet.sigmoid):
        numpy_rng = numpy.random.RandomState(12345)
        theano_rng = RandomStreams(54321)
        self.lr = lr
        #margin = 5
        # Initializing predicate representations
        if initial_pred_rep is not None:
            num_preds, pred_dim = initial_pred_rep.shape
            assert pred_vocab_size == num_arrays, "Initial predicate representation is not the same size as pred_vocab_size"
            assert embed_size == pred_dim, "Initial predicate representation does not have the same dimensionality as embed_size"
        else:
            initial_pred_rep_range = 4 * numpy.sqrt(6. / (pred_vocab_size + embed_size))
            initial_pred_rep = numpy.asarray(numpy_rng.uniform(low = -initial_pred_rep_range, high = initial_pred_rep_range, size = (pred_vocab_size, embed_size)))
            
        self.pred_rep = theano.shared(value=initial_pred_rep, name='P')
        
        # Initializing argument representations
        if initial_arg_rep is not None:
            arg_rep_len, arg_dim = initial_arg_rep.shape
            assert arg_vocab_size == arg_rep_len, "Initial argument representation is not the same size as arg_vocab_size"
            assert embed_size == arg_dim, "Initial argument representation does not have the same dimensionality as embed_size"
        else:
            initial_arg_rep_range = 4 * numpy.sqrt(6. / (arg_vocab_size + embed_size))
            initial_arg_rep = numpy.asarray(numpy_rng.uniform(low = -initial_arg_rep_range, high = initial_arg_rep_range, size = (arg_vocab_size, embed_size)))
            
        self.arg_rep = theano.shared(value=initial_arg_rep, name='A')
        
        # Initialize scorer
        scorer_dim = embed_size * (numargs + 1) # Predicate is +1
        initial_scorer_range = 4 * numpy.sqrt(6. / scorer_dim)
        initial_scorer = numpy.asarray(numpy_rng.uniform(low = -initial_scorer_range, high = initial_scorer_range, size = scorer_dim))
        self.scorer = theano.shared(value=initial_scorer, name='s')
        
        # Initialize indicator
        indicator_dim = embed_size * (numargs + 1) # Predicate is +1
        initial_indicator_range = 4 * numpy.sqrt(6. / (indicator_dim + numargs))
        initial_indicator = numpy.asarray(numpy_rng.uniform(low = -initial_indicator_range, high = initial_indicator_range, size = (indicator_dim, numargs)))
        self.indicator = theano.shared(value=initial_indicator, name='I')
        
        # Define symbolic pred-arg
        self.pred_ind = T.iscalar('p')
        self.arg_inds = T.iscalars(numargs)
        pred = self.pred_rep[self.pred_ind].reshape((1, embed_size))
        args = self.arg_rep[self.arg_inds].reshape((1, embed_size * numargs))
        pred_arg = activation(T.concatenate([pred, args], axis=1))
        
        # Define symbolic rand pred-arg for training scorer
        rand_pred_ind = theano_rng.random_integers(low=0, high=pred_vocab_size-1)
        rand_arg_inds = theano_rng.random_integers([1, numargs], low=0, high=arg_vocab_size-1)
        rand_pred = self.pred_rep[rand_pred_ind].reshape((1, embed_size))
        rand_args = self.arg_rep[rand_arg_inds].reshape((1, embed_size * numargs))
        rand_pred_arg = activation(T.concatenate([rand_pred, rand_args], axis=1))

        # Define symbolic pred_rand-arg for training indicator
        pred_rand_arg = activation(T.concatenate([pred, rand_args], axis=1))

        # Define scores and loss
        self.corr_score = T.sum(T.dot(pred_arg, self.scorer))
        rand_score = T.sum(T.dot(rand_pred_arg, self.scorer))
        self.margin_loss = T.maximum(0, margin - self.corr_score + rand_score)
        
        # Define indicator values and loss
        orig_ind_labels = T.constant(numpy.zeros(numargs))
        self.indicator_pred = T.nnet.sigmoid(T.dot(pred_arg, self.indicator))
        rand_ind_labels = T.constant(numpy.ones(numargs))
        rand_indicator_pred = T.nnet.sigmoid(T.dot(pred_rand_arg, self.indicator))
        self.indicator_loss = T.mean((self.indicator_pred - orig_ind_labels) ** 2) + T.mean((rand_indicator_pred - rand_ind_labels) ** 2)
        
        # Define params and inputs
        self.score_params = [self.pred_rep, self.arg_rep, self.scorer]
        self.indicator_params = [self.pred_rep, self.arg_rep, self.indicator]
        self.score_ind_inputs = [self.pred_ind] + list(self.arg_inds)
示例#18
0
def build_model(prepared_data, clamp_L0=None, **kwargs):
    # ##########
    # STEP1: order the data properly so that we can read from it sequentially
    # when training the model

    subject_x, skill_x, correct_y, start_x, eeg_x, eeg_table, stim_pairs, train_idx, valid_idx = prepared_data
    N = len(correct_y)
    train_mask = idx_to_mask(train_idx, N)
    valid_mask = idx_to_mask(valid_idx, N)

    # sort data by subject and skill
    sorted_i = sorted(xrange(N), key=lambda i: (subject_x[i], skill_x[i], start_x[i]))
    skill_x = skill_x[sorted_i]
    subject_x = subject_x[sorted_i]
    correct_y = correct_y[sorted_i]
    start_x = start_x[sorted_i]
    train_mask = train_mask[sorted_i]
    valid_mask = valid_mask[sorted_i]
    train_idx = np.nonzero(train_mask)[0]
    valid_idx = np.nonzero(valid_mask)[0]

    n_skills = np.max(skill_x) + 1

    # ####
    # STEP 2: initialize parameters
    p_G = 0.1
    p_S = 0.2
    feat_x = eeg_x
    feat_table = eeg_table
    feat_columns = range(feat_table.shape[1])  # [0, 1, 2, 3, 4, 5, 6]
    feat_width = len(feat_columns)
    if clamp_L0 is None:
        Beta0 = make_shared(np.random.rand(n_skills))
    Beta = make_shared(np.random.rand(n_skills, feat_width))
    b = make_shared(np.random.rand(n_skills))
    Gamma = make_shared(np.random.rand(n_skills, feat_width))
    g = make_shared(np.random.rand(n_skills))
    tp_G, t_G = make_probability(p_G, name='p(G)')
    tp_S, t_S = make_probability(p_S, name='p(S)')

    # declare and prepare variables for theano
    i = T.ivector('i')
    dummy_float = make_shared(0, name='dummy')
    skill_i, subject_i = T.iscalars('skill_i', 'subject_i')
    correct_y = make_shared(correct_y, to_int=True)
    feat_x = make_shared(feat_x, to_int=True)
    feat_table = make_shared(feat_table)

    # set up theano functions
    def step(correct_i, feat, prev_L, prev_p_C, skill_i, P_S, P_G):
        L_true_given_true = sigmoid(T.dot(Beta[skill_i].T, feat[feat_columns]) + b[skill_i])
        L_true_given_false = sigmoid(T.dot(Gamma[skill_i].T, feat[feat_columns]) + g[skill_i])
        Ln = prev_L * L_true_given_true + (1 - prev_L) * L_true_given_false
        p_C = prev_L * (1 - P_S) + (1 - prev_L) * P_G
        return Ln, p_C
    if clamp_L0 is None:
        L0 = sigmoid(Beta0[skill_i])
    else:
        L0 = make_shared(clamp_L0)
    ((results, p_C), updates) = theano.scan(fn=step,
                                            sequences=[correct_y[i],
                                                       feat_table[feat_x[i]]],
                                            outputs_info=[L0,
                                                          dummy_float],
                                            non_sequences=[skill_i,
                                                           tp_G,
                                                           tp_S])
    p_y = T.stack(1 - p_C, p_C)
    loss = neg_log_loss(p_y, correct_y[i])

    learning_rate = T.fscalar('learning_rate')
    if clamp_L0 is None:
        params = [Beta0, Beta, Gamma, g, b]
    else:
        params = [Beta, Gamma, g, b]
    update_parameters = [(param, param - learning_rate * T.grad(loss, param))
                         for param in params]

    tf_train = theano.function(inputs=[i, skill_i, learning_rate],
                               updates=update_parameters,
                               outputs=[loss, results, i],
                               allow_input_downcast=True)
    tf_valid = theano.function(inputs=[i, skill_i],
                               outputs=[loss, results, i],
                               allow_input_downcast=True)

    def f_train((i, (subject_i, skill_i)), learning_rate):
        everything = tf_train(i, skill_i, learning_rate)
        return everything[:3]
示例#19
0
def model_choice(models, obs):	
    k = [i for i in xrange(2, 9)]
    
    Statistics = []
    
    for ki in k:        
        print 'K = ', ki
                
        num_M = models[ki-2].shape[0]
        print 'Num Models: ', num_M
        numNbins = len(obs[ki-2])
        numHbins = len(obs[ki-2][0])
                
        M = theano.shared(np.asarray(models[ki-2], dtype = theano.config.floatX))
           
        ObSym = T.matrix() # Symbolic tensor for observation batches - indexed elements of Obs shared variable are passed through this

        Pred = theano.function([], predictiveness_profiles(M, ki, len(models[ki-2])))() # This should be dealt with better too...
        Pred_n = Pred

        Pred = theano.shared(np.asarray(Pred, dtype = theano.config.floatX))
        
        
        # setup inference schemas and theano symbolic tensors
        if INFERENCE == 'underfit':
            profiles = make_agression_profiles(num_profiles, num_alpha)
            
            #alpha = theano.shared(np.asmatrix(np.linspace(0.0,1.0, num = num_alpha, endpoint = False), dtype=theano.config.floatX))
            Alpha = T.arange(0., 1.0, 1./num_alpha)
            Agression_profiles = T.matrix('Agr')
            nAlpha, nM, nO = T.iscalars('','','')

            Choice_Maker = Underfit_Choice(M, ObSym, nM, nO, ki, nAlpha, Alpha, Agression_profiles, Pred, pValue_alg) #only works for 0...
            
        elif INFERENCE == 'bayes':
            profiles = make_priors_profiles(num_priors, num_M)
            
            Priors_profiles = T.matrix('Priors')
            Loss_funcs = T.arange(1,5)  # Loss functions are choices in bayesian_choice numbered [1,4]
            nM, nO = T.iscalars('','')

            Choice_Maker = Bayesian_Choice(M, ObSym, nM, nO, ki, Priors_profiles, Loss_funcs)
            
        else:
            print 'unknown inference algorithm...'
            quit()
        

        # all data for this K
        k_Data = kData(numNbins, numHbins, num_profiles)
        

        for i in xrange(numNbins):
            for j in xrange(numHbins):
                print 'bin ', i, j
                
                t0 = time.time()
                
                if obs[ki-2][i][j] == [] or obs[ki-2][i][j][0].shape[1] == 0:
                    #there are no observtions in this N*H bin...
                    continue
                else:
                    num_obs = obs[ki-2][i][j][0].shape[0]

                # allocate for predictiveness of model choice vs universe for each obs for each profile
                k_pred = kPred(num_obs, num_profiles)

                num_batches = int(np.ceil(num_obs/np.float(BATCH_SIZE)))


                for batch_index in xrange(num_batches):
                    top = BATCH_SIZE*(batch_index+1) if batch_index < (num_batches-1) else num_obs
                    n_obs = top - BATCH_SIZE*(batch_index)
                    print 'batch index ', batch_index, '\t num obs: ', top - BATCH_SIZE*batch_index
                    
                    if INFERENCE == 'underfit':                        
                        batch_choice = Choice_Maker.Choice_Profile_F(profiles, num_alpha, num_M, n_obs, obs[ki-2][i][j][0][BATCH_SIZE*batch_index:top])
                        print batch_choice
                        for prof in xrange(num_profiles):
                            k_pred[prof][BATCH_SIZE*(batch_index):top] = get_predictiveness_array(batch_choice[prof],  obs[ki-2][i][j][1], Pred_n, n_obs)

                    elif INFERENCE == 'bayes':   
                        batch_choice = Choice_Maker.Choice_Profile_F(profiles, num_M, n_obs, obs[ki-2][i][j][0][BATCH_SIZE*batch_index:top])
                        print batch_choice
                     
                        for pr in xrange(num_priors):
                            for lf in xrange(num_loss_funcs):
                                k_pred[pr*num_loss_funcs + lf][BATCH_SIZE*(batch_index):top] = get_predictiveness_array(batch_choice[pr][lf],  obs[ki-2][i][j][1], Pred_n, n_obs)
                        
                for prof in xrange(num_profiles):
                    pred_moments = get_moments(k_pred[prof], num_obs)
                    for m in xrange(len(pred_moments)):
                        k_Data[prof][m][i,j] = pred_moments[m]

                t1 = time.time()
                print 'single bin takes: ',(t1-t0)/60., ' minutes' 
        Statistics.append(k_Data)
        f = open('%s_k%d.pkl'%(name, ki), 'wb')
        pickle.dump(k_Data, f)
        f.close()
    
    return Statistics
示例#20
0
def model_choice(models, obs):
    k = [i for i in xrange(2, 9)]

    Statistics = []

    for ki in k:
        print 'K = ', ki

        num_M = models[ki - 2].shape[0]
        print 'Num Models: ', num_M
        numNbins = len(obs[ki - 2])
        numHbins = len(obs[ki - 2][0])

        M = theano.shared(
            np.asarray(models[ki - 2], dtype=theano.config.floatX))

        ObSym = T.matrix(
        )  # Symbolic tensor for observation batches - indexed elements of Obs shared variable are passed through this

        Pred = theano.function(
            [], predictiveness_profiles(M, ki, len(
                models[ki - 2])))()  # This should be dealt with better too...
        Pred_n = Pred

        Pred = theano.shared(np.asarray(Pred, dtype=theano.config.floatX))

        # setup inference schemas and theano symbolic tensors
        if INFERENCE == 'underfit':
            profiles = make_agression_profiles(num_profiles, num_alpha)

            #alpha = theano.shared(np.asmatrix(np.linspace(0.0,1.0, num = num_alpha, endpoint = False), dtype=theano.config.floatX))
            Alpha = T.arange(0., 1.0, 1. / num_alpha)
            Agression_profiles = T.matrix('Agr')
            nAlpha, nM, nO = T.iscalars('', '', '')

            Choice_Maker = Underfit_Choice(M, ObSym, nM, nO, ki, nAlpha, Alpha,
                                           Agression_profiles, Pred,
                                           pValue_alg)  #only works for 0...

        elif INFERENCE == 'bayes':
            profiles = make_priors_profiles(num_priors, num_M)

            Priors_profiles = T.matrix('Priors')
            Loss_funcs = T.arange(
                1, 5
            )  # Loss functions are choices in bayesian_choice numbered [1,4]
            nM, nO = T.iscalars('', '')

            Choice_Maker = Bayesian_Choice(M, ObSym, nM, nO, ki,
                                           Priors_profiles, Loss_funcs)

        else:
            print 'unknown inference algorithm...'
            quit()

        # all data for this K
        k_Data = kData(numNbins, numHbins, num_profiles)

        for i in xrange(numNbins):
            for j in xrange(numHbins):
                print 'bin ', i, j

                t0 = time.time()

                if obs[ki - 2][i][j] == [] or obs[ki -
                                                  2][i][j][0].shape[1] == 0:
                    #there are no observtions in this N*H bin...
                    continue
                else:
                    num_obs = obs[ki - 2][i][j][0].shape[0]

                # allocate for predictiveness of model choice vs universe for each obs for each profile
                k_pred = kPred(num_obs, num_profiles)

                num_batches = int(np.ceil(num_obs / np.float(BATCH_SIZE)))

                for batch_index in xrange(num_batches):
                    top = BATCH_SIZE * (batch_index + 1) if batch_index < (
                        num_batches - 1) else num_obs
                    n_obs = top - BATCH_SIZE * (batch_index)
                    print 'batch index ', batch_index, '\t num obs: ', top - BATCH_SIZE * batch_index

                    if INFERENCE == 'underfit':
                        batch_choice = Choice_Maker.Choice_Profile_F(
                            profiles, num_alpha, num_M, n_obs,
                            obs[ki - 2][i][j][0][BATCH_SIZE * batch_index:top])
                        print batch_choice
                        for prof in xrange(num_profiles):
                            k_pred[prof][BATCH_SIZE * (
                                batch_index):top] = get_predictiveness_array(
                                    batch_choice[prof], obs[ki - 2][i][j][1],
                                    Pred_n, n_obs)

                    elif INFERENCE == 'bayes':
                        batch_choice = Choice_Maker.Choice_Profile_F(
                            profiles, num_M, n_obs,
                            obs[ki - 2][i][j][0][BATCH_SIZE * batch_index:top])
                        print batch_choice

                        for pr in xrange(num_priors):
                            for lf in xrange(num_loss_funcs):
                                k_pred[pr * num_loss_funcs +
                                       lf][BATCH_SIZE *
                                           (batch_index
                                            ):top] = get_predictiveness_array(
                                                batch_choice[pr][lf],
                                                obs[ki - 2][i][j][1], Pred_n,
                                                n_obs)

                for prof in xrange(num_profiles):
                    pred_moments = get_moments(k_pred[prof], num_obs)
                    for m in xrange(len(pred_moments)):
                        k_Data[prof][m][i, j] = pred_moments[m]

                t1 = time.time()
                print 'single bin takes: ', (t1 - t0) / 60., ' minutes'
        Statistics.append(k_Data)
        f = open('%s_k%d.pkl' % (name, ki), 'wb')
        pickle.dump(k_Data, f)
        f.close()

    return Statistics
示例#21
0
    def __init__(self,
                 numargs,
                 embed_size,
                 pred_vocab_size,
                 arg_vocab_size,
                 initial_pred_rep=None,
                 initial_arg_rep=None,
                 margin=5,
                 lr=0.01,
                 activation=T.nnet.sigmoid):
        numpy_rng = numpy.random.RandomState(12345)
        theano_rng = RandomStreams(54321)
        self.lr = lr
        #margin = 5
        # Initializing predicate representations
        if initial_pred_rep is not None:
            num_preds, pred_dim = initial_pred_rep.shape
            assert pred_vocab_size == num_arrays, "Initial predicate representation is not the same size as pred_vocab_size"
            assert embed_size == pred_dim, "Initial predicate representation does not have the same dimensionality as embed_size"
        else:
            initial_pred_rep_range = 4 * numpy.sqrt(
                6. / (pred_vocab_size + embed_size))
            initial_pred_rep = numpy.asarray(
                numpy_rng.uniform(low=-initial_pred_rep_range,
                                  high=initial_pred_rep_range,
                                  size=(pred_vocab_size, embed_size)))

        self.pred_rep = theano.shared(value=initial_pred_rep, name='P')

        # Initializing argument representations
        if initial_arg_rep is not None:
            arg_rep_len, arg_dim = initial_arg_rep.shape
            assert arg_vocab_size == arg_rep_len, "Initial argument representation is not the same size as arg_vocab_size"
            assert embed_size == arg_dim, "Initial argument representation does not have the same dimensionality as embed_size"
        else:
            initial_arg_rep_range = 4 * numpy.sqrt(
                6. / (arg_vocab_size + embed_size))
            initial_arg_rep = numpy.asarray(
                numpy_rng.uniform(low=-initial_arg_rep_range,
                                  high=initial_arg_rep_range,
                                  size=(arg_vocab_size, embed_size)))

        self.arg_rep = theano.shared(value=initial_arg_rep, name='A')

        # Initialize scorer
        scorer_dim = embed_size * (numargs + 1)  # Predicate is +1
        initial_scorer_range = 4 * numpy.sqrt(6. / scorer_dim)
        initial_scorer = numpy.asarray(
            numpy_rng.uniform(low=-initial_scorer_range,
                              high=initial_scorer_range,
                              size=scorer_dim))
        self.scorer = theano.shared(value=initial_scorer, name='s')

        # Initialize indicator
        indicator_dim = embed_size * (numargs + 1)  # Predicate is +1
        initial_indicator_range = 4 * numpy.sqrt(6. /
                                                 (indicator_dim + numargs))
        initial_indicator = numpy.asarray(
            numpy_rng.uniform(low=-initial_indicator_range,
                              high=initial_indicator_range,
                              size=(indicator_dim, numargs)))
        self.indicator = theano.shared(value=initial_indicator, name='I')

        # Define symbolic pred-arg
        self.pred_ind = T.iscalar('p')
        self.arg_inds = T.iscalars(numargs)
        pred = self.pred_rep[self.pred_ind].reshape((1, embed_size))
        args = self.arg_rep[self.arg_inds].reshape((1, embed_size * numargs))
        pred_arg = activation(T.concatenate([pred, args], axis=1))

        # Define symbolic rand pred-arg for training scorer
        rand_pred_ind = theano_rng.random_integers(low=0,
                                                   high=pred_vocab_size - 1)
        rand_arg_inds = theano_rng.random_integers([1, numargs],
                                                   low=0,
                                                   high=arg_vocab_size - 1)
        rand_pred = self.pred_rep[rand_pred_ind].reshape((1, embed_size))
        rand_args = self.arg_rep[rand_arg_inds].reshape(
            (1, embed_size * numargs))
        rand_pred_arg = activation(
            T.concatenate([rand_pred, rand_args], axis=1))

        # Define symbolic pred_rand-arg for training indicator
        pred_rand_arg = activation(T.concatenate([pred, rand_args], axis=1))

        # Define scores and loss
        self.corr_score = T.sum(T.dot(pred_arg, self.scorer))
        rand_score = T.sum(T.dot(rand_pred_arg, self.scorer))
        self.margin_loss = T.maximum(0, margin - self.corr_score + rand_score)

        # Define indicator values and loss
        orig_ind_labels = T.constant(numpy.zeros(numargs))
        self.indicator_pred = T.nnet.sigmoid(T.dot(pred_arg, self.indicator))
        rand_ind_labels = T.constant(numpy.ones(numargs))
        rand_indicator_pred = T.nnet.sigmoid(
            T.dot(pred_rand_arg, self.indicator))
        self.indicator_loss = T.mean(
            (self.indicator_pred - orig_ind_labels)**2) + T.mean(
                (rand_indicator_pred - rand_ind_labels)**2)

        # Define params and inputs
        self.score_params = [self.pred_rep, self.arg_rep, self.scorer]
        self.indicator_params = [self.pred_rep, self.arg_rep, self.indicator]
        self.score_ind_inputs = [self.pred_ind] + list(self.arg_inds)
示例#22
0
def build_model(prepared_data, clamp_L0=0.4, **kwargs):
    # ##########
    # STEP1: order the data properly so that we can read from it sequentially
    # when training the model

    subject_x, skill_x, correct_y, start_x, eeg_x, eeg_table, stim_pairs, train_idx, valid_idx = prepared_data
    N = len(correct_y)
    train_mask = idx_to_mask(train_idx, N)
    valid_mask = idx_to_mask(valid_idx, N)

    # sort data by subject and skill
    sorted_i = sorted(xrange(N), key=lambda i: (subject_x[i], skill_x[i], start_x[i]))
    skill_x = skill_x[sorted_i]
    subject_x = subject_x[sorted_i]
    correct_y = correct_y[sorted_i]
    start_x = start_x[sorted_i]
    train_mask = train_mask[sorted_i]
    valid_mask = valid_mask[sorted_i]
    train_idx = np.nonzero(train_mask)[0]
    valid_idx = np.nonzero(valid_mask)[0]

    n_skills = np.max(skill_x) + 1
    n_subjects = np.max(subject_x) + 1

    # prepare parameters
    p_T = 0.5
    p_G = 0.1
    p_S = 0.2
    if clamp_L0 is None:
        p_L0 = 0.7
    else:
        p_L0 = clamp_L0
    parameter_base = np.ones(n_skills)
    tp_L0, t_L0 = make_probability(parameter_base * p_L0, name='L0')
    tp_T, t_T = make_probability(parameter_base * p_T, name='p(T)')
    tp_G, t_G = make_probability(parameter_base * p_G, name='p(G)')
    tp_S, t_S = make_probability(parameter_base * p_S, name='p(S)')

    # declare and prepare variables for theano
    i = T.ivector('i')
    dummy_float = make_shared(0, name='dummy')
    skill_i, subject_i = T.iscalars('skill_i', 'subject_i')
    correct_y = make_shared(correct_y, to_int=True)

    def step(correct_i, prev_L, prev_p_C, P_T, P_S, P_G):
        Ln = prev_L + (1 - prev_L) * P_T
        p_C = prev_L * (1 - P_S) + (1 - prev_L) * P_G
        return Ln, p_C

    # set up theano functions
    ((results, p_C), updates) = theano.scan(fn=step,
                                            sequences=correct_y[i],
                                            outputs_info=[tp_L0[skill_i],
                                                          dummy_float],
                                            non_sequences=[tp_T[skill_i],
                                                           tp_G[skill_i],
                                                           tp_S[skill_i]])

    p_y = T.stack(1 - p_C, p_C)
    loss = neg_log_loss(p_y, correct_y[i])

    learning_rate = T.fscalar('learning_rate')
    if clamp_L0 is None:
        params = [t_T, t_L0]
    else:
        params = [t_T]
    update_parameters = [(param, param - learning_rate * T.grad(loss, param))
                         for param in params]

    tf_train = theano.function(inputs=[i, skill_i, learning_rate],
                               updates=update_parameters,
                               outputs=[loss, results, i],
                               allow_input_downcast=True)
    tf_valid = theano.function(inputs=[i, skill_i],
                               outputs=[loss, results, i],
                               allow_input_downcast=True)

    def f_train((i, (subject_i, skill_i)), learning_rate):
        return tf_train(i, skill_i, learning_rate)
示例#23
0
    def fit(self, train_X, optimizer, param_init = None, sample_every=None):
		self.opt = optimizer
		n_train, n_vis = train_X.shape
		batch_size = self.batch_size

		if sample_every == None:
			sample_every = 10000000

		#theano.config.profile = True
		#theano.config.exception_verbosity='high'

		assert(n_vis == self.nv)

		train_X = self.shared_dataset(train_X)
		n_batches = np.ceil(n_train / float(batch_size)).astype('int')

		# theano variables for managing data (index minibatches, n examples in batch)
		index, n_ex = T.iscalars('batch_index', 'n_ex')
		batch_start = index*batch_size
		batch_stop = T.minimum(n_ex, (index + 1)*batch_size)
		effective_batch_size = batch_stop - batch_start

		# theano variables for learning
		lr = T.scalar('lr', dtype=theano.config.floatX)
		mom = T.scalar('mom', dtype=theano.config.floatX)

		if self.k == 1:
			# this one is for scaning over a batch and getting connectivity for each example
			# return grads too because T.grads through scan is awful
			# takes ~3x longer, but can experiment connectivity
			#K, grads = self.mpf.rbm_K2G(self.X, effective_batch_size)

			# this tiles out the minibatch matrix into a 3D tensor to compute connectivity
			#K, offs, y, y1, z= self.mpf.rbm_K(self.X, effective_batch_size)
			K = self.mpf.rbm_K(self.X, effective_batch_size)

		elif self.k == 2:
			if DEBUG:
				return_values = self.mpf.debug_rbm_K_2wise(self.X, effective_batch_size)	
				K = return_values[-1]
			else:
				K = self.mpf.rbm_K_2wise(self.X, effective_batch_size)
		else:
			raise('NotImplemented')

		reg = self.L1_reg * self.mpf.L1 + self.L2_reg * self.mpf.L2
		reg_grad = T.grad(reg, self.mpf.theta)

		# if not scan (tile out matrix into tensor)
		cost = K + reg
		grads = T.grad(cost, self.mpf.theta)

		# otherwise
		#grads = grads + reg_grad

		if param_init == None:
			self.mpf.theta.set_value(random_theta(D, DH, k=self.k))
		else:
			self.mpf.theta.set_value(np.asarray(np.concatenate(param_init), dtype=theano.config.floatX))

		if optimizer == 'sgd':
			updates = []
			theta = self.mpf.theta
			theta_update = self.mpf.theta_update

			upd = mom * theta_update - lr * grads
			updates.append((theta_update, upd))
			updates.append((theta, theta + upd))

			print 'compiling theano function'
			if DEBUG:
				return_values = list(return_values)
				return_values.append(cost)
				return_values.append(grads)
				train_model = theano.function(inputs=[index, n_ex, lr, mom], outputs=return_values, updates=updates, givens={self.X: train_X[batch_start:batch_stop]})
			else:
				train_model = theano.function(inputs=[index, n_ex, lr, mom], outputs=cost, updates=updates, givens={self.X: train_X[batch_start:batch_stop]})

			self.current_epoch = 0
			start = time.time()
			learning_rate_init = self.learning_rate
			while self.current_epoch < self.n_epochs:
				print 'epoch:', self.current_epoch
				self.current_epoch += 1
				effective_mom = self.final_momentum if self.current_epoch > self.momentum_switchover else self.initial_momentum

				avg_epoch_cost = 0
				last_debug = None
				for minibatch_idx in xrange(n_batches):
					avg_cost = train_model(minibatch_idx, n_train, self.learning_rate, effective_mom)
					#print '\t\t', np.isnan(gr).sum(), np.isnan(yy).sum(), np.isnan(yy1).sum(), np.isnan(zz).sum()
					if DEBUG:
						return_values, avg_cost, gradients = avg_cost[:-2], avg_cost[-2], avg_cost[-1]
						print_debug(return_values, last_debug)
						last_debug = return_values
					avg_epoch_cost += avg_cost
					#print '\t', minibatch_idx, avg_cost
				print '\t avg epoch cost:', avg_epoch_cost/n_batches
				self.learning_rate *= self.learning_rate_decay

				theta_fit = split_theta(self.mpf.theta.get_value(), self.mpf.n_visible, self.mpf.n_hidden, k=self.mpf.k)
				if (self.current_epoch % sample_every == 0):
					sample_and_save(theta_fit, self.mpf.n_hidden, self.current_epoch, learning_rate_init, self.mpf.k, self.opt)

			theta_opt = self.mpf.theta.get_value()
			end = time.time()

		elif optimizer == 'cg' or optimizer == 'bfgs':
			print "compiling theano functions"
			get_batch_size = theano.function([index, n_ex], effective_batch_size, name='get_batch_size')
			batch_cost_grads = theano.function([index, n_ex], [cost, grads], givens={self.X: train_X[batch_start:batch_stop, :]}, name='batch_cost')
			batch_cost = theano.function([index, n_ex], cost, givens={self.X: train_X[batch_start:batch_stop, :]}, name='batch_cost')
			batch_grads = theano.function([index, n_ex], grads, givens={self.X: train_X[batch_start:batch_stop, :]}, name='batch_cost')


			def train_fn_cost_grads(theta_value):
				print 'nbatches', n_batches

				self.mpf.theta.set_value(np.asarray(theta_value, dtype=theano.config.floatX), borrow=True)
				train_losses_grads = [batch_cost_gradst(i, n_train) for i in xrange(n_batches)]

				train_losses = [i[0] for i in train_losses_grads]
				train_grads = [i[1] for i in train_losses_grads]

				train_batch_sizes = [get_batch_size(i, n_train) for i in xrange(n_batches)]

				print len(train_losses), len(train_grads)
				print train_losses[0].shape, train_grads[0].shape
				returns = np.average(train_losses, weights=train_batch_sizes), np.average(train_grads, weights=train_batch_sizes, axis=0)
				return returns


			def train_fn_cost(theta_value):
				print 'nbatches', n_batches

				self.mpf.theta.set_value(np.asarray(theta_value, dtype=theano.config.floatX), borrow=True)
				train_costs = [batch_cost(i, n_train) for i in xrange(n_batches)]
				train_batch_sizes = [get_batch_size(i, n_train) for i in xrange(n_batches)]

				return np.average(train_costs, weights=train_batch_sizes)

			def train_fn_grads(theta_value):
				print 'nbatches', n_batches

				self.mpf.theta.set_value(np.asarray(theta_value, dtype=theano.config.floatX), borrow=True)
				train_grads = [batch_grads(i, n_train) for i in xrange(n_batches)]
				train_batch_sizes = [get_batch_size(i, n_train) for i in xrange(n_batches)]

				return np.average(train_grads, weights=train_batch_sizes, axis=0)


			###############
			# TRAIN MODEL #
			###############
			def my_callback():
				print 'wtf'

			from scipy.optimize import minimize
			from scipy.optimize import fmin_bfgs, fmin_l_bfgs_b
			if optimizer == 'cg':
				pass
			elif optimizer == 'bfgs':
				print 'using bfgs'
				#theta_opt, f_theta_opt, info = fmin_l_bfgs_b(train_fn, self.mpf.theta.get_value(), iprint=1, maxfun=self.n_epochs)
				start = time.time()
				disp = True
				print 'ready to minimize'
				#result_obj = minimize(train_fn, self.mpf.theta.get_value(), jac=True, method='BFGS', options={'maxiter':self.n_epochs, 'disp':disp}, callback=my_callback())
				#theta_opt = fmin_bfgs(f=train_fn_cost, x0=self.mpf.theta.get_value(), fprime=train_fn_grads, disp=1, maxiter=self.n_epochs)
				theta_opt, fff, ddd = fmin_l_bfgs_b(func=train_fn_cost, x0=self.mpf.theta.get_value(), fprime=train_fn_grads, disp=1, maxiter=self.n_epochs)
				print 'done minimize ya right'
				end = time.time()

		elif optimizer == 'sof':
			print "compiling theano functions"
			batch_cost_grads = theano.function([index, n_ex], [cost, grads], givens={self.X: train_X[batch_start:batch_stop, :]}, name='batch_cost')
			batch_cost = theano.function([index, n_ex], cost, givens={self.X: train_X[batch_start:batch_stop, :]}, name='batch_cost')
			batch_grads = theano.function([index, n_ex], grads, givens={self.X: train_X[batch_start:batch_stop, :]}, name='batch_cost')


			def train_fn(theta_value, i):
				self.mpf.theta.set_value(np.asarray(theta_value, dtype=theano.config.floatX), borrow=True)

				train_losses, train_grads = batch_cost_grads(i, n_train)
				
				return train_losses, train_grads

			###############
			# TRAIN MODEL #
			###############
			if param_init == None:
				theta.set_value(random_theta(D, DH))
			else:
				w0, bh0, bv0 = param_init
				self.mpf.theta.set_value(np.asarray(np.concatenate((w0, bh0, bv0)), dtype=theano.config.floatX))


			print 'using sof'
			sys.path.append('/export/mlrg/ebuchman/Programming/Sum-of-Functions-Optimizer')
			from sfo import SFO
			print 'n batches', n_batches
			print 'n epochs' , self.n_epochs
			optimizer = SFO(train_fn, self.mpf.theta.get_value(), np.arange(n_batches))
			start = time.time()
			theta_opt = optimizer.optimize(num_passes = self.n_epochs)
			end = time.time()

		
		self.mpf.theta.set_value(theta_opt.astype(theano.config.floatX), borrow=True)
		return end-start
示例#24
0
    def __init__(self, inf=1e37):

        pos, vel = T.fmatrices(['pos', 'vel'])
        nc, N, n_steps = T.iscalars(['nc', 'N', 'n_steps'])
        ra, rb, re, r0 = T.fscalars(['ra', 'rb', 're', 'r0'])
        v0, j, b = T.fscalars(['v0', 'J', 'b'])

        nu = trng.uniform(size=(N, 2), low=0.0, high=3.14159, dtype='floatX')

        def distance_tensor(X):
            E = X.reshape((X.shape[0], 1, -1)) - X.reshape((1, X.shape[0], -1))
            D = T.sqrt(T.sum(T.square(E), axis=2))
            return D

        def direction_tensor(X):
            E = X.reshape((X.shape[0], 1, -1)) - X.reshape((1, X.shape[0], -1))
            L = T.sqrt(T.sum(T.square(E), axis=2))
            L = T.pow(L + T.identity_like(L), -1)
            L = T.stack([L, L, L], axis=2)
            return L * E

        def neighbourhood(X):
            D = distance_tensor(X)
            N = T.argsort(D, axis=0)
            mask = T.cast(T.lt(N, nc), 'float32')
            return N[1:nc + 1], mask

        def alignment(X, Y):
            n, d = neighbourhood(X)
            return T.sum(Y[n], axis=0)

        def cohesion(X, inf=100.0):
            D = distance_tensor(X)
            E = direction_tensor(X)
            n, d = neighbourhood(X)

            F = T.zeros_like(E)
            D = T.stack([D, D, D], axis=2)
            d = T.stack([d, d, d], axis=2)

            c1 = T.lt(D, rb)
            c2 = T.and_(T.gt(D, rb), T.lt(D, ra))
            c3 = T.and_(T.gt(D, ra), T.lt(D, r0))

            F = T.set_subtensor(F[c1], -E[c1])
            F = T.set_subtensor(F[c2], 0.25 * (D[c2] - re) / (ra - re) * E[c2])
            F = T.set_subtensor(F[c3], E[c3])

            return T.sum(d * F, axis=0)

        def perturbation(nu=nu):
            phi = nu[:, 0]
            theta = 2.0 * nu[:, 1]

            return T.stack([
                T.sin(theta) * T.sin(phi),
                T.cos(theta) * T.sin(phi),
                T.cos(phi)
            ],
                           axis=1)

        def step(X, dX):
            X_ = X + dX
            V_ = j * nc / v0 * (alignment(
                X, dX)) + b * (cohesion(X)) + nc * (perturbation())
            dV = T.sqrt(T.sum(T.square(V_), axis=1)).reshape(V_.shape[0], 1)
            dV = T.stack([dV, dV, dV], axis=1)
            V = v0 * V_ / dV

            return T.cast(X_, 'float32'), T.cast(V, 'float32')

        def probability(X, Y):
            n, d = neighbourhood(X)
            vDv = T.batched_dot(Y[n].swapaxes(0, 1), Y)
            p = T.exp((j / 2.0) * T.sum(vDv, axis=1))

            return p / T.sum(p)

        sim, update = theano.scan(step,
                                  outputs_info=[pos, vel],
                                  n_steps=n_steps)

        pos_, vel_ = sim

        mean_final_velocity = 1 / (N * v0) * T.sqrt(
            T.sum(T.square(T.sum(vel_[-1], axis=0))))

        particle_probability = probability(pos_[-1], vel_[-1])

        self.f = theano.function(
            [pos, vel, nc, ra, rb, r0, re, j, v0, b, N, n_steps], [pos_, vel_],
            allow_input_downcast=True)

        self.g = theano.function(
            [pos, vel, nc, ra, rb, r0, re, j, v0, b, N, n_steps],
            mean_final_velocity,
            allow_input_downcast=True)

        self.h = theano.function(
            [pos, vel, nc, ra, rb, r0, re, j, v0, b, N, n_steps],
            particle_probability,
            allow_input_downcast=True)
示例#25
0
文件: relbox.py 项目: pdasigi/relbox
    def __init__(self, num_words, num_rels, vocab_embed_size, lr=0.01, tensor_activation=T.tanh, num_noise_samples=1, init_dense_vocab=None):
        numpy_rng = numpy.random.RandomState(89677)
        theano_rng = RandomStreams(12783)
        rng_box_limit = 4 * numpy.sqrt(6. / (vocab_embed_size + vocab_embed_size + num_rels))
        rng_box_low = 0
        rng_box_high = rng_box_limit
        init_box = numpy.asarray(numpy_rng.uniform(low=rng_box_low, high=rng_box_high, size=(vocab_embed_size, vocab_embed_size, num_rels)))
        rng_proj_low = -4 * numpy.sqrt(6. / (num_words + vocab_embed_size))
        rng_proj_high = 4 * numpy.sqrt(6. / (num_words + vocab_embed_size))
        if init_dense_vocab is None:
            init_dense_vocab = numpy.asarray(numpy_rng.uniform(low=rng_proj_low, high=rng_proj_high, size=(num_words, vocab_embed_size)))
        init_rev_dense_vocab = numpy.asarray(numpy_rng.uniform(low=rng_proj_low, high=rng_proj_high, size=(vocab_embed_size, num_words)))
        self.B = theano.shared(value=init_box, name='B')
        self.P = theano.shared(value=init_dense_vocab, name='P')
        self.P_hat = theano.shared(value=init_rev_dense_vocab, name='P_hat')
        self.vocab = T.eye(num_words)
        word_activation = T.nnet.softmax
        self.rel = T.eye(num_rels)
        rel_activation = T.nnet.softmax

        self.lr = lr

        self.x_ind, self.y_ind, self.r_ind = T.iscalars('x_ind', 'y_ind', 'r_ind')
        x = self.vocab[self.x_ind]
        self.x_rep = T.dot(x, self.P)
        y = self.vocab[self.y_ind]
        self.y_rep = T.dot(y, self.P)
        r = self.rel[self.r_ind]
        # Assumption: Corresponding dimensions: 0 -> x, 1 -> y, 2 -> r
        # TODO: Where do we apply activations? Do we have to, at all?
        pred_xy = tensor_activation(T.tensordot(r, self.B, axes=(0,2)))
        pred_y = T.dot(T.tensordot(self.x_rep, pred_xy, axes=(0,0)), self.P_hat)
        self.prob_y = word_activation(pred_y)
        pred_x = T.dot(T.tensordot(self.y_rep, pred_xy, axes=(0,1)), self.P_hat)
        self.prob_x = word_activation(pred_x)
        pred_yr = tensor_activation(T.tensordot(self.x_rep, self.B, axes=(0,0)))
        self.prob_r = rel_activation(T.tensordot(self.y_rep, pred_yr, axes=(0,0)))

        self.score = T.dot(y, T.dot(T.tensordot(self.x_rep, T.tensordot(r, self.B, axes=(0,2)), axes=(0,0)), self.P_hat).T)
        # y \times (((x \times P) \times (r \otimes B)) \times P_hat)
        rand_margin_score = T.constant(0)
        noise_log_likelihood = T.constant(0)
        # The noise distribution is one where words and the relation are independent of each other.  The probability of the right tuple and the corrupted tuple are both equal in this distribution.
        noise_prob = num_noise_samples/float(num_words * num_words * num_rels)
        rand_x_ind = theano_rng.random_integers(low=0, high=num_words-1)
        rand_y_ind = theano_rng.random_integers(low=0, high=num_words-1)
        rand_r_ind = theano_rng.random_integers(low=0, high=num_rels-1)
        rand_x = self.vocab[rand_x_ind]
        rand_x_rep = T.dot(rand_x, self.P)
        rand_y = self.vocab[rand_y_ind]
        rand_y_rep = T.dot(rand_y, self.P)
        rand_r = self.rel[rand_r_ind]
        rand_score = T.dot(rand_y, T.dot(T.tensordot(rand_x_rep, T.tensordot(rand_r, self.B, axes=(0,2)), axes=(0,0)), self.P_hat).T)
        for _ in range(num_noise_samples):
            rand_margin_score += rand_score
            noise_log_likelihood += T.log(noise_prob/(T.abs_(rand_score) + noise_prob))
        self.nce_margin_loss = T.maximum(0, 1 - self.score + rand_margin_score)
        
        # NCE negative log likelihood:-1 * {log(score/(score + num_noise_samples*noise_prob)) + \sum_{i=1}^k (log(noise_prob/(rand_score + noise_prob)))}
        self.nce_prob_loss = -(T.log(T.abs_(self.score)/(T.abs_(self.score) + noise_prob)) + noise_log_likelihood)
        self.cost_inputs = [self.x_ind, self.y_ind, self.r_ind]
        self.params = [self.B, self.P, self.P_hat]

        self.x_loss = self.ce(x, self.prob_x)
        self.y_loss = self.ce(y, self.prob_y)
        self.r_loss = self.ce(r, self.prob_r)