Exemplo n.º 1
0
    def test_ones(self):
	owl.set_device(owl.create_mpi_device(1,1))
        test = 0
	for i in range(1000):
		owl.zeros([10000,10000])
		owl.wait_for_all()
	owl.print_profiler_result()
Exemplo n.º 2
0
    def weight_update(self, base_lr, base_weight_decay, momentum, batch_size):
        ''' Update the weight & bias

        Using following formula:

        ``$_delta = momentum * $_delta - (base_lr * $_lr / batch_size) * $_grad - (base_lr * $_lr * base_wd * $_wd) * $``
        
        , where ``$`` could be either ``weight`` or ``bias``.
        '''
        if self.weightdelta == None:
            self.weightdelta = owl.zeros(self.weightgrad.shape)

        self.weightdelta = momentum * self.weightdelta \
                        - (base_lr * self.blobs_lr[0] / batch_size) * self.weightgrad \
                        - (base_lr * self.blobs_lr[0] * base_weight_decay * self.weight_decay[0]) * self.weight
        self.weight = self.weight + self.weightdelta
        self.weightgrad = None

        if self.biasdelta == None:
            self.biasdelta = owl.zeros(self.biasgrad.shape)

        self.biasdelta = momentum * self.biasdelta \
                        - (base_lr * self.blobs_lr[1] / batch_size) * self.biasgrad \
                        - (base_lr * self.blobs_lr[1] * base_weight_decay * self.weight_decay[1]) * self.bias
        self.bias = self.bias + self.biasdelta
        self.biasgrad = None
Exemplo n.º 3
0
    def weight_update(self, base_lr, base_weight_decay, momentum, batch_size):
        ''' Update the weight & bias

        Using following formula:

        ``$_delta = momentum * $_delta - (base_lr * $_lr / batch_size) * $_grad - (base_lr * $_lr * base_wd * $_wd) * $``
        
        , where ``$`` could be either ``weight`` or ``bias``.
        '''
        if self.weightdelta == None:
            self.weightdelta = owl.zeros(self.weightgrad.shape)

        self.weightdelta = momentum * self.weightdelta \
                        - (base_lr * self.lr_mult_w / batch_size) * self.weightgrad \
                        - (base_lr * self.lr_mult_w * base_weight_decay * self.decay_mult_w) * self.weight

        self.weight = self.weight + self.weightdelta
        self.weightgrad = None

        if self.biasdelta == None:
            self.biasdelta = owl.zeros(self.biasgrad.shape)

        self.biasdelta = momentum * self.biasdelta \
                        - (base_lr * self.lr_mult_b / batch_size) * self.biasgrad \
                        - (base_lr * self.lr_mult_b * base_weight_decay * self.decay_mult_b) * self.bias
        self.bias = self.bias + self.biasdelta
        self.biasgrad = None
Exemplo n.º 4
0
    def weight_update(self, base_lr, base_weight_decay, momentum, batch_size):
        """ Update the weight & bias

        Using following formula:

        ``$_delta = momentum * $_delta - (base_lr * $_lr / batch_size) * $_grad - (base_lr * $_lr * base_wd * $_wd) * $``
        
        , where ``$`` could be either ``weight`` or ``bias``.
        """
        if self.weightdelta == None:
            self.weightdelta = owl.zeros(self.weightgrad.shape)

        self.weightdelta = (
            momentum * self.weightdelta
            - (base_lr * self.lr_mult_w / batch_size) * self.weightgrad
            - (base_lr * self.lr_mult_w * base_weight_decay * self.decay_mult_w) * self.weight
        )

        self.weight = self.weight + self.weightdelta
        self.weightgrad = None

        if self.biasdelta == None:
            self.biasdelta = owl.zeros(self.biasgrad.shape)

        self.biasdelta = (
            momentum * self.biasdelta
            - (base_lr * self.lr_mult_b / batch_size) * self.biasgrad
            - (base_lr * self.lr_mult_b * base_weight_decay * self.decay_mult_b) * self.bias
        )
        self.bias = self.bias + self.biasdelta
        self.biasgrad = None
Exemplo n.º 5
0
    def __init__(self, input_size, hidden_size, output_size):
        self.Layers = [input_size, hidden_size, output_size]
        # Recurrent weights: take x_t, h_{t-1}, and bias unit
        # and produce the 3 gates and the input to cell signal

        self.ig_weight_data = owl.randn([self.Layers[0], self.Layers[1]], 0.0,
                                        0.1)
        self.fg_weight_data = owl.randn([self.Layers[0], self.Layers[1]], 0.0,
                                        0.1)
        self.og_weight_data = owl.randn([self.Layers[0], self.Layers[1]], 0.0,
                                        0.1)
        self.ff_weight_data = owl.randn([self.Layers[0], self.Layers[1]], 0.0,
                                        0.1)

        self.ig_weight_prev = owl.randn([self.Layers[1], self.Layers[1]], 0.0,
                                        0.1)
        self.fg_weight_prev = owl.randn([self.Layers[1], self.Layers[1]], 0.0,
                                        0.1)
        self.og_weight_prev = owl.randn([self.Layers[1], self.Layers[1]], 0.0,
                                        0.1)
        self.ff_weight_prev = owl.randn([self.Layers[1], self.Layers[1]], 0.0,
                                        0.1)

        self.ig_weight_bias = owl.zeros([self.Layers[1], 1])
        self.fg_weight_bias = owl.zeros([self.Layers[1], 1])
        self.og_weight_bias = owl.zeros([self.Layers[1], 1])
        self.ff_weight_bias = owl.zeros([self.Layers[1], 1])

        # Decoder weights (e.g. mapping to vocabulary)
        self.decoder_weights = owl.randn([self.Layers[1], self.Layers[2]], 0.0,
                                         0.1)  # decoder
        self.decoder_bias = owl.zeros([output_size, 1])
Exemplo n.º 6
0
    def weight_update(self, base_lr, base_weight_decay, momentum, batch_size):
        if self.weightdelta == None:
            self.weightdelta = owl.zeros(self.weightgrad.shape)

        self.weightdelta = momentum * self.weightdelta - (base_lr * self.blobs_lr[0] / batch_size) * self.weightgrad  - (base_lr * self.blobs_lr[0] * base_weight_decay * self.weight_decay[0]) * self.weight
        self.weight = self.weight + self.weightdelta
        self.weightgrad = None

        if self.biasdelta == None:
            self.biasdelta = owl.zeros(self.biasgrad.shape)

        self.biasdelta = momentum * self.biasdelta - (base_lr * self.blobs_lr[1] / batch_size) * self.biasgrad - (base_lr * self.blobs_lr[1] * base_weight_decay * self.weight_decay[1]) * self.bias
        self.bias = self.bias + self.biasdelta
        self.biasgrad = None
Exemplo n.º 7
0
    def __init__(self, vocab_size, input_size, hidden_size):
        output_size = vocab_size
        self.Layers = [input_size, hidden_size, output_size]
        print 'Model size:', self.Layers
        # Recurrent weights: take x_t, h_{t-1}, and bias unit
        # and produce the 3 gates and the input to cell signal

        # self.WIFOG = owl.randn([self.Layers[0] + self.Layers[1], self.Layers[1] * 4], 0.0, 0.1)
        # self.BIFOG = owl.zeros([self.Layers[1] * 4, 1])

        self.ig_weight_data = owl.randn([self.Layers[1], self.Layers[0]], 0.0,
                                        0.1)
        self.fg_weight_data = owl.randn([self.Layers[1], self.Layers[0]], 0.0,
                                        0.1)
        self.og_weight_data = owl.randn([self.Layers[1], self.Layers[0]], 0.0,
                                        0.1)
        self.ff_weight_data = owl.randn([self.Layers[1], self.Layers[0]], 0.0,
                                        0.1)

        self.ig_weight_prev = owl.randn([self.Layers[1], self.Layers[1]], 0.0,
                                        0.1)
        self.fg_weight_prev = owl.randn([self.Layers[1], self.Layers[1]], 0.0,
                                        0.1)
        self.og_weight_prev = owl.randn([self.Layers[1], self.Layers[1]], 0.0,
                                        0.1)
        self.ff_weight_prev = owl.randn([self.Layers[1], self.Layers[1]], 0.0,
                                        0.1)

        self.ig_weight_cell = owl.randn([self.Layers[1], self.Layers[1]], 0.0,
                                        0.1)
        self.fg_weight_cell = owl.randn([self.Layers[1], self.Layers[1]], 0.0,
                                        0.1)
        self.og_weight_cell = owl.randn([self.Layers[1], self.Layers[1]], 0.0,
                                        0.1)
        self.ff_weight_cell = owl.randn([self.Layers[1], self.Layers[1]], 0.0,
                                        0.1)

        self.ig_weight_bias = owl.zeros([self.Layers[1], 1])
        self.fg_weight_bias = owl.zeros([self.Layers[1], 1])
        self.og_weight_bias = owl.zeros([self.Layers[1], 1])
        self.ff_weight_bias = owl.zeros([self.Layers[1], 1])

        # Decoder weights (e.g. mapping to vocabulary)
        self.decoder_weights = owl.randn([self.Layers[2], self.Layers[1]], 0.0,
                                         0.1)  # decoder
        self.decoder_bias = owl.zeros([output_size, 1])

        self.emb_weight = [None] * vocab_size
        for i in range(vocab_size):
            self.emb_weight[i] = owl.randn([input_size, 1], 0.0, 0.1)
Exemplo n.º 8
0
 def __init__(self, data_file='mnist_all.mat', num_epochs=100, mb_size=256, eps_w=0.01, eps_b=0.01):
     self.cpu = owl.create_cpu_device()
     self.gpu = owl.create_gpu_device(0)
     self.data_file = data_file
     self.num_epochs=num_epochs
     self.mb_size=mb_size
     self.eps_w=eps_w
     self.eps_b=eps_b
     # init weight
     l1 = 784; l2 = 256; l3 = 10
     self.l1 = l1; self.l2 = l2; self.l3 = l3
     self.w1 = owl.randn([l2, l1], 0.0, math.sqrt(4.0 / (l1 + l2)))
     self.w2 = owl.randn([l3, l2], 0.0, math.sqrt(4.0 / (l2 + l3)))
     self.b1 = owl.zeros([l2, 1])
     self.b2 = owl.zeros([l3, 1])
Exemplo n.º 9
0
    def weight_update(self, base_lr, base_weight_decay, momentum, batch_size):
        #TODO: need recheck with caffe with what's the multiplier for weight decay
        if self.weightdelta == None:
            self.weightdelta = owl.zeros(self.weightgrad.shape)

        self.weightdelta = momentum * self.weightdelta - (
            base_lr * self.blobs_lr[0] / batch_size) * self.weightgrad - (
                base_lr * self.blobs_lr[0] * base_weight_decay *
                self.weight_decay[0]) * self.weight
        self.weight = self.weight + self.weightdelta
        self.weightgrad = None

        if self.biasdelta == None:
            self.biasdelta = owl.zeros(self.biasgrad.shape)

        self.biasdelta = momentum * self.biasdelta - (
            base_lr * self.blobs_lr[1] / batch_size) * self.biasgrad - (
                base_lr * self.blobs_lr[1] * base_weight_decay *
                self.weight_decay[1]) * self.bias
        self.bias = self.bias + self.biasdelta
        self.biasgrad = None
Exemplo n.º 10
0
 def init_random(self):
     self.weights = [
         owl.randn([5, 5, 1, 16], 0.0, 0.1),
         owl.randn([5, 5, 16, 32], 0.0, 0.1),
         owl.randn([10, 512], 0.0, 0.1)
     ]
     self.weightdelta = [
         owl.zeros([5, 5, 1, 16]),
         owl.zeros([5, 5, 16, 32]),
         owl.zeros([10, 512])
     ]
     self.bias = [owl.zeros([16]),
                  owl.zeros([32]),
                  owl.zeros([10, 1])]
     self.biasdelta = [
         owl.zeros([16]),
         owl.zeros([32]),
         owl.zeros([10, 1])
     ]
Exemplo n.º 11
0
	def __init__(self, vocab_size, input_size, hidden_size):
		output_size = vocab_size
		self.Layers = [input_size, hidden_size, output_size]
                print 'Model size:', self.Layers
		# Recurrent weights: take x_t, h_{t-1}, and bias unit
		# and produce the 3 gates and the input to cell signal

		# self.WIFOG = owl.randn([self.Layers[0] + self.Layers[1], self.Layers[1] * 4], 0.0, 0.1)
		# self.BIFOG = owl.zeros([self.Layers[1] * 4, 1])

		self.ig_weight_data = owl.randn([self.Layers[1], self.Layers[0]], 0.0, 0.1)
		self.fg_weight_data = owl.randn([self.Layers[1], self.Layers[0]], 0.0, 0.1)
		self.og_weight_data = owl.randn([self.Layers[1], self.Layers[0]], 0.0, 0.1)
		self.ff_weight_data = owl.randn([self.Layers[1], self.Layers[0]], 0.0, 0.1)

		self.ig_weight_prev = owl.randn([self.Layers[1], self.Layers[1]], 0.0, 0.1)
		self.fg_weight_prev = owl.randn([self.Layers[1], self.Layers[1]], 0.0, 0.1)
		self.og_weight_prev = owl.randn([self.Layers[1], self.Layers[1]], 0.0, 0.1)
		self.ff_weight_prev = owl.randn([self.Layers[1], self.Layers[1]], 0.0, 0.1)

		self.ig_weight_cell = owl.randn([self.Layers[1], self.Layers[1]], 0.0, 0.1)
		self.fg_weight_cell = owl.randn([self.Layers[1], self.Layers[1]], 0.0, 0.1)
		self.og_weight_cell = owl.randn([self.Layers[1], self.Layers[1]], 0.0, 0.1)
		self.ff_weight_cell = owl.randn([self.Layers[1], self.Layers[1]], 0.0, 0.1)

		self.ig_weight_bias = owl.zeros([self.Layers[1], 1])
		self.fg_weight_bias = owl.zeros([self.Layers[1], 1])
		self.og_weight_bias = owl.zeros([self.Layers[1], 1])
		self.ff_weight_bias = owl.zeros([self.Layers[1], 1])

		# Decoder weights (e.g. mapping to vocabulary)
		self.decoder_weights = owl.randn([self.Layers[2], self.Layers[1]], 0.0, 0.1) # decoder
		self.decoder_bias = owl.zeros([output_size, 1])

		self.emb_weight = [None] * vocab_size
		for i in range(vocab_size):
			self.emb_weight[i] = owl.randn([input_size, 1], 0.0, 0.1)
Exemplo n.º 12
0
 def init_random(self):
     self.weights = [
         owl.randn([5, 5, 1, 16], 0.0, 0.1),
         owl.randn([5, 5, 16, 32], 0.0, 0.1),
         owl.randn([10, 512], 0.0, 0.1)
     ];
     self.weightdelta = [
         owl.zeros([5, 5, 1, 16]),
         owl.zeros([5, 5, 16, 32]),
         owl.zeros([10, 512])
     ];
     self.bias = [
         owl.zeros([16]),
         owl.zeros([32]),
         owl.zeros([10, 1])
     ];
     self.biasdelta = [
         owl.zeros([16]),
         owl.zeros([32]),
         owl.zeros([10, 1])
     ];
Exemplo n.º 13
0
    def init_random(self):
        self.weights = [
            owl.randn([3, 3, 3, 64], 0.0, 0.01),
            owl.randn([3, 3, 64, 64], 0.0, 0.01),
            owl.randn([1, 1, 1, 1], 0.0, 0.01),
            owl.randn([3, 3, 64, 128], 0.0, 0.01),
            owl.randn([3, 3, 128, 128], 0.0, 0.01),
            owl.randn([1, 1, 1, 1], 0.0, 0.01),
            owl.randn([3, 3, 128, 256], 0.0, 0.01),
            owl.randn([3, 3, 256, 256], 0.0, 0.01),
            owl.randn([3, 3, 256, 256], 0.0, 0.01),
            owl.randn([1, 1, 1, 1], 0.0, 0.01),
            owl.randn([3, 3, 256, 512], 0.0, 0.01),
            owl.randn([3, 3, 512, 512], 0.0, 0.01),
            owl.randn([3, 3, 512, 512], 0.0, 0.01),
            owl.randn([1, 1, 1, 1], 0.0, 0.01),
            owl.randn([3, 3, 512, 512], 0.0, 0.01),
            owl.randn([3, 3, 512, 512], 0.0, 0.01),
            owl.randn([3, 3, 512, 512], 0.0, 0.01),
            owl.randn([1, 1, 1, 1], 0.0, 0.01),
            owl.randn([4096, 25088], 0.0, 0.005),
            owl.randn([4096, 4096], 0.0, 0.005),
            owl.randn([1000, 4096], 0.0, 0.01)
        ]
        self.weightsdelta = [
            owl.zeros([3, 3, 3, 64]),
            owl.zeros([3, 3, 64, 64]),
            owl.zeros([1, 1, 1, 1]),
            owl.zeros([3, 3, 64, 128]),
            owl.zeros([3, 3, 128, 128]),
            owl.zeros([1, 1, 1, 1]),
            owl.zeros([3, 3, 128, 256]),
            owl.zeros([3, 3, 256, 256]),
            owl.zeros([3, 3, 256, 256]),
            owl.zeros([1, 1, 1, 1]),
            owl.zeros([3, 3, 256, 512]),
            owl.zeros([3, 3, 512, 512]),
            owl.zeros([3, 3, 512, 512]),
            owl.zeros([1, 1, 1, 1]),
            owl.zeros([3, 3, 512, 512]),
            owl.zeros([3, 3, 512, 512]),
            owl.zeros([3, 3, 512, 512]),
            owl.zeros([1, 1, 1, 1]),
            owl.zeros([4096, 25088]),
            owl.zeros([4096, 4096]),
            owl.zeros([1000, 4096])
        ]

        self.bias = [
            owl.zeros([64]),
            owl.zeros([64]),
            owl.zeros([64]),
            owl.zeros([128]),
            owl.zeros([128]),
            owl.zeros([128]),
            owl.zeros([256]),
            owl.zeros([256]),
            owl.zeros([256]),
            owl.zeros([256]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([4096, 1]),
            owl.zeros([4096, 1]),
            owl.zeros([1000, 1])
        ]

        self.biasdelta = [
            owl.zeros([64]),
            owl.zeros([64]),
            owl.zeros([64]),
            owl.zeros([128]),
            owl.zeros([128]),
            owl.zeros([128]),
            owl.zeros([256]),
            owl.zeros([256]),
            owl.zeros([256]),
            owl.zeros([256]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([512]),
            owl.zeros([4096, 1]),
            owl.zeros([4096, 1]),
            owl.zeros([1000, 1])
        ]
Exemplo n.º 14
0
def LSTM_train(model,
               sents,
               vocab_size,
               words,
               NUM_EPOCHS=100,
               tanhC_version=1):

    # Constants
    ALPHA = 1  # Learning rate
    N = 10  # Number of units
    learning_rate = 1

    K = vocab_size  # Vocabulary size

    # For each epoch
    last_ll = 1e99
    last_time = time.time()
    for epoch_id in range(1, NUM_EPOCHS + 1):
        epoch_ll = 0
        # For each sentence
        for sent_id, sent in enumerate(sents):
            #print "sent_id",sent_id
            #print "sent", sent
            #print "sents", sents
            ##### Initialize activations #####
            Tau = len(sent)
            sent_ll = 0  # Sentence log likelihood
            batch_size = Tau

            data = [None] * Tau
            prev = [None] * Tau
            embed = np.zeros((K, 1))
            embed[sent[0]] = 1
            data[0] = owl.from_numpy(embed).trans()

            Hout = [None] * Tau
            Hout[0] = owl.zeros([N, 1])

            act_ig = [None] * Tau
            act_fg = [None] * Tau
            act_og = [None] * Tau
            act_ff = [None] * Tau

            C = [None] * Tau
            C[0] = owl.zeros([N, 1])
            Ym = [None] * Tau
            dY = [None] * Tau

            dBd = owl.zeros([model.Layers[2], 1])  #dY.sum(0)
            dWd = owl.zeros([model.Layers[1],
                             model.Layers[2]])  #Hout.transpose().dot(dY)
            dHout = [None] * Tau  #dY.dot(model.decoder_weights.transpose())

            ##### Forward pass #####
            # For each time step
            for t in range(1, Tau):
                prev[t] = Hout[t - 1]
                embed = np.zeros((K, 1))
                embed[sent[t]] = 1
                data[t] = owl.from_numpy(embed).trans()

                act_ig[t] = model.ig_weight_data.trans() * data[
                    t - 1] + model.ig_weight_prev.trans(
                    ) * prev[t] + model.ig_weight_bias
                act_fg[t] = model.fg_weight_data.trans() * data[
                    t - 1] + model.fg_weight_prev.trans(
                    ) * prev[t] + model.fg_weight_bias
                act_og[t] = model.og_weight_data.trans() * data[
                    t - 1] + model.og_weight_prev.trans(
                    ) * prev[t] + model.og_weight_bias
                act_ff[t] = model.ff_weight_data.trans() * data[
                    t - 1] + model.ff_weight_prev.trans(
                    ) * prev[t] + model.ff_weight_bias

                act_ig[t] = ele.sigm(act_ig[t])
                act_fg[t] = ele.sigm(act_fg[t])
                act_og[t] = ele.sigm(act_og[t])
                act_ff[t] = ele.tanh(act_ff[t])

                C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(
                    act_fg[t], C[t - 1])

                if tanhC_version:
                    Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
                else:
                    Hout[t] = ele.mult(act_og[t], C[t])
                Ym[t] = softmax(model.decoder_weights.trans() * Hout[t] +
                                model.decoder_bias)

                dY[t] = data[t] - Ym[t]
                dBd += dY[t] / batch_size
                dWd += Hout[t] * dY[t].trans() / batch_size
                dHout[t] = model.decoder_weights * dY[t]

                #print "Y_0[t]",Y_o[t]
                #print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
                #print np.sum(output.to_numpy())
                # output = Ym[t].trans() * data[t]
                # sent_ll += math.log10( max(np.sum(output.to_numpy()),1e-20) )
            ##### Initialize gradient vectors #####
            for t in range(1, Tau):
                output = Ym[t].trans() * data[t]
                sent_ll += math.log10(max(np.sum(output.to_numpy()), 1e-20))

            sen_ig = [None] * Tau
            sen_fg = [None] * Tau
            sen_og = [None] * Tau
            sen_ff = [None] * Tau

            weight_update_ig_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_ig_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ig_bias = owl.zeros([model.Layers[1], 1])

            weight_update_fg_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_fg_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_fg_bias = owl.zeros([model.Layers[1], 1])

            weight_update_og_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_og_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_og_bias = owl.zeros([model.Layers[1], 1])

            weight_update_ff_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_ff_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ff_bias = owl.zeros([model.Layers[1], 1])

            dHin = owl.zeros([model.Layers[1], model.Layers[1]])
            dC = [None] * Tau
            for t in xrange(Tau):
                dC[t] = owl.zeros(C[t].shape)

            # Calculate the error and add it
            for t in reversed(range(1, len(sent))):
                #print "sent",sent
                #print "t",t
                if tanhC_version:
                    tanhCt = ele.tanh(C[t])
                    sen_og[t] = ele.mult(tanhCt, dHout[t])
                    dC[t] += ele.mult((1 - ele.mult(tanhCt, tanhCt)),
                                      ele.mult(act_og[t], dHout[t]))
                else:
                    sen_og[t] = ele.mult(C[t], dHout[t])
                    dC[t] += ele.mult(act_og[t], dHout[t])

                sen_fg[t] = owl.zeros([model.Layers[1], 1])
                if t > 0:
                    sen_fg[t] = ele.mult(C[t - 1], dC[t])
                    dC[t - 1] += ele.mult(act_og[t], dC[t])
                sen_ig[t] = ele.mult(act_ff[t], dC[t])
                sen_ff[t] = ele.mult(act_ig[t], dC[t])

                # backprop activation functions
                sen_ff[t] = ele.mult((1 - ele.mult(act_ff[t], act_ff[t])),
                                     sen_ff[t])
                sen_ig[t] = ele.mult(ele.mult(act_ig[t], (1.0 - act_ig[t])),
                                     sen_ig[t])
                sen_fg[t] = ele.mult(ele.mult(act_fg[t], (1.0 - act_fg[t])),
                                     sen_fg[t])
                sen_og[t] = ele.mult(ele.mult(act_og[t], (1.0 - act_og[t])),
                                     sen_og[t])

                # backprop matrix multiply
                weight_update_ig_data += data[t] * sen_ig[t].trans()
                weight_update_ig_prev += prev[t] * sen_ig[t].trans()
                weight_update_fg_bias += sen_ig[t]  # sen_ig[t].sum(0 or 1)

                weight_update_fg_data += data[t] * sen_fg[t].trans()
                weight_update_fg_prev += prev[t] * sen_fg[t].trans()
                weight_update_fg_bias += sen_fg[t]

                weight_update_og_data += data[t] * sen_og[t].trans()
                weight_update_og_prev += prev[t] * sen_og[t].trans()
                weight_update_og_bias += sen_og[t]

                weight_update_ff_data += data[t] * sen_ff[t].trans()
                weight_update_ff_prev += prev[t] * sen_ff[t].trans()
                weight_update_ff_bias += sen_ff[t]

                if t > 1:
                    dHout[t - 1] += model.ig_weight_prev.trans() * sen_ig[t]
                    dHout[t - 1] += model.fg_weight_prev.trans() * sen_fg[t]
                    dHout[t - 1] += model.og_weight_prev.trans() * sen_og[t]
                    dHout[t - 1] += model.ff_weight_prev.trans() * sen_ff[t]

            # normalize the gradients
            # dWLSTM /= batch_size
            weight_update_ig_prev /= batch_size
            weight_update_ig_data /= batch_size
            weight_update_ig_bias /= batch_size

            weight_update_fg_prev /= batch_size
            weight_update_fg_data /= batch_size
            weight_update_fg_bias /= batch_size

            weight_update_og_prev /= batch_size
            weight_update_og_data /= batch_size
            weight_update_og_bias /= batch_size

            weight_update_ff_prev /= batch_size
            weight_update_ff_data /= batch_size
            weight_update_ff_bias /= batch_size

            # weight update
            model.ig_weight_prev += learning_rate * weight_update_ig_prev
            model.ig_weight_data += learning_rate * weight_update_ig_data
            model.ig_weight_bias += learning_rate * weight_update_ig_bias

            model.fg_weight_prev += learning_rate * weight_update_fg_prev
            model.fg_weight_data += learning_rate * weight_update_fg_data
            model.fg_weight_bias += learning_rate * weight_update_fg_bias

            model.og_weight_prev += learning_rate * weight_update_og_prev
            model.og_weight_data += learning_rate * weight_update_og_data
            model.og_weight_bias += learning_rate * weight_update_og_bias

            model.ff_weight_prev += learning_rate * weight_update_ff_prev
            model.ff_weight_data += learning_rate * weight_update_ff_data
            model.ff_weight_bias += learning_rate * weight_update_ff_bias

            model.decoder_weights += learning_rate * dWd
            model.decoder_bias += learning_rate * dBd

            # Print results
            epoch_ll += sent_ll
            # print(" Sentence %d LL: %f" % (sent_id, sent_ll))
        epoch_ent = epoch_ll * (-1) / words
        epoch_ppl = 10**epoch_ent
        cur_time = time.time()
        print("Epoch %d (alpha=%f) PPL=%f" %
              (epoch_id, learning_rate, epoch_ppl))
        print "  time consumed:", cur_time - last_time
        if last_ll > epoch_ll:
            learning_rate /= 2.0
        last_ll = epoch_ll
        last_time = cur_time
Exemplo n.º 15
0
def LSTM_train(model, sents, words, learning_rate, EPOCH, tanhC_version = 1):

	# Constants
	N = model.Layers[1]       # Number of units
	K = model.Layers[2]       # Vocabulary size

	last_time = time.time()
	# For each epoch
	for epoch_id in range(1, EPOCH + 1):
		epoch_ll = 0
		# For each sentence
		for sent_id, sent in enumerate(sents):
			#print sent_id
			#print "sent", sent
			#print "sents", sents
			##### Initialize activations #####

			Tau = len(sent)
			sent_ll = 0 # Sentence log likelihood

			data = [None] * Tau

			Hout = [None] * Tau
			Hout[0] = owl.zeros([N, 1])

			act_ig = [None] * Tau
			act_fg = [None] * Tau
			act_og = [None] * Tau
			act_ff = [None] * Tau

			C = [None] * Tau
			C[0] = owl.zeros([N, 1])
			dY = [None] * Tau

			dBd = owl.zeros([model.Layers[2], 1]) #dY.sum(0)
			dWd = owl.zeros([model.Layers[2], model.Layers[1]]) 
			dHout = [None] * Tau #dY.dot(model.decoder_weights.transpose())
			dEmb = [None] * Tau

			##### Forward pass #####
			# For each time step

			for t in range(1, Tau):
				# predict the (t+1)'th word from the t'th word
				data[t] = model.emb_weight[sent[t - 1]]
				NVector = np.zeros((K, 1))
				NVector[sent[t]] = 1
				target = owl.from_numpy(NVector).trans()

				act_ig[t] = model.ig_weight_data * data[t] + model.ig_weight_prev * Hout[t - 1] + model.ig_weight_cell * C[t - 1] + model.ig_weight_bias
				act_ig[t] = ele.sigm(act_ig[t])

				act_fg[t] = model.fg_weight_data * data[t] + model.fg_weight_prev * Hout[t - 1] + model.fg_weight_cell * C[t - 1] + model.fg_weight_bias
				act_fg[t] = ele.sigm(act_fg[t])

				act_ff[t] = model.ff_weight_data * data[t] + model.ff_weight_prev * Hout[t - 1] + model.ff_weight_bias
				act_ff[t] = ele.tanh(act_ff[t])

				C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(act_fg[t], C[t - 1])

				act_og[t] = model.og_weight_data * data[t] + model.og_weight_prev * Hout[t - 1] + model.og_weight_cell * C[t] + model.og_weight_bias
				act_og[t] = ele.sigm(act_og[t])

				if tanhC_version:
					Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
				else:
					Hout[t] = ele.mult(act_og[t], C[t])

				Y = softmax(model.decoder_weights * Hout[t] + model.decoder_bias)

				# BP to Hout
				dY[t] = Y - target
				dBd += dY[t]
				dWd += dY[t] * Hout[t].trans()
				dHout[t] = model.decoder_weights.trans() * dY[t]

				# evaluation
				output = Y.to_numpy()			# Can directly get a single element from Y
				# print output[0, sent[t]]
				sent_ll += math.log(max(output[0, sent[t]],1e-20), 2)

				#print "Y_0[t]",Y_o[t]
				#print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
				#print np.sum(output.to_numpy())
				# output = Ym[t].trans() * data[t]
				# sent_ll += math.log10( max(np.sum(output.to_numpy()),1e-20) )
			##### Initialize gradient vectors #####
				

			weight_update_ig_data = owl.zeros([model.Layers[1], model.Layers[0]])
			weight_update_ig_prev = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_ig_cell = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_ig_bias = owl.zeros([model.Layers[1], 1])

			weight_update_fg_data = owl.zeros([model.Layers[1], model.Layers[0]])
			weight_update_fg_prev = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_fg_cell = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_fg_bias = owl.zeros([model.Layers[1], 1])

			weight_update_og_data = owl.zeros([model.Layers[1], model.Layers[0]])
			weight_update_og_prev = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_og_cell = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_og_bias = owl.zeros([model.Layers[1], 1])

			weight_update_ff_data = owl.zeros([model.Layers[1], model.Layers[0]])
			weight_update_ff_prev = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_ff_bias = owl.zeros([model.Layers[1], 1])

			dC = [None] * Tau

			for t in xrange(Tau):
				dC[t] = owl.zeros(C[t].shape)

			# Calculate the error and add it
			for t in reversed(range(1, Tau)):
				#print "sent",sent
				#print "t",t

				# BP from og controled gate and og
				if tanhC_version:
					tanhC = ele.tanh(C[t])
					dTanhC = ele.mult(dHout[t], act_og[t])
					sen_og = ele.mult(dHout[t], tanhC)
					dC[t] += ele.mult((1 - ele.mult(tanhC, tanhC)), dTanhC)
				else:
					sen_og = ele.mult(C[t], dHout[t])
					dC[t] += ele.mult(act_og[t], dHout[t])

				# BP from og
				sen_og = ele.mult(ele.mult(act_og[t], (1.0 - act_og[t])), sen_og)
				dHout[t - 1] = model.og_weight_prev.trans() * sen_og
				dC[t] += model.og_weight_cell.trans() * sen_og
				dEmb[t] = model.og_weight_data.trans() * sen_og

				# BP from fg controled gate
				sen_fg = ele.mult(C[t - 1], dC[t])
				dC[t - 1] += ele.mult(act_fg[t], dC[t])
				
				# BP from ig controled gate
				sen_ig = ele.mult(act_ff[t], dC[t])
				sen_ff = ele.mult(act_ig[t], dC[t])
				sen_ff = ele.mult((1 - ele.mult(act_ff[t], act_ff[t])), sen_ff)
				dEmb[t] += model.ff_weight_data.trans() * sen_ff
				
				# BP from fg
				sen_fg = ele.mult(ele.mult(act_fg[t], (1.0 - act_fg[t])), sen_fg)
				dHout[t - 1] += model.fg_weight_prev.trans() * sen_fg
				dC[t - 1] += model.fg_weight_cell.trans() * sen_fg
				dEmb[t] += model.fg_weight_data.trans() * sen_fg

				# BP from ig
				sen_ig = ele.mult(ele.mult(act_ig[t], (1.0 - act_ig[t])), sen_ig)
				dHout[t - 1] += model.ig_weight_prev.trans() * sen_ig
				dC[t - 1] += model.ig_weight_cell.trans() * sen_ig
				dEmb[t] += model.ig_weight_data.trans() * sen_ig

				# derivatives on weight matrix and bias
				weight_update_ig_data += sen_ig * data[t].trans()
				weight_update_ig_prev += sen_ig * Hout[t - 1].trans()
				weight_update_ig_cell += sen_ig * C[t - 1].trans()
				weight_update_ig_bias += sen_ig

				weight_update_fg_data += sen_fg * data[t].trans()
				weight_update_fg_prev += sen_fg * Hout[t - 1].trans()
				weight_update_fg_cell += sen_fg * C[t - 1].trans()
				weight_update_fg_bias += sen_fg

				weight_update_og_data += sen_og * data[t].trans()
				weight_update_og_prev += sen_og * Hout[t - 1].trans()
				weight_update_og_cell += sen_og * C[t].trans()
				weight_update_og_bias += sen_og

				weight_update_ff_data += sen_ff * data[t].trans()
				weight_update_ff_prev += sen_ff * Hout[t - 1].trans()
				weight_update_ff_bias += sen_ff


			# normalize the gradients
			rate = learning_rate / Tau

			# weight update
			model.ig_weight_prev -= rate * weight_update_ig_prev
			model.ig_weight_data -= rate * weight_update_ig_data
			model.ig_weight_cell -= rate * weight_update_ig_cell
			model.ig_weight_bias -= rate * weight_update_ig_bias

			model.fg_weight_prev -= rate * weight_update_fg_prev
			model.fg_weight_data -= rate * weight_update_fg_data
			model.fg_weight_cell -= rate * weight_update_fg_cell
			model.fg_weight_bias -= rate * weight_update_fg_bias

			model.og_weight_prev -= rate * weight_update_og_prev
			model.og_weight_data -= rate * weight_update_og_data
			model.og_weight_cell -= rate * weight_update_og_cell
			model.og_weight_bias -= rate * weight_update_og_bias

			model.ff_weight_prev -= rate * weight_update_ff_prev
			model.ff_weight_data -= rate * weight_update_ff_data
			model.ff_weight_bias -= rate * weight_update_ff_bias

			model.decoder_weights -= rate * dWd
			model.decoder_bias -= rate * dBd

			for t in range(1, Tau):
				model.emb_weight[sent[t - 1]] -= rate * dEmb[t]

			# Print results
			epoch_ll += sent_ll
			# print(" Sentence %d LL: %f" % (sent_id, sent_ll))

			
		epoch_ent = epoch_ll * (-1) / words
		epoch_ppl = 2 ** epoch_ent
		cur_time = time.time()
		print("Epoch %d (alpha=%f) PPL=%f" % (epoch_id, learning_rate, epoch_ppl))
		print "  time consumed:", cur_time - last_time
		last_time = cur_time

	return model, learning_rate
Exemplo n.º 16
0
def LSTM_test(model, sents, words, tanhC_version = 1):

	N = model.Layers[1]
	K = model.Layers[2]

	test_ll = 0
	# For each sentence
	for sent_id, sent in enumerate(sents):
		#print sent_id
		#print "sent", sent
		#print "sents", sents
		##### Initialize activations #####

		Tau = len(sent)
		sent_ll = 0 # Sentence log likelihood

		data = [None] * Tau

		Hout = [None] * Tau
		Hout[0] = owl.zeros([N, 1])

		act_ig = [None] * Tau
		act_fg = [None] * Tau
		act_og = [None] * Tau
		act_ff = [None] * Tau

		C = [None] * Tau
		C[0] = owl.zeros([N, 1])

		##### Forward pass #####
		# For each time step

		for t in range(1, Tau):
			# predict the (t+1)'th word from the t'th word
			data[t] = model.emb_weight[sent[t - 1]]

			act_ig[t] = model.ig_weight_data * data[t] + model.ig_weight_prev * Hout[t - 1] + model.ig_weight_cell * C[t - 1] + model.ig_weight_bias
			act_ig[t] = ele.sigm(act_ig[t])

			act_fg[t] = model.fg_weight_data * data[t] + model.fg_weight_prev * Hout[t - 1] + model.fg_weight_cell * C[t - 1] + model.fg_weight_bias
			act_fg[t] = ele.sigm(act_fg[t])

			act_ff[t] = model.ff_weight_data * data[t] + model.ff_weight_prev * Hout[t - 1] + model.ff_weight_bias
			act_ff[t] = ele.tanh(act_ff[t])

			C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(act_fg[t], C[t - 1])

			act_og[t] = model.og_weight_data * data[t] + model.og_weight_prev * Hout[t - 1] + model.og_weight_cell * C[t] + model.og_weight_bias
			act_og[t] = ele.sigm(act_og[t])

			if tanhC_version:
				Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
			else:
				Hout[t] = ele.mult(act_og[t], C[t])

			Y = softmax(model.decoder_weights * Hout[t] + model.decoder_bias)

			# evaluation
			output = Y.to_numpy()			# Can directly get a single element from Y
			# print output[0, sent[t]]
			sent_ll += math.log(max(output[0, sent[t]],1e-20), 2)

		test_ll += sent_ll

	test_ent = test_ll * (-1) / words
	test_ppl = 2 ** test_ent

	print "Test PPL =", test_ppl
Exemplo n.º 17
0
    def init_random(self):
        self.weights = [
            owl.randn([11, 11, 3, 96], 0.0, 0.01),
            owl.randn([5, 5, 96, 256], 0.0, 0.01),
            owl.randn([3, 3, 256, 384], 0.0, 0.01),
            owl.randn([3, 3, 384, 384], 0.0, 0.01),
            owl.randn([3, 3, 384, 256], 0.0, 0.01),
            owl.randn([4096, 9216], 0.0, 0.005),
            owl.randn([4096, 4096], 0.0, 0.005),
            owl.randn([1000, 4096], 0.0, 0.01)
        ]

        self.weightsdelta = [
            owl.zeros([11, 11, 3, 96]),
            owl.zeros([5, 5, 96, 256]),
            owl.zeros([3, 3, 256, 384]),
            owl.zeros([3, 3, 384, 384]),
            owl.zeros([3, 3, 384, 256]),
            owl.zeros([4096, 9216]),
            owl.zeros([4096, 4096]),
            owl.zeros([1000, 4096])
        ]

        self.bias = [
            owl.zeros([96]),
            owl.zeros([256]) + 1,
            owl.zeros([384]),
            owl.zeros([384]) + 1,
            owl.zeros([256]) + 1,
            owl.zeros([4096, 1]) + 1,
            owl.zeros([4096, 1]) + 1,
            owl.zeros([1000, 1])
        ]

        self.biasdelta = [
            owl.zeros([96]),
            owl.zeros([256]),
            owl.zeros([384]),
            owl.zeros([384]),
            owl.zeros([256]),
            owl.zeros([4096, 1]),
            owl.zeros([4096, 1]),
            owl.zeros([1000, 1])
        ]
Exemplo n.º 18
0
import owl
import numpy as np
import demo_common as dc

x1 = owl.randn([784, 128], 0.0, 0.1)
x2 = owl.randn([784, 128], 0.0, 0.1)
w = owl.randn([512, 784], 0.0, 0.1)
b = owl.zeros([512, 1])

y1 = w * x1 + b
y2 = w * x2 + b
gw = y1 * x1.trans() + y2 * x2.trans()
print gw.to_numpy()
Exemplo n.º 19
0
 def ff(self, x, phase):
     self.ff_x = x
     self.scale = owl.zeros(x.shape)
     self.ff_y = self.lrner.ff(x, self.scale)
     return self.ff_y
Exemplo n.º 20
0
import owl
import owl.conv as co
import numpy as np
import demo_common

x = owl.randn([227, 227, 3, 256], 0.0, 1)
w = owl.randn([11, 11, 3, 96], 0.0, 0.1)
b = owl.zeros([96])
conv = co.Convolver(pad_h=0, pad_w=0, stride_v=4, stride_h=4)

y = conv.ff(x, w, b)
print y.to_numpy()
print y.shape

ex = conv.bp(y, w)
print ex.to_numpy()
print ex.shape
Exemplo n.º 21
0
    def init_random(self):
        last_channel = self.input_channel
        last_scale = self.input_size
        last_dim = last_scale * last_scale * last_channel

        for i in range(self.num_weights):
            if self.ff_infos[i]['ff_type'] == 'conv':
                kernelsize = self.ff_infos[i]['convolution_param'].kernel_size
                out_channel = self.ff_infos[i]['convolution_param'].num_output
                stride = self.ff_infos[i]['convolution_param'].stride
                pad = self.ff_infos[i]['convolution_param'].pad

                print 'conv %d %d %d %d %d %d %d %d' % (i, kernelsize, out_channel, stride, pad, last_channel, last_scale, last_dim)
                owl.randn([kernelsize, kernelsize, last_channel, out_channel], 0.0, self.ff_infos[i]['convolution_param'].weight_filler.std)
                #weight
                if self.ff_infos[i]['convolution_param'].weight_filler.type == "gaussian":
                    self.weights.append(owl.randn([kernelsize, kernelsize, last_channel, out_channel], 0.0, self.ff_infos[i]['convolution_param'].weight_filler.std))
                elif self.ff_infos[i]['convolution_param'].weight_filler.type == "constant":
                    self.weights.append(owl.zeros([kernelsize, kernelsize, last_channel, out_channel]) + self.ff_infos[i]['convolution_param'].weight_filler.value)
                else:
                    assert False
                self.weightsdelta.append(owl.zeros([kernelsize, kernelsize, last_channel, out_channel]))
                
                #bias
                if self.ff_infos[i]['convolution_param'].bias_filler.type == "gaussian":
                    self.bias.append(owl.randn([out_channel], 0.0, self.ff_infos[i]['convolution_param'].bias_filler.std))
                elif self.ff_infos[i]['convolution_param'].bias_filler.type == "constant":
                    self.bias.append(owl.zeros([out_channel]) + self.ff_infos[i]['convolution_param'].bias_filler.value)
                else:
                    assert False
                self.biasdelta.append(owl.zeros([out_channel]))

                last_channel = out_channel
                last_scale = (last_scale + pad * 2 - kernelsize) / stride + 1
                last_dim = last_scale * last_scale * last_channel
            
            elif self.ff_infos[i]['ff_type'] == 'pooling':
                kernelsize = self.ff_infos[i]['pooling_param'].kernel_size
                stride = self.ff_infos[i]['pooling_param'].stride
                pad = self.ff_infos[i]['pooling_param'].pad
                print 'pool %d %d %d %d %d %d %d' % (i, kernelsize, stride, pad, last_channel, last_scale, last_dim)
                
                self.weights.append(owl.zeros([1]))
                self.weightsdelta.append(owl.zeros([1]))
                self.bias.append(owl.zeros([1]))
                self.biasdelta.append(owl.zeros([1]))
                last_channel = out_channel
                last_scale = (last_scale + pad * 2 - kernelsize) / stride + 1
                last_dim = last_scale * last_scale * last_channel
            elif self.ff_infos[i]['ff_type'] == 'fully':
                out_channel = self.ff_infos[i]['fully_param'].num_output
                
                print 'fully %d %d %d' % (i, last_dim, out_channel)
                
                #weight
                if self.ff_infos[i]['fully_param'].weight_filler.type == "gaussian":
                    self.weights.append(owl.randn([out_channel, last_dim], 0.0, self.ff_infos[i]['fully_param'].weight_filler.std))
                elif self.ff_infos[i]['fully_param'].weight_filler.type == "constant":
                    self.weights.append(owl.zeros([out_channel, last_dim]) + self.ff_infos[i]['fully_param'].weight_filler.value)
                else:
                    assert False
                self.weightsdelta.append(owl.zeros([out_channel, last_dim]))
                
                #bias
                if self.ff_infos[i]['fully_param'].bias_filler.type == "gaussian":
                    self.bias.append(owl.randn([out_channel, 1], 0.0, self.ff_infos[i]['fully_param'].weight_filler.std))
                elif self.ff_infos[i]['fully_param'].bias_filler.type == "constant":
                    self.bias.append(owl.zeros([out_channel, 1]) + self.ff_infos[i]['fully_param'].weight_filler.value)
                else:
                    assert False
                self.biasdelta.append(owl.zeros([out_channel, 1]))                 
                last_dim = out_channel
                last_channel = out_channel
Exemplo n.º 22
0
def LSTM_train(model, sents, words, learning_rate, EPOCH, tanhC_version=1):

    # Constants
    N = model.Layers[1]  # Number of units
    K = model.Layers[0]  # Vocabulary size

    # For each epoch
    last_ll = 1e99
    for epoch_id in range(EPOCH, EPOCH + 10):
        print 'Start epoch #', epoch_id
        last_time = time.time()
        epoch_ll = 0
        tau_sum = 0
        # For each sentence
        for sent_id, sent in enumerate(sents):
            #print "sent_id",sent_id
            #print "sent", sent
            #print "sents", sents
            ##### Initialize activations #####
            Tau = len(sent)
            tau_sum += Tau
            sent_ll = 0  # Sentence log likelihood
            batch_size = Tau

            data = [None] * Tau
            prev = [None] * Tau
            data[0] = owl.zeros([K, 1])
            # embed = np.zeros((K, 1))
            # embed[sent[0]] = 1
            # data[0] = owl.from_numpy(embed).trans()

            Hout = [None] * Tau
            Hout[0] = owl.zeros([N, 1])

            act_ig = [None] * Tau
            act_fg = [None] * Tau
            act_og = [None] * Tau
            act_ff = [None] * Tau

            C = [None] * Tau
            C[0] = owl.zeros([N, 1])
            Ym = [None] * Tau
            dY = [None] * Tau

            dBd = owl.zeros([model.Layers[2], 1])  #dY.sum(0)
            dWd = owl.zeros([model.Layers[1],
                             model.Layers[2]])  #Hout.transpose().dot(dY)
            dHout = [None] * Tau  #dY.dot(model.decoder_weights.transpose())

            ##### Forward pass #####
            # For each time step
            for t in range(1, Tau):
                #prev[t] = Hout[t - 1]
                prev[t] = owl.zeros([N, 1])
                data[t] = owl.zeros([K, 1])
                #embed = np.zeros((K, 1))
                #embed[sent[t]] = 1
                #data[t] = owl.from_numpy(embed).trans()

                act_ig[t] = model.ig_weight_data.trans() * data[
                    t - 1] + model.ig_weight_prev.trans(
                    ) * prev[t] + model.ig_weight_bias
                act_fg[t] = model.fg_weight_data.trans() * data[
                    t - 1] + model.fg_weight_prev.trans(
                    ) * prev[t] + model.fg_weight_bias
                act_og[t] = model.og_weight_data.trans() * data[
                    t - 1] + model.og_weight_prev.trans(
                    ) * prev[t] + model.og_weight_bias
                act_ff[t] = model.ff_weight_data.trans() * data[
                    t - 1] + model.ff_weight_prev.trans(
                    ) * prev[t] + model.ff_weight_bias

                act_ig[t] = ele.sigm(act_ig[t])
                act_fg[t] = ele.sigm(act_fg[t])
                act_og[t] = ele.sigm(act_og[t])
                act_ff[t] = ele.tanh(act_ff[t])

                C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(
                    act_fg[t], C[t - 1])

                if tanhC_version:
                    Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
                else:
                    Hout[t] = ele.mult(act_og[t], C[t])
                Ym[t] = softmax(model.decoder_weights.trans() * Hout[t] +
                                model.decoder_bias)

                dY[t] = data[t] - Ym[t]
                dBd += dY[t] / batch_size
                dWd += Hout[t] * dY[t].trans() / batch_size
                dHout[t] = model.decoder_weights * dY[t]

            #print "Y_0[t]",Y_o[t]
            #print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
            #print np.sum(output.to_numpy())
            # output = Ym[t].trans() * data[t]
            # sent_ll += math.log10( max(np.sum(output.to_numpy()),1e-20) )
            ##### Initialize gradient vectors #####
            #Ym[-1].wait_for_eval()
            for t in range(1, Tau):
                Ym[t].wait_for_eval()
            #output = Ym[t].trans() * data[t]
            #sent_ll += math.log10( max(np.sum(output.to_numpy()),1e-20) )
            if sent_id % 100 == 0:
                cur_time = time.time()
                print 'Finished', sent_id, 'sentences. Time used:', cur_time - last_time, 's. sent/s:', float(
                    sent_id) / (cur_time - last_time), 'tau_sum=', tau_sum
                #print owl.print_profiler_result()
                tau_sum = 0
            continue

            sen_ig = [None] * Tau
            sen_fg = [None] * Tau
            sen_og = [None] * Tau
            sen_ff = [None] * Tau

            weight_update_ig_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_ig_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ig_bias = owl.zeros([model.Layers[1], 1])

            weight_update_fg_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_fg_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_fg_bias = owl.zeros([model.Layers[1], 1])

            weight_update_og_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_og_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_og_bias = owl.zeros([model.Layers[1], 1])

            weight_update_ff_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_ff_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ff_bias = owl.zeros([model.Layers[1], 1])

            dHin = owl.zeros([model.Layers[1], model.Layers[1]])
            dC = [None] * Tau
            for t in xrange(Tau):
                dC[t] = owl.zeros(C[t].shape)

            # Calculate the error and add it
            for t in reversed(range(1, len(sent))):
                #print "sent",sent
                #print "t",t
                if tanhC_version:
                    tanhCt = ele.tanh(C[t])
                    sen_og[t] = ele.mult(tanhCt, dHout[t])
                    dC[t] += ele.mult((1 - ele.mult(tanhCt, tanhCt)),
                                      ele.mult(act_og[t], dHout[t]))
                else:
                    sen_og[t] = ele.mult(C[t], dHout[t])
                    dC[t] += ele.mult(act_og[t], dHout[t])

                sen_fg[t] = owl.zeros([model.Layers[1], 1])
                if t > 0:
                    sen_fg[t] = ele.mult(C[t - 1], dC[t])
                    dC[t - 1] += ele.mult(act_fg[t], dC[t])
                sen_ig[t] = ele.mult(act_ff[t], dC[t])
                sen_ff[t] = ele.mult(act_ig[t], dC[t])

                # backprop activation functions
                sen_ff[t] = ele.mult((1 - ele.mult(act_ff[t], act_ff[t])),
                                     sen_ff[t])
                sen_ig[t] = ele.mult(ele.mult(act_ig[t], (1.0 - act_ig[t])),
                                     sen_ig[t])
                sen_fg[t] = ele.mult(ele.mult(act_fg[t], (1.0 - act_fg[t])),
                                     sen_fg[t])
                sen_og[t] = ele.mult(ele.mult(act_og[t], (1.0 - act_og[t])),
                                     sen_og[t])

                # backprop matrix multiply
                weight_update_ig_data += data[t] * sen_ig[t].trans()
                weight_update_ig_prev += prev[t] * sen_ig[t].trans()
                weight_update_fg_bias += sen_ig[t]  # sen_ig[t].sum(0 or 1)

                weight_update_fg_data += data[t] * sen_fg[t].trans()
                weight_update_fg_prev += prev[t] * sen_fg[t].trans()
                weight_update_fg_bias += sen_fg[t]

                weight_update_og_data += data[t] * sen_og[t].trans()
                weight_update_og_prev += prev[t] * sen_og[t].trans()
                weight_update_og_bias += sen_og[t]

                weight_update_ff_data += data[t] * sen_ff[t].trans()
                weight_update_ff_prev += prev[t] * sen_ff[t].trans()
                weight_update_ff_bias += sen_ff[t]

                if t > 1:
                    dHout[t - 1] += model.ig_weight_prev.trans() * sen_ig[t]
                    dHout[t - 1] += model.fg_weight_prev.trans() * sen_fg[t]
                    dHout[t - 1] += model.og_weight_prev.trans() * sen_og[t]
                    dHout[t - 1] += model.ff_weight_prev.trans() * sen_ff[t]

            # normalize the gradients
            # weight update
            model.ig_weight_prev += learning_rate / batch_size * weight_update_ig_prev
            model.ig_weight_data += learning_rate / batch_size * weight_update_ig_data
            model.ig_weight_bias += learning_rate / batch_size * weight_update_ig_bias

            model.fg_weight_prev += learning_rate / batch_size * weight_update_fg_prev
            model.fg_weight_data += learning_rate / batch_size * weight_update_fg_data
            model.fg_weight_bias += learning_rate / batch_size * weight_update_fg_bias

            model.og_weight_prev += learning_rate / batch_size * weight_update_og_prev
            model.og_weight_data += learning_rate / batch_size * weight_update_og_data
            model.og_weight_bias += learning_rate / batch_size * weight_update_og_bias

            model.ff_weight_prev += learning_rate / batch_size * weight_update_ff_prev
            model.ff_weight_data += learning_rate / batch_size * weight_update_ff_data
            model.ff_weight_bias += learning_rate / batch_size * weight_update_ff_bias

            model.decoder_weights += learning_rate * dWd
            model.decoder_bias += learning_rate * dBd

            # Print results
            epoch_ll += sent_ll
            # print(" Sentence %d LL: %f" % (sent_id, sent_ll))
        epoch_ent = epoch_ll * (-1) / words
        epoch_ppl = 10**epoch_ent
        cur_time = time.time()
        print("Epoch %d (alpha=%f) PPL=%f" %
              (epoch_id, learning_rate, epoch_ppl))
        print "  time consumed:", cur_time - last_time
        last_time = cur_time
        if last_ll > epoch_ll:
            learning_rate /= 2.0
        last_ll = epoch_ll

    return model, learning_rate
Exemplo n.º 23
0
 def init_random(self):
     self.weights = [
         owl.randn([self.filtersizes[0], self.filtersizes[0], 1, self.filters[0]], 0.0, 0.1),
         owl.randn([self.filtersizes[1], self.filtersizes[1], self.filters[0], self.filters[1]], 0.0, 0.1),
         owl.randn([128, self.convolution_output_size], 0.0, 0.1),
         owl.randn([10, 128], 0.0, 0.1)
     ];
     self.weightdelta = [
         owl.zeros([self.filtersizes[0], self.filtersizes[0], 1, self.filters[0]]),
         owl.zeros([self.filtersizes[1], self.filtersizes[1], self.filters[0], self.filters[1]]),
         owl.zeros([128, self.convolution_output_size]),
         owl.zeros([10, 128])
     ];
     self.bias = [
         owl.zeros([self.filters[0]]),
         owl.zeros([self.filters[1]]),
         owl.zeros([128, 1]),
         owl.zeros([10, 1])
     ];
     self.biasdelta = [
         owl.zeros([self.filters[0]]),
         owl.zeros([self.filters[1]]),
         owl.zeros([128, 1]),
         owl.zeros([10, 1])
     ];
Exemplo n.º 24
0
    def test_ones(self):
        test = 0
	for i in range(1000):
		test=owl.zeros([10000,10000])
		owl.wait_for_all()
	owl.print_profiler_result()
Exemplo n.º 25
0
def LSTM_test(model, sents, vocab_size, words, tanhC_version=1):

    N = 10
    K = vocab_size

    test_ll = 0
    # For each sentence
    for sent_id, sent in enumerate(sents):
        #print "sent_id",sent_id
        #print "sent", sent
        #print "sents", sents
        ##### Initialize activations #####
        Tau = len(sent)
        sent_ll = 0  # Sentence log likelihood
        batch_size = Tau

        data = [None] * Tau
        prev = [None] * Tau
        embed = np.zeros((K, 1))
        embed[sent[0]] = 1
        data[0] = owl.from_numpy(embed).trans()

        Hout = [None] * Tau
        Hout[0] = owl.zeros([N, 1])

        act_ig = [None] * Tau
        act_fg = [None] * Tau
        act_og = [None] * Tau
        act_ff = [None] * Tau

        C = [None] * Tau
        C[0] = owl.zeros([N, 1])
        Ym = [None] * Tau

        ##### Forward pass #####
        # For each time step
        for t in range(1, Tau):
            prev[t] = Hout[t - 1]
            embed = np.zeros((K, 1))
            embed[sent[t]] = 1
            data[t] = owl.from_numpy(embed).trans()

            act_ig[t] = model.ig_weight_data.trans() * data[
                t - 1] + model.ig_weight_prev.trans(
                ) * prev[t] + model.ig_weight_bias
            act_fg[t] = model.fg_weight_data.trans() * data[
                t - 1] + model.fg_weight_prev.trans(
                ) * prev[t] + model.fg_weight_bias
            act_og[t] = model.og_weight_data.trans() * data[
                t - 1] + model.og_weight_prev.trans(
                ) * prev[t] + model.og_weight_bias
            act_ff[t] = model.ff_weight_data.trans() * data[
                t - 1] + model.ff_weight_prev.trans(
                ) * prev[t] + model.ff_weight_bias

            act_ig[t] = ele.sigm(act_ig[t])
            act_fg[t] = ele.sigm(act_fg[t])
            act_og[t] = ele.sigm(act_og[t])
            act_ff[t] = ele.tanh(act_ff[t])

            C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(
                act_fg[t], C[t - 1])

            if tanhC_version:
                Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
            else:
                Hout[t] = ele.mult(act_og[t], C[t])
            Ym[t] = softmax(model.decoder_weights.trans() * Hout[t] +
                            model.decoder_bias)

            #print "Y_0[t]",Y_o[t]
            #print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
            output = Ym[t].trans() * data[t]
            test_ll += math.log10(max(np.sum(output.to_numpy()), 1e-20))

    print test_ll
    test_ent = test_ll * (-1) / words
    test_ppl = 10**test_ent

    print("Test PPL = %f" % (test_ppl))
Exemplo n.º 26
0
def LSTM_train(model, sents, words, learning_rate, EPOCH, tanhC_version=1):

    # Constants
    N = model.Layers[1]  # Number of units
    K = model.Layers[2]  # Vocabulary size

    last_time = time.time()
    # For each epoch
    for epoch_id in range(1, EPOCH + 1):
        epoch_ll = 0
        # For each sentence
        for sent_id, sent in enumerate(sents):
            #print sent_id
            #print "sent", sent
            #print "sents", sents
            ##### Initialize activations #####

            Tau = len(sent)
            sent_ll = 0  # Sentence log likelihood

            data = [None] * Tau

            Hout = [None] * Tau
            Hout[0] = owl.zeros([N, 1])

            act_ig = [None] * Tau
            act_fg = [None] * Tau
            act_og = [None] * Tau
            act_ff = [None] * Tau

            C = [None] * Tau
            C[0] = owl.zeros([N, 1])
            dY = [None] * Tau

            dBd = owl.zeros([model.Layers[2], 1])  #dY.sum(0)
            dWd = owl.zeros([model.Layers[2], model.Layers[1]])
            dHout = [None] * Tau  #dY.dot(model.decoder_weights.transpose())
            dEmb = [None] * Tau

            ##### Forward pass #####
            # For each time step

            for t in range(1, Tau):
                # predict the (t+1)'th word from the t'th word
                data[t] = model.emb_weight[sent[t - 1]]
                NVector = np.zeros((K, 1))
                NVector[sent[t]] = 1
                target = owl.from_numpy(NVector).trans()

                act_ig[t] = model.ig_weight_data * data[
                    t] + model.ig_weight_prev * Hout[
                        t - 1] + model.ig_weight_cell * C[
                            t - 1] + model.ig_weight_bias
                act_ig[t] = ele.sigm(act_ig[t])

                act_fg[t] = model.fg_weight_data * data[
                    t] + model.fg_weight_prev * Hout[
                        t - 1] + model.fg_weight_cell * C[
                            t - 1] + model.fg_weight_bias
                act_fg[t] = ele.sigm(act_fg[t])

                act_ff[t] = model.ff_weight_data * data[
                    t] + model.ff_weight_prev * Hout[t -
                                                     1] + model.ff_weight_bias
                act_ff[t] = ele.tanh(act_ff[t])

                C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(
                    act_fg[t], C[t - 1])

                act_og[t] = model.og_weight_data * data[
                    t] + model.og_weight_prev * Hout[
                        t -
                        1] + model.og_weight_cell * C[t] + model.og_weight_bias
                act_og[t] = ele.sigm(act_og[t])

                if tanhC_version:
                    Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
                else:
                    Hout[t] = ele.mult(act_og[t], C[t])

                Y = softmax(model.decoder_weights * Hout[t] +
                            model.decoder_bias)

                # BP to Hout
                dY[t] = Y - target
                dBd += dY[t]
                dWd += dY[t] * Hout[t].trans()
                dHout[t] = model.decoder_weights.trans() * dY[t]

                # evaluation
                output = Y.to_numpy(
                )  # Can directly get a single element from Y
                # print output[0, sent[t]]
                sent_ll += math.log(max(output[0, sent[t]], 1e-20), 2)

                #print "Y_0[t]",Y_o[t]
                #print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
                #print np.sum(output.to_numpy())
                # output = Ym[t].trans() * data[t]
                # sent_ll += math.log10( max(np.sum(output.to_numpy()),1e-20) )
            ##### Initialize gradient vectors #####

            weight_update_ig_data = owl.zeros(
                [model.Layers[1], model.Layers[0]])
            weight_update_ig_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ig_cell = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ig_bias = owl.zeros([model.Layers[1], 1])

            weight_update_fg_data = owl.zeros(
                [model.Layers[1], model.Layers[0]])
            weight_update_fg_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_fg_cell = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_fg_bias = owl.zeros([model.Layers[1], 1])

            weight_update_og_data = owl.zeros(
                [model.Layers[1], model.Layers[0]])
            weight_update_og_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_og_cell = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_og_bias = owl.zeros([model.Layers[1], 1])

            weight_update_ff_data = owl.zeros(
                [model.Layers[1], model.Layers[0]])
            weight_update_ff_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ff_bias = owl.zeros([model.Layers[1], 1])

            dC = [None] * Tau

            for t in xrange(Tau):
                dC[t] = owl.zeros(C[t].shape)

            # Calculate the error and add it
            for t in reversed(range(1, Tau)):
                #print "sent",sent
                #print "t",t

                # BP from og controled gate and og
                if tanhC_version:
                    tanhC = ele.tanh(C[t])
                    dTanhC = ele.mult(dHout[t], act_og[t])
                    sen_og = ele.mult(dHout[t], tanhC)
                    dC[t] += ele.mult((1 - ele.mult(tanhC, tanhC)), dTanhC)
                else:
                    sen_og = ele.mult(C[t], dHout[t])
                    dC[t] += ele.mult(act_og[t], dHout[t])

                # BP from og
                sen_og = ele.mult(ele.mult(act_og[t], (1.0 - act_og[t])),
                                  sen_og)
                dHout[t - 1] = model.og_weight_prev.trans() * sen_og
                dC[t] += model.og_weight_cell.trans() * sen_og
                dEmb[t] = model.og_weight_data.trans() * sen_og

                # BP from fg controled gate
                sen_fg = ele.mult(C[t - 1], dC[t])
                dC[t - 1] += ele.mult(act_fg[t], dC[t])

                # BP from ig controled gate
                sen_ig = ele.mult(act_ff[t], dC[t])
                sen_ff = ele.mult(act_ig[t], dC[t])
                sen_ff = ele.mult((1 - ele.mult(act_ff[t], act_ff[t])), sen_ff)
                dEmb[t] += model.ff_weight_data.trans() * sen_ff

                # BP from fg
                sen_fg = ele.mult(ele.mult(act_fg[t], (1.0 - act_fg[t])),
                                  sen_fg)
                dHout[t - 1] += model.fg_weight_prev.trans() * sen_fg
                dC[t - 1] += model.fg_weight_cell.trans() * sen_fg
                dEmb[t] += model.fg_weight_data.trans() * sen_fg

                # BP from ig
                sen_ig = ele.mult(ele.mult(act_ig[t], (1.0 - act_ig[t])),
                                  sen_ig)
                dHout[t - 1] += model.ig_weight_prev.trans() * sen_ig
                dC[t - 1] += model.ig_weight_cell.trans() * sen_ig
                dEmb[t] += model.ig_weight_data.trans() * sen_ig

                # derivatives on weight matrix and bias
                weight_update_ig_data += sen_ig * data[t].trans()
                weight_update_ig_prev += sen_ig * Hout[t - 1].trans()
                weight_update_ig_cell += sen_ig * C[t - 1].trans()
                weight_update_ig_bias += sen_ig

                weight_update_fg_data += sen_fg * data[t].trans()
                weight_update_fg_prev += sen_fg * Hout[t - 1].trans()
                weight_update_fg_cell += sen_fg * C[t - 1].trans()
                weight_update_fg_bias += sen_fg

                weight_update_og_data += sen_og * data[t].trans()
                weight_update_og_prev += sen_og * Hout[t - 1].trans()
                weight_update_og_cell += sen_og * C[t].trans()
                weight_update_og_bias += sen_og

                weight_update_ff_data += sen_ff * data[t].trans()
                weight_update_ff_prev += sen_ff * Hout[t - 1].trans()
                weight_update_ff_bias += sen_ff

            # normalize the gradients
            rate = learning_rate / Tau

            # weight update
            model.ig_weight_prev -= rate * weight_update_ig_prev
            model.ig_weight_data -= rate * weight_update_ig_data
            model.ig_weight_cell -= rate * weight_update_ig_cell
            model.ig_weight_bias -= rate * weight_update_ig_bias

            model.fg_weight_prev -= rate * weight_update_fg_prev
            model.fg_weight_data -= rate * weight_update_fg_data
            model.fg_weight_cell -= rate * weight_update_fg_cell
            model.fg_weight_bias -= rate * weight_update_fg_bias

            model.og_weight_prev -= rate * weight_update_og_prev
            model.og_weight_data -= rate * weight_update_og_data
            model.og_weight_cell -= rate * weight_update_og_cell
            model.og_weight_bias -= rate * weight_update_og_bias

            model.ff_weight_prev -= rate * weight_update_ff_prev
            model.ff_weight_data -= rate * weight_update_ff_data
            model.ff_weight_bias -= rate * weight_update_ff_bias

            model.decoder_weights -= rate * dWd
            model.decoder_bias -= rate * dBd

            for t in range(1, Tau):
                model.emb_weight[sent[t - 1]] -= rate * dEmb[t]

            # Print results
            epoch_ll += sent_ll
            # print(" Sentence %d LL: %f" % (sent_id, sent_ll))

        epoch_ent = epoch_ll * (-1) / words
        epoch_ppl = 2**epoch_ent
        cur_time = time.time()
        print("Epoch %d (alpha=%f) PPL=%f" %
              (epoch_id, learning_rate, epoch_ppl))
        print "  time consumed:", cur_time - last_time
        last_time = cur_time

    return model, learning_rate
Exemplo n.º 27
0
 epsilon = 0.01
 momentum = 0.9
 
 num_epochs = 20
 batch_size = 64
 num_batches = data.shape[1]//batch_size
 
 # model parameters
 num_vis = data.shape[0]
 num_hid = 128
 
 # initialize weights
 np.random.seed(1234)
 weights = owl.from_numpy(0.1 * np.random.randn(num_vis, num_hid)).trans()
 #weights = 0.1 * owl.randn([num_vis, num_hid],0,1)
 bias_v = owl.zeros([1,num_vis])
 bias_h = owl.zeros([1,num_hid])
 
 # initialize weight updates
 d_weights = owl.zeros((num_vis,num_hid ))
 d_bias_v = owl.zeros([1,num_vis])
 d_bias_h = owl.zeros([1,num_hid])
 
 start_time = time.time()
 for epoch in range(num_epochs):
     print("Epoch %i" % (epoch + 1))
     err = []
     weights_old = weights
     for batch in range(num_batches):
         np_set = data[:,batch*batch_size:(batch + 1)*batch_size]
         training_set = owl.from_numpy(np_set)
Exemplo n.º 28
0
def LSTM_test(model, sents, words, tanhC_version=1):

    N = model.Layers[1]
    K = model.Layers[2]

    test_ll = 0
    # For each sentence
    for sent_id, sent in enumerate(sents):
        #print sent_id
        #print "sent", sent
        #print "sents", sents
        ##### Initialize activations #####

        Tau = len(sent)
        sent_ll = 0  # Sentence log likelihood

        data = [None] * Tau

        Hout = [None] * Tau
        Hout[0] = owl.zeros([N, 1])

        act_ig = [None] * Tau
        act_fg = [None] * Tau
        act_og = [None] * Tau
        act_ff = [None] * Tau

        C = [None] * Tau
        C[0] = owl.zeros([N, 1])

        ##### Forward pass #####
        # For each time step

        for t in range(1, Tau):
            # predict the (t+1)'th word from the t'th word
            data[t] = model.emb_weight[sent[t - 1]]

            act_ig[t] = model.ig_weight_data * data[
                t] + model.ig_weight_prev * Hout[
                    t - 1] + model.ig_weight_cell * C[t -
                                                      1] + model.ig_weight_bias
            act_ig[t] = ele.sigm(act_ig[t])

            act_fg[t] = model.fg_weight_data * data[
                t] + model.fg_weight_prev * Hout[
                    t - 1] + model.fg_weight_cell * C[t -
                                                      1] + model.fg_weight_bias
            act_fg[t] = ele.sigm(act_fg[t])

            act_ff[t] = model.ff_weight_data * data[
                t] + model.ff_weight_prev * Hout[t - 1] + model.ff_weight_bias
            act_ff[t] = ele.tanh(act_ff[t])

            C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(
                act_fg[t], C[t - 1])

            act_og[t] = model.og_weight_data * data[
                t] + model.og_weight_prev * Hout[
                    t - 1] + model.og_weight_cell * C[t] + model.og_weight_bias
            act_og[t] = ele.sigm(act_og[t])

            if tanhC_version:
                Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
            else:
                Hout[t] = ele.mult(act_og[t], C[t])

            Y = softmax(model.decoder_weights * Hout[t] + model.decoder_bias)

            # evaluation
            output = Y.to_numpy()  # Can directly get a single element from Y
            # print output[0, sent[t]]
            sent_ll += math.log(max(output[0, sent[t]], 1e-20), 2)

        test_ll += sent_ll

    test_ent = test_ll * (-1) / words
    test_ppl = 2**test_ent

    print "Test PPL =", test_ppl
Exemplo n.º 29
0
    def init_random(self):
        self.weights = [
            owl.randn([11, 11, 3, 96], 0.0, 0.01),
            owl.randn([5, 5, 96, 256], 0.0, 0.01),
            owl.randn([3, 3, 256, 384], 0.0, 0.01),
            owl.randn([3, 3, 384, 384], 0.0, 0.01),
            owl.randn([3, 3, 384, 256], 0.0, 0.01),
            owl.randn([4096, 9216], 0.0, 0.01),
            owl.randn([4096, 4096], 0.0, 0.01),
            owl.randn([1000, 4096], 0.0, 0.01)
        ];
	self.weightsdelta = [
            owl.zeros([11, 11, 3, 96]),
            owl.zeros([5, 5, 96, 256]),
            owl.zeros([3, 3, 256, 384]),
            owl.zeros([3, 3, 384, 384]),
            owl.zeros([3, 3, 384, 256]),
            owl.zeros([4096, 9216]),
            owl.zeros([4096, 4096]),
            owl.zeros([1000, 4096])
        ];
        self.bias = [
            owl.zeros([96]),
            owl.zeros([256]),
            owl.zeros([384]),
            owl.zeros([384]),
            owl.zeros([256]),
            owl.zeros([4096, 1]),
            owl.zeros([4096, 1]),
            owl.zeros([1000, 1])
        ];
        self.biasdelta = [
            owl.zeros([96]),
            owl.zeros([256]),
            owl.zeros([384]),
            owl.zeros([384]),
            owl.zeros([256]),
            owl.zeros([4096, 1]),
            owl.zeros([4096, 1]),
            owl.zeros([1000, 1])
        ];
Exemplo n.º 30
0
 def ff(self, x, phase):
     self.ff_x = x
     self.scale = owl.zeros(x.shape)
     self.ff_y = self.lrner.ff(x, self.scale)
     return self.ff_y
Exemplo n.º 31
0
import owl
import owl.elewise as ele
import numpy as np
import demo_common

x = owl.randn([784, 256], 0.0, 0.01)
w = owl.randn([512, 784], 0.0, 0.01)
b = owl.zeros([512, 1])

y = ele.relu(w * x + b)
print y.to_numpy()

e = owl.randn([512, 256], 0.0, 0.01)
ey = ele.relu_back(e, y)
ex = w.trans() * ey
print ex.to_numpy()