def recurrence(input, pmem, i): i = i.value ingate = sig(T.dot(input, wiig[i]) + T.dot(pmem, wmig[i]) + big[i]) forgate = sig(T.dot(input, wifg[i]) + T.dot(pmem, wmfg[i]) + bfg[i]) mem = forgate * pmem + ingate * T.tanh(T.dot(input, wim[i]) + bm[i]) # Use sig or tan??? outgate = sig(T.dot(input, wiog[i]) + T.dot(mem, wmog[i]) + bog[i]) layerout = T.dot(outgate * mem, wmo[i]) #output = sig(T.dot(outgate * mem, wmo) + bo) return mem, layerout
def recurrence(input, pmem, i): i = i.value ingate = sig(T.dot(input, wiig[i]) + T.dot(pmem, wmig[i]) + big[i]) forgate = sig(T.dot(input, wifg[i]) + T.dot(pmem, wmfg[i]) + bfg[i]) #mem = forgate * pmem + ingate * T.tanh(T.dot(input, wim[i]) + bm[i]) # Use sig or tan??? mem = T.tanh(forgate * pmem + ingate * T.tanh(T.dot(input, wim[i]) + bm[i])) # instead of identity, use tanh for mem out outgate = sig(T.dot(input, wiog[i]) + T.dot(mem, wmog[i]) + bog[i]) layerout = T.tanh(T.dot(outgate * mem, wmo[i])) #print layerout.shape.eval() return mem, layerout
def recurrence(input, pmem, i): i = i.value ingate = sig(T.dot(input, wiig[i]) + T.dot(pmem, wmig[i]) + big[i]) forgate = sig( T.dot(input, wifg[i]) + T.dot(pmem, wmfg[i]) + bfg[i]) #mem = forgate * pmem + ingate * T.tanh(T.dot(input, wim[i]) + bm[i]) # Use sig or tan??? mem = T.tanh(forgate * pmem + ingate * T.tanh(T.dot(input, wim[i]) + bm[i]) ) # instead of identity, use tanh for mem out outgate = sig(T.dot(input, wiog[i]) + T.dot(mem, wmog[i]) + bog[i]) layerout = T.tanh(T.dot(outgate * mem, wmo[i])) #print layerout.shape.eval() return mem, layerout
def __init__(self, n_input=3, n_memblock=100, n_output=2, lr=0.0001, m=0.9, l2rate=0.0001, dense=True): self.dense = dense input_sequence = T.matrix() gold_sequence = T.matrix() # 1, n_output #input_sequence.tag.test_value = [[0,0,1],[0,1,0],[1,0,0]] #gold_sequence.tag.test_value = [[1,0],[0,1],[0,0]] ''' START WEIGHTS - 0=forward; 1=backward''' wiig = shared_normal(n_input, n_memblock, 0.01, "wiig0"), shared_normal( n_input, n_memblock, 0.01, "wiig1") # Weights from inputs to gates wmig = shared_normal( n_memblock, n_memblock, 0.01, "wmig0"), shared_normal( n_memblock, n_memblock, 0.01, "wmig1") # Weights from cells to gates - peepholes #big = shared_zeros(n_memblock,"big0"),shared_zeros(n_memblock,"big1") big = theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "big0"), theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "big1") wifg = shared_normal(n_input, n_memblock, 0.01, "wifg0"), shared_normal(n_input, n_memblock, 0.01, "wifg1") wmfg = shared_normal(n_memblock, n_memblock, 0.01, "wmfg0"), shared_normal(n_memblock, n_memblock, 0.01, "wmfg1") #bfg = shared_zeros(n_memblock,"bfg0"),shared_zeros(n_memblock,"bfg1") bfg = theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "bfg0"), theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "bfg1") wiog = shared_normal(n_input, n_memblock, 0.01, "wiog0"), shared_normal(n_input, n_memblock, 0.01, "wiog1") wmog = shared_normal(n_memblock, n_memblock, 0.01, "wmog0"), shared_normal(n_memblock, n_memblock, 0.01, "wmog1") #bog = shared_zeros(n_memblock,"bog0"),shared_zeros(n_memblock,"bog1") bog = theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "bog0"), theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "bog1") wim = shared_normal(n_input, n_memblock, 0.01, "wim0"), shared_normal( n_input, n_memblock, 0.01, "wim1") # Weight from input to mem #bm = shared_zeros(n_memblock,"bm0"),shared_zeros(n_memblock,"bm1") # Bias from input to mem bm = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX), "bm0"), theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "bm1") wmo = shared_normal(n_memblock, n_output, 0.01, "wmo0"), shared_normal( n_memblock, n_output, 0.01, "wmo1") # Weight from input to mem slo = theano.shared(numpy.random.normal(scale=0.01), name="slo0"), theano.shared( numpy.random.normal(scale=0.01), name="slo1") bo = theano.shared(numpy.zeros(n_output, dtype=theano.config.floatX), "bo") # Bias from input to mem ''' END OF WEIGHTS ''' self.params = wiig[0], wiig[1], big[0], big[1], wifg[0], wifg[1], bfg[ 0], bfg[1], wiog[0], wiog[1], bog[0], bog[1], wmig[0], wmig[ 1], wmfg[0], wmfg[1], wmog[0], wmog[1], wim[0], wim[1], bm[ 0], bm[1], wmo[0], wmo[1], slo[0], slo[1], bo ''' START DELTAS - 0=forward; 1=backward''' dwiig = shared_normal(n_input, n_memblock, 0.01, "dwiig0"), shared_normal( n_input, n_memblock, 0.01, "dwiig1") # Weights from inputs to gates dwmig = shared_normal( n_memblock, n_memblock, 0.01, "dwmig0"), shared_normal( n_memblock, n_memblock, 0.01, "dwmig1") # Weights from cells to gates - peepholes #dbig = shared_zeros(n_memblock,"big0"),shared_zeros(n_memblock,"dbig1") dbig = theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "dbig0"), theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "dbig1") dwifg = shared_normal(n_input, n_memblock, 0.01, "dwifg0"), shared_normal(n_input, n_memblock, 0.01, "dwifg1") dwmfg = shared_normal(n_memblock, n_memblock, 0.01, "dwmfg0"), shared_normal(n_memblock, n_memblock, 0.01, "dwmfg1") #dbfg = shared_zeros(n_memblock,"bfg0"),shared_zeros(n_memblock,"dbfg1") dbfg = theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "dbfg0"), theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "dbfg1") dwiog = shared_normal(n_input, n_memblock, 0.01, "dwiog0"), shared_normal(n_input, n_memblock, 0.01, "dwiog1") dwmog = shared_normal(n_memblock, n_memblock, 0.01, "dwmog0"), shared_normal(n_memblock, n_memblock, 0.01, "dwmog1") #dbog = shared_zeros(n_memblock,"bog0"),shared_zeros(n_memblock,"dbog1") dbog = theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "dbog0"), theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "dbog1") dwim = shared_normal(n_input, n_memblock, 0.01, "dwim0"), shared_normal( n_input, n_memblock, 0.01, "dwim1") # Weight from input to mem #dbm = shared_zeros(n_memblock,"bm0"),shared_zeros(n_memblock,"dbm1") # Bias from input to mem dbm = theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "dbm0"), theano.shared( numpy.zeros(n_memblock, dtype=theano.config.floatX), "dbm1") dwmo = shared_normal(n_memblock, n_output, 0.01, "dwmo0"), shared_normal( n_memblock, n_output, 0.01, "dwmo1") # Weight from input to mem dslo = theano.shared(numpy.random.normal(scale=0.01), name="dslo0"), theano.shared( numpy.random.normal(scale=0.01), name="dslo1") dbo = theano.shared(numpy.zeros(n_output, dtype=theano.config.floatX), "dbo") # Bias from input to mem ''' END OF DELTAS ''' self.deltas = dwiig[0], dwiig[1], dbig[0], dbig[1], dwifg[0], dwifg[ 1], dbfg[0], dbfg[1], dwiog[0], dwiog[1], dbog[0], dbog[1], dwmig[ 0], dwmig[1], dwmfg[0], dwmfg[1], dwmog[0], dwmog[1], dwim[ 0], dwim[1], dbm[0], dbm[1], dwmo[0], dwmo[1], dslo[ 0], dslo[1], dbo init_mem = shared_zeros(n_memblock) # EXPRESSIONS - Forward def recurrence(input, pmem, i): i = i.value ingate = sig(T.dot(input, wiig[i]) + T.dot(pmem, wmig[i]) + big[i]) forgate = sig( T.dot(input, wifg[i]) + T.dot(pmem, wmfg[i]) + bfg[i]) #mem = forgate * pmem + ingate * T.tanh(T.dot(input, wim[i]) + bm[i]) # Use sig or tan??? mem = T.tanh(forgate * pmem + ingate * T.tanh(T.dot(input, wim[i]) + bm[i]) ) # instead of identity, use tanh for mem out outgate = sig(T.dot(input, wiog[i]) + T.dot(mem, wmog[i]) + bog[i]) layerout = T.tanh(T.dot(outgate * mem, wmo[i])) #print layerout.shape.eval() return mem, layerout #Forward Pass (_, output_sequencef), updf = theano.scan(fn=recurrence, sequences=input_sequence, non_sequences=0, outputs_info=[init_mem, None]) (_, output_sequencebp), updb = theano.scan(fn=recurrence, sequences=input_sequence, non_sequences=1, outputs_info=[init_mem, None], go_backwards=True) output_sequenceb = output_sequencebp[::-1] presig_output_sequence, train_updates = theano.scan( fn=lambda x, y: (x * slo[0] + y * slo[1] + bo), sequences=[output_sequencef, output_sequenceb], outputs_info=[None]) # avoid log(0) for log(scan(sigmoid())) output_sequence = sig(presig_output_sequence) # output_sequence become a batch of output vectors train_updates.update(updf) train_updates.update(updb) l2 = 0 for p in self.params: l2 += T.sum(p * p) # Loss Function outloss = T.nnet.binary_crossentropy( output_sequence, gold_sequence).mean( ) + l2 * l2rate # TODO: check if the dimensions match here # consider using multi-category? because binary allows multiple 1's in the vector # Backward Pass gradient = T.grad(outloss, self.params, consider_constant=[input_sequence, gold_sequence]) train_updates.update( ((p, p + m * d - lr * g) for p, g, d in zip(self.params, gradient, self.deltas))) train_updates.update( ((d, m * d - lr * g) for p, g, d in zip(self.params, gradient, self.deltas))) target = T.iround(gold_sequence) output = T.iround(output_sequence) tp = T.sum(T.and_(target, output)) p = tp / (T.sum(target)) r = tp / (T.sum(output)) f = (2 * p * r) / (p + r) ct = T.sum(target) co = T.sum(output) #self.train_function = theano.function([input_sequence,gold_sequence], [output_sequence], updates=train_updates) self.train_function = theano.function([input_sequence, gold_sequence], [], updates=train_updates) #self.validate_function = theano.function([input_sequence,gold_sequence], [outloss,output_sequence]) self.test_function = theano.function([input_sequence, gold_sequence], [outloss, ct, co, tp]) self.generate_function = theano.function([input_sequence], output)
def __init__(self, n_input=3, n_memblock=100, n_output=2, lr=0.0001, m=0.9): input_sequence = T.matrix() gold_sequence = T.matrix() # 1, n_output #input_sequence.tag.test_value = [[0,0,1],[0,1,0],[1,0,0]] #gold_sequence.tag.test_value = [[1,0],[0,1],[0,0]] ''' START WEIGHTS - 0=forward; 1=backward''' wiig = shared_normal(n_input, n_memblock, 0.01,"wiig0"),shared_normal(n_input, n_memblock, 0.01,"wiig1") # Weights from inputs to gates wmig = shared_normal(n_memblock, n_memblock, 0.01,"wmig0"),shared_normal(n_memblock, n_memblock, 0.01,"wmig1") # Weights from cells to gates - peepholes #big = shared_zeros(n_memblock,"big0"),shared_zeros(n_memblock,"big1") big = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"big0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"big1") wifg = shared_normal(n_input, n_memblock, 0.01,"wifg0"),shared_normal(n_input, n_memblock, 0.01,"wifg1") wmfg = shared_normal(n_memblock, n_memblock, 0.01,"wmfg0"),shared_normal(n_memblock, n_memblock, 0.01,"wmfg1") #bfg = shared_zeros(n_memblock,"bfg0"),shared_zeros(n_memblock,"bfg1") bfg = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"bfg0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"bfg1") wiog = shared_normal(n_input, n_memblock, 0.01,"wiog0"),shared_normal(n_input, n_memblock, 0.01,"wiog1") wmog = shared_normal(n_memblock, n_memblock, 0.01,"wmog0"),shared_normal(n_memblock, n_memblock, 0.01,"wmog1") #bog = shared_zeros(n_memblock,"bog0"),shared_zeros(n_memblock,"bog1") bog = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"bog0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"bog1") wim = shared_normal(n_input, n_memblock, 0.01,"wim0"),shared_normal(n_input, n_memblock, 0.01,"wim1") # Weight from input to mem #bm = shared_zeros(n_memblock,"bm0"),shared_zeros(n_memblock,"bm1") # Bias from input to mem bm = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"bm0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"bm1") wmo = shared_normal(n_memblock, n_output, 0.01,"wmo0"),shared_normal(n_memblock, n_output, 0.01,"wmo1") # Weight from input to mem bo = theano.shared(numpy.zeros(n_output, dtype=theano.config.floatX),"bo") # Bias from input to mem ''' END OF WEIGHTS ''' self.params = wiig[0], big[0], wifg[0], bfg[0], wiog[0], bog[0], wmig[0], wmfg[0], wmog[0], wim[0], bm[0], wmo[0], wiig[1], big[1], wifg[1], bfg[1], wiog[1], bog[1], wmig[1], wmfg[1], wmog[1], wim[1], bm[1], wmo[1], bo ''' START DELTAS - 0=forward; 1=backward''' dwiig = shared_normal(n_input, n_memblock, 0.01,"dwiig0"),shared_normal(n_input, n_memblock, 0.01,"dwiig1") # Weights from inputs to gates dwmig = shared_normal(n_memblock, n_memblock, 0.01,"dwmig0"),shared_normal(n_memblock, n_memblock, 0.01,"dwmig1") # Weights from cells to gates - peepholes #dbig = shared_zeros(n_memblock,"big0"),shared_zeros(n_memblock,"dbig1") dbig = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbig0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbig1") dwifg = shared_normal(n_input, n_memblock, 0.01,"dwifg0"),shared_normal(n_input, n_memblock, 0.01,"dwifg1") dwmfg = shared_normal(n_memblock, n_memblock, 0.01,"dwmfg0"),shared_normal(n_memblock, n_memblock, 0.01,"dwmfg1") #dbfg = shared_zeros(n_memblock,"bfg0"),shared_zeros(n_memblock,"dbfg1") dbfg = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbfg0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbfg1") dwiog = shared_normal(n_input, n_memblock, 0.01,"dwiog0"),shared_normal(n_input, n_memblock, 0.01,"dwiog1") dwmog = shared_normal(n_memblock, n_memblock, 0.01,"dwmog0"),shared_normal(n_memblock, n_memblock, 0.01,"dwmog1") #dbog = shared_zeros(n_memblock,"bog0"),shared_zeros(n_memblock,"dbog1") dbog = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbog0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbog1") dwim = shared_normal(n_input, n_memblock, 0.01,"dwim0"),shared_normal(n_input, n_memblock, 0.01,"dwim1") # Weight from input to mem #dbm = shared_zeros(n_memblock,"bm0"),shared_zeros(n_memblock,"dbm1") # Bias from input to mem dbm = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbm0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbm1") dwmo = shared_normal(n_memblock, n_output, 0.01,"dwmo0"),shared_normal(n_memblock, n_output, 0.01,"dwmo1") # Weight from input to mem dbo = theano.shared(numpy.zeros(n_output, dtype=theano.config.floatX),"dbo") # Bias from input to mem ''' END OF DELTAS ''' self.deltas = dwiig[0], dbig[0], dwifg[0], dbfg[0], dwiog[0], dbog[0], dwmig[0], dwmfg[0], dwmog[0], dwim[0], dbm[0], dwmo[0], dwiig[1], dbig[1], dwifg[1], dbfg[1], dwiog[1], dbog[1], dwmig[1], dwmfg[1], dwmog[1], dwim[1], dbm[1], dwmo[1], dbo init_mem = shared_zeros(n_memblock) # EXPRESSIONS - Forward def recurrence(input, pmem, i): i = i.value ingate = sig(T.dot(input, wiig[i]) + T.dot(pmem, wmig[i]) + big[i]) forgate = sig(T.dot(input, wifg[i]) + T.dot(pmem, wmfg[i]) + bfg[i]) mem = forgate * pmem + ingate * T.tanh(T.dot(input, wim[i]) + bm[i]) # Use sig or tan??? outgate = sig(T.dot(input, wiog[i]) + T.dot(mem, wmog[i]) + bog[i]) layerout = T.dot(outgate * mem, wmo[i]) #output = sig(T.dot(outgate * mem, wmo) + bo) return mem, layerout #Forward Pass (mem_sequencef, output_sequencef), updf = theano.scan(fn=recurrence, sequences = input_sequence, non_sequences = 0, outputs_info = [init_mem, None]) (mem_sequenceb, output_sequenceb), updb = theano.scan(fn=recurrence, sequences = input_sequence, non_sequences = 1, outputs_info = [init_mem, None], go_backwards=True) output_sequenceb = output_sequenceb[::-1] output_sequence, train_updates = theano.scan(fn=lambda x, y: sig(x + y + bo), sequences = [output_sequencef, output_sequenceb], outputs_info=[None]) train_updates.update(updf) train_updates.update(updb) # output_sequence become a batch of output vectors # Loss Function outloss = T.nnet.binary_crossentropy(output_sequence, gold_sequence).mean() # TODO: check if the dimensions match here # consider using multi-category? because binary allows multiple 1's in the vector # Backward Pass gradient = T.grad(outloss, self.params, consider_constant=[input_sequence, gold_sequence]) train_updates.update(((p, p + m * d - lr * g) for p, g, d in zip(self.params, gradient, self.deltas))) train_updates.update(((d, m * d - lr * g) for p, g, d in zip(self.params, gradient, self.deltas))) target = T.iround(gold_sequence) output = T.iround(output_sequence) tp = T.sum(T.and_(target,output)) p = tp/(T.sum(target)) r = tp/(T.sum(output)) f = ( 2 * p * r )/(p+r) ct = T.sum(target) co = T.sum(output) #self.train_function = theano.function([input_sequence,gold_sequence], [output_sequence], updates=train_updates) self.train_function = theano.function([input_sequence,gold_sequence], [], updates=train_updates) #self.validate_function = theano.function([input_sequence,gold_sequence], [outloss,output_sequence]) self.test_function = theano.function([input_sequence,gold_sequence], [outloss, ct, co, tp]) self.generate_function = theano.function([input_sequence], output)