示例#1
0
	def emit_new_weight(self, inp_h, weight_h, memory_h):
		#context addressing

		key = T.dot(inp_h, self.key_w)+self.key_b
		beta = T.nnet.softplus(T.dot(inp_h, self.beta_w )+self.beta_b)

		g = T.nnet.sigmoid(T.dot(inp_h, self.g_w)+self.g_b)

		#gamma = T.nnet.softplus(T.dot(inp_h,self.gamma_w)+self.gamma_b)
				
		weight_c = tools.vector_softmax(beta*tools.cos_sim(key, memory_h))
		#location addressing
		#interpolating
		
		weight_g = g*weight_c+ (1-g)*weight_h

		weight_location = T.tanh(T.dot(weight_g, self.location_w)+self.location_b)

		weight_new = weight_location
		
		#erase and add
		erase = T.nnet.sigmoid(T.dot(inp_h,self.erase_w)+self.erase_b)
		add = T.dot(inp_h,self.add_w)+self.add_b
		#if test:
		#	return key, beta,weight_c,g,weight_g,shift,weight_shift,gamma,weight_gamma,weight_new
		return weight_new, erase, add
示例#2
0
	def __init__(self, vector_size, voc_size, head_type, head_num, controller_type, controller_sizes, memory_size,shift_width = 3, activation = T.tanh):
		self.controller = controller_type(controller_sizes)
		embedding = tools.initial_weights(voc_size[0]+1, vector_size)
		self.embedding = theano.shared(value = embedding, name = 'embedding', borrow=True)
		input_w = tools.initial_weights(vector_size, controller_sizes[0])
		self.input_w = theano.shared(value = input_w, name = 'input_w', borrow=True)
		input_b = 0.*tools.initial_weights(controller_sizes[0])
		self.input_b = theano.shared(value = input_b, name = 'input_b', borrow=True)
		read_w = tools.initial_weights(memory_size[1], controller_sizes[0])
		self.read_w = theano.shared(value = read_w, name = 'read_w', borrow=True)
		output_w = tools.initial_weights(controller_sizes[-1], voc_size[1])
		self.output_w = theano.shared(value=output_w,name='Controller_outw', borrow=True)
		output_b = 0.*tools.initial_weights(voc_size[1])
		self.output_b = theano.shared(value=output_b,name='Controller_outb', borrow=True)
		memory_init_p = 2*(numpy.random.rand(memory_size[0],memory_size[1])-0.5)
		weight_init_p = numpy.random.randn((memory_size[0]))
		self.memory_init = theano.shared(value = memory_init_p, name = 'memory_init', borrow=True)
		self.weight_init = theano.shared(value = weight_init_p, name = 'weight_init', borrow=True)
		self.params = self.controller.params+[self.embedding, self.input_w, self.read_w, self.input_b, self.weight_init,self.memory_init,self.output_w, self.output_b]

		memory_init = self.memory_init
		weight_init = tools.vector_softmax(self.weight_init)

		self.heads = []
		for i in xrange(head_num):
			if head_type == Head_neural:
				self.heads.append(head_type(controller_sizes[-1], memory_size,i))
			else:
				self.heads.append(head_type(controller_sizes[-1], memory_size, shift_width,i))
			self.params += self.heads[i].params
		print self.params

		def pred_t(input_voc_t, weight_tm1, memory_tm1):
			rawinput_t = self.embedding[input_voc_t]
			input_t = T.dot(rawinput_t,self.input_w)
			read_m = T.dot(weight_tm1, memory_tm1)
			read_t = T.dot(read_m,self.read_w)
			controller_input = activation(input_t+read_t+self.input_b)
			hid = self.controller.getY(controller_input)
			output = T.nnet.softmax(T.dot(hid, self.output_w)+self.output_b)
			result = T.switch(T.eq(input_voc_t, 0),T.argmax(output,axis=1), theano.shared(0))
			#test = controller_input
			
			memory_inter = memory_tm1
			weight_inter = weight_tm1
			for head in self.heads:
				weight_inter, erase, add= head.emit_new_weight(hid, weight_inter, memory_inter)
				#write to memory
				weight_tdim = weight_inter.dimshuffle((0, 'x'))
				erase_dim = erase.dimshuffle(('x', 0))
				add_dim = add.dimshuffle(('x', 0))
				M_erased = memory_inter*(1-(weight_tdim*erase_dim))
				memory_inter = M_erased+(weight_tdim*add_dim)

			#testing = weight_tm1
			#testing2 = rawinput_t
			memory_t = memory_inter
			weight_t = weight_inter
			

			return weight_t, memory_t, output,result


		input = T.lvector()
		output = T.lvector()

		pred, _ = theano.scan(fn = pred_t,
							sequences = [input],
							outputs_info = [weight_init, memory_init, None,None])

		p_output = -T.log(pred[-2])[output.shape[0]-1:]
		#output = output.reshape(output.shape[0],1)
		def cost_step(po, o,cost_tm1):
			cost = cost_tm1+po[0][o]
			return cost
		cost0 = theano.shared(0.)
		costs,_ = theano.scan(fn = cost_step,
			sequences = [p_output, output],
			outputs_info = [cost0]
			)

		l2 = T.sum(0)
		for param_i in self.params:
			l2 = l2+(param_i**2).sum()

		costs += 1e-4*l2

		grads = T.grad(costs[-1], self.params)
		grads_clip = [T.clip(grad,-100,100) for grad in grads]
		updates = tools.adadelta(self.params, grads_clip, 0.95, 1e-6)

		self.predict = theano.function(inputs = [input], outputs =[pred[-1]])
		self.train = theano.function(inputs= [input, output], outputs = costs[-1], updates = updates)
		self.test = theano.function(inputs= [input, output], outputs = costs[-1])
		self.getweight = theano.function(inputs = [input], outputs = [pred[0]])
示例#3
0
	def __init__(self, vector_size, head_type,head_num, controller_type, controller_sizes, memory_size, shift_width = 3, activation = T.tanh):
		self.lr = 0.01
		self.controller = controller_type(controller_sizes)
		input_w = tools.initial_weights(vector_size, controller_sizes[0])
		self.input_w = theano.shared(value = input_w, name = 'input_w', borrow=True)
		input_b = 0.*tools.initial_weights(controller_sizes[0])
		self.input_b = theano.shared(value = input_b, name = 'input_b', borrow=True)
		read_w = tools.initial_weights(memory_size[1], controller_sizes[0])
		self.read_w = theano.shared(value = read_w, name = 'read_w', borrow=True)
		output_w = tools.initial_weights(controller_sizes[-1], vector_size)
		self.output_w = theano.shared(value=output_w,name='Controller_outw', borrow=True)
		output_b = 0.*tools.initial_weights(controller_sizes[-1])
		self.output_b = theano.shared(value=output_b,name='Controller_outb', borrow=True)
		memory_init_p = 2*(numpy.random.rand(memory_size[0],memory_size[1])-0.5)
		weight_init_p = numpy.random.randn((memory_size[0]))
		self.memory_init = theano.shared(value = memory_init_p, name = 'memory_init', borrow=True)
		self.weight_init = theano.shared(value = weight_init_p, name = 'weight_init', borrow=True)
		self.params = self.controller.params+[self.input_w, self.read_w, self.input_b, self.weight_init,self.memory_init, self.output_w, self.output_b]

		self.heads = []
		for i in xrange(head_num):
			if head_type == Head_neural:
				self.heads.append(head_type(controller_sizes[-1], memory_size,i))
			else:
				self.heads.append(head_type(controller_sizes[-1], memory_size, shift_width,i))
			self.params += self.heads[i].params

		#memory_init = tools.initial_weights(memory_size)
		memory_init = self.memory_init
		#weight_init_s = T.nnet.sigmoid(self.weight_init)
		weight_init = tools.vector_softmax(self.weight_init)
		print self.params

		#def weighting(weight, value):
		#	return weight*value

		def pred_t(rawinput_t, weight_tm1, memory_tm1):
			#memory_tm1 = self
			#predict the current output 
			input_t = T.dot(rawinput_t,self.input_w)
			read_m = T.dot(weight_tm1, memory_tm1)
			read_t = T.dot(read_m,self.read_w)
			controller_input = activation(input_t+read_t+self.input_b)
			#zero_vec = theano.shared(value=numpy.zeros((vector_size,)))
			#mask = T.nonzero(T.eq(rawinput_t,0))
			hid = self.controller.getY(controller_input)
			output = T.nnet.sigmoid(T.dot(hid, self.output_w)+self.output_b)
			#result = T.switch(T.eq(zero_vec,rawinput_t),output,theano.shared(0))
			result = output
			#testing = T.switch(T.eq(zero_vec,rawinput_t),theano.shared(1),theano.shared(0))
			#result = theano.shared(value=numpy.zeros((vector_size,)))
			
			#result = read_m
			#emit the weights
			
			memory_inter = memory_tm1
			weight_inter = weight_tm1
			for head in self.heads:
				weight_inter, erase, add= head.emit_new_weight(hid, weight_inter, memory_inter)
				#write to memory
				weight_tdim = weight_inter.dimshuffle((0, 'x'))
				erase_dim = erase.dimshuffle(('x', 0))
				add_dim = add.dimshuffle(('x', 0))
				M_erased = memory_inter*(1-(weight_tdim*erase_dim))
				memory_inter = M_erased+(weight_tdim*add_dim)

			#testing = weight_tm1
			#testing2 = rawinput_t
			memory_t = memory_inter
			weight_t = weight_inter

			return weight_t, memory_t, result

		input = T.matrix()
		output = T.matrix()
		seqlength = input.shape[0]/2
		#tmp = T.dvector()
		#testinfo = self.controller.getY(input[1])
		#testinfo = input.shape

		pred, _ = theano.scan(fn = pred_t, 
							sequences = [input],
							outputs_info = [weight_init, memory_init,None ])

		
		entropy = T.sum(T.nnet.binary_crossentropy(5e-6+(1-1e-5)*pred[-1][seqlength+1:], output[seqlength+1:]),axis = 1)
		
		
		#costs = (pred[-1]-output) ** 2
		#cost_sq = T.sum(costs)


		l2 = T.sum(0)
		for param_i in self.params:
			l2 = l2+(param_i**2).sum()
		#norm = l2
		cost = T.sum(entropy) +1e-3*l2


		grads = [T.grad(cost, param_i) for param_i in self.params]
		grads_clip = [T.clip(grad,-10,10) for grad in grads]
		#params_up = [param_i for param_i, grad_i in zip(self.params, grads_clip)]
		#new_value = [param_i-self.lr*grad_i for param_i, grad_i in zip(self.params, grads_clip)]
		#SGD
		#updates = [(param_i, param_i-self.lr*grad_i) for param_i, grad_i in zip(self.params, grads_clip)]
		#updates = zip(params_up, new_value)
		
		#adadelta
		updates = tools.adadelta(self.params, grads_clip, 0.95, 1e-6)
		#updates = tools.adadelta_another(self.params,grads_clip)

		self.test = theano.function(inputs = [], outputs = l2)
		self.predict = theano.function(inputs = [input], outputs = pred)
		self.grads = theano.function(inputs = [input, output], outputs = grads)
		#self.train = theano.function(inputs = [input,output], outputs = [input,output, costs, cost, pred[0],pred[2],pred[-1],grads[5],grads_clip[5], grads[6]], updates = updates)
		self.train = theano.function(inputs = [input,output], outputs = cost, updates = updates)#,mode=theano.compile.MonitorMode(post_func=tools.detect_nan))
示例#4
0
	def emit_new_weight(self, inp_h, weight_h, memory_h):
		#context addressing
		
		
		key = T.dot(inp_h, self.key_w)+self.key_b
		beta = T.nnet.softplus(T.dot(inp_h, self.beta_w )+self.beta_b)

		g = T.nnet.sigmoid(T.dot(inp_h, self.g_w)+self.g_b)

		#shift = tools.vector_softmax(T.dot(inp_h,self.shift_w)+self.shift_b)
		#shift = shift.dimshuffle((0,'x'))

		#gamma = T.nnet.softplus(T.dot(inp_h,self.gamma_w)+self.gamma_b)+1.
		

		'''
		key_normal = key/(T.sqrt(T.sum(key**2)) + tools.mini)
		memory_mod = T.sqrt(T.sum(memory**2, axis = 1).dimshuffle((0,'x')))
		memory_normal = memory/(memory_mod  + tools.mini)	

		weight_c = T.exp(beta*T.dot(memory_normal, key_normal))
		weight_cnormal = weight_c/T.sum(weight_c  + tools.mini)
		'''

		
		weight_c = tools.vector_softmax(beta*tools.cos_sim(key, memory_h))
		#location addressing
		#interpolating
		
		#weight_g = g*weight_c+ (1-g)*weight_h
		#weight_conv = theano.tensor.signal.conv(weight_g.reshape(memory.shape[0],1),
		#				shift_normal.reshape(self.shift_width, 1))

		#code from shaw

		#shift_normal = shift/T.sum(shift)

		

		'''
		wlength = weight_g.shape[0]
		shift_sidewidth = self.shift_width/2
		weight_shift = weight_h

		def cal_shift(pos):
			weight_pos = 0.
			for j in range(0,self.shift_width):
				pos = i+j-shift_sidewidth
				if pos < 0:
					pos += wlength
				if pos >= wlength:
					pos -= wlength
				weight_shift += shift[j]*weight_g[pos]
			return weight_pos

		weight_shift
		'''

		#weight_shift = T.sum(shift*weight_g[self.shift_conv], axis = 0)
		#sharpening
		'''
		weight_gamma = weight_shift ** gamma
		#weight_gamma = weight_g
		
		weight_new = weight_gamma/T.sum(weight_gamma)
		'''
		weight_new = weight_c
		

		#erase and add
		erase = T.nnet.sigmoid(T.dot(inp_h,self.erase_w)+self.erase_b)
		add = T.dot(inp_h,self.add_w)+self.add_b
		#if test:
		#	return key, beta,weight_c,g,weight_g,shift,weight_shift,gamma,weight_gamma,weight_new
		return weight_new, erase, add