def output(self, input, n_batch=None): ###--- Unpool if self.poolsize[0] == 1 and self.poolsize[1] == 1: unpool_out = input else: unpool_out = Textra.repeat(Textra.repeat( input, self.poolsize[0], axis=2), self.poolsize[1], axis=3) * self.mask image_shape = list(self.image_shape) if n_batch is not None: image_shape[0] = n_batch ###--- Unpool + conv # convolve input feature maps with filters if self.border_mode == 'same': conv_out = dnn.dnn_conv( img=unpool_out, kerns=self.W, subsample=(1, 1), border_mode=self.border, #conv_mode='cross' ) else: raise Exception('Unknown conv type') # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') return (lin_output if self.activation is None else self.activation(lin_output))
def test_repeatOp(self): for ndim in range(3): x = T.TensorType(config.floatX, [False] * ndim)() a = np.random.random((10, ) * ndim).astype(config.floatX) for axis in self._possible_axis(ndim): for dtype in tensor.discrete_dtypes: r_var = T.scalar(dtype=dtype) r = numpy.asarray(3, dtype=dtype) if dtype in self.numpy_unsupported_dtypes: self.assertRaises(TypeError, repeat, x, r_var, axis=axis) else: f = theano.function([x, r_var], repeat(x, r_var, axis=axis)) assert np.allclose(np.repeat(a, r, axis=axis), f(a, r)) r_var = T.vector(dtype=dtype) if axis is None: r = np.random.random_integers( 5, size=a.size).astype(dtype) else: r = np.random.random_integers( 5, size=(10,)).astype(dtype) f = theano.function([x, r_var], repeat(x, r_var, axis=axis)) assert np.allclose(np.repeat(a, r, axis=axis), f(a, r))
def reverseConv(self, activations, img_shape, flipped_filter, dim2=1): # Reverse max pooling first self.zp = activations.reshape((self.output.shape[0] * self.output.shape[1] * self.output.shape[2], self.output.shape[3])) lengthen = repeat(activations, self.poolsize[0], axis=2) self.lengthen = repeat(lengthen, self.poolsize[1], axis=3) self.w_shape = self.W.shape self.changed_W = self.W.dimshuffle(1,0,2,3) # Reversing the convolutional step rev_conv_out = conv.conv2d(input=self.lengthen, filters=self.changed_W[:,:,::-1,::-1],filter_shape=flipped_filter,image_shape=img_shape, border_mode='full') #convert to "same" (from full) s1 = numpy.floor((self.filter_shape[2]-1)/2.0).astype(int) e1 = numpy.ceil((self.filter_shape[2]-1)/2.0).astype(int) #Time must be the same forward = time is same, frequency is valid, backward = time is same, frequency is full if dim2: #convert to "valid" (from full) s2 = numpy.floor((self.filter_shape[3]-1)/2.0).astype(int) e2 = numpy.ceil((self.filter_shape[3]-1)/2.0).astype(int) if s1 == e1: rev_conv_out = rev_conv_out[:,:,:,s2:-e2] else: rev_conv_out = rev_conv_out[:,:,s1:-e1,s2:-e2] else: rev_conv_out = rev_conv_out[:,:,s1:-e1,:] self.reverseOutput=rev_conv_out
def fawn_recurrent(inpt_mean, inpt_var, weights_mean, weights_var, f, initial_mean, initial_var): f_transfer = lookup(f, transfer_) def step(inpt_mean, inpt_var, him_m1, hiv_m1, hom_m1, hov_m1): wm, wv = weights_mean, weights_var pres_mean = T.dot(inpt_mean, wm) pres_var = (T.dot(inpt_mean**2, wv) + T.dot(inpt_var, wm**2) + T.dot(inpt_var, wv)) post_mean, post_var = f_transfer(pres_mean, pres_var) return pres_mean, pres_var, post_mean, post_var if initial_mean.ndim == 1: initial_mean = repeat(initial_mean.dimshuffle('x', 0), inpt_mean.shape[1], axis=0) if initial_var.ndim == 1: initial_var = repeat(initial_var.dimshuffle('x', 0), inpt_mean.shape[1], axis=0) (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec, hidden_var_rec), _ = theano.scan(step, sequences=[inpt_mean, inpt_var], outputs_info=[ T.zeros_like(inpt_mean[0]), T.zeros_like(inpt_mean[0]), initial_mean, initial_var ]) return (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec, hidden_var_rec)
def test_repeatOp(self): for ndim in range(3): x = T.TensorType(config.floatX, [False] * ndim)() a = np.random.random((10, ) * ndim).astype(config.floatX) for axis in self._possible_axis(ndim): for dtype in tensor.discrete_dtypes: r_var = T.scalar(dtype=dtype) r = numpy.asarray(3, dtype=dtype) if dtype in self.numpy_unsupported_dtypes: self.assertRaises(TypeError, repeat, x, r_var, axis=axis) else: f = theano.function([x, r_var], repeat(x, r_var, axis=axis)) assert np.allclose(np.repeat(a, r, axis=axis), f(a, r)) r_var = T.vector(dtype=dtype) if axis is None: r = np.random.random_integers( 5, size=a.size).astype(dtype) else: r = np.random.random_integers( 5, size=(10, )).astype(dtype) f = theano.function([x, r_var], repeat(x, r_var, axis=axis)) assert np.allclose(np.repeat(a, r, axis=axis), f(a, r))
def output(self, input, n_batch=None): ###--- Unpool if self.poolsize[0] == 1 and self.poolsize[1] == 1: unpool_out = input else: unpool_out = Textra.repeat(Textra.repeat(input, self.poolsize[0], axis = 2), self.poolsize[1], axis = 3) * self.mask image_shape = list(self.image_shape) if n_batch is not None: image_shape[0] = n_batch ###--- Unpool + conv # convolve input feature maps with filters if self.border_mode == 'same': conv_out = dnn.dnn_conv( img=unpool_out, kerns=self.W, subsample=(1,1), border_mode=self.border, #conv_mode='cross' ) else: raise Exception('Unknown conv type') # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') return ( lin_output if self.activation is None else self.activation(lin_output) )
def drop_output(self, input, drop=0, rng=None, p=0.5): ###--- Unpool if self.poolsize[0] == 1 and self.poolsize[1] == 1: unpool_out = input else: unpool_out = Textra.repeat(Textra.repeat(input, self.poolsize[0], axis = 2), self.poolsize[1], axis = 3) * self.mask image_shape = list(self.image_shape) if n_batch is not None: image_shape[0] = n_batch ###--- Unpool + conv # convolve input feature maps with filters if self.border_mode == 'valid': conv_out = conv.conv2d( input=unpool_out, filters=self.W, filter_shape=self.filter_shape, image_shape=image_shape, border_mode='valid' ) elif self.border_mode == 'same': conv_out = conv.conv2d( input=unpool_out, filters=self.W, filter_shape=self.filter_shape, image_shape=image_shape, border_mode='full' ) padding_w = theano.shared((self.filter_shape[2] - 1) / 2) padding_h = theano.shared((self.filter_shape[3] - 1) / 2) conv_out = conv_out[:,:,padding_w:-padding_w,padding_h:-padding_h] elif self.border_mode == 'full': conv_out = conv.conv2d( input=unpool_out, filters=self.W, filter_shape=self.filter_shape, image_shape=image_shape, border_mode='full' ) else: raise Exception('Unknown conv type') # downsample each feature map individually, using maxpooling # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') output= ( lin_output if self.activation is None else self.activation(lin_output) ) droppedOutput = nonlinearity.dropout(rng, output, p) return T.switch(T.neq(drop, 0), droppedOutput, output)
def unpool(self, input): unpool = T.grad(T.sum(self.pool_out), wrt=self.pool_in) * \ repeat(repeat(input, self.poolsize[0], 2), self.poolsize[1], 3) return unpool
def gen(Z, w, w1, w2, w3): h0 = ReLU(batchnorm(T.dot(Z, w))) h1 = ReLU(batchnorm(T.dot(h0, w1))) h1_output = h1.reshape((h1.shape[0], nkerns[2], 7, 7)) h2_input = repeat(repeat(h1_output, 2, 2), 2, 3) h2 = ReLU(batchnorm(conv2d(h2_input, w2, border_mode='half'))) h3_input = repeat(repeat(h2, 2, 2), 2, 3) h3 = T.tanh(conv2d(h3_input, w3, border_mode='half')) return h3
def test_repeatOp(self): for ndim in [1, 3]: x = T.TensorType(config.floatX, [False] * ndim)() a = np.random.random((10, ) * ndim).astype(config.floatX) for axis in self._possible_axis(ndim): for dtype in tensor.integer_dtypes: r_var = T.scalar(dtype=dtype) r = np.asarray(3, dtype=dtype) if (dtype == 'uint64' or (dtype in self.numpy_unsupported_dtypes and r_var.ndim == 1)): self.assertRaises(TypeError, repeat, x, r_var, axis=axis) else: f = theano.function([x, r_var], repeat(x, r_var, axis=axis)) assert np.allclose(np.repeat(a, r, axis=axis), f(a, r)) r_var = T.vector(dtype=dtype) if axis is None: r = np.random.randint( 1, 6, size=a.size).astype(dtype) else: r = np.random.randint( 1, 6, size=(10,)).astype(dtype) if dtype in self.numpy_unsupported_dtypes and r_var.ndim == 1: self.assertRaises(TypeError, repeat, x, r_var, axis=axis) else: f = theano.function([x, r_var], repeat(x, r_var, axis=axis)) assert np.allclose(np.repeat(a, r, axis=axis), f(a, r)) # check when r is a list of single integer, e.g. [3]. r = np.random.randint( 1, 11, size=()).astype(dtype) + 2 f = theano.function([x], repeat(x, [r], axis=axis)) assert np.allclose(np.repeat(a, r, axis=axis), f(a)) assert not np.any([isinstance(n.op, RepeatOp) for n in f.maker.fgraph.toposort()]) # check when r is theano tensortype that broadcastable is (True,) r_var = theano.tensor.TensorType(broadcastable=(True,), dtype=dtype)() r = np.random.randint(1, 6, size=(1,)).astype(dtype) f = theano.function([x, r_var], repeat(x, r_var, axis=axis)) assert np.allclose(np.repeat(a, r[0], axis=axis), f(a, r)) assert not np.any([isinstance(n.op, RepeatOp) for n in f.maker.fgraph.toposort()])
def drop_output(self, input, drop=0, rng=None, p=0.5): ###--- Unpool if self.poolsize[0] == 1 and self.poolsize[1] == 1: unpool_out = input else: unpool_out = Textra.repeat(Textra.repeat( input, self.poolsize[0], axis=2), self.poolsize[1], axis=3) * self.mask image_shape = list(self.image_shape) if n_batch is not None: image_shape[0] = n_batch ###--- Unpool + conv # convolve input feature maps with filters if self.border_mode == 'valid': conv_out = conv.conv2d(input=unpool_out, filters=self.W, filter_shape=self.filter_shape, image_shape=image_shape, border_mode='valid') elif self.border_mode == 'same': conv_out = conv.conv2d(input=unpool_out, filters=self.W, filter_shape=self.filter_shape, image_shape=image_shape, border_mode='full') padding_w = theano.shared((self.filter_shape[2] - 1) / 2) padding_h = theano.shared((self.filter_shape[3] - 1) / 2) conv_out = conv_out[:, :, padding_w:-padding_w, padding_h:-padding_h] elif self.border_mode == 'full': conv_out = conv.conv2d(input=unpool_out, filters=self.W, filter_shape=self.filter_shape, image_shape=image_shape, border_mode='full') else: raise Exception('Unknown conv type') # downsample each feature map individually, using maxpooling # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') output = (lin_output if self.activation is None else self.activation(lin_output)) droppedOutput = nonlinearity.dropout(rng, output, p) return T.switch(T.neq(drop, 0), droppedOutput, output)
def output(self, dropout_active=False): X = self.embedded() out, _ = theano.scan(self.op.step, sequences=[X], outputs_info=[repeat(self.op.id, X.shape[1], axis=0)] ) return out[-1]
def step(time_idx,lstm_hidden): M_pad = repeat(P.memory_init.dimshuffle((0,'x',1)) , lstm_hidden.shape[1] , axis=1 ) M_curr_temp = T.concatenate([M_pad , lstm_hidden[:time_idx,:,:]] , axis=0) M_curr = M_curr_temp.transpose((1,0,2)) input_curr = lstm_hidden[time_idx,:,:] weight_prev = T.zeros([input_curr.shape[0] , time_idx+1]) weight_inter = weight_prev for head in heads: weight_inter, att_w_inter, key = build_head_curr( weight_inter, M_curr , head, input_curr) weight_curr = weight_inter entropy_temp = -1*(weight_curr*T.log(weight_curr)) entropy = T.sum(entropy_temp , axis=1) key_normalize = T.nnet.softmax(key) key_entropy_temp = -1*(key_normalize*T.log(key_normalize)) key_entropy = T.sum(key_entropy_temp , axis=1) att_w_curr = att_w_inter att_M_curr = att_w_curr.dimshuffle(0,'x',1)*M_curr read_curr = build_read(att_M_curr, weight_curr) output = controller(input_curr, read_curr) return output,entropy,key_entropy
def step(time_idx,lstm_hidden): M_pad = repeat(P.memory_init.dimshuffle((0,'x',1)) , lstm_hidden.shape[1] , axis=1 ) M_curr_temp = T.concatenate([M_pad , lstm_hidden[:time_idx,:,:]] , axis=0) M_curr = M_curr_temp.transpose((1,0,2)) input_curr = lstm_hidden[time_idx,:,:] weight_prev = T.zeros([input_curr.shape[0] , time_idx+1]) weight_inter = weight_prev for head in heads: weight_inter, att_w_inter = build_head_curr( weight_inter, M_curr , head, input_curr) weight_curr = weight_inter pad_matrix = T.zeros((input_curr.shape[0],lstm_hidden.shape[0]-weight_curr.shape[1]),dtype='float32') weight_pad = T.concatenate([weight_curr,pad_matrix],axis=1) entropy_temp = -1*(weight_curr*T.log(weight_curr)) entropy = T.sum(entropy_temp , axis=1) att_w_curr = att_w_inter att_M_curr = att_w_curr.dimshuffle(0,'x',1)*M_curr read_curr = build_read(att_M_curr, weight_curr) output = controller(input_curr, read_curr) return output,entropy,weight_pad
def output(self, dropout_active=False): X = self.embedded() out, _ = theano.scan( self.op.step, sequences=[X], outputs_info=[repeat(self.op.id, X.shape[1], axis=0)]) return out[-1]
def get_output_for(self, input, **kwargs): data, mask_max = input #return Textra.repeat(Textra.repeat(data, self.factor[0], axis=2), self.factor[1], axis=3) * mask_max window = np.zeros(self.factor, dtype=np.float32) window[0, 0] = 1 mask_unpool = np.tile(window.reshape((1, ) + self.factor), self.input_shapes[0][1:]) mask_unpool = T.shape_padleft(mask_unpool, n_ones=1) rs = np.random.RandomState(1234) rng = theano.tensor.shared_randomstreams.RandomStreams( rs.randint(999999)) mask_binomial = rng.binomial(n=1, p=self.noise, size=self.input_shapes[1][1:]) mask_binomial = T.shape_padleft(T.cast(mask_binomial, dtype='float32'), n_ones=1) mask = mask_binomial * mask_unpool + (1 - mask_binomial) * mask_max return Textra.repeat(Textra.repeat(data, self.factor[0], axis=2), self.factor[1], axis=3) * mask
def test_infer_shape(self): for ndim in [1, 3]: x = T.TensorType(config.floatX, [False] * ndim)() shp = (np.arange(ndim) + 1) * 3 a = np.random.random(shp).astype(config.floatX) for axis in self._possible_axis(ndim): for dtype in ["int8", "uint8", "uint64"]: r_var = T.scalar(dtype=dtype) r = np.asarray(3, dtype=dtype) if dtype in self.numpy_unsupported_dtypes: r_var = T.vector(dtype=dtype) with pytest.raises(TypeError): repeat(x, r_var) else: self._compile_and_check( [x, r_var], [RepeatOp(axis=axis)(x, r_var)], [a, r], self.op_class, ) r_var = T.vector(dtype=dtype) if axis is None: r = np.random.randint(1, 6, size=a.size).astype(dtype) elif a.size > 0: r = np.random.randint( 1, 6, size=a.shape[axis]).astype(dtype) else: r = np.random.randint(1, 6, size=(10, )).astype(dtype) self._compile_and_check( [x, r_var], [RepeatOp(axis=axis)(x, r_var)], [a, r], self.op_class, )
def drop_output(self, input, drop=0, rng=None, p=0.5): ###--- Unpool if self.poolsize[0] == 1 and self.poolsize[1] == 1: unpool_out = input else: unpool_out = Textra.repeat(Textra.repeat(input, self.poolsize[0], axis = 2), self.poolsize[1], axis = 3) * self.mask image_shape = list(self.image_shape) if n_batch is not None: image_shape[0] = n_batch if self.border_mode == 'same': conv_out = dnn.dnn_conv( img=unpool_out, kerns=self.W, subsample=(1,1), border_mode=self.border, #conv_mode='cross' ) else: raise Exception('Unknown conv type') if self.cnorm: print 'cnorm size', self.filter_shape[0]/8+1 conv_out=ContrastCrossChannels.ContrastCrossChannels(input=conv_out, n=self.filter_shape[0]/8+1) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') output= ( lin_output if self.activation is None else self.activation(lin_output) ) droppedOutput = nonlinearity.dropout(rng, output, p) return T.switch(T.neq(drop, 0), droppedOutput, output)
def fawn_recurrent( inpt_mean, inpt_var, weights_mean, weights_var, f, initial_mean, initial_var): f_transfer = lookup(f, transfer_) def step(inpt_mean, inpt_var, him_m1, hiv_m1, hom_m1, hov_m1): wm, wv = weights_mean, weights_var pres_mean = T.dot(inpt_mean, wm) pres_var = (T.dot(inpt_mean ** 2, wv) + T.dot(inpt_var, wm ** 2) + T.dot(inpt_var, wv) ) post_mean, post_var = f_transfer(pres_mean, pres_var) return pres_mean, pres_var, post_mean, post_var if initial_mean.ndim == 1: initial_mean = repeat( initial_mean.dimshuffle('x', 0), inpt_mean.shape[1], axis=0) if initial_var.ndim == 1: initial_var = repeat( initial_var.dimshuffle('x', 0), inpt_mean.shape[1], axis=0) (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec, hidden_var_rec), _ = theano.scan( step, sequences=[inpt_mean, inpt_var], outputs_info=[T.zeros_like(inpt_mean[0]), T.zeros_like(inpt_mean[0]), initial_mean, initial_var]) return (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec, hidden_var_rec)
def output(self, dropout_active=False): X = self.l_in.output(dropout_active=dropout_active) if self.p_drop > 0. and dropout_active: X = dropout(X, self.p_drop) x_in = T.dot(X, self.w_in) + self.b_in out, _ = theano.scan( self.step, sequences=[x_in], outputs_info=[repeat(self.h0, x_in.shape[1], axis=0)], non_sequences=[self.w_rec], truncate_gradient=self.truncate_gradient) if self.seq_output: return out else: return out[-1]
def step(time_idx,lstm_hidden,input_hidden,weighted_mem):#lstm_hidden is used to generate weight M_pad = repeat(P.memory_init.dimshuffle((0,'x',1)) , lstm_hidden.shape[1] , axis=1 ) weighted_M_pad = repeat(P.weighted_memory_init.dimshuffle((0,'x',1)) , lstm_hidden.shape[1] , axis=1 ) M_curr_temp = T.concatenate([M_pad , lstm_hidden[:time_idx,:,:]] , axis=0) weighted_M_curr_temp = T.concatenate([weighted_M_pad , weighted_mem[:time_idx,:,:]] , axis=0) M_curr = M_curr_temp.transpose((1,0,2)) weighted_M_curr = weighted_M_curr_temp.transpose((1,0,2)) input_curr = input_hidden[time_idx,:,:] weight_prev = T.zeros([input_curr.shape[0] , time_idx+1]) weight_inter = weight_prev for head in heads: weight_inter = build_head_curr( weight_inter, M_curr , head, input_curr) weight_curr = weight_inter read_curr = build_read(weighted_M_curr, weight_curr) output = controller(input_curr, read_curr) return output
def output(self, dropout_active=False): X = self.l_in.output(dropout_active=dropout_active) if self.p_drop > 0. and dropout_active: X = dropout(X, self.p_drop) x_in = T.dot(X, self.w_in) + self.b_in out, _ = theano.scan(self.step, sequences=[x_in], outputs_info=[repeat(self.h0, x_in.shape[1], axis=0)], non_sequences=[self.w_rec], truncate_gradient=self.truncate_gradient ) if self.seq_output: return out else: return out[-1]
def gen(Z, w, w1, w2, w3, w4): h0 = ReLU(batchnorm(T.dot(Z, w))) h1_input = h0.reshape((h0.shape[0], nkerns[3], 4, 4)) h1 = ReLU(batchnorm(conv2d(h1_input, w1, border_mode='half'))) h2_input = repeat(repeat(h1, 2, 2), 2, 3) h2 = ReLU(batchnorm(conv2d(h2_input, w2, border_mode='half'))) h3_input = repeat(repeat(h2, 2, 2), 2, 3) h3 = ReLU(batchnorm(conv2d(h3_input, w3, border_mode='half'))) h4_input = repeat(repeat(h3, 2, 2), 2, 3) h4 = T.tanh(conv2d(h4_input, w4, border_mode='half')) return h4
def output(self, dropout_active=False): X = self.l_in.output(dropout_active=dropout_active) if self.p_drop > 0. and dropout_active: X = dropout(X, self.p_drop) x_z = T.dot(X, self.w_z) + self.b_z x_r = T.dot(X, self.w_r) + self.b_r x_h = T.dot(X, self.w_h) + self.b_h out, _ = theano.scan( self.step, sequences=[x_z, x_r, x_h], outputs_info=[repeat(self.h0, x_h.shape[1], axis=0)], non_sequences=[self.u_z, self.u_r, self.u_h], truncate_gradient=self.truncate_gradient) if self.seq_output: return out else: return out[-1]
def output(self, dropout_active=False): X = self.l_in.output(dropout_active=dropout_active) if self.p_drop > 0. and dropout_active: X = dropout(X, self.p_drop) x_z = T.dot(X, self.w_z) + self.b_z x_r = T.dot(X, self.w_r) + self.b_r x_h = T.dot(X, self.w_h) + self.b_h out, _ = theano.scan(self.step, sequences=[x_z, x_r, x_h], outputs_info=[repeat(self.h0, x_h.shape[1], axis=0)], non_sequences=[self.u_z, self.u_r, self.u_h], truncate_gradient=self.truncate_gradient ) if self.seq_output: return out else: return out[-1]
def recurrent_layer(hidden_inpt, hidden_to_hidden, f, initial_hidden): def step(x, hi_tm1): h_tm1 = f(hi_tm1) hi = T.dot(h_tm1, hidden_to_hidden) + x return hi # Modify the initial hidden state to obtain several copies of # it, one per sample. initial_hidden_b = repeat(initial_hidden, hidden_inpt.shape[1], axis=0) initial_hidden_b = initial_hidden_b.reshape( (hidden_inpt.shape[1], hidden_inpt.shape[2])) hidden_in_rec, _ = theano.scan(step, sequences=hidden_inpt, outputs_info=[initial_hidden_b]) hidden_rec = f(hidden_in_rec) return hidden_in_rec, hidden_rec
def recurrent_layer_stateful(hidden_inpt, hidden_to_hidden, f, initial_hidden): def step(x, s_m1, hi_tm1, h_tm1): hi = T.dot(h_tm1, hidden_to_hidden) hi += x s, h = f(s_m1, hi) return s, hi, h initial_hidden_b = repeat( initial_hidden.dimshuffle('x', 0), hidden_inpt.shape[1], axis=0) (states, hidden_in_rec, hidden_rec), _ = theano.scan( step, sequences=hidden_inpt, outputs_info=[ T.zeros_like(initial_hidden_b), T.zeros_like(hidden_inpt[0]), initial_hidden_b]) return states, hidden_in_rec, hidden_rec
def recurrent_layer(hidden_inpt, hidden_to_hidden, f, initial_hidden): def step(x, hi_tm1): h_tm1 = f(hi_tm1) hi = T.dot(h_tm1, hidden_to_hidden) + x return hi # Modify the initial hidden state to obtain several copies of # it, one per sample. initial_hidden_b = repeat(initial_hidden, hidden_inpt.shape[1], axis=0) initial_hidden_b = initial_hidden_b.reshape( (hidden_inpt.shape[1], hidden_inpt.shape[2])) hidden_in_rec, _ = theano.scan( step, sequences=hidden_inpt, outputs_info=[initial_hidden_b]) hidden_rec = f(hidden_in_rec) return hidden_in_rec, hidden_rec
def recurrent_layer_stateful(hidden_inpt, hidden_to_hidden, f, initial_hidden): def step(x, s_m1, hi_tm1, h_tm1): hi = T.dot(h_tm1, hidden_to_hidden) hi += x s, h = f(s_m1, hi) return s, hi, h initial_hidden_b = repeat(initial_hidden.dimshuffle('x', 0), hidden_inpt.shape[1], axis=0) (states, hidden_in_rec, hidden_rec), _ = theano.scan(step, sequences=hidden_inpt, outputs_info=[ T.zeros_like(initial_hidden_b), T.zeros_like(hidden_inpt[0]), initial_hidden_b ]) return states, hidden_in_rec, hidden_rec
def step(time_idx,lstm_hidden): M_pad = repeat(P.memory_init.dimshuffle((0,'x',1)) , lstm_hidden.shape[1] , axis=1 ) M_curr_temp = T.concatenate([M_pad , lstm_hidden[:time_idx,:,:]] , axis=0) M_curr = M_curr_temp.transpose((1,0,2)) input_curr = lstm_hidden[time_idx,:,:] weight_prev = T.zeros([input_curr.shape[0] , time_idx+1]) weight_inter = weight_prev for head in heads: weight_inter, att_w_inter = build_head_curr( weight_inter, M_curr , head, input_curr) weight_curr = weight_inter att_w_curr = att_w_inter att_M_curr = att_w_curr.dimshuffle(0,'x',1)*M_curr read_curr = build_read(att_M_curr, weight_curr) output = controller(input_curr, read_curr) return output
def output(self, pool=True): X = self.input if self.backward: # flip along second axis X = X[:, ::-1] self.mask = self.mask[:, ::-1] # shuffle dimension so scan over axis 1 X = X.dimshuffle(1, 0, 2) if self.mask is not None: mask = self.mask.dimshuffle(1, 0) seq_input = [mask, X] step = self.step_masked else: seq_input = [X] step = self.step out, _ = theano.scan( step, sequences=seq_input, outputs_info=[repeat(self.h0, X.shape[1], axis=0)], non_sequences=[self.u_z, self.u_r, self.u_h], truncate_gradient=self.truncate_gradient ) # shuffle dimension back out = out.dimshuffle(1, 0, 2) if pool: if self.mask is not None: out = (out * self.mask[:, :, None]).sum(axis=1) out = out / self.mask.sum(axis=1)[:, None] return out return T.mean(out, axis=1) elif self.seq_output: if self.mask is not None: return out * self.mask[:, :, None] else: return out else: return out[-1]
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), border_mode='same', activation=None, mask=None): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height, filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows, #cols) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size ###--- Change / to * fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) * numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) ###--- Unpool if poolsize[0] == 1 and poolsize[1] == 1: self.unpool_out = input else: if mask is None: window = np.zeros((poolsize), dtype=np.float32) window[0, 0] = 1 mask = theano.shared( np.tile(window.reshape([1, 1] + poolsize), input_shape)) self.unpool_out = Textra.repeat( Textra.repeat(input, poolsize[0], axis=2), poolsize[1], axis=3) * mask relu_output = (self.unpool_out if activation is None else activation(self.unpool_out)) ###--- Unpool + conv # convolve input feature maps with filters if border_mode == 'valid': conv_out = conv.conv2d(input=relu_output, filters=self.W, filter_shape=filter_shape, image_shape=image_shape, border_mode='valid') elif border_mode == 'same': conv_out = conv.conv2d(input=relu_output, filters=self.W, filter_shape=filter_shape, image_shape=image_shape, border_mode='full') padding_w = theano.shared((filter_shape[2] - 1) / 2) padding_h = theano.shared((filter_shape[3] - 1) / 2) conv_out = conv_out[:, :, padding_w:-padding_w, padding_h:-padding_h] elif border_mode == 'full': conv_out = conv.conv2d(input=relu_output, filters=self.W, filter_shape=filter_shape, image_shape=image_shape, border_mode='full') else: raise Exception('Unknown conv type') # downsample each feature map individually, using maxpooling # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') # store parameters of this layer self.params = [self.W, self.b]
def recurrent_layer(in_mean, in_var, weights, f, initial_hidden_mean, initial_hidden_var, p_dropout): """Return a theano variable representing a recurrent layer. Parameters ---------- in_mean : Theano variable Sequence tensor of shape ``(t, n ,d)``. Represents the mean of the input to the layer. in_var : Theano variable Sequence tensor. Represents the variance of the input to the layer. Either (a) same shape as the mean or (b) scalar. weights : Theano variable Theano matrix of shape ``(d, d)``. Represents the recurrent weight matrix the hiddens are right multiplied with. f : function Function that takes a theano variable and returns a theano variable of the same shape. Meant as transfer function of the layer. initial_hidden : Theano variable Theano vector of size ``d``, representing the initial hidden state. p_dropout : Theano variable Scalar representing the probability that unit is dropped out. Returns ------- hidden_in_mean_rec : Theano variable Theano sequence tensor representing the mean of the hidden activations before the application of ``f``. hidden_in_var_rec : Theano variable Theano sequence tensor representing the varianceof the hidden activations before the application of ``f``. hidden_mean_rec : Theano variable Theano sequence tensor representing the mean of the hidden activations after the application of ``f``. hidden_var_rec : Theano variable Theano sequence tensor representing the varianceof the hidden activations after the application of ``f``. """ def step(inpt_mean, inpt_var, him_m1, hiv_m1, hom_m1, hov_m1): hom = T.dot(hom_m1, weights) * p_dropout + inpt_mean p_keep = 1 - p_dropout dropout_var = p_dropout * (1 - p_dropout) element_var = (hov_m1 * dropout_var + (hom_m1**2) * dropout_var + hov_m1 * p_keep**2) hov = T.dot(element_var, weights**2) + inpt_var fhom, fhov = f(hom, hov) return hom, hov, fhom, fhov if initial_hidden_mean.ndim == 1: initial_hidden_mean = repeat(initial_hidden_mean.dimshuffle('x', 0), in_mean.shape[1], axis=0) if initial_hidden_var.ndim == 1: initial_hidden_var = repeat(initial_hidden_var.dimshuffle('x', 0), in_mean.shape[1], axis=0) (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec, hidden_var_rec), _ = theano.scan(step, sequences=[in_mean, in_var], outputs_info=[ T.zeros_like(in_mean[0]), T.zeros_like(in_mean[0]), initial_hidden_mean, initial_hidden_var ]) #hidden_mean_rec, hidden_var_rec = f( # hidden_in_mean_rec, hidden_in_var_rec) return (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec, hidden_var_rec)
def __init__(self, n_state, n_action, scale_action=1.0, mean_learning_rate=0.01, sigma_learning_rate=0.001, gamma=0.99): self.n_state = n_state self.n_action = n_action self.scale_action = scale_action self.mean_learning_rate = mean_learning_rate self.sigma_learning_rate = sigma_learning_rate self.gamma = gamma self.episode_state_history = [] self.episode_action_history = [] self.episode_reward_history = [] self.all_states = [] self.all_actions = [] self.all_rewards = [] def action_nonlinearity(x): return self.scale_action * tanh(x) # Neural Network for the policy def policy_network(state): input_state = InputLayer(input_var=state, shape=(None, n_state)) dense = DenseLayer(input_state, num_units=n_state, nonlinearity=tanh, W=Normal(0.1, 0.0), b=Constant(0.0)) dense = DenseLayer(dense, num_units=n_state, nonlinearity=tanh, W=Normal(0.1, 0.0), b=Constant(0.0)) mean = DenseLayer(dense, num_units=n_action, nonlinearity=action_nonlinearity, W=Normal(0.1, 0.0), b=Constant(0.0)) sigma = DenseLayer(dense, num_units=n_action, nonlinearity=T.exp, W=Normal(0.1, 0.0), b=Constant(0.0)) return mean, sigma # Defining the system variables (state, action, reward) self.X_state = T.fmatrix() self.X_action = T.fmatrix() self.X_reward = T.fmatrix() # Policy and distribution functions self.policy_mean_, self.policy_sigma_ = policy_network(self.X_state) self.policy_mean = get_output(self.policy_mean_) self.policy_sigma = get_output(self.policy_sigma_) self.action_dist = theano.function( inputs=[self.X_state], outputs=[self.policy_mean, self.policy_sigma], allow_input_downcast=True) # log policy grads # d_f / d_u = (action - mu) / sigma ^2 # d_f / d_sigma = - 1 / sigma + (action - mu) ^ 2 / sigma ^3 # E[d_J / d_u] = (d_f / d_u) * R # E[d_J / d_sigma] = (d_f / d_sigma) * R self.policy = (-2 * T.log(self.policy_sigma) + (self.X_action - self.policy_mean)**2 * self.policy_sigma**-2) * repeat( self.X_reward, n_action, axis=1) self.policy = self.policy.mean() # Parameters to optimize self.mean_params = get_all_params(self.policy_mean_) self.sigma_params = get_all_params(self.policy_sigma_) # Gradients w.r.t. Parameters self.mean_grads = T.grad(self.policy, self.mean_params) self.sigma_grads = T.grad(self.policy, self.sigma_params) # Update equations self.mean_updates = adam(self.mean_grads, self.mean_params, learning_rate=self.mean_learning_rate) self.sigma_updates = adam(self.sigma_grads, self.sigma_params, learning_rate=self.sigma_learning_rate) self.update_mean_network = theano.function( inputs=[self.X_state, self.X_action, self.X_reward], outputs=None, updates=self.mean_updates, allow_input_downcast=True) self.update_sigma_network = theano.function( inputs=[self.X_state, self.X_action, self.X_reward], outputs=None, updates=self.sigma_updates, allow_input_downcast=True)
def make_train(image_size , word_size , first_hidden_size , proj_size , reg_lambda) : #initialize model P = Parameters() image_projecting = image_project.build(P, image_size, proj_size) batched_triplet_encoding , vector_triplet_encoding = triplet_encoding.build(P , word_size , first_hidden_size , proj_size) image_vector = T.vector() #training correct_triplet = [T.vector(dtype='float32') , T.vector(dtype='float32') , T.vector(dtype='float32')] #[E,R,E] negative_triplet = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')] image_projection_vector = image_projecting(image_vector) image_projection_matrix = repeat(image_projection_vector.dimshuffle(('x',0)) , negative_triplet[0].shape[0] , axis=0) correct_triplet_encoding_vector = vector_triplet_encoding(correct_triplet[0] , correct_triplet[1] , correct_triplet[2]) negative_triplet_encoding_matrix = batched_triplet_encoding(negative_triplet[0] , negative_triplet[1] , negative_triplet[2]) correct_cross_dot_scalar = T.dot(image_projection_vector , correct_triplet_encoding_vector) negative_cross_dot_vector = T.batched_dot(image_projection_matrix , negative_triplet_encoding_matrix) #margin cost zero_cost = T.zeros_like(negative_cross_dot_vector) margin_cost = 1 - correct_cross_dot_scalar + negative_cross_dot_vector cost_vector = T.switch(T.gt(zero_cost , margin_cost) , zero_cost , margin_cost) #regulizar cost params = P.values() l2 = T.sum(0) for p in params: l2 = l2 + (p ** 2).sum() cost = T.sum(cost_vector)/T.shape(negative_triplet[0])[0] + reg_lambda * l2 #assume word vector has been put into P #unsolved grads = [T.clip(g, -100, 100) for g in T.grad(cost, wrt=params)] lr = T.scalar(name='learning rate',dtype='float32') train = theano.function( inputs=[image_vector, correct_triplet[0], correct_triplet[1], correct_triplet[2], negative_triplet[0], negative_triplet[1], negative_triplet[2], lr], outputs=cost, updates=updates.rmsprop(params, grads, learning_rate=lr), allow_input_downcast=True ) #valid valid = theano.function( inputs=[image_vector, correct_triplet[0], correct_triplet[1], correct_triplet[2], negative_triplet[0], negative_triplet[1], negative_triplet[2]], outputs=cost, allow_input_downcast=True ) #visualize image_project_fun = theano.function( inputs=[image_vector], outputs=image_projection_vector, allow_input_downcast=True ) #testing all_triplet = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')] image_projection_matrix_test = repeat(image_projection_vector.dimshuffle(('x',0)) , all_triplet[0].shape[0] , axis=0) all_triplet_encoding_matrix = batched_triplet_encoding(all_triplet[0] , all_triplet[1] , all_triplet[2]) all_cross_dot_vector = T.batched_dot(image_projection_matrix_test , all_triplet_encoding_matrix) test = theano.function( inputs=[image_vector, all_triplet[0], all_triplet[1], all_triplet[2]], outputs=all_cross_dot_vector, allow_input_downcast=True ) return P , train , valid , image_project_fun , test
def repeat(self, repeats, axis=None): """See `theano.tensor.repeat`""" from theano.tensor.extra_ops import repeat return repeat(self, repeats, axis)
def get_output_for(self, input, **kwargs): return Textra.repeat(Textra.repeat(input, self.factor[0], axis=2), self.factor[1], axis=3)
def get_timit_waveform(): #load training data wavefiles train_filenames = [] with open('train.list') as f: for line in f: train_filenames.append(line.rstrip('\n')) file_pre = '/vega/stats/users/sl3368/TIMIT_process/TimitWav/train/' train_audio = [] for filename in train_filenames: f,w = wavfile.read(file_pre+filename) train_audio.append(w) #load training data phoneme labels phn = h5py.File('TIMIT_TRAIN.mat') phn_data = phn['data'] #initializing encoder enc = OneHotEncoder(n_values=41,dtype=numpy.int16,sparse=False) train_phn = [] for i in range(len(train_audio)): ref = phn_data[i][0] labels = phn[ref] phonemes = labels[2] phonemes = numpy.reshape(phonemes,(len(phonemes),1)) #need to encode and repeat for each sample encoded_phonemes = enc.fit_transform(phonemes) #repeat for the sampling rate 1600 this case!! rep_enc_phonemes = repeat(encoded_phonemes,160,axis=0).eval() train_phn.append(rep_enc_phonemes) print 'training done...' #load test data wavefiles test_filenames = [] with open('test.list') as f: for line in f: test_filenames.append(line.rstrip('\n')) file_pre = '/vega/stats/users/sl3368/TIMIT_process/TimitWav/test/' test_audio = [] for filename in test_filenames: f,w = wavfile.read(file_pre+filename) test_audio.append(w) #load testing data phoneme labels phn = h5py.File('TIMIT_TEST.mat') phn_data = phn['data'] #initializing encoder enc = OneHotEncoder(n_values=41,dtype=numpy.int16,sparse=False) test_phn = [] for i in range(len(test_audio)): ref = phn_data[i][0] labels = phn[ref] phonemes = labels[2] phonemes = numpy.reshape(phonemes,(len(phonemes),1)) #need to encode and repeat for each sample encoded_phonemes = enc.fit_transform(phonemes) #repeat for the sampling rate 16000 this case!! rep_enc_phonemes = repeat(encoded_phonemes,160,axis=0).eval() test_phn.append(rep_enc_phonemes) return train_audio,train_phn,test_audio,test_phn
def get_timit_specs_images(window_size): #get training spectrograms f = h5py.File('/vega/stats/users/sl3368/Data_LC/timit/train/timit_stim_1.mat') train_stim = numpy.transpose(f['stimulus_zscore']) #need to construct windows train_stim_windows = numpy.zeros((train_stim.shape[0]/5000,5000-window_size,window_size,60)) half = window_size/2 for j in range(len(train_stim)/5000): for i in range(j*5000,(j+1)*5000-window_size): temp_window = train_stim[i:i+window_size] train_stim_windows[j][i] = temp_window #single_window = numpy.reshape(temp_window,(1,window_size*train_stim.shape[1])) train_filenames = [] with open('train.list') as f: for line in f: train_filenames.append(line.rstrip('\n')) #load training data phoneme labels phn = h5py.File('TIMIT_TRAIN.mat') phn_data = phn['data'] #initializing encoder enc = OneHotEncoder(n_values=41,dtype=numpy.int16,sparse=False) train_phn = [] for i in range(len(train_filenames)): ref = phn_data[i][0] labels = phn[ref] phonemes = labels[2] phonemes = numpy.reshape(phonemes,(len(phonemes),1)) #need to encode and repeat for each sample encoded_phonemes = enc.fit_transform(phonemes) #repeat for the sampling rate 10 this case!! rep_enc_phonemes = repeat(encoded_phonemes,10,axis=0).eval() train_phn.append(rep_enc_phonemes) train_phn = train_phn[half:len(train_phn)-half] #get testing spectrograms f = h5py.File('/vega/stats/users/sl3368/Data_LC/timit/test/timit_stim_1.mat') test_stim = numpy.transpose(f['stimulus_zscore']) #need to construct windows test_stim_windows = numpy.zeros((test_stim.shape[0]/5000,5000-window_size,window_size,60)) half = window_size/2 for j in range(len(test_stim)/5000): for i in range(j*5000,(j+1)*5000-window_size): temp_window = test_stim[i:i+window_size] test_stim_windows[j][i] = temp_window #single_window = numpy.reshape(temp_window,(1,window_size*train_stim.shape[1])) #load test data wavefiles test_filenames = [] with open('test.list') as f: for line in f: test_filenames.append(line.rstrip('\n')) #load testing data phoneme labels phn = h5py.File('TIMIT_TEST.mat') phn_data = phn['data'] #initializing encoder enc = OneHotEncoder(n_values=41,dtype=numpy.int16,sparse=False) test_phn = [] for i in range(len(test_filenames)): ref = phn_data[i][0] labels = phn[ref] phonemes = labels[2] phonemes = numpy.reshape(phonemes,(len(phonemes),1)) #need to encode and repeat for each sample encoded_phonemes = enc.fit_transform(phonemes) #repeat for the sampling rate 10 this case!! rep_enc_phonemes = repeat(encoded_phonemes,10,axis=0).eval() test_phn.append(rep_enc_phonemes) test_phn = test_phn[half:len(test_phn)-half] return train_stim_windows,train_phn,test_stim_windows,test_phn
def get_timit_specs(): #get training spectrograms f = h5py.File('/vega/stats/users/sl3368/Data_LC/timit/train/timit_stim_1.mat') train_stim = numpy.transpose(f['stimulus_zscore']) train_filenames = [] with open('train.list') as f: for line in f: train_filenames.append(line.rstrip('\n')) #load training data phoneme labels phn = h5py.File('TIMIT_TRAIN.mat') phn_data = phn['data'] #initializing encoder enc = OneHotEncoder(n_values=41,dtype=numpy.int16,sparse=False) train_phn = [] for i in range(len(train_filenames)): ref = phn_data[i][0] labels = phn[ref] phonemes = labels[2] phonemes = numpy.reshape(phonemes,(len(phonemes),1)) #need to encode and repeat for each sample encoded_phonemes = enc.fit_transform(phonemes) #repeat for the sampling rate 10 this case!! rep_enc_phonemes = repeat(encoded_phonemes,10,axis=0).eval() train_phn.append(rep_enc_phonemes) #get testing spectrograms f = h5py.File('/vega/stats/users/sl3368/Data_LC/timit/test/timit_stim_1.mat') test_stim = numpy.transpose(f['stimulus_zscore']) #load test data wavefiles test_filenames = [] with open('test.list') as f: for line in f: test_filenames.append(line.rstrip('\n')) #load testing data phoneme labels phn = h5py.File('TIMIT_TEST.mat') phn_data = phn['data'] #initializing encoder enc = OneHotEncoder(n_values=41,dtype=numpy.int16,sparse=False) test_phn = [] for i in range(len(test_filenames)): ref = phn_data[i][0] labels = phn[ref] phonemes = labels[2] phonemes = numpy.reshape(phonemes,(len(phonemes),1)) #need to encode and repeat for each sample encoded_phonemes = enc.fit_transform(phonemes) #repeat for the sampling rate 10 this case!! rep_enc_phonemes = repeat(encoded_phonemes,10,axis=0).eval() test_phn.append(rep_enc_phonemes) return train_stim,train_phn,test_stim,test_phn
def make_train(image_size , word_size , first_hidden_size , proj_size , reg_lambda) : #initialize model P = Parameters() image_projecting = image_project.build(P, image_size, proj_size) batched_triplet_encoding , vector_triplet_encoding = triplet_encoding.build(P , word_size , first_hidden_size , proj_size) image_vector = T.vector() #training correct_triplet = [T.vector(dtype='float32') , T.vector(dtype='float32') , T.vector(dtype='float32')] #[E,R,E] negative_triplet = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')] image_projection_vector = image_projecting(image_vector) image_projection_matrix = repeat(image_projection_vector.dimshuffle(('x',0)) , negative_triplet[0].shape[0] , axis=0) correct_triplet_encoding_vector = vector_triplet_encoding(correct_triplet[0] , correct_triplet[1] , correct_triplet[2]) negative_triplet_encoding_matrix = batched_triplet_encoding(negative_triplet[0] , negative_triplet[1] , negative_triplet[2]) correct_cross_dot_scalar = T.dot(image_projection_vector , correct_triplet_encoding_vector) negative_cross_dot_vector = T.batched_dot(image_projection_matrix , negative_triplet_encoding_matrix) #margin cost zero_cost = T.zeros_like(negative_cross_dot_vector) margin_cost = 1 - correct_cross_dot_scalar + negative_cross_dot_vector cost_vector = T.switch(T.gt(zero_cost , margin_cost) , zero_cost , margin_cost) #regulizar cost params = P.values() l2 = T.sum(0) for p in params: l2 = l2 + (p ** 2).sum() cost = T.sum(cost_vector)/T.shape(negative_triplet[0])[0] + reg_lambda * l2 #assume word vector has been put into P #unsolved grads = [T.clip(g, -100, 100) for g in T.grad(cost, wrt=params)] lr = T.scalar(name='learning rate',dtype='float32') train = theano.function( inputs=[image_vector, correct_triplet[0], correct_triplet[1], correct_triplet[2], negative_triplet[0], negative_triplet[1], negative_triplet[2], lr], outputs=cost, updates=updates.rmsprop(params, grads, learning_rate=lr), allow_input_downcast=True ) #valid valid = theano.function( inputs=[image_vector, correct_triplet[0], correct_triplet[1], correct_triplet[2], negative_triplet[0], negative_triplet[1], negative_triplet[2]], outputs=cost, allow_input_downcast=True ) #testing all_triplet = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')] image_projection_matrix_test = repeat(image_projection_vector.dimshuffle(('x',0)) , all_triplet[0].shape[0] , axis=0) all_triplet_encoding_matrix = batched_triplet_encoding(all_triplet[0] , all_triplet[1] , all_triplet[2]) all_cross_dot_vector = T.batched_dot(image_projection_matrix_test , all_triplet_encoding_matrix) test = theano.function( inputs=[image_vector, all_triplet[0], all_triplet[1], all_triplet[2]], outputs=all_cross_dot_vector, allow_input_downcast=True ) #default P_default = Parameters() P_default['left'] = 2 * (np.random.rand(word_size) - 0.5) P_default['right'] = 2 * (np.random.rand(word_size) - 0.5) P_default['relation'] = 2 * (np.random.rand(word_size) - 0.5) correct_triplet_d = [T.vector(dtype='float32') , T.vector(dtype='float32') , T.vector(dtype='float32')] #[E,R,E] negative_triplet_d = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')] correct_triplet_d_train = [correct_triplet_d,correct_triplet_d,correct_triplet_d] negative_triplet_d_train = [negative_triplet_d,negative_triplet_d,negative_triplet_d] cost = 0 for i in range(3) : if i == 0 : correct_triplet_d_train[0] = [correct_triplet_d[0],P_default['relation'],P_default['right']] negative_triplet_d_train[0] = [negative_triplet_d[0],repeat(P_default['relation'].dimshuffle(('x',0)),negative_triplet_d[0].shape[0] , axis=0),repeat(P_default['right'].dimshuffle(('x',0)),negative_triplet_d[0].shape[0] , axis=0)] elif i == 1 : correct_triplet_d_train[1] = [P_default['left'],correct_triplet_d[1],P_default['right']] negative_triplet_d_train[1] = [repeat(P_default['left'].dimshuffle(('x',0)),negative_triplet_d[1].shape[0] , axis=0),negative_triplet_d[1],repeat(P_default['right'].dimshuffle(('x',0)),negative_triplet_d[1].shape[0] , axis=0)] elif i == 2 : correct_triplet_d_train[2] = [P_default['left'],P_default['relation'],correct_triplet_d[2]] negative_triplet_d_train[2] = [repeat(P_default['left'].dimshuffle(('x',0)),negative_triplet_d[2].shape[0] , axis=0),repeat(P_default['relation'].dimshuffle(('x',0)),negative_triplet_d[2].shape[0] , axis=0),negative_triplet_d[2]] image_projection_matrix_d = repeat(image_projection_vector.dimshuffle(('x',0)) , negative_triplet_d[i].shape[0] , axis=0) correct_triplet_encoding_vector_d = vector_triplet_encoding(correct_triplet_d_train[i][0] , correct_triplet_d_train[i][1] , correct_triplet_d_train[i][2]) negative_triplet_encoding_matrix_d = batched_triplet_encoding(negative_triplet_d_train[i][0] , negative_triplet_d_train[i][1] , negative_triplet_d_train[i][2]) correct_cross_dot_scalar_d = T.dot(image_projection_vector , correct_triplet_encoding_vector_d) negative_cross_dot_vector_d = T.batched_dot(image_projection_matrix_d , negative_triplet_encoding_matrix_d) #margin cost zero_cost_d = T.zeros_like(negative_cross_dot_vector_d) margin_cost_d = 1 - correct_cross_dot_scalar_d + negative_cross_dot_vector_d cost_vector_d = T.switch(T.gt(zero_cost_d , margin_cost_d) , zero_cost_d , margin_cost_d) cost = cost + T.sum(cost_vector_d)/T.shape(negative_triplet[i])[0] params_d = P_default.values() l2 = T.sum(0) for p in params_d: l2 = l2 + (p ** 2).sum() cost = cost + 0.01*l2 grads = [T.clip(g, -100, 100) for g in T.grad(cost, wrt=params_d)] train_default = theano.function( inputs=[image_vector, correct_triplet_d[0], correct_triplet_d[1], correct_triplet_d[2], negative_triplet_d[0], negative_triplet_d[1], negative_triplet_d[2], lr], outputs=cost, updates=updates.rmsprop(params_d, grads, learning_rate=lr), allow_input_downcast=True ) all_triplet_d = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')] all_triplet_d_test = [all_triplet_d,all_triplet_d,all_triplet_d] result = [[],[],[]] for i in range(3) : image_projection_matrix_test_d = repeat(image_projection_vector.dimshuffle(('x',0)) , all_triplet[i].shape[0] , axis=0) if i == 0 : all_triplet_d_test[0] = [all_triplet_d[0],repeat(P_default['relation'].dimshuffle(('x',0)),all_triplet_d[0].shape[0] , axis=0),repeat(P_default['right'].dimshuffle(('x',0)),all_triplet_d[0].shape[0] , axis=0)] elif i == 1 : all_triplet_d_test[1] = [repeat(P_default['left'].dimshuffle(('x',0)),all_triplet_d[1].shape[0] , axis=0),all_triplet_d[1],repeat(P_default['right'].dimshuffle(('x',0)),all_triplet_d[1].shape[0] , axis=0)] elif i == 2 : all_triplet_d_test[2] = [repeat(P_default['left'].dimshuffle(('x',0)),all_triplet_d[2].shape[0] , axis=0),repeat(P_default['relation'].dimshuffle(('x',0)),all_triplet_d[2].shape[0] , axis=0),all_triplet_d[2]] all_triplet_encoding_matrix_d = batched_triplet_encoding(all_triplet_d_test[i][0] , all_triplet_d_test[i][1] , all_triplet_d_test[i][2]) result[i] = T.batched_dot(image_projection_matrix_test_d , all_triplet_encoding_matrix_d) test_default = theano.function( inputs=[image_vector, all_triplet_d[0], all_triplet_d[1], all_triplet_d[2]], outputs=result, allow_input_downcast=True ) return P , P_default , train , valid , test , train_default , test_default
def recurrent_layer(in_mean, in_var, weights, f, initial_hidden_mean, initial_hidden_var, p_dropout): """Return a theano variable representing a recurrent layer. Parameters ---------- in_mean : Theano variable Sequence tensor of shape ``(t, n ,d)``. Represents the mean of the input to the layer. in_var : Theano variable Sequence tensor. Represents the variance of the input to the layer. Either (a) same shape as the mean or (b) scalar. weights : Theano variable Theano matrix of shape ``(d, d)``. Represents the recurrent weight matrix the hiddens are right multiplied with. f : function Function that takes a theano variable and returns a theano variable of the same shape. Meant as transfer function of the layer. initial_hidden : Theano variable Theano vector of size ``d``, representing the initial hidden state. p_dropout : Theano variable Scalar representing the probability that unit is dropped out. Returns ------- hidden_in_mean_rec : Theano variable Theano sequence tensor representing the mean of the hidden activations before the application of ``f``. hidden_in_var_rec : Theano variable Theano sequence tensor representing the varianceof the hidden activations before the application of ``f``. hidden_mean_rec : Theano variable Theano sequence tensor representing the mean of the hidden activations after the application of ``f``. hidden_var_rec : Theano variable Theano sequence tensor representing the varianceof the hidden activations after the application of ``f``. """ def step(inpt_mean, inpt_var, him_m1, hiv_m1, hom_m1, hov_m1): hom = T.dot(hom_m1, weights) * p_dropout + inpt_mean p_keep = 1 - p_dropout dropout_var = p_dropout * (1 - p_dropout) element_var = (hov_m1 * dropout_var + (hom_m1 ** 2) * dropout_var + hov_m1 * p_keep ** 2) hov = T.dot(element_var, weights ** 2) + inpt_var fhom, fhov = f(hom, hov) return hom, hov, fhom, fhov if initial_hidden_mean.ndim == 1: initial_hidden_mean = repeat( initial_hidden_mean.dimshuffle('x', 0), in_mean.shape[1], axis=0) if initial_hidden_var.ndim == 1: initial_hidden_var = repeat( initial_hidden_var.dimshuffle('x', 0), in_mean.shape[1], axis=0) (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec, hidden_var_rec), _ = theano.scan( step, sequences=[in_mean, in_var], outputs_info=[T.zeros_like(initial_hidden_mean), T.zeros_like(initial_hidden_var), initial_hidden_mean, initial_hidden_var]) #hidden_mean_rec, hidden_var_rec = f( # hidden_in_mean_rec, hidden_in_var_rec) return (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec, hidden_var_rec)
def deconv_and_depool(X, w, b=None, activation=rectify): X = repeat(X, repeats=2, axis=2) X = repeat(X, repeats=2, axis=3) return activation(deconv(X, w, b))
def output_random_generation(self, input, n_batch=144): ###--- Unpool image_shape = list(self.image_shape) image_shape[0] = n_batch #print '---', image_shape if self.random_mask is None: image_shape[2]/=self.poolsize[0] image_shape[3]/=self.poolsize[1] window = np.zeros((self.poolsize), dtype=np.float32) window[0, 0] = 1 self.random_mask = theano.shared(np.tile(window.reshape([1, 1]+self.poolsize), image_shape)) image_shape[2]*=self.poolsize[0] image_shape[3]*=self.poolsize[1] #print '----', image_shape if self.poolsize[0] == 1 and self.poolsize[1] == 1: unpool_out = input else: unpool_out = Textra.repeat(Textra.repeat(input, self.poolsize[0], axis = 2), self.poolsize[1], axis = 3) * self.random_mask ###--- Unpool + conv # convolve input feature maps with filters if self.border_mode == 'same': conv_out = dnn.dnn_conv( img=unpool_out, kerns=self.W, subsample=(1,1), border_mode=self.border, #conv_mode='cross' ) else: raise Exception('Unknown conv type') ''' if self.border_mode == 'valid': conv_out = conv.conv2d( input=unpool_out, filters=self.W, filter_shape=self.filter_shape, image_shape=image_shape, border_mode='valid' ) elif self.border_mode == 'same': conv_out = conv.conv2d( input=unpool_out, filters=self.W, filter_shape=self.filter_shape, image_shape=image_shape, border_mode='full' ) padding_w = theano.shared((self.filter_shape[2] - 1) / 2) padding_h = theano.shared((self.filter_shape[3] - 1) / 2) conv_out = conv_out[:,:,padding_w:-padding_w,padding_h:-padding_h] elif self.border_mode == 'full': conv_out = conv.conv2d( input=unpool_out, filters=self.W, filter_shape=self.filter_shape, image_shape=image_shape, border_mode='full' ) else: raise Exception('Unknown conv type') ''' # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') return ( lin_output if self.activation is None else self.activation(lin_output) )
xtp1 = T.cast(T.argmax(srng.multinomial(n=1, pvals=ot), axis=1), floatX) s_updates = OrderedDict() for s, st, num_h in zip(ss, sts, args.num_hs): if T.lt(xt.shape[0], s.shape[0]): pad = T.zeros((s.shape[0] - xt.shape[0], num_h), dtype=floatX) st = T.concatenate([st, pad], axis=0) s_updates[s] = st return [ot, xtp1], s_updates [o, _], gru_train_updates = theano.scan(gru_step, outputs_info=[None, None], sequences=[X.T] + dropout_masks) o = o.dimshuffle((1, 0, 2)) p_hat = o[repeat(T.arange(o.shape[0]).dimshuffle(0, "x"), o.shape[1], axis=1), repeat(T.arange(o.shape[1]).dimshuffle("x", 0), o.shape[0], axis=0), T.cast(y, "int32")] y_mask = T.neq(y, -1) # Evolves into c_fagrigus cross_entropy = -T.mean( T.sum(T.log(p_hat) * y_mask, axis=1) / T.sum(y_mask, axis=1)) def perplexity(y, o): p_hat = o[ np.repeat(np.arange(o.shape[0]).reshape((-1, 1)), o.shape[1], axis=1), np.repeat(np.arange(o.shape[1]).reshape( (1, -1)), o.shape[0], axis=0), np.cast["int32"](y)] y_mask = y != -1 cross_entropy = -np.mean( np.sum(np.log(p_hat) * y_mask, axis=1) / np.sum(y_mask, axis=1))
def cnn_creator(kernel): if kernel.shape[0] != 8: raise Exception('Expected cnn kernel with 8 subkernels.' '\nReceived kernel has {0} ' 'subkernel(s).'.format(kernel.shape[0])) src_data = T.tensor4(name="source_data") grt_data = T.tensor4(name="ground_truth_data") # *********************************************************** w1 = kernel[0] b1 = kernel[1] w2 = kernel[2] b2 = kernel[3] w3 = kernel[4] b3 = kernel[5] w4 = kernel[6] b4 = kernel[7] w1_shape = kernel[0].eval().shape b1_shape = kernel[1].eval().shape w2_shape = kernel[2].eval().shape b2_shape = kernel[3].eval().shape w3_shape = kernel[4].eval().shape b3_shape = kernel[5].eval().shape w4_shape = kernel[6].eval().shape b4_shape = kernel[7].eval().shape # *********************************************************** def relu(value, alpha=0.05): return T.switch(value > 0, value, alpha * value) def softmax4d(value): e_x = theano.tensor.exp(value - value.max(axis=1, keepdims=True)) return e_x / e_x.sum(axis=1, keepdims=True) def create_param(w_shape): param_values = numpy.zeros(w_shape) shared = theano.shared( numpy.asarray(param_values, dtype=theano.config.floatX), borrow=True) return shared # *********************************************************** conv_1 = nnet.conv2d(input=src_data, filters=w1, ) + \ b1.dimshuffle('x', 0, 'x', 'x') pool_1 = downsample.max_pool_2d(conv_1, (2, 2)) l1_out = relu(pool_1) conv_2 = nnet.conv2d(input=l1_out, filters=w2) + \ b2.dimshuffle('x', 0, 'x', 'x') pool_2 = downsample.max_pool_2d(conv_2, (2, 2)) l2_out = relu(pool_2) conv_3 = nnet.conv2d(input=l2_out, filters=w3) + \ b3.dimshuffle('x', 0, 'x', 'x') pool_3 = downsample.max_pool_2d(conv_3, (2, 2)) l3_out = relu(pool_3) conv_4 = nnet.conv2d(input=l3_out, filters=w4) + \ b4.dimshuffle('x', 0, 'x', 'x') pool_4 = downsample.max_pool_2d(conv_4, (2, 2)) l4_out = relu(pool_4) scaled_up_y = ops.repeat(l4_out, 16, axis=2) scaled_up_y_x = ops.repeat(scaled_up_y, 16, axis=3) softmax = softmax4d(scaled_up_y_x) eps = 1e-7 clipped_softmax = softmax.clip(eps, 1 - eps) # *********************************************************** max_val = clipped_softmax.argmax(axis=1, keepdims=True) # *********************************************************** ds_softmax = clipped_softmax.dimshuffle(0, 2, 3, 1) rs_softmax = ds_softmax.reshape((-1, 3)) ds_grt_data = grt_data.dimshuffle(0, 2, 3, 1) rs_grt_data = ds_grt_data.reshape((-1, 3)) cross = T.nnet.categorical_crossentropy(rs_softmax, rs_grt_data) cost = T.mean(cross) # *********************************************************** params = [w1, w2, w3, w4, b1, b2, b3, b4] gparams = [T.grad(cost, param) for param in params] # *********************************************************** prev_eg2_w1 = create_param(w1_shape) prev_eg2_w2 = create_param(w2_shape) prev_eg2_w3 = create_param(w3_shape) prev_eg2_w4 = create_param(w4_shape) prev_eg2_b1 = create_param(b1_shape) prev_eg2_b2 = create_param(b2_shape) prev_eg2_b3 = create_param(b3_shape) prev_eg2_b4 = create_param(b4_shape) prev_eg2s = [prev_eg2_w1, prev_eg2_w2, prev_eg2_w3, prev_eg2_w4, prev_eg2_b1, prev_eg2_b2, prev_eg2_b3, prev_eg2_b4] prev_edx2_w1 = create_param(w1_shape) prev_edx2_w2 = create_param(w2_shape) prev_edx2_w3 = create_param(w3_shape) prev_edx2_w4 = create_param(w4_shape) prev_edx2_b1 = create_param(b1_shape) prev_edx2_b2 = create_param(b2_shape) prev_edx2_b3 = create_param(b3_shape) prev_edx2_b4 = create_param(b4_shape) prev_edx2s = [prev_edx2_w1, prev_edx2_w2, prev_edx2_w3, prev_edx2_w4, prev_edx2_b1, prev_edx2_b2, prev_edx2_b3, prev_edx2_b4] rho = 0.95 cur_eg2s = [rho * prev_eg2 + (1.0 - rho) * T.sqr(gparam) for prev_eg2, gparam in zip(prev_eg2s, gparams)] ada_eps = 1e-9 dxs = [T.sqrt(edx2 + ada_eps) / T.sqrt(eg2 + ada_eps) * gparam for edx2, eg2, gparam in zip(prev_edx2s, cur_eg2s, gparams)] cur_edx2s = [rho * prev_edx2 + (1.0 - rho) * T.sqr(dx) for prev_edx2, dx in zip(prev_edx2s, dxs)] learning_rate = 1 cur_params = [param - learning_rate * dx for param, dx in zip(params, dxs)] update_params = [(param, new_param) for param, new_param in zip(params, cur_params)] update_prev_eg2 = [(prev_eg2, eg2) for prev_eg2, eg2 in zip(prev_eg2s, cur_eg2s)] update_prev_edx2 = [(prev_edx2, edx2) for prev_edx2, edx2 in zip(prev_edx2s, cur_edx2s)] updates = update_params + update_prev_eg2 + update_prev_edx2 # *********************************************************** f_cnn = theano.function([src_data], theano.Out(max_val, borrow=True)) f_cost = theano.function(inputs=[src_data, grt_data], outputs=cost) f_train = theano.function(inputs=[src_data, grt_data], outputs=cost, updates=updates) return f_train, f_cnn, f_cost
def output_random_generation(self, input, n_batch=144): ###--- Unpool image_shape = list(self.image_shape) image_shape[0] = n_batch #print '---', image_shape if self.random_mask is None: image_shape[2] /= self.poolsize[0] image_shape[3] /= self.poolsize[1] window = np.zeros((self.poolsize), dtype=np.float32) window[0, 0] = 1 self.random_mask = theano.shared( np.tile(window.reshape([1, 1] + self.poolsize), image_shape)) image_shape[2] *= self.poolsize[0] image_shape[3] *= self.poolsize[1] #print '----', image_shape if self.poolsize[0] == 1 and self.poolsize[1] == 1: unpool_out = input else: unpool_out = Textra.repeat(Textra.repeat( input, self.poolsize[0], axis=2), self.poolsize[1], axis=3) * self.random_mask ###--- Unpool + conv # convolve input feature maps with filters if self.border_mode == 'valid': conv_out = conv.conv2d(input=unpool_out, filters=self.W, filter_shape=self.filter_shape, image_shape=image_shape, border_mode='valid') elif self.border_mode == 'same': conv_out = conv.conv2d(input=unpool_out, filters=self.W, filter_shape=self.filter_shape, image_shape=image_shape, border_mode='full') padding_w = theano.shared((self.filter_shape[2] - 1) / 2) padding_h = theano.shared((self.filter_shape[3] - 1) / 2) conv_out = conv_out[:, :, padding_w:-padding_w, padding_h:-padding_h] elif self.border_mode == 'full': conv_out = conv.conv2d(input=unpool_out, filters=self.W, filter_shape=self.filter_shape, image_shape=image_shape, border_mode='full') else: raise Exception('Unknown conv type') # downsample each feature map individually, using maxpooling # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') return (lin_output if self.activation is None else self.activation(lin_output))
def get_timit_waveform(): #load training data wavefiles train_filenames = [] with open('train.list') as f: for line in f: train_filenames.append(line.rstrip('\n')) file_pre = '/vega/stats/users/sl3368/TIMIT_process/TimitWav/train/' train_audio = [] for filename in train_filenames: f, w = wavfile.read(file_pre + filename) train_audio.append(w) #load training data phoneme labels phn = h5py.File('TIMIT_TRAIN.mat') phn_data = phn['data'] #initializing encoder enc = OneHotEncoder(n_values=41, dtype=numpy.int16, sparse=False) train_phn = [] for i in range(len(train_audio)): ref = phn_data[i][0] labels = phn[ref] phonemes = labels[2] phonemes = numpy.reshape(phonemes, (len(phonemes), 1)) #need to encode and repeat for each sample encoded_phonemes = enc.fit_transform(phonemes) #repeat for the sampling rate 1600 this case!! rep_enc_phonemes = repeat(encoded_phonemes, 160, axis=0).eval() train_phn.append(rep_enc_phonemes) print 'training done...' #load test data wavefiles test_filenames = [] with open('test.list') as f: for line in f: test_filenames.append(line.rstrip('\n')) file_pre = '/vega/stats/users/sl3368/TIMIT_process/TimitWav/test/' test_audio = [] for filename in test_filenames: f, w = wavfile.read(file_pre + filename) test_audio.append(w) #load testing data phoneme labels phn = h5py.File('TIMIT_TEST.mat') phn_data = phn['data'] #initializing encoder enc = OneHotEncoder(n_values=41, dtype=numpy.int16, sparse=False) test_phn = [] for i in range(len(test_audio)): ref = phn_data[i][0] labels = phn[ref] phonemes = labels[2] phonemes = numpy.reshape(phonemes, (len(phonemes), 1)) #need to encode and repeat for each sample encoded_phonemes = enc.fit_transform(phonemes) #repeat for the sampling rate 16000 this case!! rep_enc_phonemes = repeat(encoded_phonemes, 160, axis=0).eval() test_phn.append(rep_enc_phonemes) return train_audio, train_phn, test_audio, test_phn
def __init__(self, model, learning_rate=0.1, pred_given=None, arg0_given=None, arg1_given=None, arg2_given=None): self.learning_rate = learning_rate self.model = model self.network = model.pair_projection_model self.event_network = self.network.event_network self.pred_given = pred_given self.arg0_given = arg0_given self.arg1_given = arg1_given self.arg2_given = arg2_given self.learning_rate_var = T.scalar("learning_rate", dtype=theano.config.floatX) # Create variables for the unobserved inputs (RHS), which we're sampling self.pred_size = (1, self.network.event_network.pred_vector_size) self.rhs_pred = theano.shared( numpy.zeros(self.pred_size, dtype=theano.config.floatX), borrow=True, ) self.arg_size = (1, self.network.event_network.arg_vector_size) self.rhs_arg0 = theano.shared( numpy.zeros(self.arg_size, dtype=theano.config.floatX), borrow=True, ) self.rhs_arg1 = theano.shared( numpy.zeros(self.arg_size, dtype=theano.config.floatX), borrow=True, ) self.rhs_arg2 = theano.shared( numpy.zeros(self.arg_size, dtype=theano.config.floatX), borrow=True, ) self.arg_vectors = [self.rhs_arg0, self.rhs_arg1, self.rhs_arg2] self.input_vector_size = self.pred_size[1] + 3 * self.arg_size[1] rhs_vector = T.concatenate( [self.rhs_pred, self.rhs_arg0, self.rhs_arg1, self.rhs_arg2], axis=1) # Rebuild the prediction function so that it uses our new vectors on the RHS # Repeat them over the first dimension, a single RHS vector is compared to all LHS vectors (context) rhs_projection = theano.clone( self.event_network.projection_layer, replace={ self.event_network.input_vector: extra_ops.repeat(rhs_vector, self.event_network.predicate_input_a.shape[0], axis=0) }) prediction = theano.clone( self.network.prediction, replace={self.network.input_b: rhs_projection}, share_inputs=True) # The prediction value is the coherence output, which we will use as our objective to maximize # Average it over the context inputs (comparing each to the single RHS vector) chain_coherence = T.mean(prediction) # The optimization fn updates the RHS vectors to maximize the mean coherence with the LHS self.params = [] # Only optimize the positions that haven't been fixed if pred_given is None: self.params.append(self.rhs_pred) if arg0_given is None: self.params.append(self.rhs_arg0) if arg1_given is None: self.params.append(self.rhs_arg1) if arg2_given is None: self.params.append(self.rhs_arg2) if len(self.params) == 0: raise ValueError( "all RHS event components have been fixed, so there's nothing left to sample!" ) cost = -T.log(chain_coherence) # Differentiate cost w.r.t. the RHS vectors to get the updates gparams = [T.grad(cost, param) for param in self.params] updates = [(param, param - self.learning_rate_var * gparam) for param, gparam in zip(self.params, gparams)] self.optimize = theano.function( inputs=[ self.event_network.predicate_input_a, self.event_network.arg0_input_a, self.event_network.arg1_input_a, self.event_network.arg2_input_a, theano.Param(self.learning_rate_var, default=self.learning_rate) ], outputs=[cost, chain_coherence], updates=updates, ) self.score_vector = theano.function( inputs=[ self.event_network.predicate_input_a, self.event_network.arg0_input_a, self.event_network.arg1_input_a, self.event_network.arg2_input_a, ], outputs=chain_coherence, ) self.positions = [ self.rhs_pred, self.rhs_arg0, self.rhs_arg1, self.rhs_arg2 ] self.givens = [pred_given, arg0_given, arg1_given, arg2_given] self.vector_vocabs = [ self.model.pair_projection_model.event_network.predicate_vectors. get_value(), self.model.pair_projection_model.event_network.argument0_vectors. get_value(), self.model.pair_projection_model.event_network.argument1_vectors. get_value(), self.model.pair_projection_model.event_network.argument2_vectors. get_value(), ] # Set the non-updated input vectors to the right values if not all(x is None for x in [pred_given, arg0_given, arg1_given, arg2_given]): if pred_given is not None: if pred_given not in self.model.pred_vocab: warnings.warn( "predicate '%s' not in vocabulary: not constraining sample on predicate" % pred_given) self.set_given(0, pred_given) if arg0_given is not None: if arg0_given == "--": # Special value meaning fix to empty self.set_given(1, None) else: if arg0_given not in self.model.arg_vocab: warnings.warn( "arg '%s' not in vocabulary: not constraining sample on arg0" % arg0_given) self.set_given(1, arg0_given) if arg1_given is not None: if arg1_given == "--": self.set_given(2, None) else: if arg1_given not in self.model.arg_vocab: warnings.warn( "arg '%s' not in vocabulary: not constraining sample on arg1" % arg1_given) self.set_given(2, arg1_given) if arg2_given is not None: if arg2_given == "--": self.set_given(3, None) else: if arg2_given not in self.model.arg_vocab: warnings.warn( "arg '%s' not in vocabulary: not constraining sample on arg2" % arg2_given) self.set_given(3, arg2_given)
def from_model(model, neighbour_finder=None, learning_rate=0.1, num_samples=1, slimline_model=None): learning_rate_var = T.scalar("learning_rate", dtype=theano.config.floatX) network = model.pair_projection_model # Create variables for the unobserved input vector (RHS), which we're sampling projection_size = network.event_network.projection_size rhs_projection = theano.shared( numpy.zeros((num_samples, projection_size), dtype=theano.config.floatX), borrow=True, ) # The prediction value is the coherence output, which we will use as our objective to maximize # Compute it as the composition of the observed LHS event(s) and the unobserved RHS event # Repeat over the first dimension, so a single RHS vector is compared to all LHS vectors (context) prediction = theano.clone( network.prediction, replace={ network.input_b: extra_ops.repeat( rhs_projection, network.event_network.predicate_input_a.shape[0], axis=0), network.event_network.predicate_input_a: T.tile(network.event_network.predicate_input_a, (num_samples, )), network.event_network.arg0_input_a: T.tile(network.event_network.arg0_input_a, (num_samples, )), network.event_network.arg1_input_a: T.tile(network.event_network.arg1_input_a, (num_samples, )), network.event_network.arg2_input_a: T.tile(network.event_network.arg2_input_a, (num_samples, )), }, share_inputs=True) # Average it over the context inputs (comparing each to the single RHS vector) chain_coherence = T.mean(prediction) # The optimization fn updates the RHS vector to maximize the mean coherence with the LHS params = [rhs_projection] cost = -T.log(chain_coherence) # Differentiate cost w.r.t. the RHS vectors to get the updates gparams = [T.grad(cost, param) for param in params] updates = [(param, param - learning_rate_var * gparam) for param, gparam in zip(params, gparams)] optimize = theano.function( inputs=[ network.event_network.predicate_input_a, network.event_network.arg0_input_a, network.event_network.arg1_input_a, network.event_network.arg2_input_a, theano.Param(learning_rate_var, default=learning_rate) ], outputs=[cost, chain_coherence], updates=updates, ) score_vector = theano.function( inputs=[ network.event_network.predicate_input_a, network.event_network.arg0_input_a, network.event_network.arg1_input_a, network.event_network.arg2_input_a, ], outputs=chain_coherence, ) if slimline_model is not None: # Use the slimline version of the model to keep a reference to for sampling purposes model = slimline_model return NextEventProjectionSampler(projection_size, rhs_projection, optimize, score_vector, model, neighbour_finder, learning_rate, num_samples)
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), border_mode='same', activation=None, mask=None): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height, filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows, #cols) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size ###--- Change / to * fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) * numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) ###--- Unpool if poolsize[0] == 1 and poolsize[1] == 1: self.unpool_out = input else: if mask is None: window = np.zeros((poolsize), dtype=np.float32) window[0, 0] = 1 mask = theano.shared(np.tile(window.reshape([1, 1]+poolsize), input_shape)) self.unpool_out = Textra.repeat(Textra.repeat(input, poolsize[0], axis = 2), poolsize[1], axis = 3) * mask relu_output = ( self.unpool_out if activation is None else activation(self.unpool_out) ) ###--- Unpool + conv # convolve input feature maps with filters if border_mode == 'valid': conv_out = conv.conv2d( input=relu_output, filters=self.W, filter_shape=filter_shape, image_shape=image_shape, border_mode='valid' ) elif border_mode == 'same': conv_out = conv.conv2d( input=relu_output, filters=self.W, filter_shape=filter_shape, image_shape=image_shape, border_mode='full' ) padding_w = theano.shared((filter_shape[2] - 1) / 2) padding_h = theano.shared((filter_shape[3] - 1) / 2) conv_out = conv_out[:,:,padding_w:-padding_w,padding_h:-padding_h] elif border_mode == 'full': conv_out = conv.conv2d( input=relu_output, filters=self.W, filter_shape=filter_shape, image_shape=image_shape, border_mode='full' ) else: raise Exception('Unknown conv type') # downsample each feature map individually, using maxpooling # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') # store parameters of this layer self.params = [self.W, self.b]
def get_timit_specs(): #get training spectrograms f = h5py.File( '/vega/stats/users/sl3368/Data_LC/timit/train/timit_stim_1.mat') train_stim = numpy.transpose(f['stimulus_zscore']) train_filenames = [] with open('train.list') as f: for line in f: train_filenames.append(line.rstrip('\n')) #load training data phoneme labels phn = h5py.File('TIMIT_TRAIN.mat') phn_data = phn['data'] #initializing encoder enc = OneHotEncoder(n_values=41, dtype=numpy.int16, sparse=False) train_phn = [] for i in range(len(train_filenames)): ref = phn_data[i][0] labels = phn[ref] phonemes = labels[2] phonemes = numpy.reshape(phonemes, (len(phonemes), 1)) #need to encode and repeat for each sample encoded_phonemes = enc.fit_transform(phonemes) #repeat for the sampling rate 10 this case!! rep_enc_phonemes = repeat(encoded_phonemes, 10, axis=0).eval() train_phn.append(rep_enc_phonemes) #get testing spectrograms f = h5py.File( '/vega/stats/users/sl3368/Data_LC/timit/test/timit_stim_1.mat') test_stim = numpy.transpose(f['stimulus_zscore']) #load test data wavefiles test_filenames = [] with open('test.list') as f: for line in f: test_filenames.append(line.rstrip('\n')) #load testing data phoneme labels phn = h5py.File('TIMIT_TEST.mat') phn_data = phn['data'] #initializing encoder enc = OneHotEncoder(n_values=41, dtype=numpy.int16, sparse=False) test_phn = [] for i in range(len(test_filenames)): ref = phn_data[i][0] labels = phn[ref] phonemes = labels[2] phonemes = numpy.reshape(phonemes, (len(phonemes), 1)) #need to encode and repeat for each sample encoded_phonemes = enc.fit_transform(phonemes) #repeat for the sampling rate 10 this case!! rep_enc_phonemes = repeat(encoded_phonemes, 10, axis=0).eval() test_phn.append(rep_enc_phonemes) return train_stim, train_phn, test_stim, test_phn
def get_interpolated_hiddens(old_hidden, n_timesteps, n_samples, interpolation_mask, number_cons_hiddens): ''' old_hidden: old_hidden_matrix which needs to be interpolated. : number_of_hiddens * batch_size * Hidden_Size number_of_reduced_timstamps alphas = [1, 0.8, 0.6, 0.4, 0.2] alpha is the interpolation mask as of now, which ne eds to be passed as a function parameter. For ex, given hiddens, h1, h2, h3, h_n-1 You get, [h1, h2], [h2, h3], [h_n-2, h_n-1] so basically, n-1 pairs. Number of interolations need to be done. i.e relative clock times. ''' alpha = interpolation_mask hidden_size = 1024 batch_size = 32 num_cons_hiddens = number_cons_hiddens num_reduced_hiddens = num_cons_hiddens + 1 number_interp = len(interpolation_mask) X = old_hidden.dimshuffle(1, 0, 2) new_matrix2 = repeat(X, 2, axis=1) new_matrix2 = tensor.roll(new_matrix2, -1, axis=1) new_matrix2 = new_matrix2[:, 0:2 * num_reduced_hiddens - 2, :] new_matrix2 = new_matrix2.reshape( [n_samples, num_cons_hiddens, 2, hidden_size]) def _step_slice(m_, interp_mask): interp_ret = [] for i in range(number_interp): interp_ret.append(interp_mask[i] * m_[0] + (1 - interp_mask[i]) * m_[1]) return interp_ret _step = _step_slice def step_batch(m_, alpha): seqs = m_ rval, updates = theano.scan(_step, sequences=seqs, non_sequences=[alpha]) return rval _batch_step = step_batch seqs = new_matrix2 rval, updates = theano.scan(_batch_step, sequences=seqs, non_sequences=[alpha]) out = [] out_batch = [] for batch_index in range(batch_size): for i in range(num_cons_hiddens): something = [rval[j][batch_index][i] for j in range(number_interp)] if i == 0: out = something if i >= 1: out = tensor.concatenate([out, something], axis=0) if batch_index == 0: out_batch = out if batch_index == 1: out_batch = tensor.stacklists([out_batch, out]) if batch_index > 1: out = tensor.reshape(out, [1, n_timesteps - 2, hidden_size]) out_batch = tensor.concatenate([out_batch, out]) zero_pad = tensor.zeros( [out_batch.shape[0], number_interp, out_batch.shape[2]]) out_batch = tensor.concatenate([zero_pad, out_batch], axis=1) return out_batch
def __init__(self, data, image_shape, filter_shape, poolsize, sparse_coeff, activation='sigmoid', tied_weight=False, is_linear=False, do_max_pool=False): rng = np.random.RandomState(None) self.data = data self.batchsize = image_shape[0] self.in_channels = image_shape[1] self.in_height = image_shape[2] self.in_width = image_shape[3] self.flt_channels = filter_shape[0] self.flt_height = filter_shape[2] self.flt_width = filter_shape[3] self.input = T.ftensor4('input') # self.input = input.reshape(image_shape) hidden_layer=ConvolutionLayer(rng, input=self.input, filter_shape=filter_shape, act=activation, border_mode='full', if_pool=do_max_pool) self.hidden_image_shape = (self.batchsize, self.flt_channels, self.in_height+self.flt_height-1, self.in_width+self.flt_width-1) self.hidden_pooled_image_shape = (self.batchsize, self.flt_channels, (self.in_height+self.flt_height-1)/2, (self.in_width+self.flt_width-1)/2) self.hidden_filter_shape = (self.in_channels, self.flt_channels, self.flt_height, self.flt_width) if sparse_coeff == 0: if do_max_pool: hidden_layer_output = repeat(hidden_layer.output, repeats=2, axis=2) hidden_layer_output = repeat(hidden_layer_output, repeats=2, axis=3) else: hidden_layer_output = hidden_layer.output else: feature_map = hidden_layer.output # first per featuremap, then across featuremap # feature_map_vec = feature_map.reshape((feature_map.shape[0], # feature_map.shape[1], feature_map.shape[2]*feature_map.shape[3])) # feat_sparsity = feature_map_vec.norm(2, axis=2) # feat_sparsity = feat_sparsity.dimshuffle(0, 1, 'x', 'x') # feature_map1 = np.divide(feature_map, feat_sparsity+1e-9) # examp_sparsity = feature_map1.norm(2, axis=1) # examp_sparsity = examp_sparsity.dimshuffle(0, 'x', 1, 2) # feature_map2 = np.divide(feature_map1, examp_sparsity+1e-9) # first across featuremap, then per featuremap examp_sparsity = feature_map.norm(2, axis=1) examp_sparsity = examp_sparsity.dimshuffle(0, 'x', 1, 2) feature_map1 = np.divide(feature_map, examp_sparsity+1e-9) feature_map1_vec = feature_map1.reshape((feature_map1.shape[0], feature_map1.shape[1], feature_map1.shape[2]*feature_map1.shape[3])) feat_sparsity = feature_map1_vec.norm(2, axis=2) feat_sparsity = feat_sparsity.dimshuffle(0, 1, 'x', 'x') feature_map2 = np.divide(feature_map1, feat_sparsity+1e-9) if do_max_pool: hidden_layer_output = repeat(feature_map2, repeats=2, axis=2) hidden_layer_output = repeat(hidden_layer_output, repeats=2, axis=3) else: hidden_layer_output = feature_map2 # recon_layer_input = hidden_layer_output if is_linear: recon_layer=ConvolutionLayer(rng, input=hidden_layer_output, filter_shape=self.hidden_filter_shape, act='linear', border_mode='valid') else: recon_layer=ConvolutionLayer(rng, input=hidden_layer_output, filter_shape=self.hidden_filter_shape, act=activation, border_mode='valid') self.tied_weight = tied_weight if self.tied_weight: # recon_layer.W = hidden_layer.W # recon_layer.W = recon_layer.W.dimshuffle(1,0,2,3) weight = hidden_layer.W.get_value() recon_layer.W.set_value(weight.transpose(1,0,2,3), borrow=True) self.layers = [hidden_layer, recon_layer] self.params = sum([layer.params for layer in self.layers], []) # self.params = hidden_layer.params + recon_layer.params L1_sparsity = hidden_layer_output.norm(1, axis=(2, 3)) # L1_sparsity = T.sum(np.abs(feature_map2), axis=(2, 3)) # sparse_filter = T.mean(L1_sparsity.sum(axis=1), axis=(0)) sparse_filter = T.mean(L1_sparsity, axis=(0, 1)) # sparsity = T.mean(feature_map2, axis=(2,3)) # sparse_filter = T.mean(sparsity, axis=(0, 1)) # L=T.sum(T.pow(T.sub(recon_layer.output, self.input), 2), axis=0) L=T.sum(T.pow(T.sub(recon_layer.output, self.input), 2), axis=(1,2,3)) # sum over channel,height, width cost = 0.5*T.mean(L) + sparse_coeff * sparse_filter grads = T.grad(cost, self.params) # learning_rate = 0.1 # updates = [(param_i, param_i-learning_rate*grad_i) # for param_i, grad_i in zip(self.params, grads)] updates = adadelta_updates(self.params, grads, rho=0.95, eps=1e-6) # self.train = theano.function( # [self.input], # cost, # updates=updates, # name="train cae model") index = T.lscalar('index') batch_begin = index * self.batchsize batch_end = batch_begin + self.batchsize self.train = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ self.input: self.data[batch_begin:batch_end] }, name="train cae model") self.activation = downsample.max_pool_2d( input=hidden_layer.output, ds=poolsize, ignore_border=True) # self.get_activation = theano.function( # [self.input], # self.activation, # updates=None, # name='get hidden activation') # num = T.bscalar self.get_activation = theano.function( inputs=[index], # outputs=self.activation, outputs=hidden_layer.output if do_max_pool else self.activation, updates=None, givens={ self.input: self.data[batch_begin:batch_end] }, name='get hidden activation') # self.get_reconstruction = theano.function( # inputs=[self.input], # outputs=recon_layer.output, # updates=None, # name='get reconstruction') self.get_reconstruction = theano.function( inputs=[index], outputs=recon_layer.output, updates=None, givens={ self.input: self.data[batch_begin:batch_end] }, name='get reconstruction')
def get_timit_specs_images(window_size): #get training spectrograms f = h5py.File( '/vega/stats/users/sl3368/Data_LC/timit/train/timit_stim_1.mat') train_stim = numpy.transpose(f['stimulus_zscore']) #need to construct windows train_stim_windows = numpy.zeros( (train_stim.shape[0] / 5000, 5000 - window_size, window_size, 60)) half = window_size / 2 for j in range(len(train_stim) / 5000): for i in range(j * 5000, (j + 1) * 5000 - window_size): temp_window = train_stim[i:i + window_size] train_stim_windows[j][i] = temp_window #single_window = numpy.reshape(temp_window,(1,window_size*train_stim.shape[1])) train_filenames = [] with open('train.list') as f: for line in f: train_filenames.append(line.rstrip('\n')) #load training data phoneme labels phn = h5py.File('TIMIT_TRAIN.mat') phn_data = phn['data'] #initializing encoder enc = OneHotEncoder(n_values=41, dtype=numpy.int16, sparse=False) train_phn = [] for i in range(len(train_filenames)): ref = phn_data[i][0] labels = phn[ref] phonemes = labels[2] phonemes = numpy.reshape(phonemes, (len(phonemes), 1)) #need to encode and repeat for each sample encoded_phonemes = enc.fit_transform(phonemes) #repeat for the sampling rate 10 this case!! rep_enc_phonemes = repeat(encoded_phonemes, 10, axis=0).eval() train_phn.append(rep_enc_phonemes) train_phn = train_phn[half:len(train_phn) - half] #get testing spectrograms f = h5py.File( '/vega/stats/users/sl3368/Data_LC/timit/test/timit_stim_1.mat') test_stim = numpy.transpose(f['stimulus_zscore']) #need to construct windows test_stim_windows = numpy.zeros( (test_stim.shape[0] / 5000, 5000 - window_size, window_size, 60)) half = window_size / 2 for j in range(len(test_stim) / 5000): for i in range(j * 5000, (j + 1) * 5000 - window_size): temp_window = test_stim[i:i + window_size] test_stim_windows[j][i] = temp_window #single_window = numpy.reshape(temp_window,(1,window_size*train_stim.shape[1])) #load test data wavefiles test_filenames = [] with open('test.list') as f: for line in f: test_filenames.append(line.rstrip('\n')) #load testing data phoneme labels phn = h5py.File('TIMIT_TEST.mat') phn_data = phn['data'] #initializing encoder enc = OneHotEncoder(n_values=41, dtype=numpy.int16, sparse=False) test_phn = [] for i in range(len(test_filenames)): ref = phn_data[i][0] labels = phn[ref] phonemes = labels[2] phonemes = numpy.reshape(phonemes, (len(phonemes), 1)) #need to encode and repeat for each sample encoded_phonemes = enc.fit_transform(phonemes) #repeat for the sampling rate 10 this case!! rep_enc_phonemes = repeat(encoded_phonemes, 10, axis=0).eval() test_phn.append(rep_enc_phonemes) test_phn = test_phn[half:len(test_phn) - half] return train_stim_windows, train_phn, test_stim_windows, test_phn
def get_interpolated_hiddens(old_hidden, n_timesteps, n_samples, interpolation_mask, number_cons_hiddens): ''' old_hidden: old_hidden_matrix which needs to be interpolated. : number_of_hiddens * batch_size * Hidden_Size number_of_reduced_timstamps alphas = [1, 0.8, 0.6, 0.4, 0.2] alpha is the interpolation mask as of now, which ne eds to be passed as a function parameter. For ex, given hiddens, h1, h2, h3, h_n-1 You get, [h1, h2], [h2, h3], [h_n-2, h_n-1] so basically, n-1 pairs. Number of interolations need to be done. i.e relative clock times. ''' alpha = interpolation_mask hidden_size = 1024 batch_size = 32 num_cons_hiddens = number_cons_hiddens num_reduced_hiddens = num_cons_hiddens + 1 number_interp = len(interpolation_mask) X = old_hidden.dimshuffle(1, 0, 2) new_matrix2 = repeat(X, 2, axis=1) new_matrix2 = tensor.roll(new_matrix2, -1, axis=1) new_matrix2 = new_matrix2[:, 0:2*num_reduced_hiddens-2, :] new_matrix2 = new_matrix2.reshape([n_samples, num_cons_hiddens, 2, hidden_size]) def _step_slice(m_, interp_mask): interp_ret = [] for i in range(number_interp): interp_ret.append(interp_mask[i] * m_[0] + (1-interp_mask[i])* m_[1]) return interp_ret _step = _step_slice def step_batch(m_, alpha): seqs = m_ rval, updates = theano.scan(_step, sequences=seqs, non_sequences=[alpha]) return rval _batch_step = step_batch seqs = new_matrix2 rval, updates = theano.scan(_batch_step, sequences=seqs, non_sequences=[alpha]) out=[] out_batch =[] for batch_index in range(batch_size): for i in range(num_cons_hiddens): something = [rval[j][batch_index][i] for j in range(number_interp)] if i==0: out = something if i >=1: out = tensor.concatenate([out, something], axis=0) if batch_index == 0: out_batch = out if batch_index == 1: out_batch = tensor.stacklists([out_batch, out]) if batch_index > 1: out = tensor.reshape(out,[1, n_timesteps-2, hidden_size]) out_batch = tensor.concatenate([out_batch, out]) zero_pad = tensor.zeros([out_batch.shape[0], number_interp , out_batch.shape[2]]) out_batch = tensor.concatenate([zero_pad, out_batch], axis=1) return out_batch