if use.norm and stage == 0: gray_norm = NormLayer(out[i][:, 0:1], method="lcn", use_divisor=use.norm_div).output gray_norm = std_norm(gray_norm, axis=[-3, -2, -1]) depth_norm = var_norm(out[i][:, 1:]) out[i] = T.concatenate([gray_norm, depth_norm], axis=1) elif use.norm: out[i] = NormLayer(out[i], method="lcn", use_divisor=use.norm_div).output out[i] = std_norm(out[i], axis=[-3, -2, -1]) # convolutions out[i] *= net.scaler[stage][i] layers.append( ConvLayer( out[i], **conv_args(stage, i, batch, net, use, tr.rng, tr.video_shapes))) out[i] = layers[-1].output out[i] = PoolLayer(out[i], net.pools[stage], method=net.pool_method).output if tr.inspect: insp.append(T.mean(out[i])) # flatten all convnets outputs for i in xrange(len(out)): out[i] = std_norm(out[i], axis=[-3, -2, -1]) out = [out[i].flatten(2) for i in range(len(out))] vid_ = T.concatenate(out, axis=1) # dropout if use.drop: drop.p_vid = shared(float32(drop.p_vid_val))
def build(): # constants floatX = config.floatX enum = enumerate file = gzip.GzipFile("params.zip", 'rb') params = load(file) file.close() print params W = [[None, None], [None, None], [None, None]] b = [[None, None], [None, None], [None, None]] W[0][0], b[0][0], W[0][1], b[0][1], W[1][0], b[1][0], W[1][1], b[1][1], W[ 2][0], b[2][0], W[2][1], b[2][1], Wh, bh, Ws, bs = params #-----------------------------FLIP KERNEL------------------------------------------ W = array(W) W_new = [[None, None], [None, None], [None, None]] for i in range(W.shape[0]): for j in range(W.shape[1]): w = W[i, j].get_value() print w.shape, w.dtype for k in range(w.shape[0]): for l in range(w.shape[1]): for m in range(w.shape[2]): w[k, l, m] = cv2.flip(w[k, l, m], -1) W_new[i][j] = shared(array(w, dtype=floatX), borrow=True) W = W_new #-----------------------------FLIP KERNEL------------------------------------------ rng = random.RandomState( 1337) # this will make sure results are always the same batch_size = 1 in_shape = (1, 2, 2, 32, 64, 64 ) # (batchsize, maps, frames, w, h) input video shapes traj_shape = (batch_size, 3, 32 ) # (batchsize, input shape of the trajectory # hyper parameters # ------------------------------------------------------------------------------ # use techniques/methods class use: drop = True # dropout depth = True # use depth map as input aug = False # data augmentation load = False # load params.p file traj = False # trajectory trajconv = False # convolutions on trajectory valid2 = False fast_conv = False norm_div = False norm = True # normalization layer mom = True # momentum # regularization class reg: L1_traj = .0 # degree/amount of regularization L2_traj = .0 # 1: only L1, 0: only L2 L1_vid = .0 # degree/amount of regularization L2_vid = .0 # 1: only L1, 0: only L2 class trajconv: append = False # append convolutions result to original traject filter_size = 5 layers = 3 # number of convolution layers res_shape = traj_shape[-1] - layers * (filter_size - 1) class net: shared_stages = [] # stages where weights are shared shared_convnets = [ ] # convnets that share weights ith beighbouring convnet n_convnets = 2 # number of convolutional networks in the architecture maps = [2, 16, 32, 64] # feature maps in each convolutional network # maps = [2,5,25,25] # feature maps in each convolutional network kernels = [(1, 7, 7), (1, 8, 8), (1, 6, 6)] # convolution kernel shapes pools = [(2, 2, 2), (2, 2, 2), (2, 2, 2)] # pool/subsampling shapes hidden_traj = 200 # hidden units in MLP hidden_vid = 300 # hidden units in MLP W_scale = 0.01 b_scale = 0.1 norm_method = "lcn" # normalisation method: lcn = local contrast normalisation pool_method = "max" # maxpool fusion = "early" # early or late fusion hidden = hidden_traj + hidden_vid if fusion == "late" else 500 # hidden units in MLP n_class = 21 activation = relu n_stages = len(net.kernels) video_shapes = [in_shape[-3:]] def _shared(val, borrow=True): return shared(array(val, dtype=floatX), borrow=borrow) def ndtensor(n): return TensorType(floatX, (False, ) * n) # n-dimensional tensor for i in xrange(n_stages): k, p, v = array(net.kernels[i]), array(net.pools[i]), array( video_shapes[i]) conv_s = tuple(v - k + 1) video_shapes.append(tuple((v - k + 1) / p)) n_in_MLP = net.maps[-1] * net.n_convnets * prod(video_shapes[-1]) def conv_args(stage, i): """ ConvLayer arguments, i: stage index """ args = { 'batch_size': 1, 'activation': activation, 'rng': rng, 'n_in_maps': net.maps[stage], 'n_out_maps': net.maps[stage + 1], 'kernel_shape': net.kernels[stage], 'video_shape': video_shapes[stage], "fast_conv": use.fast_conv, "layer_name": "Conv" + str(stage), "W_scale": net.W_scale, "b_scale": net.b_scale, "stride": 1, "W": W[stage][i], "b": b[stage][i] } return args # print conv_args(0,0) x = ndtensor(len(in_shape))(name='x') # video input def var_norm(_x, imgs=True, axis=[-3, -2, -1]): if imgs: return (_x - T.mean(_x, axis=axis, keepdims=True)) / T.maximum( 1e-4, T.std(_x, axis=axis, keepdims=True)) return (_x - T.mean(_x)) / T.maximum(1e-4, T.std(_x)) def std_norm(_x, axis=[-3, -2, -1]): return _x / T.maximum(1e-4, T.std(_x, axis=axis, keepdims=True)) out = [x[:, 0], x[:, 1]] for stage in xrange(n_stages): for i in xrange(len(out)): # for each convnet of the stage if stage == 0: gray_norm = NormLayer(out[i][:, 0:1], method="lcn", use_divisor=False).output gray_norm = std_norm(gray_norm) depth_norm = var_norm(out[i][:, 1:]) out[i] = T.concatenate([gray_norm, depth_norm], axis=1) else: out[i] = NormLayer(out[i], method="lcn", use_divisor=False).output out[i] = std_norm(out[i]) out[i] = ConvLayer(out[i], **conv_args(stage, i)).output out[i] = PoolLayer(out[i], net.pools[stage], method=net.pool_method).output out = [out[i].flatten(2) for i in range(len(out))] out = T.concatenate(out, axis=1) #hidden layer out = HiddenLayer(out, W=Wh, b=bh, n_in=n_in_MLP, n_out=net.hidden, rng=rng, activation=activation).output logreg = LogRegr(out, W=Ws, b=bs, rng=rng, activation=activation, n_in=net.hidden, n_out=net.n_class) pred = logreg.p_y_given_x x_ = _shared(empty(in_shape)) print "compiling..." eval_model = function([], [pred], givens={x: x_}, on_unused_input='ignore') print "compiling done" return eval_model, x_
input_size = train_set_x.get_value(borrow=True).shape output_size = train_set_y.shape layer_0_input=x.reshape((batch_size, 1, input_size[1], input_size[2], input_size[3])) # Zero padding a tensor final_output= y.reshape((batch_size, 1, output_size[1], output_size[2], output_size[3])) # n_in_map = number of input maps (How many maps are there in the input) # n_out_map = number of output maps (How many maps are there in the output) # def __init__(self, input, n_in_maps, n_out_maps, kernel_shape, video_shape, # batch_size, activation, layer_name="Conv", rng=RandomState(1234), # borrow=True, W=None, b=None): layer0=ConvLayer(layer_0_input, nkerns[0], nkerns[1], (filterSize[0,0], filterSize[0,1], filterSize[0,2]), (input_size[1], input_size[2], input_size[3]), batch_size, T.tanh ) ######## Pooling layer1=PoolLayer(layer0.output, (2,2,2)) #T.nnet.abstract_conv.bilinear_upsampling() This function may do upsampling too ######## Depooling shp_1 = layer1.output.shape layer2_output = T.zeros((shp_1[0], shp_1[1], shp_1[2]*2, shp_1[3]*2, shp_1[4]*2), dtype=layer1.output.dtype) layer2_output = T.set_subtensor(layer2_output[:, :, ::2, ::2, ::2], layer1.output) # ######## Theano Upsampling zero_padding_1 = T.zeros((batch_size, nkerns[1], input_size[1], input_size[2], input_size[3]), dtype = theano.config.floatX)
kernel_size=7, use_divisor=use.norm_div).output # out[i] = std_norm(out[i],axis=[-3,-2,-1]) else: # out[i] = mean_sub(out[i],axis=[-3,-2,-1]) # out[i] = std_norm(out[i],axis=[-4,-3,-2,-1]) # out[i] = NormLayer(out[i], method="lcn",use_divisor=use.norm_div).output # out[i] = std_norm(out[i],axis=[-3,-2,-1]) out[i] = var_norm(out[i], axis=[-3, -2, -1]) # convolutions # out[i] *= net.scaler[stage][i] layers.append(ConvLayer(out[i], **conv_args(stage, i))) out[i] = layers[-1].output # #ccn # if use.norm: # out[i] = NormLayer(out[i],method="ccn", # shape=(batch.micro,net.maps[stage+1])+conv_shapes[stage]).output # pooling, subsamping # pools = net.pools[stage] # pools = (1,pools[1],pools[2]) # out[i] = PoolLayer(out[i], pools, method=net.pool_method).output out[i] = PoolLayer(out[i], net.pools[stage], method=net.pool_method).output # if inspect: # insp.append(T.cast( out[i].nonzero()[0].size/T.cast(out[i].size,"float32"),"float32"))
def build(): use.load = True # we load the CNN parameteres here x = ndtensor(len(tr.in_shape))(name='x') # video input x_ = _shared(empty(tr.in_shape)) conv_shapes = [] for i in xrange(net.n_stages): k, p, v = array(net.kernels[i]), array(net.pools[i]), array( tr.video_shapes[i]) conv_s = tuple(v - k + 1) conv_shapes.append(conv_s) tr.video_shapes.append(tuple((v - k + 1) / p)) print "stage", i print " conv", tr.video_shapes[i], "->", conv_s print " pool", conv_s, "->", tr.video_shapes[i + 1], "x", net.maps[i + 1] # number of inputs for MLP = (# maps last stage)*(# convnets)*(resulting video shape) + trajectory size n_in_MLP = net.maps[-1] * net.n_convnets * prod(tr.video_shapes[-1]) print 'MLP:', n_in_MLP, "->", net.hidden, "->", net.n_class, "" if use.depth: if net.n_convnets == 2: out = [x[:, :, 0, :, :, :], x[:, :, 1, :, :, :]] # 2 nets: body and hand # build 3D ConvNet layers = [] # all architecture layers insp = [] for stage in xrange(net.n_stages): for i in xrange(len(out)): # for body and hand # normalization if use.norm and stage == 0: gray_norm = NormLayer(out[i][:, 0:1], method="lcn", use_divisor=use.norm_div).output gray_norm = std_norm(gray_norm, axis=[-3, -2, -1]) depth_norm = var_norm(out[i][:, 1:]) out[i] = T.concatenate([gray_norm, depth_norm], axis=1) elif use.norm: out[i] = NormLayer(out[i], method="lcn", use_divisor=use.norm_div).output out[i] = std_norm(out[i], axis=[-3, -2, -1]) # convolutions out[i] *= net.scaler[stage][i] layers.append( ConvLayer( out[i], **conv_args(stage, i, batch, net, use, tr.rng, tr.video_shapes))) out[i] = layers[-1].output out[i] = PoolLayer(out[i], net.pools[stage], method=net.pool_method).output if tr.inspect: insp.append(T.mean(out[i])) # flatten all convnets outputs for i in xrange(len(out)): out[i] = std_norm(out[i], axis=[-3, -2, -1]) out = [out[i].flatten(2) for i in range(len(out))] vid_ = T.concatenate(out, axis=1) # dropout if use.drop: drop.p_vid = shared(float32(drop.p_vid_val)) drop.p_hidden = shared(float32(drop.p_hidden_val)) drop.p_vid.set_value(float32(0.)) # dont use dropout when testing drop.p_hidden.set_value(float32(0.)) # dont use dropout when testing vid_ = DropoutLayer(vid_, rng=tr.rng, p=drop.p_vid).output # MLP # ------------------------------------------------------------------------------ # fusion if net.fusion == "early": out = vid_ # hidden layer Wh, bh = load_params(use) # This is test, wudi added this! layers.append( HiddenLayer(out, W=Wh, b=bh, n_in=n_in_MLP, n_out=net.hidden, rng=tr.rng, W_scale=net.W_scale[-2], b_scale=net.b_scale[-2], activation=relu)) out = layers[-1].output if tr.inspect: insp = T.stack(insp[0], insp[1], insp[2], insp[3], insp[4], insp[5], T.mean(out)) else: insp = T.stack(0, 0) if use.drop: out = DropoutLayer(out, rng=tr.rng, p=drop.p_hidden).output #maxout # softmax layer Ws, bs = load_params(use) # This is test, wudi added this! layers.append( LogRegr(out, W=Ws, b=bs, rng=tr.rng, activation=lin, n_in=net.hidden, W_scale=net.W_scale[-1], b_scale=net.b_scale[-1], n_out=net.n_class)) """ layers[-1] : softmax layer layers[-2] : hidden layer (video if late fusion) layers[-3] : hidden layer (trajectory, only if late fusion) """ # prediction y_pred = layers[-1].y_pred p_y_given_x = layers[-1].p_y_given_x #################################################################### #################################################################### print "\n%s\n\tcompiling\n%s" % (('-' * 30, ) * 2) #################################################################### #################################################################### # compile functions # ------------------------------------------------------------------------------ print 'compiling test_model' eval_model = function([], [y_pred, p_y_given_x], givens={x: x_}, on_unused_input='ignore') return eval_model, x_
def __init__(self, x, use, lr, batch, net, reg, drop, mom, tr, res_dir, load_path=""): self.out = [] self.layers = [] self.insp_mean = [] self.insp_std = [] for c in (use, lr, batch, net, reg, drop, mom, tr): write(c.__name__ + ":", res_dir) _s = c.__dict__ del _s['__module__'], _s['__doc__'] for key in _s.keys(): val = str(_s[key]) if val.startswith("<static"): val = str(_s[key].__func__.__name__) if val.startswith("<Cuda"): continue if val.startswith("<Tensor"): continue write(" " + key + ": " + val, res_dir) #################################################################### #################################################################### print "\n%s\n\tbuilding\n%s" % (('-' * 30, ) * 2) #################################################################### #################################################################### # ConvNet # ------------------------------------------------------------------------------ # calculate resulting video shapes for all stages print net.n_stages conv_shapes = [] for i in xrange(net.n_stages): k, p, v = array(net.kernels[i]), array(net.pools[i]), array( tr.video_shapes[i]) conv_s = tuple(v - k + 1) conv_shapes.append(conv_s) tr.video_shapes.append(tuple((v - k + 1) / p)) print "stage", i if use.depth and i == 0: print " conv", tr.video_shapes[ i], "x 2 ->", conv_s #for body and hand else: print " conv", tr.video_shapes[i], "->", conv_s print " pool", conv_s, "->", tr.video_shapes[i + 1], "x", net.maps[i + 1] # number of inputs for MLP = (# maps last stage)*(# convnets)*(resulting video shape) + trajectory size n_in_MLP = net.maps[-1] * net.n_convnets * prod(tr.video_shapes[-1]) print 'debug1' if use.depth: if net.n_convnets == 2: out = [x[:, :, 0, :, :, :], x[:, :, 1, :, :, :]] # 2 nets: body and hand # build 3D ConvNet for stage in xrange(net.n_stages): for i in xrange(len(out)): # for body and hand # normalization if use.norm and stage == 0: gray_norm = NormLayer(out[i][:, 0:1], method="lcn", use_divisor=use.norm_div).output gray_norm = std_norm(gray_norm, axis=[-3, -2, -1]) depth_norm = var_norm(out[i][:, 1:]) out[i] = T.concatenate([gray_norm, depth_norm], axis=1) elif use.norm: out[i] = NormLayer(out[i], method="lcn", use_divisor=use.norm_div).output out[i] = std_norm(out[i], axis=[-3, -2, -1]) # convolutions out[i] *= net.scaler[stage][i] print 'debug2' self.layers.append( ConvLayer( out[i], **conv_args(stage, i, batch, net, use, tr.rng, tr.video_shapes, load_path))) out[i] = self.layers[-1].output out[i] = PoolLayer(out[i], net.pools[stage], method=net.pool_method).output if tr.inspect: self.insp_mean.append(T.mean(out[i])) self.insp_std.append(T.std(out[i])) print 'debug2' # flatten all convnets outputs for i in xrange(len(out)): out[i] = std_norm(out[i], axis=[-3, -2, -1]) out = [out[i].flatten(2) for i in range(len(out))] vid_ = T.concatenate(out, axis=1) print 'debug3' # dropout if use.drop: drop.p_vid = shared(float32(drop.p_vid_val)) drop.p_hidden = shared(float32(drop.p_hidden_val)) vid_ = DropoutLayer(vid_, rng=tr.rng, p=drop.p_hidden).output #maxout if use.maxout: vid_ = maxout(vid_, (batch.micro, n_in_MLP)) net.activation = lin n_in_MLP /= 2 # net.hidden *= 2 # MLP # ------------------------------------------------------------------------------ # fusion if net.fusion == "early": out = vid_ # hidden layer if use.load: W, b = load_params(use, load_path) self.layers.append( HiddenLayer(out, n_in=n_in_MLP, n_out=net.hidden_vid, rng=tr.rng, W=W, b=b, W_scale=net.W_scale[-2], b_scale=net.b_scale[-2], activation=net.activation)) else: self.layers.append( HiddenLayer(out, n_in=n_in_MLP, n_out=net.hidden_vid, rng=tr.rng, W_scale=net.W_scale[-2], b_scale=net.b_scale[-2], activation=net.activation)) out = self.layers[-1].output #if tr.inspect: #self.insp_mean = T.stack(self.insp_mean) #self.insp_std = T.stack(self.insp_std) #self.insp = T.stack(self.insp[0],self.insp[1],self.insp[2],self.insp[3],self.insp[4],self.insp[5], T.mean(out)) #else: self.insp = T.stack(0,0) # out = normalize(out) if use.drop: out = DropoutLayer(out, rng=tr.rng, p=drop.p_hidden).output #maxout if use.maxout: out = maxout(out, (batch.micro, net.hidden)) net.hidden /= 2 print 'debug3' # now assembly all the output self.out = out self.n_in_MLP = n_in_MLP
def __init__(self, numpy_rng, batch_size, n_outs,conv_layer_configs, hidden_layer_configs, conv_activation = T.nnet.sigmoid,hidden_activation = T.nnet.sigmoid): self.layers = [] self.finetune_cost = None self.params = []; self.delta_params = []; self.n_layers = 0; self.type = None; self.mlp_layer_start = 0 #placeholders self.output = None self.features = None self.features_dim = None self.errors = None self.finetune_cost = None # allocate symbolic variables for the data self.x = tensor5('x') self.y = T.ivector('y') self.conv_layer_num = len(conv_layer_configs) #counting number of convolution layers self.hidden_layer_num = len(hidden_layer_configs['hidden_layers']) self.mlp_layer_start = self.hidden_layer_num; self.mlp_layers = [] self.conv_layers = [] self.pool_layers = [] for i in xrange(self.conv_layer_num): # construct the convolution layer if i == 0: #is_input layer input = self.x is_input_layer = True else: input = self.layers[-1].output #output of previous layer is_input_layer = False config = conv_layer_configs[i] conv_layer = ConvLayer(input=input, n_in_maps=config['n_in_maps'], n_out_maps=config['n_out_maps'], kernel_shape=config['kernel_shape'],video_shape=config['video_shape'], batch_size=batch_size, numpy_rng=numpy_rng,activation=conv_activation); self.layers.append(conv_layer) self.conv_layers.append(conv_layer) pool_layer = PoolLayer(conv_layer.output, pool_shape=config['poolsize']); self.layers.append(pool_layer) self.pool_layers.append(conv_layer) if config['update']==True: # only few layers of convolution layer are considered for updation self.params.extend(conv_layer.params) self.delta_params.extend(conv_layer.delta_params) hidden_layers = hidden_layer_configs['hidden_layers']; self.conv_output_dim = (config['n_out_maps'] * numpy.prod(config['output_shape'])) #flattening the last convolution output layer self.features = self.conv_layers[-1].output.flatten(2); self.features_dim = self.conv_output_dim; for i in xrange(self.hidden_layer_num): # construct the hidden layer if i == 0: # is first sigmoidla layer input_size = self.conv_output_dim layer_input = self.features else: input_size = hidden_layers[i - 1] # number of hidden neurons in previous layers layer_input = self.layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, n_out = hidden_layers[i], activation=hidden_activation); self.layers.append(sigmoid_layer) self.mlp_layers.append(sigmoid_layer) if config['update']==True: # only few layers of hidden layer are considered for updation self.params.extend(sigmoid_layer.params) self.delta_params.extend(sigmoid_layer.delta_params) self.logLayer = LogisticRegression(input=self.layers[-1].output,n_in=hidden_layers[-1],n_out=n_outs) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) self.output = self.logLayer.prediction()
index = T.lscalar() # index to a [mini]batch # input = (nImages, nChannel(nFeatureMaps), nDim1, nDim2, nDim3) # layer1 (500, 5, 47, 56, 22) # layer2 (500, 5, 10, 12, 5) # layer3 (500, 3, 9, 11, 4) # layer4 (500, 3, 5, 6, 2) fMRI_shape = (51, 61, 23) batch_size = 200 # 1st: Convolution Layer layer1_input = x layer1 = ConvLayer(layer1_input, 1, 10, (5, 5, 5), fMRI_shape, batch_size, softplus) # print(layer1.output.eval({x:xTrain[:50]}).shape) # layer1.output.eval({x:xTrain[1]}).shape[3:] # 2nd: Pool layer poolShape = (2, 2, 2) layer2 = PoolLayer(layer1.output, poolShape) # print(layer2.output.eval({x:xTrain[:50]}).shape) # 3rd: Convolution Layer layer3 = ConvLayer(layer2.output, 10, 10, (5, 5, 5), (24, 29, 10), batch_size, softplus)
from mlp import LogRegr, HiddenLayer, DropoutLayer from activations import relu, tanh, sigmoid, softplus dataReadyForCNN = sio.loadmat("DataReadyForCNN.mat") xTrain = dataReadyForCNN["xTrain"].astype('float64') # xTrain = np.random.rand(10, 1, 5, 6, 2).astype('float64') # xTrain.dtype dtensor5 = T.TensorType('float64', (False, ) * 5) x = dtensor5('x') # the input data yCond = T.ivector() # input = (nImages, nChannel(nFeatureMaps), nDim1, nDim2, nDim3) kernel_shape = (5, 6, 2) fMRI_shape = (51, 61, 23) n_in_maps = 1 # channel n_out_maps = 5 # num of feature maps, aka the depth of the neurons num_pic = 2592 layer1_input = x print layer1_input.eval({x: xTrain}).shape convLayer1 = ConvLayer(layer1_input, n_in_maps, n_out_maps, kernel_shape, fMRI_shape, num_pic, tanh) f = theano.function([x], 2 * x) print convLayer1.output.eval({x: xTrain}).shape