def genLfc(input, num_outputs, learning_parameters): # A function to generate the lfc network topology which matches the overlay for the Pynq board. # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay. if num_outputs < 1 or num_outputs > 64: error("num_outputs should be in the range of 1 to 64.") stochastic = False binary = True H = 1 num_units = 1024 n_hidden_layers = 3 activation = binary_net.binary_tanh_unit W_LR_scale = learning_parameters.W_LR_scale epsilon = learning_parameters.epsilon alpha = learning_parameters.alpha dropout_in = learning_parameters.dropout_in dropout_hidden = learning_parameters.dropout_hidden mlp = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input) mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_in) for k in range(n_hidden_layers): mlp = binary_net.DenseLayer( mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_units) mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha) mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation) mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_hidden) mlp = binary_net.DenseLayer(mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_outputs) mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha) return mlp
def genCnv(input, num_outputs, learning_parameters): # A function to generate the cnv network topology which matches the overlay for the Pynq board. # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay. stochastic = False binary = True H = 1 activation = binary_net.binary_tanh_unit W_LR_scale = learning_parameters.W_LR_scale epsilon = learning_parameters.epsilon alpha = learning_parameters.alpha cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input) # 64C3-64C3-P2 cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # 256C3-256C3-P2 cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # 256C3-256C3 cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # print(cnn.output_shape) # 1024FP-1024FP-10FP cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=512) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=512) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_outputs) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) return cnn
def genCnv(input, num_outputs, learning_parameters): # A function to generate the cnv network topology which matches the overlay for the Pynq board. # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay. stochastic = False binary = True H = 1 activation = binary_net.binary_tanh_unit W_LR_scale = learning_parameters.W_LR_scale epsilon = learning_parameters.epsilon alpha = learning_parameters.alpha # Encoder cnn = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input) # 1st Layer cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(4, 4), pad='valid', stride=(2, 2), flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=0.2) print cnn.output_shape # 2nd Layer cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(4, 4), pad='valid', stride=(2, 2), flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=0.2) print cnn.output_shape # 3rd Layer cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(4, 4), pad='valid', stride=(1, 1), flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=0.2) print cnn.output_shape cnn = lasagne.layers.flatten(cnn) print cnn.output_shape # FC Layer cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=256) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) print cnn.output_shape # Deoceder cnn = lasagne.layers.ReshapeLayer(cnn, shape=(-1, 64, 2, 2)) print cnn.output_shape # 1st Deconv Layer cnn = binary_net.Deconv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(4, 4), crop='valid', stride=(2, 2), flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) print cnn.output_shape # 2nd Deconv Layer cnn = binary_net.Deconv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(4, 4), crop='valid', stride=(2, 2), flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) print cnn.output_shape # 3rd Deconv Layer cnn = binary_net.Deconv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=1, filter_size=(4, 4), crop='valid', stride=(2, 2), flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) print cnn.output_shape cnn = lasagne.layers.flatten(cnn) print cnn.output_shape # Last FC layer cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_outputs) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) print cnn.output_shape return cnn
def trial(N_HIDDEN_LAYERS, NUM_UNITS, OUTPUT_TYPE, MAIN_LOSS_TYPE, LAMBDA, FOLD, FINTUNE_SNAPSHOT, FINTUNE_SCALE): # BN parameters batch_size = 97 print("batch_size = " + str(batch_size)) # alpha is the exponential moving average factor # alpha = .15 alpha = .1 print("alpha = " + str(alpha)) epsilon = 1e-4 print("epsilon = " + str(epsilon)) # MLP parameters #NUM_UNITS = 25 print("NUM_UNITS = " + str(NUM_UNITS)) #N_HIDDEN_LAYERS = 1 print("N_HIDDEN_LAYERS = " + str(N_HIDDEN_LAYERS)) # Training parameters num_epochs = 1000 print("num_epochs = " + str(num_epochs)) # Dropout parameters dropout_in = .2 # 0. means no dropout print("dropout_in = " + str(dropout_in)) dropout_hidden = .5 print("dropout_hidden = " + str(dropout_hidden)) # BinaryOut activation = binary_net.binary_tanh_unit print("activation = binary_net.binary_tanh_unit") # activation = binary_net.binary_sigmoid_unit # print("activation = binary_net.binary_sigmoid_unit") # BinaryConnect binary = True print("binary = " + str(binary)) stochastic = False print("stochastic = " + str(stochastic)) # (-H,+H) are the two binary values # H = "Glorot" H = 1. print("H = " + str(H)) # W_LR_scale = 1. W_LR_scale = "Glorot" # "Glorot" means we are using the coefficients from Glorot's paper print("W_LR_scale = " + str(W_LR_scale)) # Decaying LR #LR_start = .003 LR_start = 0.000003 print("LR_start = " + str(LR_start)) #LR_fin = 0.0000003 LR_fin = LR_start print("LR_fin = " + str(LR_fin)) LR_decay = (LR_fin / LR_start)**(1. / num_epochs) print("LR_decay = " + str(LR_decay)) # BTW, LR decay might good for the BN moving average... # replace the dataset print('Loading SFEW2 dataset...') [train_x, train_y, val_x, val_y] = SFEW2.load_train_val() print(train_x.shape) print(train_y.shape) print(val_x.shape) print(val_y.shape) print('last training minibatch size: ' + str(train_x.shape[0] - train_x.shape[0] / batch_size * batch_size) + ' / ' + str(batch_size)) print( 'last training minibatch size should not be too small (except 0). try decrease the batch_size, but not add more minibatches.' ) print('minibatches size: ' + str(batch_size)) print('suggested minibatches size: ' + str( math.ceil( float(train_x.shape[0]) / math.ceil(float(train_x.shape[0]) / 100)))) print('Building the MLP...') # Prepare Theano variables for inputs and targets input = T.matrix('inputs') target = T.matrix('targets') LR = T.scalar('LR', dtype=theano.config.floatX) mlp = lasagne.layers.InputLayer(shape=(None, train_x.shape[1]), input_var=input) mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_in) for k in range(N_HIDDEN_LAYERS): # pretrain-finetune if (k == 0): # fixed num_units mlp = binary_net.DenseLayer( mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=1500) # scale down the LR of transfered dense layer print('scale down the LR of transfered dense layer from', str(mlp.W_LR_scale)) mlp.W_LR_scale *= np.float32(FINTUNE_SCALE) print('to', str(mlp.W_LR_scale)) else: mlp = binary_net.DenseLayer( mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=NUM_UNITS) mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha) mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation) mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_hidden) # pretrain-finetune # only restore the first layer group if (k == 0): if (FINTUNE_SNAPSHOT != 0): print('Load ./W-%d.npz' % FINTUNE_SNAPSHOT) with np.load('./W-%d.npz' % FINTUNE_SNAPSHOT) as f: param_values = [ f['arr_%d' % i] for i in range(len(f.files)) ] param_values = param_values[0:6] lasagne.layers.set_all_param_values(mlp, param_values) mlp = binary_net.DenseLayer(mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=7) mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha) # network output BN or SGN if OUTPUT_TYPE == 'C': pass # elif OUTPUT_TYPE == 'D': mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation) else: assert (False) # loss weight nodes SPARSITY = 0.9 SPARSITY_MAP = (np.float32(train_x == -1)).mean(0) LOSS_WEIGHT_1 = 1. + input * (2. * SPARSITY - 1) LOSS_WEIGHT_1 /= 4 * SPARSITY * (1 - SPARSITY ) # fixed 1->-1:5 -1->1:5/9 weights LOSS_WEIGHT_2 = 1. + input * (2. * SPARSITY_MAP - 1) # LOSS_WEIGHT_2 /= 4 * SPARSITY_MAP * ( 1 - SPARSITY_MAP) # weights considering element's prior probability # train loss nodes train_output = lasagne.layers.get_output(mlp, deterministic=False) if MAIN_LOSS_TYPE == 'SH': train_loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output))) elif MAIN_LOSS_TYPE == 'W1SH': train_loss = T.mean( T.sqr(T.maximum(0., (1. - target * train_output))) * LOSS_WEIGHT_1) elif MAIN_LOSS_TYPE == 'W2SH': train_loss = T.mean( T.sqr(T.maximum(0., (1. - target * train_output))) * LOSS_WEIGHT_2) elif MAIN_LOSS_TYPE == 'H': train_loss = T.mean(T.maximum(0., 1. - target * train_output)) elif MAIN_LOSS_TYPE == 'W1H': train_loss = T.mean( T.maximum(0., (1. - target * train_output)) * LOSS_WEIGHT_1) elif MAIN_LOSS_TYPE == 'W2H': train_loss = T.mean( T.maximum(0., (1. - target * train_output)) * LOSS_WEIGHT_2) else: assert (False) # + sparse penalty if LAMBDA > 0: train_pixel_wise_density = T.mean(T.reshape( (train_output + 1.) / 2., [train_output.shape[0], train_output.shape[1] / 10, 10]), axis=2) train_penalty = LAMBDA * T.mean( T.sqr(train_pixel_wise_density - (1. - SPARSITY))) else: train_penalty = T.constant(0.) train_loss = train_loss + train_penalty # acc train_acc = T.mean(T.eq(T.argmax(train_output, axis=1), T.argmax(target, axis=1)), dtype=theano.config.floatX) # grad nodes if binary: # W updates W = lasagne.layers.get_all_params(mlp, binary=True) W_grads = binary_net.compute_grads(train_loss, mlp) updates = lasagne.updates.adam(loss_or_grads=W_grads, params=W, learning_rate=LR) updates = binary_net.clipping_scaling(updates, mlp) # other parameters updates params = lasagne.layers.get_all_params(mlp, trainable=True, binary=False) updates = OrderedDict(updates.items() + lasagne.updates.adam( loss_or_grads=train_loss, params=params, learning_rate=LR).items()) else: params = lasagne.layers.get_all_params(mlp, trainable=True) updates = lasagne.updates.adam(loss_or_grads=train_loss, params=params, learning_rate=LR) # val loss nodes # must be created after grad nodes val_output = lasagne.layers.get_output(mlp, deterministic=True) if MAIN_LOSS_TYPE == 'SH': val_loss = T.mean(T.sqr(T.maximum(0., 1. - target * val_output))) elif MAIN_LOSS_TYPE == 'W1SH': val_loss = T.mean( T.sqr(T.maximum(0., (1. - target * val_output))) * LOSS_WEIGHT_1) elif MAIN_LOSS_TYPE == 'W2SH': val_loss = T.mean( T.sqr(T.maximum(0., (1. - target * val_output))) * LOSS_WEIGHT_2) elif MAIN_LOSS_TYPE == 'H': val_loss = T.mean(T.maximum(0., 1. - target * val_output)) elif MAIN_LOSS_TYPE == 'W1H': val_loss = T.mean( T.maximum(0., (1. - target * val_output)) * LOSS_WEIGHT_1) elif MAIN_LOSS_TYPE == 'W2H': val_loss = T.mean( T.maximum(0., (1. - target * val_output)) * LOSS_WEIGHT_2) # + sparse penalty if LAMBDA > 0: val_pixel_wise_density = T.mean(T.reshape( (val_output + 1.) / 2., [val_output.shape[0], val_output.shape[1] / 10, 10]), axis=2) val_penalty = LAMBDA * T.mean( T.sqr(val_pixel_wise_density - (1. - SPARSITY))) else: val_penalty = T.constant(0.) val_loss = val_loss + val_penalty # acc val_acc = T.mean(T.eq(T.argmax(val_output, axis=1), T.argmax(target, axis=1)), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving the updates dictionary) # and returning the corresponding training train_loss: train_fn = theano.function( [input, target, LR], [train_loss, train_penalty, train_acc, train_output], updates=updates) # Compile a second function computing the validation train_loss and accuracy: val_fn = theano.function([input, target], [val_loss, val_penalty, val_acc, val_output]) print('Training...') train_x = binary_net.MoveParameter(train_x) binary_net.train(train_fn, val_fn, batch_size, LR_start, LR_decay, num_epochs, train_x, train_y, val_x, val_y)
def run(binary=False, noise=None, nalpha=0, result_path=None): # BN parameters batch_size = 128 print("batch_size = " + str(batch_size)) # alpha is the exponential moving average factor alpha = .1 print("alpha = " + str(alpha)) epsilon = 1e-4 print("epsilon = " + str(epsilon)) # Training parameters num_epochs = 150 print("num_epochs = " + str(num_epochs)) # Dropout parameters dropout_in = .2 # default: .2 print("dropout_in = " + str(dropout_in)) dropout_hidden = .5 # default: .5 print("dropout_hidden = " + str(dropout_hidden)) # BinaryOut if binary: activation = binary_net.binary_tanh_unit print("activation = binary_net.binary_tanh_unit") else: activation = lasagne.nonlinearities.tanh print("activation = lasagne.nonlinearities.tanh") # BinaryConnect print("binary = " + str(binary)) stochastic = False print("stochastic = " + str(stochastic)) # (-H,+H) are the two binary values # H = "Glorot" H = 1. print("H = " + str(H)) # W_LR_scale = 1. W_LR_scale = "Glorot" # "Glorot" means we are using the coefficients from Glorot's paper print("W_LR_scale = " + str(W_LR_scale)) # Decaying LR LR_start = 0.005 print("LR_start = " + str(LR_start)) LR_fin = 0.0000005 # 0.0000003 print("LR_fin = " + str(LR_fin)) LR_decay = (LR_fin / LR_start)**(1. / num_epochs) print("LR_decay = " + str(LR_decay)) # BTW, LR decay might good for the BN moving average... train_set_size = 40000 shuffle_parts = 1 print("shuffle_parts = " + str(shuffle_parts)) print("noise = " + str(noise)) print("nalpha = " + str(nalpha)) print('Loading CIFAR-10 dataset...') cifar = CifarReader("./data/cifar-10-batches-py/") train_X, train_y = cifar.get_train_data(n_samples=train_set_size, noise=noise, alpha=nalpha) valid_X, valid_y = cifar.get_validation_data() test_X, test_y = cifar.get_test_data() print("train_set_size = " + str(train_y.shape[0])) print("validation_set_size = " + str(valid_y.shape[0])) print("test_set_size = " + str(test_y.shape[0])) # Log output with open(result_path + "params.txt", "a+") as l: print("batch_size = " + str(batch_size), file=l) print("alpha = " + str(alpha), file=l) print("epsilon = " + str(epsilon), file=l) print("num_epochs = " + str(num_epochs), file=l) print("dropout_in = " + str(dropout_in), file=l) print("dropout_hidden = " + str(dropout_hidden), file=l) if binary: print("activation = binary_net.binary_tanh_unit", file=l) else: print("activation = lasagne.nonlinearities.tanh", file=l) print("binary = " + str(binary), file=l) print("stochastic = " + str(stochastic), file=l) print("H = " + str(H), file=l) print("W_LR_scale = " + str(W_LR_scale), file=l) print("LR_start = " + str(LR_start), file=l) print("LR_fin = " + str(LR_fin), file=l) print("LR_decay = " + str(LR_decay), file=l) print("shuffle_parts = " + str(shuffle_parts), file=l) print("noise = " + str(noise), file=l) print("nalpha = " + str(nalpha), file=l) print("train_set_size = " + str(train_y.shape[0]), file=l) print("validation_set_size = " + str(valid_y.shape[0]), file=l) print("test_set_size = " + str(test_y.shape[0]), file=l) # bc01 format # Inputs in the range [-1,+1] # print("Inputs in the range [-1,+1]") train_X = np.reshape(np.subtract(np.multiply(2. / 255., train_X), 1.), (-1, 3, 32, 32)) valid_X = np.reshape(np.subtract(np.multiply(2. / 255., valid_X), 1.), (-1, 3, 32, 32)) test_X = np.reshape(np.subtract(np.multiply(2. / 255., test_X), 1.), (-1, 3, 32, 32)) # flatten targets train_y = np.hstack(train_y) valid_y = np.hstack(valid_y) test_y = np.hstack(test_y) # Onehot the targets train_y = np.float32(np.eye(10)[train_y]) valid_y = np.float32(np.eye(10)[valid_y]) test_y = np.float32(np.eye(10)[test_y]) # for hinge loss train_y = 2 * train_y - 1. valid_y = 2 * valid_y - 1. test_y = 2 * test_y - 1. print('Building the CNN...') # Prepare Theano variables for inputs and targets input = T.tensor4('inputs') target = T.matrix('targets') LR = T.scalar('LR', dtype=theano.config.floatX) cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input) cnn = lasagne.layers.DropoutLayer(cnn, p=dropout_in) # 32C3-64C3-P2 cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=32, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=dropout_hidden) # 128FP-10FP cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=128) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=dropout_hidden) cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=10) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=lasagne.nonlinearities.softmax) train_output = lasagne.layers.get_output(cnn, deterministic=False) # squared hinge loss loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output))) if binary: # W updates W = lasagne.layers.get_all_params(cnn, binary=True) W_grads = binary_net.compute_grads(loss, cnn) updates = lasagne.updates.adam(loss_or_grads=W_grads, params=W, learning_rate=LR) updates = binary_net.clipping_scaling(updates, cnn) # other parameters updates params = lasagne.layers.get_all_params(cnn, trainable=True, binary=False) updates.update( lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR)) else: params = lasagne.layers.get_all_params(cnn, trainable=True) updates = lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR) test_output = lasagne.layers.get_output(cnn, deterministic=True) test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output))) test_err = T.mean(T.neq(T.argmax(test_output, axis=1), T.argmax(target, axis=1)), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving the updates dictionary) # and returning the corresponding training loss: train_fn = theano.function([input, target, LR], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input, target], [test_loss, test_err]) print('Training...') binary_net.train(train_fn, val_fn, cnn, batch_size, LR_start, LR_decay, num_epochs, train_X, train_y, valid_X, valid_y, test_X, test_y, shuffle_parts=shuffle_parts, result_path=result_path)
mlp = lasagne.layers.InputLayer( #shape=(None, 1, 28, 28), shape=(None, n_inputs_per_sample), input_var=input) mlp = lasagne.layers.DropoutLayer( mlp, p=dropout_in) for k in range(n_hidden_layers): mlp = binary_net.DenseLayer( mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=n_neurons_per_hiddenlayer) mlp = lasagne.layers.BatchNormLayer( mlp, epsilon=epsilon, alpha=alpha) mlp = lasagne.layers.NonlinearityLayer( mlp, nonlinearity=activation) mlp = lasagne.layers.DropoutLayer( mlp,
# Prepare Theano variables for inputs and targets input = T.tensor4('inputs') target = T.matrix('targets') LR = T.scalar('LR', dtype=theano.config.floatX) mlp = lasagne.layers.InputLayer(shape=(None, 1, input_size, input_size), input_var=input) mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_in) for k in range(n_hidden_layers): mlp = binary_net.DenseLayer( mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_units) mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha) mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation) mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_hidden) mlp = binary_net.DenseLayer(mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale,
def makeGenerator_encoder(self, layer_Z, layer_Y, layer_YGH): #(G2) # as the Conditional GAN style gen = ll.ConcatLayer([layer_Z, layer_YGH], axis=1) #(G3) #(G3-1) Full Connect (w) out_G3_1 = None for k in range(self.NUM_HIDDEN_LAYERS): if self.IS_USE_B_FC: gen = binary_net.DenseLayer( gen, binary=True, stochastic=IS_STOCHASTIC, H=H, W_LR_scale=W_LR_scale, b=None, #No Bias nonlinearity=None, num_units=self.NUM_FC_UNITS) else: gen = ll.DenseLayer(gen, num_units=self.NUM_FC_UNITS, nonlinearity=None) print 'G3-1:gen.shape', gen.input_shape, gen.output_shape if out_G3_1 is None: out_G3_1 = ll.get_output(gen) #(G3-2) Batch Norm if self.IS_USE_B_BNA_1: # This layer includes the activation process gen = binary_net_ex.BatchNormLayer(gen, epsilon=EPSILON, alpha=ALPHA, H=1) print 'G3-2:gen.shape', gen.input_shape, gen.output_shape else: gen = ll.BatchNormLayer(gen, epsilon=EPSILON, alpha=ALPHA) print 'G3-2:gen.shape', gen.input_shape, gen.output_shape #(G3-3) Activation: Binary tanh gen = ll.NonlinearityLayer( gen, nonlinearity=binary_net.binary_tanh_unit) print 'G3-3:gen.shape', gen.input_shape, gen.output_shape out_G3_2 = ll.get_output(gen) #END for #(G4) Concat gen = ll.ConcatLayer([gen, layer_Y], axis=1) #(G5) #(G5-1) Full connect (w2) if self.IS_USE_B_FC: gen = binary_net.DenseLayer( gen, binary=True, stochastic=IS_STOCHASTIC, H=H, W_LR_scale=W_LR_scale, b=None, #No Bias nonlinearity=lasagne.nonlinearities.identity, num_units=(self.NUM_GEN_FILTERS * 7 * 7)) else: gen = ll.DenseLayer(gen, num_units=(self.NUM_GEN_FILTERS * 7 * 7), nonlinearity=None) print 'G5-1:gen.shape', gen.input_shape, gen.output_shape #(128,3136) #(G5-2) Batch Norm if self.IS_USE_B_BNA_1: gen = binary_net_ex.BatchNormLayer(gen, epsilon=EPSILON, alpha=ALPHA, H=1) else: gen = ll.BatchNormLayer(gen, epsilon=EPSILON, alpha=ALPHA) #(G5-3) Activation: Binary tanh gen = ll.NonlinearityLayer( gen, nonlinearity=binary_net.binary_tanh_unit) print 'G5-3:gen.shape', gen.input_shape, gen.output_shape #(G6) Reshape gen = ll.ReshapeLayer( gen, # shape [0] denoting to use the size of the 0-th input dimension shape=([0], self.NUM_GEN_FILTERS, 7, 7) #TODO constat var. ) return gen, out_G3_1, out_G3_2
def build_net(input,binary,stochastic=False,H=1.0,W_LR_scale="Glorot",activation=binary_net.binary_tanh_unit,epsilon=1e-4,alpha=.1,patch_size=32,channels=3,num_filters=256): cnn = lasagne.layers.InputLayer( shape=(None, channels, patch_size, patch_size), input_var=input) #1 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) #2 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) #3 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) #4 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) print(cnn.output_shape) #5 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) #6 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) print(cnn.output_shape) #7 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) #8 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) cnn = binary_net.DenseLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=2) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) return cnn
#mlp = lasagne.layers.InputLayer( # shape=(None, 1, 10, 10), #shape=(None, 1, 28, 28), # input_var=input) #mlp = lasagne.layers.DropoutLayer( # mlp, # p=dropout_in) l1_in = lasagne.layers.InputLayer(shape=(None, 1, 10, 10) , input_var=input) #Jintao: tear-up the layers to get binarized weight/ output for testing. l1_dl = binary_net.DenseLayer(l1_in, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_units) l1_bn = lasagne.layers.BatchNormLayer(l1_dl, epsilon=epsilon, alpha=alpha) l1_nl = lasagne.layers.NonlinearityLayer(l1_bn, nonlinearity=activation) l2_dl = binary_net.DenseLayer(l1_nl, binary=binary, stochastic=stochastic, H=H,
def trial(N_HIDDEN_LAYERS, NUM_UNITS, OUTPUT_TYPE, MAIN_LOSS_TYPE, LAMBDA, FOLD): # BN parameters batch_size = 100 print("batch_size = " + str(batch_size)) # alpha is the exponential moving average factor # alpha = .15 alpha = .1 print("alpha = " + str(alpha)) epsilon = 1e-4 print("epsilon = " + str(epsilon)) # MLP parameters #NUM_UNITS = 25 print("NUM_UNITS = " + str(NUM_UNITS)) #N_HIDDEN_LAYERS = 1 print("N_HIDDEN_LAYERS = " + str(N_HIDDEN_LAYERS)) # Training parameters num_epochs = 1000000 print("num_epochs = " + str(num_epochs)) # Dropout parameters dropout_in = .2 # 0. means no dropout print("dropout_in = " + str(dropout_in)) dropout_hidden = .5 print("dropout_hidden = " + str(dropout_hidden)) # BinaryOut activation = binary_net.binary_tanh_unit print("activation = binary_net.binary_tanh_unit") # activation = binary_net.binary_sigmoid_unit # print("activation = binary_net.binary_sigmoid_unit") # BinaryConnect binary = True print("binary = " + str(binary)) stochastic = False print("stochastic = " + str(stochastic)) # (-H,+H) are the two binary values # H = "Glorot" H = 1. print("H = " + str(H)) # W_LR_scale = 1. W_LR_scale = "Glorot" # "Glorot" means we are using the coefficients from Glorot's paper print("W_LR_scale = " + str(W_LR_scale)) # Decaying LR #LR_start = .003 LR_start = 0.000003 print("LR_start = " + str(LR_start)) #LR_fin = 0.0000003 LR_fin = LR_start print("LR_fin = " + str(LR_fin)) LR_decay = (LR_fin / LR_start)**(1. / num_epochs) print("LR_decay = " + str(LR_decay)) # BTW, LR decay might good for the BN moving average... # replace the dataset print('Loading SFEW2 dataset...') [train_x] = SFEW2.load_lfw() assert (train_x.shape[0] == 26404) train_x = train_x[0:26400, :] [val_x, _, _, _] = SFEW2.load_train_val() print(train_x.shape) print(val_x.shape) print('last training minibatch size: ' + str(train_x.shape[0] - train_x.shape[0] / batch_size * batch_size) + ' / ' + str(batch_size)) print( 'last training minibatch size should not be too small (except 0). try decrease the batch_size, but not add more minibatches.' ) print('minibatches size: ' + str(batch_size)) print('suggested minibatches size: ' + str( math.ceil( float(train_x.shape[0]) / math.ceil(float(train_x.shape[0]) / 100)))) ############################################################################################## print('Building the MLP...') # Prepare Theano variables for inputs and targets input = T.matrix('inputs') LR = T.scalar('LR', dtype=theano.config.floatX) mlp = lasagne.layers.InputLayer(shape=(None, train_x.shape[1]), input_var=input) mlp = lasagne.layers.DropoutLayer( mlp, p=0) # train BAE-2: no dropout on input & BAE-1 layer for k in range(N_HIDDEN_LAYERS): if (k == 0): mlp = binary_net.DenseLayer( mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=NUM_UNITS) elif (k == 1): mlp = binary_net.DenseLayer( mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=NUM_UNITS * 2) else: assert (False) #if(k==0): # print('scale down the LR of transfered dense layer from', str(mlp.W_LR_scale)) # mlp.W_LR_scale = 0 # print('to', str(mlp.W_LR_scale)) if (k == 0): # BAE1 encoder: BN mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha) elif (k == 1): # BAE2 encoder: do not use BN for encouraging sparsity pass else: # further layer use BN mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha) # midactivation place before hard tanh # encoder and decoder should not use BatchNorm # "l1 reg" on midactivation if (k == 1): mlp_midactivation = mlp mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation) if (k == 0): mlp = lasagne.layers.DropoutLayer( mlp, p=0) # train BAE-2: no dropout on input & BAE-1 layer else: mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_hidden) # pretrain-finetune # only restore the first layer group if (k == 0): print('Load ./W-1168.npz') with np.load('./W-1168.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] param_values = param_values[0:6] lasagne.layers.set_all_param_values(mlp, param_values) mlp_groundtruth = mlp mlp = binary_net.DenseLayer(mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=1500) mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha) # network output BN or SGN if OUTPUT_TYPE == 'C': pass # elif OUTPUT_TYPE == 'D': mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation) else: assert (False) ''' # equal transform validation # 1 set AE transform to I # 1 modift AE DenseLayer.get_output_for() use W(0 1) instead of Wb(+1 -1) # 2 set encoder's dropout=0 # 3 comment out encoder's and decoder's BatchNormLayer, modify set_all_param_values # will see train loss = 0 pv = lasagne.layers.get_all_param_values(mlp) pv[2] = np.identity(1500, np.float64) pv[4] = np.identity(1500, np.float64) lasagne.layers.set_all_param_values(mlp, pv) ''' ''' # loss weight nodes SPARSITY = 0.9 SPARSITY_MAP = (np.float32(train_x==-1)).mean(0) LOSS_WEIGHT_1 = 1.+input*(2.*SPARSITY-1) LOSS_WEIGHT_1 /= 4*SPARSITY*(1 - SPARSITY)# fixed 1->-1:5 -1->1:5/9 weights LOSS_WEIGHT_2 = 1.+input*(2.*SPARSITY_MAP-1)# LOSS_WEIGHT_2 /= 4*SPARSITY_MAP*(1 - SPARSITY_MAP)# weights considering element's prior probability ''' # train loss nodes ''' train_output = lasagne.layers.get_output(mlp, deterministic=False) if MAIN_LOSS_TYPE=='SH': train_loss = T.mean(T.sqr(T.maximum(0.,1.-input*train_output))) elif MAIN_LOSS_TYPE == 'W1SH': train_loss = T.mean(T.sqr(T.maximum(0., (1. - input * train_output))) * LOSS_WEIGHT_1) elif MAIN_LOSS_TYPE == 'W2SH': train_loss = T.mean(T.sqr(T.maximum(0., (1. - input * train_output))) * LOSS_WEIGHT_2) elif MAIN_LOSS_TYPE == 'H': train_loss = T.mean(T.maximum(0.,1.-input*train_output)) elif MAIN_LOSS_TYPE == 'W1H': train_loss = T.mean(T.maximum(0., (1. - input * train_output)) * LOSS_WEIGHT_1) elif MAIN_LOSS_TYPE == 'W2H': train_loss = T.mean(T.maximum(0., (1. - input * train_output)) * LOSS_WEIGHT_2) else: assert(False) ''' [ train_output_mlp_groundtruth, train_output_mlp_midactivation, train_output ] = lasagne.layers.get_output([mlp_groundtruth, mlp_midactivation, mlp], deterministic=False) train_loss = T.mean( T.maximum(0., 1. - train_output_mlp_groundtruth * train_output)) # + sparse penalty ''' if LAMBDA>0: train_pixel_wise_density = T.mean(T.reshape((train_output+1.)/2., [train_output.shape[0], train_output.shape[1]/10, 10]), axis=2) train_penalty = LAMBDA*T.mean(T.sqr(train_pixel_wise_density - (1.-SPARSITY))) else: train_penalty = T.constant(0.) train_loss = train_loss + train_penalty ''' if LAMBDA > 0: train_penalty = LAMBDA * T.mean( T.maximum(0., 1. + train_output_mlp_midactivation)) else: train_penalty = T.constant(0.) train_loss = train_loss + train_penalty # grad nodes if binary: # W updates W = lasagne.layers.get_all_params(mlp, binary=True) W_grads = binary_net.compute_grads(train_loss, mlp) # untrainable W1 assert (len(W) == 3) assert (len(W_grads) == 3) W = W[1:len(W)] W_grads = W_grads[1:len(W_grads)] assert (len(W) == 2) assert (len(W_grads) == 2) updates = lasagne.updates.adam(loss_or_grads=W_grads, params=W, learning_rate=LR) updates = binary_net.clipping_scaling(updates, mlp) # other parameters updates params = lasagne.layers.get_all_params(mlp, trainable=True, binary=False) # untrainable b1 bn1 assert (len(params) == 7) assert (params[0].name == 'b') # fix assert (params[1].name == 'beta') # fix assert (params[2].name == 'gamma') # fix assert (params[3].name == 'b') assert (params[4].name == 'b') assert (params[5].name == 'beta') assert (params[6].name == 'gamma') params = params[3:len(params)] assert (len(params) == 4) updates = OrderedDict(updates.items() + lasagne.updates.adam( loss_or_grads=train_loss, params=params, learning_rate=LR).items()) else: params = lasagne.layers.get_all_params(mlp, trainable=True) updates = lasagne.updates.adam(loss_or_grads=train_loss, params=params, learning_rate=LR) ############################################################################################## # val loss nodes # must be created after grad nodes ''' val_output = lasagne.layers.get_output(mlp, deterministic=True) if MAIN_LOSS_TYPE=='SH': val_loss = T.mean(T.sqr(T.maximum(0.,1.-input*val_output))) elif MAIN_LOSS_TYPE == 'W1SH': val_loss = T.mean(T.sqr(T.maximum(0., (1. - input * val_output))) * LOSS_WEIGHT_1) elif MAIN_LOSS_TYPE == 'W2SH': val_loss = T.mean(T.sqr(T.maximum(0., (1. - input * val_output))) * LOSS_WEIGHT_2) elif MAIN_LOSS_TYPE == 'H': val_loss = T.mean(T.maximum(0.,1.-input*val_output)) elif MAIN_LOSS_TYPE == 'W1H': val_loss = T.mean(T.maximum(0., (1. - input * val_output)) * LOSS_WEIGHT_1) elif MAIN_LOSS_TYPE == 'W2H': val_loss = T.mean(T.maximum(0., (1. - input * val_output)) * LOSS_WEIGHT_2) ''' [val_output_mlp_groundtruth, val_output_mlp_midactivation, val_output ] = lasagne.layers.get_output([mlp_groundtruth, mlp_midactivation, mlp], deterministic=True) val_loss = T.mean( T.maximum(0., 1. - val_output_mlp_groundtruth * val_output)) # + sparse penalty ''' if LAMBDA>0: val_pixel_wise_density = T.mean(T.reshape((val_output + 1.) / 2., [val_output.shape[0], val_output.shape[1] / 10, 10]), axis=2) val_penalty = LAMBDA*T.mean(T.sqr(val_pixel_wise_density - (1. - SPARSITY))) else: val_penalty = T.constant(0.) val_loss = val_loss + val_penalty ''' if LAMBDA > 0: val_penalty = LAMBDA * T.mean( T.maximum(0., 1. + val_output_mlp_midactivation)) else: val_penalty = T.constant(0.) val_loss = val_loss + val_penalty ############################################################################################## # Compile a function performing a training step on a mini-batch (by giving the updates dictionary) # and returning the corresponding training train_loss: train_fn = theano.function([input, LR], [ train_loss, train_penalty, train_output_mlp_groundtruth, train_output_mlp_midactivation, train_output ], updates=updates) ############################################################################################## # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input], [ val_loss, val_penalty, val_output_mlp_groundtruth, val_output_mlp_midactivation, val_output ]) ############################################################################################## print('Training...') train_x = binary_net.MoveParameter(train_x) binary_net.train(train_fn, val_fn, batch_size, LR_start, LR_decay, num_epochs, train_x, val_x, mlp) print('Save W') np.savez('./W.npz', *lasagne.layers.get_all_param_values( mlp)) # W b BN BN BN BN W b BN BN BN BN
# shape=(None, 1, 28, 28), # input_var=input) mlp = lasagne.layers.InputLayer( shape=(None, ins), input_var=input) mlp = lasagne.layers.DropoutLayer( mlp, p=dropout_in) for k in range(n_hidden_layers): mlp = binary_net.DenseLayer( mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_units) #/(k+1) Dont divide, its here for a somesort of an autoencoder mlp = lasagne.layers.BatchNormLayer( mlp, epsilon=epsilon, alpha=alpha) mlp = lasagne.layers.NonlinearityLayer( mlp, nonlinearity=activation) mlp = lasagne.layers.DropoutLayer( mlp,
def genCnv(input, num_outputs, learning_parameters): # A function to generate the cnv network topology which matches the overlay for the Pynq board. # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay. if num_outputs < 1 or num_outputs > 64: error("num_outputs should be in the range of 1 to 64.") stochastic = False binary = True H = 1 activation = binary_net.binary_tanh_unit W_LR_scale = learning_parameters.W_LR_scale epsilon = learning_parameters.epsilon alpha = learning_parameters.alpha cnn = lasagne.layers.InputLayer(shape=(None, 5, 64, 64), input_var=input) # conv maxpool cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) #32 cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv maxpool cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) #16 cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv conv maxpool cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) # 8 cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=0.6) # conv maxpool cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) #4 cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv maxpool cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) #2 cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=0.6) print(cnn.output_shape) # FC1 cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=512) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # FC 2 cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=512) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # output cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_outputs) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) return cnn
def run(binary=False, noise=None, nalpha=0, result_path=None): # BN parameters batch_size = 128 # default: 100 print("batch_size = " + str(batch_size)) # alpha is the exponential moving average factor alpha = .1 # default: .1 print("alpha = " + str(alpha)) epsilon = 1e-4 # default: 1e-4 print("epsilon = " + str(epsilon)) # MLP parameters num_units = 300 # default: 4096 print("num_units = " + str(num_units)) n_hidden_layers = 1 # default: 3 print("n_hidden_layers = " + str(n_hidden_layers)) # Training parameters num_epochs = 500 # default: 1000 print("num_epochs = " + str(num_epochs)) # Dropout parameters dropout_in = .2 # default: .2 print("dropout_in = " + str(dropout_in)) dropout_hidden = .5 # default: .5 print("dropout_hidden = " + str(dropout_hidden)) # BinaryOut if binary: activation = binary_net.binary_tanh_unit print("activation = binary_net.binary_tanh_unit") else: activation = lasagne.nonlinearities.tanh print("activation = lasagne.nonlinearities.tanh") # BinaryConnect print("binary = " + str(binary)) stochastic = False # default: False print("stochastic = " + str(stochastic)) # (-H,+H) are the two binary values # H = "Glorot" H = 1. # default: 1. print("H = " + str(H)) # W_LR_scale = 1. W_LR_scale = "Glorot" # default: "Glorot" print("W_LR_scale = " + str(W_LR_scale)) # Decaying LR LR_start = 0.005 # default: .003 print("LR_start = " + str(LR_start)) LR_fin = 0.0000005 # default: 0.0000003 print("LR_fin = " + str(LR_fin)) LR_decay = (LR_fin / LR_start) ** (1. / num_epochs) print("LR_decay = " + str(LR_decay)) # BTW, LR decay might good for the BN moving average... save_path = None # default: "mnist_parameters.npz" print("save_path = " + str(save_path)) # Load the dataset (https://github.com/mnielsen/neural-networks-and-deep-learning) print('Loading MNIST dataset...') mnist = MnistReader("./data/mnist.pkl.gz") shuffle_parts = 1 # default: 1 print("shuffle_parts = " + str(shuffle_parts)) print("noise = " + str(noise)) print("nalpha = " + str(nalpha)) train_set_size = 50000 # default: 50000 train_X, train_y = mnist.get_train_data(n_samples=train_set_size, noise=noise, alpha=nalpha) validation_X, validation_y = mnist.get_validation_data() test_X, test_y = mnist.get_test_data() print("train_set_size = "+str(train_y.shape[0])) print("validation_set_size = "+str(validation_y.shape[0])) print("test_set_size = "+str(test_y.shape[0])) # Log output with open(result_path + "params.txt", "a+") as l: print("batch_size = " + str(batch_size), file=l) print("alpha = " + str(alpha), file=l) print("epsilon = " + str(epsilon), file=l) print("num_units = " + str(num_units), file=l) print("n_hidden_layers = " + str(n_hidden_layers), file=l) print("num_epochs = " + str(num_epochs), file=l) print("dropout_in = " + str(dropout_in), file=l) print("dropout_hidden = " + str(dropout_hidden), file=l) if binary: print("activation = binary_net.binary_tanh_unit", file=l) else: print("activation = lasagne.nonlinearities.tanh", file=l) print("binary = " + str(binary), file=l) print("stochastic = " + str(stochastic), file=l) print("H = " + str(H), file=l) print("W_LR_scale = " + str(W_LR_scale), file=l) print("LR_start = " + str(LR_start), file=l) print("LR_fin = " + str(LR_fin), file=l) print("LR_decay = " + str(LR_decay), file=l) print("save_path = " + str(save_path), file=l) print("shuffle_parts = " + str(shuffle_parts), file=l) print("noise = " + str(noise), file=l) print("nalpha = " + str(nalpha), file=l) print("train_set_size = "+str(train_y.shape[0]), file=l) print("validation_set_size = "+str(validation_y.shape[0]), file=l) print("test_set_size = "+str(test_y.shape[0]), file=l) # bc01 format # Inputs in the range [-1,+1] # print("Inputs in the range [-1,+1]") train_X = 2 * train_X.reshape(-1, 1, 28, 28) - 1. validation_X = 2 * validation_X.reshape(-1, 1, 28, 28) - 1. test_X = 2 * test_X.reshape(-1, 1, 28, 28) - 1. # flatten targets train_y = np.hstack(train_y) validation_y = np.hstack(validation_y) test_y = np.hstack(test_y) # Onehot the targets train_y = np.float32(np.eye(10)[train_y]) validation_y = np.float32(np.eye(10)[validation_y]) test_y = np.float32(np.eye(10)[test_y]) # for hinge loss train_y = 2 * train_y - 1. validation_y = 2 * validation_y - 1. test_y = 2 * test_y - 1. print('Building the MLP...') # Prepare Theano variables for inputs and targets input = T.tensor4('inputs') target = T.matrix('targets') LR = T.scalar('LR', dtype=theano.config.floatX) mlp = lasagne.layers.InputLayer( shape=(None, 1, 28, 28), input_var=input) mlp = lasagne.layers.DropoutLayer( mlp, p=dropout_in) for k in range(n_hidden_layers): mlp = binary_net.DenseLayer( mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_units) mlp = lasagne.layers.BatchNormLayer( mlp, epsilon=epsilon, alpha=alpha) mlp = lasagne.layers.NonlinearityLayer( mlp, nonlinearity=activation) mlp = lasagne.layers.DropoutLayer( mlp, p=dropout_hidden) mlp = binary_net.DenseLayer( mlp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=10) mlp = lasagne.layers.BatchNormLayer( mlp, epsilon=epsilon, alpha=alpha) train_output = lasagne.layers.get_output(mlp, deterministic=False) # squared hinge loss loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output))) if binary: # W updates W = lasagne.layers.get_all_params(mlp, binary=True) W_grads = binary_net.compute_grads(loss, mlp) updates = lasagne.updates.adam(loss_or_grads=W_grads, params=W, learning_rate=LR) updates = binary_net.clipping_scaling(updates, mlp) # other parameters updates params = lasagne.layers.get_all_params(mlp, trainable=True, binary=False) updates.update(lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR)) else: params = lasagne.layers.get_all_params(mlp, trainable=True) updates = lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR) test_output = lasagne.layers.get_output(mlp, deterministic=True) test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output))) test_err = T.mean(T.neq(T.argmax(test_output, axis=1), T.argmax(target, axis=1)), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving the updates dictionary) # and returning the corresponding training loss: train_fn = theano.function([input, target, LR], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input, target], [test_loss, test_err]) print('Training...') binary_net.train( train_fn, val_fn, mlp, batch_size, LR_start, LR_decay, num_epochs, train_X, train_y, validation_X, validation_y, test_X, test_y, save_path, shuffle_parts, result_path)
def buildCNN(networkType, dataType, input, epsilon, alpha, activation, binary, stochastic, H, W_LR_scale, oneHot=True): if oneHot: print("identity") denseOut = lasagne.nonlinearities.identity else: print("softmax") denseOut = lasagne.nonlinearities.softmax if dataType == 'TCDTIMIT': nbClasses = 39 cnn = lasagne.layers.InputLayer(shape=(None, 1, 120, 120), input_var=input) elif dataType == 'cifar10': nbClasses = 10 cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input) if networkType == 'google': # conv 1 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv 2 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), stride=(2, 2), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv3 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv 4 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv 5 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # FC layer cnn = binary_net.DenseLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=denseOut, #TODO was identity num_units=nbClasses) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) elif networkType == 'cifar10': # 128C3-128C3-P2 # 128C3-128C3-P2 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # 256C3-256C3-P2 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # 512C3-512C3-P2 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # print(cnn.output_shape) # 1024FP-1024FP-10FP cnn = binary_net.DenseLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=1024) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.DenseLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=1024) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=denseOut, num_units=nbClasses) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) return cnn
def genCnv(input, num_outputs, learning_parameters): # A function to generate the cnv network topology which matches the overlay for the Pynq board. # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay. stochastic = False binary = True H = 1 activation = binary_net.binary_tanh_unit W_LR_scale = learning_parameters.W_LR_scale epsilon = learning_parameters.epsilon alpha = learning_parameters.alpha out_layers = [] inp = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input) # first conv cnn = binary_net.Conv2DLayer(inp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=16, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # 1x1 conv cnn_1x1 = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=32, filter_size=(1, 1), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn_1x1 = lasagne.layers.MaxPool2DLayer(cnn_1x1, pool_size=(2, 2)) cnn_1x1 = lasagne.layers.BatchNormLayer(cnn_1x1, epsilon=epsilon, alpha=alpha) cnn_1x1 = lasagne.layers.NonlinearityLayer(cnn_1x1, nonlinearity=activation) out_layers.append(cnn_1x1) # 3x3 conv layer cnn_3x3 = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=32, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn_3x3 = lasagne.layers.MaxPool2DLayer(cnn_3x3, pool_size=(2, 2)) cnn_3x3 = lasagne.layers.BatchNormLayer(cnn_3x3, epsilon=epsilon, alpha=alpha) cnn_3x3 = lasagne.layers.NonlinearityLayer(cnn_3x3, nonlinearity=activation) out_layers.append(cnn_3x3) # 2nd conv layer cnn_5x5 = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=32, filter_size=(5, 5), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn_5x5 = lasagne.layers.MaxPool2DLayer(cnn_5x5, pool_size=(2, 2)) cnn_5x5 = lasagne.layers.BatchNormLayer(cnn_5x5, epsilon=epsilon, alpha=alpha) cnn_5x5 = lasagne.layers.NonlinearityLayer(cnn_5x5, nonlinearity=activation) out_layers.append(cnn_5x5) cnn = lasagne.layers.concat(out_layers) # FC layer 1 cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=512) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # FC layer 2 cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_outputs) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) return cnn
pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # print(cnn.output_shape) # 1024FP-1024FP-10FP cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=1024) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=1024)
def genCnv(input, num_outputs, learning_parameters): # A function to generate the cnv network topology which matches the overlay for the Pynq board. # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay. stochastic = False binary = True H = 1 activation = binary_net.binary_tanh_unit W_LR_scale = learning_parameters.W_LR_scale epsilon = learning_parameters.epsilon alpha = learning_parameters.alpha inp = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input) # first conv cnn = binary_net.Conv2DLayer(inp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) residual = cnn # conv 1 in Res block cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # special conv 3 is Res block cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = ElemwiseSumLayer([residual, cnn], coeffs=1) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # FC layer 1 cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=512) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # FC layer 2 cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_outputs) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) return cnn