def lllistool(i, inp, func): if func == LSTM: NUMS[i+1] *= 4 sdim = DIMS[i] if func == SimpleRecurrent or func == LSTM: sdim = DIMS[i] + DIMS[i+1] l = Linear(input_dim=DIMS[i], output_dim=DIMS[i+1] * NUMS[i+1], weights_init=IsotropicGaussian(std=sdim**(-0.5)), biases_init=IsotropicGaussian(std=sdim**(-0.5)), name='Lin{}'.format(i)) l.initialize() if func == SimpleRecurrent: gong = func(dim=DIMS[i+1], activation=Rectifier(), weights_init=IsotropicGaussian(std=sdim**(-0.5))) gong.initialize() ret = gong.apply(l.apply(inp)) elif func == LSTM: gong = func(dim=DIMS[i+1], activation=Tanh(), weights_init=IsotropicGaussian(std=sdim**(-0.5))) gong.initialize() print(inp) ret, _ = gong.apply( l.apply(inp), T.zeros((inp.shape[1], DIMS[i+1])), T.zeros((inp.shape[1], DIMS[i+1])), ) elif func == SequenceGenerator: gong = func( readout=None, transition=SimpleRecurrent(dim=100, activation=Rectifier(), weights_init=IsotropicGaussian(std=0.1))) ret = None elif func == None: ret = l.apply(inp) else: gong = func() ret = gong.apply(l.apply(inp)) return ret
def __init__(self, n_out, dwin, vector_size, n_hidden_layer, **kwargs): super(ConvPoolNlp, self).__init__(**kwargs) self.vector_size = vector_size self.n_hidden_layer = n_hidden_layer self.dwin = dwin self.n_out = n_out self.rectifier = Rectifier() """ self.convolution = Convolutional(filter_size=(1,self.filter_size),num_filters=self.num_filter,num_channels=1, weights_init=IsotropicGaussian(0.01), use_bias=False) """ # second dimension is of fixed size sum(vect_size) less the fiter_size borders self.mlp = MLP(activations=[Rectifier()] * len(self.n_hidden_layer) + [Identity()], dims=[self.n_out] + self.n_hidden_layer + [2], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.)) self.parameters = [] self.children = [] #self.children.append(self.lookup) #self.children.append(self.convolution) self.children.append(self.mlp) self.children.append(self.rectifier)
def create_kim_cnn(layer0_input, embedding_size, input_len, config, pref): ''' One layer convolution with different filter-sizes and maxpooling ''' filter_width_list = [ int(fw) for fw in config[pref + '_filterwidth'].split() ] print filter_width_list num_filters = int(config[pref + '_num_filters']) #num_filters /= len(filter_width_list) totfilters = 0 for i, fw in enumerate(filter_width_list): num_feature_map = input_len - fw + 1 #39 conv = Convolutional(image_size=(input_len, embedding_size), filter_size=(fw, embedding_size), num_filters=min(int(config[pref + '_maxfilter']), num_filters * fw), num_channels=1) totfilters += conv.num_filters initialize2(conv, num_feature_map) conv.name = pref + 'conv_' + str(fw) convout = conv.apply(layer0_input) pool_layer = MaxPooling(pooling_size=(num_feature_map, 1)) pool_layer.name = pref + 'pool_' + str(fw) act = Rectifier() act.name = pref + 'act_' + str(fw) outpool = act.apply(pool_layer.apply(convout)).flatten(2) if i == 0: outpools = outpool else: outpools = T.concatenate([outpools, outpool], axis=1) name_rep_len = totfilters return outpools, name_rep_len
def test_convolutional_sequence_tied_biases_pushed_if_explicitly_set(): cnn = ConvolutionalSequence(sum([[ Convolutional(filter_size=(1, 1), num_filters=1, tied_biases=True), Rectifier() ] for _ in range(3)], []), num_channels=1, image_size=(1, 1), tied_biases=False) cnn.allocate() assert [ not child.tied_biases for child in cnn.children if isinstance(child, Convolutional) ] cnn = ConvolutionalSequence(sum( [[Convolutional(filter_size=(1, 1), num_filters=1), Rectifier()] for _ in range(3)], []), num_channels=1, image_size=(1, 1), tied_biases=True) cnn.allocate() assert [ child.tied_biases for child in cnn.children if isinstance(child, Convolutional) ]
def __init__(self, dim, mini_dim, summary_dim, **kwargs): super(RNNwMini, self).__init__(**kwargs) self.dim = dim self.mini_dim = mini_dim self.summary_dim = summary_dim self.recurrent_layer = SimpleRecurrent( dim=self.summary_dim, activation=Rectifier(), name='recurrent_layer', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.mini_recurrent_layer = SimpleRecurrent( dim=self.mini_dim, activation=Rectifier(), name='mini_recurrent_layer', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.mini_to_main = Linear(self.dim + self.mini_dim, self.summary_dim, name='mini_to_main', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.children = [ self.recurrent_layer, self.mini_recurrent_layer, self.mini_to_main ]
def create_vae(x=None, batch=batch_size): x = T.matrix('features') if x is None else x x = x / 255. encoder = MLP( activations=[Rectifier(), Logistic()], dims=[img_dim**2, hidden_dim, 2*latent_dim], weights_init=IsotropicGaussian(std=0.01, mean=0), biases_init=Constant(0.01), name='encoder' ) encoder.initialize() z_param = encoder.apply(x) z_mean, z_log_std = z_param[:,latent_dim:], z_param[:,:latent_dim] z = Sampling(theano_seed=seed).apply([z_mean, z_log_std], batch=batch_size) decoder = MLP( activations=[Rectifier(), Logistic()], dims=[latent_dim, hidden_dim, img_dim**2], weights_init=IsotropicGaussian(std=0.01, mean=0), biases_init=Constant(0.01), name='decoder' ) decoder.initialize() x_reconstruct = decoder.apply(z) cost = VAEloss().apply(x, x_reconstruct, z_mean, z_log_std) cost.name = 'vae_cost' return cost
def create_kim_cnn(layer0_input, embedding_size, input_len, config, pref): ''' One layer convolution with different filter-sizes and maxpooling ''' filter_width_list = [int(fw) for fw in config[pref + '_filterwidth'].split()] print filter_width_list num_filters = int(config[pref+'_num_filters']) #num_filters /= len(filter_width_list) totfilters = 0 for i, fw in enumerate(filter_width_list): num_feature_map = input_len - fw + 1 #39 conv = Convolutional( image_size=(input_len, embedding_size), filter_size=(fw, embedding_size), num_filters=min(int(config[pref + '_maxfilter']), num_filters * fw), num_channels=1 ) totfilters += conv.num_filters initialize2(conv, num_feature_map) conv.name = pref + 'conv_' + str(fw) convout = conv.apply(layer0_input) pool_layer = MaxPooling( pooling_size=(num_feature_map,1) ) pool_layer.name = pref + 'pool_' + str(fw) act = Rectifier() act.name = pref + 'act_' + str(fw) outpool = act.apply(pool_layer.apply(convout)).flatten(2) if i == 0: outpools = outpool else: outpools = T.concatenate([outpools, outpool], axis=1) name_rep_len = totfilters return outpools, name_rep_len
def create_model(self, x, y, input_dim, tol=10e-5): # Create the output of the MLP mlp = MLP( [Rectifier(), Rectifier(), Logistic()], [input_dim, 100, 100, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() probs = mlp.apply(x) y = y.dimshuffle(0, 'x') # Create the if-else cost function true_p = (T.sum(y * probs) + tol) * 1.0 / (T.sum(y) + tol) true_n = (T.sum((1 - y) * (1 - probs)) + tol) * \ 1.0 / (T.sum(1 - y) + tol) #p = (T.sum(y) + tol) / (y.shape[0] + tol) theta = (1 - self.p) / self.p numerator = (1 + self.beta**2) * true_p denominator = self.beta**2 + theta + true_p - theta * true_n Fscore = numerator / denominator cost = -1 * Fscore cost.name = "cost" return mlp, cost, probs
def build_mlp(features_cat, features_int, labels): mlp_int = MLP(activations=[Rectifier(), Rectifier()], dims=[19, 50, 50], weights_init=IsotropicGaussian(), biases_init=Constant(0), name='mlp_interval') mlp_int.initialize() mlp_cat = MLP(activations=[Logistic()], dims=[320, 50], weights_init=IsotropicGaussian(), biases_init=Constant(0), name='mlp_categorical') mlp_cat.initialize() mlp = MLP(activations=[Rectifier(), None], dims=[50, 50, 1], weights_init=IsotropicGaussian(), biases_init=Constant(0)) mlp.initialize() gated = mlp_cat.apply(features_cat) * mlp_int.apply(features_int) prediction = mlp.apply(gated) cost = MAPECost().apply(prediction, labels) cg = ComputationGraph(cost) print cg.variables cg_dropout1 = apply_dropout(cg, [VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3]], .2) cost_dropout1 = cg_dropout1.outputs[0] return cost_dropout1, cg_dropout1.parameters, cost
def create_lenet_5(): feature_maps = [6, 16] mlp_hiddens = [120, 84] conv_sizes = [5, 5] pool_sizes = [2, 2] image_size = (28, 28) output_size = 10 # The above are from LeCun's paper. The blocks example had: # feature_maps = [20, 50] # mlp_hiddens = [500] # Use ReLUs everywhere and softmax for the final prediction conv_activations = [Rectifier() for _ in feature_maps] mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()] convnet = LeNet(conv_activations, 1, image_size, filter_sizes=zip(conv_sizes, conv_sizes), feature_maps=feature_maps, pooling_sizes=zip(pool_sizes, pool_sizes), top_mlp_activations=mlp_activations, top_mlp_dims=mlp_hiddens + [output_size], border_mode='valid', weights_init=Uniform(width=.2), biases_init=Constant(0)) # We push initialization config to set different initialization schemes # for convolutional layers. convnet.push_initialization_config() convnet.layers[0].weights_init = Uniform(width=.2) convnet.layers[1].weights_init = Uniform(width=.09) convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08) convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11) convnet.initialize() return convnet
def __init__(self, config, **kwargs): super(Model, self).__init__(config, **kwargs) self.dest_mlp = MLP( activations=[Rectifier() for _ in config.dim_hidden_dest] + [Softmax()], dims=[config.dim_hidden[-1]] + config.dim_hidden_dest + [config.dim_output_dest], name='dest_mlp') self.time_mlp = MLP( activations=[Rectifier() for _ in config.dim_hidden_time] + [Softmax()], dims=[config.dim_hidden[-1]] + config.dim_hidden_time + [config.dim_output_time], name='time_mlp') self.dest_classes = theano.shared(numpy.array( config.dest_tgtcls, dtype=theano.config.floatX), name='dest_classes') self.time_classes = theano.shared(numpy.array( config.time_tgtcls, dtype=theano.config.floatX), name='time_classes') self.inputs.append('input_time') self.children.extend([self.dest_mlp, self.time_mlp])
def build_mlp(features_int, features_cat, labels, labels_mean): inputs = tensor.concatenate([features_int, features_cat], axis=1) mlp = MLP(activations=[Rectifier(), Rectifier(), Rectifier(), None], dims=[337, 800, 1200, 1], weights_init=IsotropicGaussian(), biases_init=Constant(1)) mlp.initialize() prediction = mlp.apply(inputs) cost = MAPECost().apply(prediction, labels, labels_mean) cg = ComputationGraph(cost) #cg_dropout0 = apply_dropout(cg, [VariableFilter(roles=[INPUT])(cg.variables)[1]], .2) cg_dropout1 = apply_dropout(cg, [ VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3], VariableFilter(roles=[OUTPUT])(cg.variables)[5] ], .2) cost_dropout1 = cg_dropout1.outputs[0] return cost_dropout1, cg_dropout1.parameters, cost #cost, cg.parameters, cost #
def __init__(self, filter_size, num_filters, num_channels, noise_batch_size, image_size=(None, None), step=(1, 1), border_mode='valid', tied_biases=True, prior_mean=0, prior_noise_level=0, **kwargs): self.convolution = Convolutional() self.rectifier = Rectifier() self.mask = Convolutional(name='mask') children = [self.convolution, self.rectifier, self.mask] kwargs.setdefault('children', []).extend(children) super(NoisyConvolutional2, self).__init__(**kwargs) self.filter_size = filter_size self.num_filters = num_filters self.num_channels = num_channels self.noise_batch_size = noise_batch_size self.image_size = image_size self.step = step self.border_mode = border_mode self.tied_biases = tied_biases self.prior_mean = prior_mean self.prior_noise_level = prior_noise_level
def build_mlp(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels): features = tensor.concatenate([ features_hascar, means['cp'][features_cp[:, 0]], means['dep'][features_cp[:, 1]] ], axis=1) mlp = MLP(activations=[Rectifier(), Rectifier(), None], dims=[5, 50, 50, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp') mlp.initialize() prediction = mlp.apply(features) cost = MAPECost().apply(labels, prediction) cg = ComputationGraph(cost) input_var = VariableFilter(roles=[INPUT])(cg.variables) print input_var cg_dropout1 = apply_dropout(cg, [input_var[3], input_var[5]], .4) cost_dropout1 = cg_dropout1.outputs[0] return prediction, cost_dropout1, cg_dropout1.parameters, cost
def test_defaults_sequence2(): seq = DefaultsSequence(input_dim=(3, 4, 4), lists=[ Convolutional(num_filters=10, stride=(2, 2), filter_size=(3, 3)), BatchNormalization(), Rectifier(), Flattener(), Linear(output_dim=10), BatchNormalization(), Rectifier(), Linear(output_dim=12), BatchNormalization(), Rectifier() ]) seq.weights_init = Constant(1.0) seq.biases_init = Constant(0.0) seq.push_allocation_config() seq.push_initialization_config() seq.initialize() x = T.tensor4('input') y = seq.apply(x) func_ = theano.function([x], [y]) x_val = np.ones((1, 3, 4, 4), dtype=theano.config.floatX) res = func_(x_val)[0] assert_allclose(res.shape, (1, 12))
def setup_ff_network(in_dim, out_dim, num_layers, num_neurons): """Setup a feedforward neural network. Parameters ---------- in_dim : int input dimension of network out_dim : int output dimension of network num_layers : int number of hidden layers num_neurons : int number of neurons of each layer Returns ------- net : object network structure """ activations = [Rectifier()] dims = [in_dim] for i in xrange(num_layers): activations.append(Rectifier()) dims.append(num_neurons) dims.append(out_dim) net = MLP(activations=activations, dims=dims, weights_init=IsotropicGaussian(), biases_init=Constant(0.01)) return net
def __init__(self, image_dimension, **kwargs): layers = [] ############################################# # a first block with 2 convolutions of 32 (3, 3) filters layers.append(Convolutional((3, 3), 32, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 32, border_mode='half')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) ############################################# # a 2nd block with 3 convolutions of 64 (3, 3) filters layers.append(Convolutional((3, 3), 64, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 64, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 64, border_mode='half')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) ############################################# # a 3rd block with 4 convolutions of 128 (3, 3) filters layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) self.conv_sequence = ConvolutionalSequence(layers, 3, image_size=image_dimension) flattener = Flattener() self.top_mlp = MLP(activations=[Rectifier(), Logistic()], dims=[500, 1]) application_methods = [ self.conv_sequence.apply, flattener.apply, self.top_mlp.apply ] super(VGGNet, self).__init__(application_methods, biases_init=Constant(0), weights_init=Uniform(width=.1), **kwargs)
def test_fully_layer(): batch_size=2 x = T.tensor4(); y = T.ivector() V = 200 layer_conv = Convolutional(filter_size=(5,5),num_filters=V, name="toto", weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) # try with no bias activation = Rectifier() pool = MaxPooling(pooling_size=(2,2)) convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15, image_size=(10,10), name="conv_section") convnet.push_allocation_config() convnet.initialize() output=convnet.apply(x) batch_size=output.shape[0] output_dim=np.prod(convnet.get_dim('output')) result_conv = output.reshape((batch_size, output_dim)) mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) mlp.initialize() output=mlp.apply(result_conv) cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output)) cg = ComputationGraph(cost) W = VariableFilter(roles=[WEIGHT])(cg.variables) B = VariableFilter(roles=[BIAS])(cg.variables) W = W[0]; b = B[0] inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg) outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg) var_input=inputs_fully[0] var_output=outputs_fully[0] [d_W,d_S,d_b] = T.grad(cost, [W, var_output, b]) d_b = d_b.dimshuffle(('x',0)) d_p = T.concatenate([d_W, d_b], axis=0) x_value = 1e3*np.random.ranf((2,15, 10, 10)) f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore') A, B, C= f(x_value, [5, 0]) A = np.concatenate([A, np.ones((2,1))], axis=1) print 'A', A.shape print 'B', B.shape print 'C', C.shape print lin.norm(C - np.dot(np.transpose(A), B), 'fro') return """
def __init__(self, filter_size, num_filters, num_channels, batch_size=None, mid_noise=False, out_noise=False, tied_noise=False, tied_sigma=False, noise_rate=None, noise_batch_size=None, prior_noise_level=None, image_size=(None, None), step=(1, 1), **kwargs): self.filter_size = filter_size self.num_filters = num_filters self.batch_size = batch_size self.num_channels = num_channels self.image_size = image_size self.mid_noise = mid_noise self.noise_batch_size = noise_batch_size self.noise_rate = noise_rate self.step = step self.border_mode = 'half' self.tied_biases = True depth = 2 self.b0 = SpatialBatchNormalization(name='b0') self.r0 = Rectifier(name='r0') self.n0 = (SpatialNoise(name='n0', noise_rate=self.noise_rate, tied_noise=tied_noise, tied_sigma=tied_sigma, prior_noise_level=prior_noise_level) if mid_noise else None) self.c0 = Convolutional(name='c0') self.b1 = SpatialBatchNormalization(name='b1') self.r1 = Rectifier(name='r1') self.n1 = (SpatialNoise(name='n1', noise_rate=self.noise_rate, tied_noise=tied_noise, tied_sigma=tied_sigma, prior_noise_level=prior_noise_level) if out_noise else None) self.c1 = Convolutional(name='c1') kwargs.setdefault('children', []).extend([ c for c in [ self.c0, self.b0, self.r0, self.n0, self.c1, self.b1, self.r1, self.n1 ] if c is not None ]) super(ResidualConvolutional, self).__init__(**kwargs)
def build_mlp(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels): mlp_car = MLP(activations=[Rectifier(), Rectifier(), None], dims=[8 + 185, 200, 200, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_interval_car') mlp_car.initialize() mlp_nocar = MLP(activations=[Rectifier(), Rectifier(), None], dims=[5 + 135, 200, 200, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_interval_nocar') mlp_nocar.initialize() feature_car = tensor.concatenate((features_car_cat, features_car_int), axis=1) feature_nocar = tensor.concatenate( (features_nocar_cat, features_nocar_int), axis=1) prediction = mlp_nocar.apply(feature_nocar) # gating with the last feature : does the dude own a car prediction += tensor.addbroadcast(features_hascar, 1) * mlp_car.apply(feature_car) prediction_loc, _, _, _, = \ build_mlp_onlyloc(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels) prediction += prediction_loc # add crm mlp_crm = MLP(activations=[None], dims=[1, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_crm') mlp_crm.initialize() crm = features_nocar_int[:, 0][:, None] prediction = prediction * mlp_crm.apply(crm) cost = MAPECost().apply(labels, prediction) cg = ComputationGraph(cost) input_var = VariableFilter(roles=[INPUT])(cg.variables) print input_var cg_dropout1 = apply_dropout(cg, [input_var[6], input_var[7]], .4) cost_dropout1 = cg_dropout1.outputs[0] return prediction, cost_dropout1, cg_dropout1.parameters, cost
def apply_cnn(self, l_emb1, l_size1, l_emb2, l_size2, r_emb1, r_size1, r_emb2, r_size2, embedding_size, mycnf): assert l_size1 == r_size1 assert l_size2 == r_size2 assert l_size1 == l_size1 max_len = l_size1 fv_len = 0 filter_sizes = mycnf['cnn_config']['filter_sizes'] num_filters = mycnf['cnn_config']['num_filters'] for i, fw in enumerate(filter_sizes): conv_left = ConvolutionalActivation( activation=Rectifier().apply, filter_size=(fw, embedding_size), num_filters=num_filters, num_channels=1, image_size=(max_len, embedding_size), name="conv" + str(fw) + l_emb1.name, seed=self.curSeed) conv_right = ConvolutionalActivation( activation=Rectifier().apply, filter_size=(fw, embedding_size), num_filters=num_filters, num_channels=1, image_size=(max_len, embedding_size), name="conv" + str(fw) + r_emb1.name, seed=self.curSeed) pooling = MaxPooling((max_len - fw + 1, 1), name="pool" + str(fw)) initialize([conv_left, conv_right]) l_convinp1 = l_emb1.flatten().reshape( (l_emb1.shape[0], 1, max_len, embedding_size)) l_convinp2 = l_emb2.flatten().reshape( (l_emb2.shape[0], 1, max_len, embedding_size)) l_pool1 = pooling.apply(conv_left.apply(l_convinp1)).flatten(2) l_pool2 = pooling.apply(conv_left.apply(l_convinp2)).flatten(2) r_convinp1 = r_emb1.flatten().reshape( (r_emb1.shape[0], 1, max_len, embedding_size)) r_convinp2 = r_emb2.flatten().reshape( (r_emb2.shape[0], 1, max_len, embedding_size)) r_pool1 = pooling.apply(conv_right.apply(r_convinp1)).flatten(2) r_pool2 = pooling.apply(conv_right.apply(r_convinp2)).flatten(2) onepools1 = T.concatenate([l_pool1, r_pool1], axis=1) onepools2 = T.concatenate([l_pool2, r_pool2], axis=1) fv_len += conv_left.num_filters * 2 if i == 0: outpools1 = onepools1 outpools2 = onepools2 else: outpools1 = T.concatenate([outpools1, onepools1], axis=1) outpools2 = T.concatenate([outpools2, onepools2], axis=1) return outpools1, outpools2, fv_len
def generate_elementary_block3(self, index, to_index): number_of_channels = 512 name_conv_0 = 'fconv7_' + str(index) name_relu_0 = 'relu7_' + str(index) name_conv_1 = 'fconv7_' + str(index) + 'to' + str(to_index) + '_step1' name_relu_1 = 'relu7_' + str(index) + 'to' + str(to_index) + '_step1' name_conv_2 = 'fconv7_' + str(index) + 'to' + str(to_index) + '_step2' name_conv_3 = 'fconv7_output_' + str(index) return [Convolutional(filter_size=(1,1), num_filters = 128, border_mode = (0,0), use_bias=True, tied_biases=True, name=name_conv_0, biases_init=Constant(0.), weights_init=IsotropicGaussian(0.01), num_channels = number_of_channels), \ ParallelSum3(), Rectifier(name=name_relu_0), \ Convolutional(filter_size=(7,7), num_filters = 64, border_mode = (3,3), use_bias=True, tied_biases=True, name=name_conv_1, biases_init=Constant(0.), weights_init=IsotropicGaussian(0.01), num_channels = 128), \ Rectifier(name=name_relu_1), \ Convolutional(filter_size=(7,7), num_filters = 128, border_mode = (3,3), use_bias=True, tied_biases=True, name=name_conv_2, biases_init=Constant(0.), weights_init=IsotropicGaussian(0.01), num_channels = 64), \ Convolutional(filter_size=(1,1), num_filters = 1, border_mode = (0,0), use_bias=True, tied_biases=True, name=name_conv_3, biases_init=Constant(0.), weights_init=IsotropicGaussian(0.01), num_channels = 128)]
def test_convolutional_sequence_with_no_input_size(): # suppose x is outputted by some RNN x = tensor.tensor4('x') filter_size = (1, 1) num_filters = 2 num_channels = 1 pooling_size = (1, 1) conv = Convolutional(filter_size, num_filters, tied_biases=False, weights_init=Constant(1.), biases_init=Constant(1.)) act = Rectifier() pool = MaxPooling(pooling_size) bad_seq = ConvolutionalSequence([conv, act, pool], num_channels, tied_biases=False) assert_raises_regexp(ValueError, 'Cannot infer bias size \S+', bad_seq.initialize) seq = ConvolutionalSequence([conv, act, pool], num_channels, tied_biases=True) try: seq.initialize() out = seq.apply(x) except TypeError: assert False, "This should have succeeded" assert out.ndim == 4
def generation(z_list, n_latent, hu_decoder, n_out, y): logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent, hu_decoder) if hu_decoder == 0: return generation_simple(z_list, n_latent, n_out, y) mlp1 = MLP(activations=[Rectifier()], dims=[n_latent, hu_decoder], name='latent_to_hidDecoder') initialize([mlp1]) hid_to_out = Linear(name='hidDecoder_to_output', input_dim=hu_decoder, output_dim=n_out) initialize([hid_to_out]) mysigmoid = Logistic(name='y_hat_vae') agg_logpy_xz = 0. agg_y_hat = 0. for i, z in enumerate(z_list): y_hat = mysigmoid.apply(hid_to_out.apply( mlp1.apply(z))) #reconstructed x agg_logpy_xz += cross_entropy_loss(y_hat, y) agg_y_hat += y_hat agg_logpy_xz /= len(z_list) agg_y_hat /= len(z_list) return agg_y_hat, agg_logpy_xz
def build_model(images, labels): # Construct a bottom convolutional sequence bottom_conv_sequence = convolutional_sequence((3,3), 16, (160, 160)) bottom_conv_sequence._push_allocation_config() # Flatten layer flattener = Flattener() # Construct a top MLP conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output')) #top_mlp = MLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0)) top_mlp = BatchNormalizedMLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0)) # Construct feedforward sequence ss_seq = FeedforwardSequence([bottom_conv_sequence.apply, flattener.apply, top_mlp.apply]) ss_seq.push_initialization_config() ss_seq.initialize() prediction = ss_seq.apply(images) cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction) # add regularization selector = Selector([top_mlp]) Ws = selector.get_parameters('W') mlp_brick_name = 'batchnormalizedmlp' W0 = Ws['/%s/linear_0.W' % mlp_brick_name] W1 = Ws['/%s/linear_1.W' % mlp_brick_name] cost = cost_noreg + .01 * (W0 ** 2).mean() + .01 * (W1 ** 2).mean() return cost
def test_convolutional_layer(): x = tensor.tensor4('x') num_channels = 4 batch_size = 5 pooling_size = 3 num_filters = 3 filter_size = (3, 3) activation = Rectifier().apply conv = ConvolutionalLayer(activation, filter_size, num_filters, (pooling_size, pooling_size), num_channels, image_size=(17, 13), weights_init=Constant(1.), biases_init=Constant(5.)) conv.initialize() y = conv.apply(x) func = function([x], y) x_val = numpy.ones((batch_size, num_channels, 17, 13), dtype=theano.config.floatX) assert_allclose( func(x_val), numpy.prod(filter_size) * num_channels * numpy.ones( (batch_size, num_filters, 5, 4)) + 5)
def create_OLD_kim_cnn(layer0_input, embedding_size, input_len, config, pref): ''' One layer convolution with the same filtersize ''' filter_width_list = [ int(fw) for fw in config[pref + '_filterwidth'].split() ] print filter_width_list num_filters = int(config[pref + '_num_filters']) totfilters = 0 for i, fw in enumerate(filter_width_list): num_feature_map = input_len - fw + 1 #39 conv = Convolutional(filter_size=(fw, embedding_size), num_filters=num_filters, num_channels=1, image_size=(input_len, embedding_size), name="conv" + str(fw)) pooling = MaxPooling((num_feature_map, 1), name="pool" + str(fw)) initialize([conv]) totfilters += num_filters outpool = Flattener(name="flat" + str(fw)).apply( Rectifier(name=pref + 'act_' + str(fw)).apply( pooling.apply(conv.apply(layer0_input)))) if i == 0: outpools = outpool else: outpools = T.concatenate([outpools, outpool], axis=1) name_rep_len = totfilters return outpools, name_rep_len
def test_convolutional_sequence(): x = tensor.tensor4('x') num_channels = 4 pooling_size = 3 batch_size = 5 activation = Rectifier().apply conv = ConvolutionalLayer(activation, (3, 3), 5, (pooling_size, pooling_size), weights_init=Constant(1.), biases_init=Constant(5.)) conv2 = ConvolutionalActivation(activation, (2, 2), 4, weights_init=Constant(1.)) seq = ConvolutionalSequence([conv, conv2], num_channels, image_size=(17, 13)) seq.push_allocation_config() assert conv.num_channels == 4 assert conv2.num_channels == 5 conv2.convolution.use_bias = False y = seq.apply(x) seq.initialize() func = function([x], y) x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX) y_val = (numpy.ones((batch_size, 4, 4, 3)) * (9 * 4 + 5) * 4 * 5) assert_allclose(func(x_val), y_val)
def create_cnn_general(embedded_x, mycnf, max_len, embedding_size, inp_conv=False): fv_len = 0 filter_sizes = mycnf['cnn_config']['filter_sizes'] num_filters = mycnf['cnn_config']['num_filters'] for i, fw in enumerate(filter_sizes): conv = ConvolutionalActivation( activation=Rectifier().apply, filter_size=(fw, embedding_size), num_filters=num_filters, num_channels=1, image_size=(max_len, embedding_size), name="conv"+str(fw)+embedded_x.name) pooling = MaxPooling((max_len-fw+1, 1), name="pool"+str(fw)+embedded_x.name) initialize([conv]) if inp_conv: convinp = embedded_x else: convinp = embedded_x.flatten().reshape((embedded_x.shape[0], 1, max_len, embedding_size)) onepool = pooling.apply(conv.apply(convinp)).flatten(2) if i == 0: outpools = onepool else: outpools = T.concatenate([outpools, onepool], axis=1) fv_len += conv.num_filters return outpools, fv_len
def __init__(self, input_dim, dim, mlp_hidden_dims, batch_size, image_shape, patch_shape, activation=None, **kwargs): super(LSTMAttention, self).__init__(**kwargs) self.dim = dim self.image_shape = image_shape self.patch_shape = patch_shape self.batch_size = batch_size non_lins = [Rectifier()] * (len(mlp_hidden_dims) - 1) + [None] mlp_dims = [input_dim + dim] + mlp_hidden_dims mlp = MLP(non_lins, mlp_dims, weights_init=self.weights_init, biases_init=self.biases_init, name=self.name + '_mlp') hyperparameters = {} hyperparameters["cutoff"] = 3 hyperparameters["batched_window"] = True cropper = LocallySoftRectangularCropper( patch_shape=patch_shape, hyperparameters=hyperparameters, kernel=Gaussian()) if not activation: activation = Tanh() self.children = [activation, mlp, cropper]
def construct_mlp(name, hidden_dims, input_dim, initargs, batch_normalize, activations=None): if not hidden_dims: return FeedforwardIdentity(dim=input_dim) if not activations: activations = [Rectifier() for dim in hidden_dims] elif not isinstance(activations, collections.Iterable): activations = [activations] * len(hidden_dims) assert len(activations) == len(hidden_dims) dims = [input_dim] + hidden_dims wrapped_activations = [ NormalizedActivation(shape=[hidden_dim], name="activation_%i" % i, batch_normalize=batch_normalize, activation=activation) for i, (hidden_dim, activation) in enumerate(zip(hidden_dims, activations)) ] mlp = MLP(name=name, activations=wrapped_activations, dims=dims, **initargs) # biases are handled by our activation function for layer in mlp.linear_transformations: layer.use_bias = False return mlp
def __init__(self, filter_size, num_filters, num_channels, batch_size=None, mid_noise=False, out_noise=False, tied_noise=False, tied_sigma=False, noise_rate=None, noise_batch_size=None, prior_noise_level=None, image_size=(None, None), step=(1, 1), **kwargs): self.filter_size = filter_size self.num_filters = num_filters self.batch_size = batch_size self.num_channels = num_channels self.image_size = image_size self.mid_noise = mid_noise self.noise_batch_size = noise_batch_size self.noise_rate = noise_rate self.step = step self.border_mode = 'half' self.tied_biases = True depth = 2 self.b0 = SpatialBatchNormalization(name='b0') self.r0 = Rectifier(name='r0') self.n0 = (SpatialNoise(name='n0', noise_rate=self.noise_rate, tied_noise=tied_noise, tied_sigma=tied_sigma, prior_noise_level=prior_noise_level) if mid_noise else None) self.c0 = Convolutional(name='c0') self.b1 = SpatialBatchNormalization(name='b1') self.r1 = Rectifier(name='r1') self.n1 = (SpatialNoise(name='n1', noise_rate=self.noise_rate, tied_noise=tied_noise, tied_sigma=tied_sigma, prior_noise_level=prior_noise_level) if out_noise else None) self.c1 = Convolutional(name='c1') kwargs.setdefault('children', []).extend([c for c in [ self.c0, self.b0, self.r0, self.n0, self.c1, self.b1, self.r1, self.n1] if c is not None]) super(ResidualConvolutional, self).__init__(**kwargs)
def create_yy_cnn(numConvLayer, conv_input, embedding_size, input_len, config, pref): ''' CNN with several layers of convolution, each with specific filter size. Maxpooling at the end. ''' filter_width_list = [int(fw) for fw in config[pref + '_filterwidth'].split()] base_num_filters = int(config[pref + '_num_filters']) assert len(filter_width_list) == numConvLayer convs = []; fmlist = [] last_fm = input_len for i in range(numConvLayer): fw = filter_width_list[i] num_feature_map = last_fm - fw + 1 #39 conv = Convolutional( image_size=(last_fm, embedding_size), filter_size=(fw, embedding_size), num_filters=min(int(config[pref + '_maxfilter']), base_num_filters * fw), num_channels=1 ) fmlist.append(num_feature_map) last_fm = num_feature_map embedding_size = conv.num_filters convs.append(conv) initialize(convs) for i, conv in enumerate(convs): conv.name = pref+'_conv' + str(i) conv_input = conv.apply(conv_input) conv_input = conv_input.flatten().reshape((conv_input.shape[0], 1, fmlist[i], conv.num_filters)) lastconv = conv lastconv_out = conv_input pool_layer = MaxPooling( pooling_size=(last_fm,1) ) pool_layer.name = pref+'_pool_' + str(fw) act = Rectifier(); act.name = 'act_' + str(fw) outpool = act.apply(pool_layer.apply(lastconv_out).flatten(2)) return outpool, lastconv.num_filters
def main(): x = T.tensor3('features') m = T.matrix('features_mask') y = T.imatrix('targets') x = m.mean() + x #stupid mask not always needed... #embedding_size = 300 #glove_version = "glove.6B.300d.txt" embedding_size = 50 glove_version = "vectors.6B.50d.txt" wstd = 0.02 conv1 = Conv1D(filter_length=5, num_filters=128, input_dim=embedding_size, weights_init=IsotropicGaussian(std=wstd), biases_init=Constant(0.0)) conv1.initialize() o = conv1.apply(x) o = Rectifier(name="conv1red").apply(o) o = MaxPooling1D(pooling_length=5 #, step=2 ).apply(o) conv2 = Conv1D(filter_length=5, num_filters=128, input_dim=128, weights_init=IsotropicGaussian(std=wstd), biases_init=Constant(0.0), step=3, name="conv2") conv2.initialize() o = conv2.apply(o) o = Rectifier(name="conv2rec").apply(o) conv2 = Conv1D(filter_length=5, num_filters=128, input_dim=128, weights_init=IsotropicGaussian(std=wstd), biases_init=Constant(0.0), step=3, name="conv3") conv2.initialize() o = conv2.apply(o) o = Rectifier(name="conv3rec").apply(o) fork = Fork(weights_init=IsotropicGaussian(0.02), biases_init=Constant(0.), input_dim=128, output_dims=[128]*3, output_names=['inputs', 'reset_inputs', 'update_inputs'] ) fork.initialize() inputs, reset_inputs, update_inputs = fork.apply(o) out = o.mean(axis=1) #gru = GatedRecurrent(dim=128, #weights_init=IsotropicGaussian(0.02), #biases_init=IsotropicGaussian(0.0)) #gru.initialize() #states = gru.apply(inputs=inputs, reset_inputs=reset_inputs, update_inputs=update_inputs) #out = states[:, -1, :] hidden = Linear( input_dim = 128, output_dim = 128, weights_init = Uniform(std=0.01), biases_init = Constant(0.)) hidden.initialize() o = hidden.apply(out) o = Rectifier().apply(o) #hidden = Linear( #input_dim = 128, #output_dim = 128, #weights_init = IsotropicGaussian(std=0.02), #biases_init = Constant(0.), #name="hiddenmap2") #hidden.initialize() #o = hidden.apply(o) #o = Rectifier(name="rec2").apply(o) score_layer = Linear( input_dim = 128, output_dim = 1, weights_init = IsotropicGaussian(std=wstd), biases_init = Constant(0.), name="linear2") score_layer.initialize() o = score_layer.apply(o) probs = Sigmoid().apply(o) cost = - (y * T.log(probs) + (1-y) * T.log(1 - probs)).mean() cost.name = 'cost' misclassification = (y * (probs < 0.5) + (1-y) * (probs > 0.5)).mean() misclassification.name = 'misclassification' #print (rnn_states * m.dimshuffle(0, 1, 'x')).sum(axis=1).shape.eval( #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX), #m : np.ones((45, 111), dtype=theano.config.floatX)}) #print (m).sum(axis=1).shape.eval({ #m : np.ones((45, 111), dtype=theano.config.floatX)}) #print (m).shape.eval({ #m : np.ones((45, 111), dtype=theano.config.floatX)}) #raw_input() # ================= cg = ComputationGraph([cost]) params = cg.parameters algorithm = GradientDescent( cost = cost, params=params, step_rule = CompositeRule([ StepClipping(threshold=10), AdaM(), #AdaDelta(), ]) ) # ======== print "setting up data" ports = { 'gpu0_train' : 5557, 'gpu0_test' : 5558, 'gpu1_train' : 5559, 'gpu1_test' : 5560, } batch_size = 16 def start_server(port, which_set): fuel.server.logger.setLevel('WARN') dataset = IMDBText(which_set) n_train = dataset.num_examples stream = DataStream( dataset=dataset, iteration_scheme=ShuffledScheme( examples=n_train, batch_size=batch_size) ) print "loading glove" glove = GloveTransformer(glove_version, data_stream=stream) padded = Padding( data_stream=glove, mask_sources=('features',) ) fuel.server.start_server(padded, port=port, hwm=20) train_port = ports[theano.config.device + '_train'] train_p = Process(target=start_server, args=(train_port, 'train')) train_p.start() test_port = ports[theano.config.device + '_test'] test_p = Process(target=start_server, args=(test_port, 'test')) test_p.start() train_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=train_port) test_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=test_port) print "setting up model" #import ipdb #ipdb.set_trace() n_examples = 25000 #====== model = Model(cost) extensions = [] extensions.append(EpochProgress(batch_per_epoch=n_examples // batch_size + 1)) extensions.append(TrainingDataMonitoring( [cost, misclassification], prefix='train', after_epoch=True )) extensions.append(DataStreamMonitoring( [cost, misclassification], data_stream=test_stream, prefix='test', after_epoch=True )) extensions.append(Timing()) extensions.append(Printing()) #extensions.append(Plot("norms", channels=[['train_lstm_norm', 'train_pre_norm']], after_epoch=True)) extensions.append(Plot(theano.config.device+"_result", channels=[['test_misclassification', 'train_misclassification']], after_epoch=True)) main_loop = MainLoop( model=model, data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def main_run(_config, _log): from collections import namedtuple c = namedtuple("Config", _config.keys())(*_config.values()) _log.info("Running with" + str(_config)) import theano from theano import tensor as T import numpy as np from dataset import IMDBText, GloveTransformer from blocks.initialization import Uniform, Constant, IsotropicGaussian, NdarrayInitialization, Identity, Orthogonal from blocks.bricks.recurrent import LSTM, SimpleRecurrent, GatedRecurrent from blocks.bricks.parallel import Fork from blocks.bricks import Linear, Sigmoid, Tanh, Rectifier from blocks import bricks from blocks.extensions import Printing, Timing from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring from blocks.extensions.plot import Plot from plot import PlotHistogram from blocks.algorithms import GradientDescent, Adam, Scale, StepClipping, CompositeRule, AdaDelta from blocks.graph import ComputationGraph, apply_dropout from blocks.main_loop import MainLoop from blocks.model import Model from cuboid.algorithms import AdaM, NAG from cuboid.extensions import EpochProgress from fuel.streams import DataStream, ServerDataStream from fuel.transformers import Padding from fuel.schemes import ShuffledScheme from Conv1D import Conv1D, MaxPooling1D from schemes import BatchwiseShuffledScheme from bricks import WeightedSigmoid, GatedRecurrentFull from multiprocessing import Process import fuel import logging from initialization import SumInitialization from transformers import DropSources global train_p global test_p x = T.tensor3("features") # m = T.matrix('features_mask') y = T.imatrix("targets") # x = x+m.mean()*0 dropout_variables = [] embedding_size = 300 glove_version = "glove.6B.300d.txt" # embedding_size = 50 # glove_version = "vectors.6B.50d.txt" gloveMapping = Linear( input_dim=embedding_size, output_dim=c.rnn_input_dim, weights_init=Orthogonal(), # weights_init = IsotropicGaussian(c.wstd), biases_init=Constant(0.0), name="gloveMapping", ) gloveMapping.initialize() o = gloveMapping.apply(x) o = Rectifier(name="gloveRec").apply(o) dropout_variables.append(o) summed_mapped_glove = o.sum(axis=1) # take out the sequence glove_out = Linear( input_dim=c.rnn_input_dim, output_dim=1.0, weights_init=IsotropicGaussian(c.wstd), biases_init=Constant(0.0), name="mapping_to_output", ) glove_out.initialize() deeply_sup_0 = glove_out.apply(summed_mapped_glove) deeply_sup_probs = Sigmoid(name="deeply_sup_softmax").apply(deeply_sup_0) input_dim = c.rnn_input_dim hidden_dim = c.rnn_dim gru = GatedRecurrentFull( hidden_dim=hidden_dim, activation=Tanh(), # activation=bricks.Identity(), gate_activation=Sigmoid(), state_to_state_init=SumInitialization([Identity(1.0), IsotropicGaussian(c.wstd)]), state_to_reset_init=IsotropicGaussian(c.wstd), state_to_update_init=IsotropicGaussian(c.wstd), input_to_state_transform=Linear( input_dim=input_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(c.wstd), biases_init=Constant(0.0), ), input_to_update_transform=Linear( input_dim=input_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(c.wstd), # biases_init=Constant(-2.0)), biases_init=Constant(-1.0), ), input_to_reset_transform=Linear( input_dim=input_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(c.wstd), # biases_init=Constant(-3.0)) biases_init=Constant(-2.0), ), ) gru.initialize() rnn_in = o.dimshuffle(1, 0, 2) # rnn_in = o # rnn_out = gru.apply(rnn_in, mask=m.T) rnn_out = gru.apply(rnn_in) state_to_state = gru.rnn.state_to_state state_to_state.name = "state_to_state" # o = rnn_out[-1, :, :] o = rnn_out[-1] # o = rnn_out[:, -1, :] # o = rnn_out.mean(axis=1) # print rnn_last_out.eval({ # x: np.ones((3, 101, 300), dtype=theano.config.floatX), # m: np.ones((3, 101), dtype=theano.config.floatX)}) # raw_input() # o = rnn_out.mean(axis=1) dropout_variables.append(o) score_layer = Linear( input_dim=hidden_dim, output_dim=1, weights_init=IsotropicGaussian(std=c.wstd), biases_init=Constant(0.0), name="linear2", ) score_layer.initialize() o = score_layer.apply(o) probs = Sigmoid().apply(o) # probs = deeply_sup_probs cost = -(y * T.log(probs) + (1 - y) * T.log(1 - probs)).mean() # cost_deeply_sup0 = - (y * T.log(deeply_sup_probs) + (1-y) * T.log(1 - deeply_sup_probs)).mean() # cost += cost_deeply_sup0 * c.deeply_factor cost.name = "cost" misclassification = (y * (probs < 0.5) + (1 - y) * (probs > 0.5)).mean() misclassification.name = "misclassification" # print rnn_in.shape.eval( # {x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX), # }) # print rnn_out.shape.eval( # {x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX), # m : np.ones((45, 111), dtype=theano.config.floatX)}) # print (m).sum(axis=1).shape.eval({ # m : np.ones((45, 111), dtype=theano.config.floatX)}) # print (m).shape.eval({ # m : np.ones((45, 111), dtype=theano.config.floatX)}) # raw_input() # ================= cg = ComputationGraph([cost]) cg = apply_dropout(cg, variables=dropout_variables, drop_prob=0.5) params = cg.parameters algorithm = GradientDescent( cost=cg.outputs[0], params=params, step_rule=CompositeRule( [ StepClipping(threshold=4), Adam(learning_rate=0.002, beta1=0.1, beta2=0.001), # NAG(lr=0.1, momentum=0.9), # AdaDelta(), ] ), ) # ======== print "setting up data" ports = { "gpu0_train": 5557, "gpu0_test": 5558, "cuda0_train": 5557, "cuda0_test": 5558, "opencl0:0_train": 5557, "opencl0:0_test": 5558, "gpu1_train": 5559, "gpu1_test": 5560, } # batch_size = 16 # batch_size = 32 batch_size = 40 def start_server(port, which_set): fuel.server.logger.setLevel("WARN") dataset = IMDBText(which_set, sorted=True) n_train = dataset.num_examples # scheme = ShuffledScheme(examples=n_train, batch_size=batch_size) scheme = BatchwiseShuffledScheme(examples=n_train, batch_size=batch_size) stream = DataStream(dataset=dataset, iteration_scheme=scheme) print "loading glove" glove = GloveTransformer(glove_version, data_stream=stream) padded = Padding( data_stream=glove, # mask_sources=('features',) mask_sources=("features",), ) padded = DropSources(padded, ["features_mask"]) fuel.server.start_server(padded, port=port, hwm=20) train_port = ports[theano.config.device + "_train"] train_p = Process(target=start_server, args=(train_port, "train")) train_p.start() test_port = ports[theano.config.device + "_test"] test_p = Process(target=start_server, args=(test_port, "test")) test_p.start() # train_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=train_port) # test_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=test_port) train_stream = ServerDataStream(("features", "targets"), port=train_port) test_stream = ServerDataStream(("features", "targets"), port=test_port) print "setting up model" # ipdb.set_trace() n_examples = 25000 print "Batches per epoch", n_examples // (batch_size + 1) batches_extensions = 100 monitor_rate = 50 # ====== model = Model(cg.outputs[0]) extensions = [] extensions.append(EpochProgress(batch_per_epoch=n_examples // batch_size + 1)) extensions.append(TrainingDataMonitoring([cost, misclassification], prefix="train", every_n_batches=monitor_rate)) extensions.append( DataStreamMonitoring( [cost, misclassification], data_stream=test_stream, prefix="test", after_epoch=True, before_first_epoch=False, ) ) extensions.append(Timing()) extensions.append(Printing()) # extensions.append(Plot("norms", channels=[['train_lstm_norm', 'train_pre_norm']], after_epoch=True)) # extensions.append(Plot(theano.config.device+"_result", channels=[['test_misclassification', 'train_misclassification']], after_epoch=True)) # extensions.append(PlotHistogram( # channels=['train_state_to_state'], # bins=50, # every_n_batches=30)) extensions.append( Plot( theano.config.device + "_result", channels=[["train_cost"], ["train_misclassification"]], every_n_batches=monitor_rate, ) ) main_loop = MainLoop(model=model, data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
class NoisyConvolutional2(Initializable, Feedforward, Random): """Convolutional transformation sent through a learned noisy channel. Applies the noise after the Relu rather than before it. Parameters (same as Convolutional) """ @lazy(allocation=[ 'filter_size', 'num_filters', 'num_channels', 'noise_batch_size']) def __init__(self, filter_size, num_filters, num_channels, noise_batch_size, image_size=(None, None), step=(1, 1), border_mode='valid', tied_biases=True, prior_mean=0, prior_noise_level=0, **kwargs): self.convolution = Convolutional() self.rectifier = Rectifier() self.mask = Convolutional(name='mask') children = [self.convolution, self.rectifier, self.mask] kwargs.setdefault('children', []).extend(children) super(NoisyConvolutional2, self).__init__(**kwargs) self.filter_size = filter_size self.num_filters = num_filters self.num_channels = num_channels self.noise_batch_size = noise_batch_size self.image_size = image_size self.step = step self.border_mode = border_mode self.tied_biases = tied_biases self.prior_mean = prior_mean self.prior_noise_level = prior_noise_level def _push_allocation_config(self): self.convolution.filter_size = self.filter_size self.convolution.num_filters = self.num_filters self.convolution.num_channels = self.num_channels # self.convolution.batch_size = self.batch_size self.convolution.image_size = self.image_size self.convolution.step = self.step self.convolution.border_mode = self.border_mode self.convolution.tied_biases = self.tied_biases self.mask.filter_size = (1, 1) self.mask.num_filters = self.num_filters self.mask.num_channels = self.num_filters # self.mask.batch_size = self.batch_size self.mask.image_size = self.convolution.get_dim('output')[1:] # self.mask.step = self.step # self.mask.border_mode = self.border_mode self.mask.tied_biases = self.tied_biases def _allocate(self): out_shape = self.convolution.get_dim('output') N = shared_floatx_zeros((self.noise_batch_size,) + out_shape, name='N') add_role(N, NOISE) self.parameters.append(N) @application(inputs=['input_'], outputs=['output']) def apply(self, input_, application_call): """Apply the linear transformation followed by masking with noise. Parameters ---------- input_ : :class:`~tensor.TensorVariable` The input on which to apply the transformations Returns ------- output : :class:`~tensor.TensorVariable` The transformed input """ from theano.printing import Print pre_noise = self.rectifier.apply(self.convolution.apply(input_)) # noise_level = self.mask.apply(input_) noise_level = (self.prior_noise_level - tensor.clip(self.mask.apply(pre_noise), -16, 16)) noise_level = copy_and_tag_noise( noise_level, self, LOG_SIGMA, 'log_sigma') # Allow incomplete batches by just taking the noise that is needed noise = self.parameters[0][:noise_level.shape[0], :, :, :] # noise = self.theano_rng.normal(noise_level.shape) kl = ( self.prior_noise_level - noise_level + 0.5 * ( tensor.exp(2 * noise_level) + (pre_noise - self.prior_mean) ** 2 ) / tensor.exp(2 * self.prior_noise_level) - 0.5 ) application_call.add_auxiliary_variable(kl, roles=[NITS], name='nits') return pre_noise + tensor.exp(noise_level) * noise def get_dim(self, name): if name == 'input_': return self.convolution.get_dim(name) if name == 'output': return self.convolution.get_dim(name) if name == 'nits': return self.convolution.get_dim('output') return super(NoisyConvolutional2, self).get_dim(name) @property def num_output_channels(self): return self.num_filters
class ResidualConvolutional(Initializable): @lazy(allocation=['filter_size', 'num_filters', 'num_channels']) def __init__(self, filter_size, num_filters, num_channels, batch_size=None, mid_noise=False, out_noise=False, tied_noise=False, tied_sigma=False, noise_rate=None, noise_batch_size=None, prior_noise_level=None, image_size=(None, None), step=(1, 1), **kwargs): self.filter_size = filter_size self.num_filters = num_filters self.batch_size = batch_size self.num_channels = num_channels self.image_size = image_size self.mid_noise = mid_noise self.noise_batch_size = noise_batch_size self.noise_rate = noise_rate self.step = step self.border_mode = 'half' self.tied_biases = True depth = 2 self.b0 = SpatialBatchNormalization(name='b0') self.r0 = Rectifier(name='r0') self.n0 = (SpatialNoise(name='n0', noise_rate=self.noise_rate, tied_noise=tied_noise, tied_sigma=tied_sigma, prior_noise_level=prior_noise_level) if mid_noise else None) self.c0 = Convolutional(name='c0') self.b1 = SpatialBatchNormalization(name='b1') self.r1 = Rectifier(name='r1') self.n1 = (SpatialNoise(name='n1', noise_rate=self.noise_rate, tied_noise=tied_noise, tied_sigma=tied_sigma, prior_noise_level=prior_noise_level) if out_noise else None) self.c1 = Convolutional(name='c1') kwargs.setdefault('children', []).extend([c for c in [ self.c0, self.b0, self.r0, self.n0, self.c1, self.b1, self.r1, self.n1] if c is not None]) super(ResidualConvolutional, self).__init__(**kwargs) def get_dim(self, name): if name == 'input_': return ((self.num_channels,) + self.image_size) if name == 'output': return self.c1.get_dim(name) return super(ResidualConvolutionalUnit, self).get_dim(name) @property def num_output_channels(self): return self.num_filters def _push_allocation_config(self): self.b0.input_dim = self.get_dim('input_') self.b0.push_allocation_config() if self.r0: self.r0.push_allocation_config() if self.n0: self.n0.noise_batch_size = self.noise_batch_size self.n0.num_channels = self.num_channels self.n0.image_size = self.image_size self.c0.filter_size = self.filter_size self.c0.batch_size = self.batch_size self.c0.num_channels = self.num_channels self.c0.num_filters = self.num_filters self.c0.border_mode = self.border_mode self.c0.image_size = self.image_size self.c0.step = self.step self.c0.use_bias = False self.c0.push_allocation_config() c0_shape = self.c0.get_dim('output') self.b1.input_dim = c0_shape self.b1.push_allocation_config() self.r1.push_allocation_config() if self.n1: self.n1.noise_batch_size = self.noise_batch_size self.n1.num_channels = self.num_filters self.n1.image_size = c0_shape[1:] self.c1.filter_size = self.filter_size self.c1.batch_size = self.batch_size self.c1.num_channels = self.num_filters self.c1.num_filters = self.num_filters self.c1.border_mode = self.border_mode self.c1.image_size = c0_shape[1:] self.c1.step = (1, 1) self.c1.use_bias = False self.c1.push_allocation_config() @application(inputs=['input_'], outputs=['output']) def apply(self, input_): shortcut = input_ # Batchnorm, then Relu, then Convolution first_conv = self.b0.apply(input_) first_conv = self.r0.apply(first_conv) if self.n0: first_conv = self.n0.apply(first_conv) first_conv = self.c0.apply(first_conv) # Batchnorm, then Relu, then Convolution (second time) second_conv = self.b1.apply(first_conv) second_conv = self.r1.apply(second_conv) if self.n1: second_conv = self.n1.apply(second_conv) residual = second_conv # Apply stride and zero-padding to match shortcut to output if self.step and self.step != (1, 1): shortcut = shortcut[:,:,::self.step[0],::self.step[1]] if self.num_filters > self.num_channels: padshape = (residual.shape[0], self.num_filters - self.num_channels, residual.shape[2], residual.shape[3]) shortcut = tensor.concatenate( [shortcut, tensor.zeros(padshape, dtype=residual.dtype)], axis=1) elif self.num_filters < self.num_channels: shortcut = shortcut[:,:self.num_channels,:,:] response = shortcut + residual return response