def create_models(): ali = create_model_brick() x = tensor.tensor4('features') z = ali.theano_rng.normal(size=(x.shape[0], NLAT, 1, 1)) def _create_model(with_dropout): cg = ComputationGraph(ali.compute_losses(x, z)) if with_dropout: inputs = VariableFilter( bricks=([ali.discriminator.x_discriminator.layers[0], ali.discriminator.z_discriminator.layers[0]]), roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.2) inputs = VariableFilter( bricks=(ali.discriminator.x_discriminator.layers[2::3] + ali.discriminator.z_discriminator.layers[2::2] + ali.discriminator.joint_discriminator.layers[::2]), roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.5) return Model(cg.outputs) model = _create_model(with_dropout=False) with batch_normalization(ali): bn_model = _create_model(with_dropout=True) pop_updates = list( set(get_batch_normalization_updates(bn_model, allow_duplicates=True))) bn_updates = [(p, m * 0.05 + p * 0.95) for p, m in pop_updates] return model, bn_model, bn_updates
def create_models(): gan = create_model_brick() x = tensor.matrix('features') z = gan.theano_rng.normal(size=(x.shape[0], NLAT)) def _create_model(with_dropout): cg = ComputationGraph(gan.compute_losses(x, z)) if with_dropout: inputs = VariableFilter( bricks=gan.discriminator.children[1:], roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.5) inputs = VariableFilter( bricks=[gan.discriminator], roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.2) return Model(cg.outputs) model = _create_model(with_dropout=False) with batch_normalization(gan): bn_model = _create_model(with_dropout=False) pop_updates = list( set(get_batch_normalization_updates(bn_model, allow_duplicates=True))) bn_updates = [(p, m * 0.05 + p * 0.95) for p, m in pop_updates] return model, bn_model, bn_updates
def create_models(): gan = create_model_brick() x = tensor.matrix('features') z = gan.theano_rng.normal(size=(x.shape[0], NLAT)) def _create_model(with_dropout): cg = ComputationGraph(gan.compute_losses(x, z)) if with_dropout: inputs = VariableFilter(bricks=gan.discriminator.children[1:], roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.5) inputs = VariableFilter(bricks=[gan.discriminator], roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.2) return Model(cg.outputs) model = _create_model(with_dropout=False) with batch_normalization(gan): bn_model = _create_model(with_dropout=False) pop_updates = list( set(get_batch_normalization_updates(bn_model, allow_duplicates=True))) bn_updates = [(p, m * 0.05 + p * 0.95) for p, m in pop_updates] return model, bn_model, bn_updates
def create_models(self): gan = self.create_model_brick() x = tensor.matrix('features') zs = [] for i in range(self._config["num_packing"]): z = circle_gaussian_mixture(num_modes=self._config["num_zmode"], num_samples=x.shape[0], dimension=self._config["num_zdim"], r=self._config["z_mode_r"], std=self._config["z_mode_std"]) zs.append(z) def _create_model(with_dropout): cg = ComputationGraph(gan.compute_losses(x, zs)) if with_dropout: inputs = VariableFilter(bricks=gan.discriminator.children[1:], roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.5) inputs = VariableFilter(bricks=[gan.discriminator], roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.2) return Model(cg.outputs) model = _create_model(with_dropout=False) with batch_normalization(gan): bn_model = _create_model(with_dropout=False) pop_updates = list( set( get_batch_normalization_updates(bn_model, allow_duplicates=True))) # merge same variables names = [] counts = [] pop_update_merges = [] pop_update_merges_finals = [] for pop_update in pop_updates: b = False for i in range(len(names)): if (pop_update[0].auto_name == names[i]): counts[i] += 1 pop_update_merges[i][1] += pop_update[1] b = True break if not b: names.append(pop_update[0].auto_name) counts.append(1) pop_update_merges.append([pop_update[0], pop_update[1]]) for i in range(len(pop_update_merges)): pop_update_merges_finals.append( (pop_update_merges[i][0], pop_update_merges[i][1] / counts[i])) bn_updates = [(p, m * 0.05 + p * 0.95) for p, m in pop_update_merges_finals] return model, bn_model, bn_updates
def main(mode, save_to, num_epochs, load_params=None, feature_maps=None, mlp_hiddens=None, conv_sizes=None, pool_sizes=None, stride=None, repeat_times=None, batch_size=None, num_batches=None, algo=None, test_set=None, valid_examples=None, dropout=None, max_norm=None, weight_decay=None, batch_norm=None): if feature_maps is None: feature_maps = [20, 50, 50] if mlp_hiddens is None: mlp_hiddens = [500] if conv_sizes is None: conv_sizes = [5, 5, 5] if pool_sizes is None: pool_sizes = [2, 2, 2] if repeat_times is None: repeat_times = [1, 1, 1] if batch_size is None: batch_size = 500 if valid_examples is None: valid_examples = 2500 if stride is None: stride = 1 if test_set is None: test_set = 'test' if algo is None: algo = 'rmsprop' if batch_norm is None: batch_norm = False image_size = (128, 128) output_size = 2 if (len(feature_maps) != len(conv_sizes) or len(feature_maps) != len(pool_sizes) or len(feature_maps) != len(repeat_times)): raise ValueError("OMG, inconsistent arguments") # Use ReLUs everywhere and softmax for the final prediction conv_activations = [Rectifier() for _ in feature_maps] mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()] convnet = LeNet(conv_activations, 3, image_size, stride=stride, filter_sizes=zip(conv_sizes, conv_sizes), feature_maps=feature_maps, pooling_sizes=zip(pool_sizes, pool_sizes), repeat_times=repeat_times, top_mlp_activations=mlp_activations, top_mlp_dims=mlp_hiddens + [output_size], border_mode='full', batch_norm=batch_norm, weights_init=Glorot(), biases_init=Constant(0)) # We push initialization config to set different initialization schemes # for convolutional layers. convnet.initialize() logging.info("Input dim: {} {} {}".format( *convnet.children[0].get_dim('input_'))) for i, layer in enumerate(convnet.layers): if isinstance(layer, Activation): logging.info("Layer {} ({})".format( i, layer.__class__.__name__)) else: logging.info("Layer {} ({}) dim: {} {} {}".format( i, layer.__class__.__name__, *layer.get_dim('output'))) single_x = tensor.tensor3('image_features') x = tensor.tensor4('image_features') single_y = tensor.lvector('targets') y = tensor.lmatrix('targets') # Training with batch_normalization(convnet): probs = convnet.apply(x) cost = (CategoricalCrossEntropy().apply(y.flatten(), probs) .copy(name='cost')) error_rate = (MisclassificationRate().apply(y.flatten(), probs) .copy(name='error_rate')) cg = ComputationGraph([cost, error_rate]) extra_updates = [] if batch_norm: # batch norm: logger.debug("Apply batch norm") pop_updates = get_batch_normalization_updates(cg) # p stands for population mean # m stands for minibatch alpha = 0.005 extra_updates = [(p, m * alpha + p * (1 - alpha)) for p, m in pop_updates] population_statistics = [p for p, m in extra_updates] if dropout: relu_outputs = VariableFilter(bricks=[Rectifier], roles=[OUTPUT])(cg) cg = apply_dropout(cg, relu_outputs, dropout) cost, error_rate = cg.outputs if weight_decay: logger.debug("Apply weight decay {}".format(weight_decay)) cost += weight_decay * l2_norm(cg.parameters) cost.name = 'cost' # Validation valid_probs = convnet.apply_5windows(single_x) valid_cost = (CategoricalCrossEntropy().apply(single_y, valid_probs) .copy(name='cost')) valid_error_rate = (MisclassificationRate().apply( single_y, valid_probs).copy(name='error_rate')) model = Model([cost, error_rate]) if load_params: logger.info("Loaded params from {}".format(load_params)) with open(load_params, 'r') as src: model.set_parameter_values(load_parameters(src)) # Training stream with random cropping train = DogsVsCats(("train",), subset=slice(None, 25000 - valid_examples, None)) train_str = DataStream( train, iteration_scheme=ShuffledScheme(train.num_examples, batch_size)) train_str = add_transformers(train_str, random_crop=True) # Validation stream without cropping valid = DogsVsCats(("train",), subset=slice(25000 - valid_examples, None, None)) valid_str = DataStream( valid, iteration_scheme=SequentialExampleScheme(valid.num_examples)) valid_str = add_transformers(valid_str) if mode == 'train': directory, _ = os.path.split(sys.argv[0]) env = dict(os.environ) env['THEANO_FLAGS'] = 'floatX=float32' port = numpy.random.randint(1025, 10000) server = subprocess.Popen( [directory + '/server.py', str(25000 - valid_examples), str(batch_size), str(port)], env=env, stderr=subprocess.STDOUT) train_str = ServerDataStream( ('image_features', 'targets'), produces_examples=False, port=port) save_to_base, save_to_extension = os.path.splitext(save_to) # Train with simple SGD if algo == 'rmsprop': step_rule = RMSProp(decay_rate=0.999, learning_rate=0.0003) elif algo == 'adam': step_rule = Adam() else: assert False if max_norm: conv_params = VariableFilter(bricks=[Convolutional], roles=[WEIGHT])(cg) linear_params = VariableFilter(bricks=[Linear], roles=[WEIGHT])(cg) step_rule = CompositeRule( [step_rule, Restrict(VariableClipping(max_norm, axis=0), linear_params), Restrict(VariableClipping(max_norm, axis=(1, 2, 3)), conv_params)]) algorithm = GradientDescent( cost=cost, parameters=model.parameters, step_rule=step_rule) algorithm.add_updates(extra_updates) # `Timing` extension reports time for reading data, aggregating a batch # and monitoring; # `ProgressBar` displays a nice progress bar during training. extensions = [Timing(every_n_batches=100), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), DataStreamMonitoring( [valid_cost, valid_error_rate], valid_str, prefix="valid"), TrainingDataMonitoring( [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), TrackTheBest("valid_error_rate"), Checkpoint(save_to, save_separately=['log'], parameters=cg.parameters + (population_statistics if batch_norm else []), before_training=True, after_epoch=True) .add_condition( ['after_epoch'], OnLogRecord("valid_error_rate_best_so_far"), (save_to_base + '_best' + save_to_extension,)), Printing(every_n_batches=100)] model = Model(cost) main_loop = MainLoop( algorithm, train_str, model=model, extensions=extensions) try: main_loop.run() finally: server.terminate() elif mode == 'test': classify = theano.function([single_x], valid_probs.argmax()) test = DogsVsCats((test_set,)) test_str = DataStream( test, iteration_scheme=SequentialExampleScheme(test.num_examples)) test_str = add_transformers(test_str) correct = 0 with open(save_to, 'w') as dst: print("id", "label", sep=',', file=dst) for index, example in enumerate(test_str.get_epoch_iterator()): image = example[0] prediction = classify(image) print(index + 1, classify(image), sep=',', file=dst) if len(example) > 1 and prediction == example[1]: correct += 1 print(correct / float(test.num_examples)) else: assert False