def get_streams(num_train_examples, batch_size, use_test=True): dataset = MNIST(("train", )) all_ind = numpy.arange(dataset.num_examples) rng = numpy.random.RandomState(seed=1) rng.shuffle(all_ind) indices_train = all_ind[:num_train_examples] indices_valid = all_ind[num_train_examples:] tarin_stream = Flatten( DataStream.default_stream(dataset, iteration_scheme=ShuffledScheme( indices_train, batch_size))) valid_stream = None if len(indices_valid) != 0: valid_stream = Flatten( DataStream.default_stream(dataset, iteration_scheme=ShuffledScheme( indices_valid, batch_size))) test_stream = None if use_test: dataset = MNIST(("test", )) ind = numpy.arange(dataset.num_examples) rng = numpy.random.RandomState(seed=1) rng.shuffle(all_ind) test_stream = Flatten( DataStream.default_stream(dataset, iteration_scheme=ShuffledScheme( ind, batch_size))) return tarin_stream, valid_stream, test_stream
def test_flatten_examples(self): wrapper = Flatten(DataStream( IndexableDataset(self.data), iteration_scheme=SequentialExampleScheme(4)), which_sources=('features', )) assert_equal(list(wrapper.get_epoch_iterator()), [(numpy.ones(4), 0), (numpy.ones(4), 1)] * 2)
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(tensor.flatten(x, outdim=2)) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST(("train", )) mnist_test = MNIST(("test", )) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], Flatten(DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), which_sources=('features', )), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] if BLOCKS_EXTRAS_AVAILABLE: extensions.append( Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']])) main_loop = MainLoop(algorithm, Flatten(DataStream.default_stream( mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), which_sources=('features', )), model=Model(cost), extensions=extensions) main_loop.run()
def test_flatten_batches(self): wrapper = Flatten(DataStream(IndexableDataset(self.data), iteration_scheme=SequentialScheme(4, 2)), which_sources=('features', )) assert_equal(list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 4)), numpy.array([[0], [1]])), (numpy.ones((2, 4)), numpy.array([[0], [1]]))])
def test_flatten_examples(self): wrapper = Flatten( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialExampleScheme(4)), which_sources=('features',)) assert_equal( list(wrapper.get_epoch_iterator()), [(numpy.ones(4), 0), (numpy.ones(4), 1)] * 2)
def test_flatten(): stream = DataStream(IndexableDataset( OrderedDict([('features', numpy.ones((4, 2, 2))), ('targets', numpy.array([0, 1, 0, 1]))])), iteration_scheme=SequentialScheme(4, 2)) wrapper = Flatten(stream, which_sources=('features', )) assert_equal(list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 4)), numpy.array([0, 1])), (numpy.ones((2, 4)), numpy.array([0, 1]))])
def test_flatten_batches(self): wrapper = Flatten( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialScheme(4, 2)), which_sources=('features',)) assert_equal( list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 4)), numpy.array([[0], [1]])), (numpy.ones((2, 4)), numpy.array([[0], [1]]))])
def test_flatten(): stream = DataStream( IndexableDataset(OrderedDict([("features", numpy.ones((4, 2, 2))), ("targets", numpy.array([0, 1, 0, 1]))])), iteration_scheme=SequentialScheme(4, 2), ) wrapper = Flatten(stream, which_sources=("features",)) assert_equal( list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 4)), numpy.array([0, 1])), (numpy.ones((2, 4)), numpy.array([0, 1]))], )
def test_flatten(): stream = DataStream( IndexableDataset({'features': numpy.ones((4, 2, 2)), 'targets': numpy.array([0, 1, 0, 1])}), iteration_scheme=SequentialScheme(4, 2)) wrapper = Flatten(stream, which_sources=('features',)) assert_equal( list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 4)), numpy.array([0, 1])), (numpy.ones((2, 4)), numpy.array([0, 1]))])
def get_streams(data_name, batch_size): if data_name == "mnist": map_fn = map_mnist elif data_name == "tfd": map_fn = map_tfd else: map_fn = None small_batch_size = max(1, batch_size // 10) # Our usual train/valid/test data streams... x_dim, data_train, data_valid, data_test = get_data(data_name) train_stream, valid_stream, test_stream = (Flatten( MapFeatures(DataStream(data, iteration_scheme=ShuffledScheme( data.num_examples, batch_size)), fn=map_fn), which_sources='features') for data, batch_size in ((data_train, batch_size), (data_valid, small_batch_size), (data_test, small_batch_size))) return x_dim, train_stream, valid_stream, test_stream
def get_stream(self, part, batch_size=None, max_length=None, seed=None, remove_keys=False, add_bos_=True, remove_n_identical_keys=True): dataset = self.get_dataset(part, max_length) if self._layout == 'lambada' and part == 'train': stream = DataStream(dataset, iteration_scheme=RandomSpanScheme( dataset.num_examples, max_length, seed)) stream = Mapping(stream, listify) else: stream = dataset.get_example_stream() if add_bos_: stream = SourcewiseMapping(stream, functools.partial( add_bos, Vocabulary.BOS), which_sources=('words')) if max_length != None: stream = SourcewiseMapping(stream, functools.partial( cut_if_too_long, max_length), which_sources=('words')) stream = SourcewiseMapping(stream, vectorize, which_sources=('words')) stream = SourcewiseMapping(stream, word_to_singleton_list, which_sources=('keys')) stream = SourcewiseMapping(stream, vectorize, which_sources=('keys')) stream = Flatten(stream, which_sources=('keys')) if self._layout == 'dict': if remove_keys: stream = FilterSources( stream, [source for source in stream.sources if source != 'keys']) if remove_n_identical_keys: print "remove identical keys" stream = FilterSources(stream, [ source for source in stream.sources if source != 'n_identical_keys' ]) if not batch_size: return stream stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size)) stream = Padding(stream, mask_sources=('words')) #stream = Flatten(stream, which_sources=('n_identical_keys')) #if self._layout == 'dict': # stream = FilterSources(stream, [source for source in stream.sources # if source != 'keys_mask']) # stream = FilterSources(stream, [source for source in stream.sources # if source != 'n_identical_keys_mask']) return stream
def test_axis_labels_on_flatten_batches_with_none(self): wrapper = Flatten( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialScheme(4, 2), axis_labels={'features': None, 'targets': ('batch', 'index')}), which_sources=('features',)) assert_equal(wrapper.axis_labels, {'features': None, 'targets': ('batch', 'index')})
def test_axis_labels_on_flatten_examples(self): wrapper = Flatten( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialExampleScheme(4), axis_labels={'features': ('batch', 'width', 'height'), 'targets': ('batch', 'index')}), which_sources=('features',)) assert_equal(wrapper.axis_labels, {'features': ('feature',), 'targets': ('index',)})
def apply_transformers(data_stream): data_stream_ = Flatten(data_stream, which_sources=['features_1', 'features_2']) data_stream_ = ScaleAndShift(data_stream_, which_sources=['features_1', 'features_2'], scale=2.0, shift=-1.0) return data_stream_
def get_mnist_streams(num_train_examples, batch_size): from fuel.datasets import MNIST dataset = MNIST(("train", )) all_ind = numpy.arange(dataset.num_examples) rng = numpy.random.RandomState(seed=1) rng.shuffle(all_ind) indices_train = all_ind[:num_train_examples] indices_valid = all_ind[num_train_examples:] tarin_stream = Flatten(DataStream.default_stream( dataset, iteration_scheme=ShuffledScheme(indices_train, batch_size)), which_sources=('features', )) valid_stream = Flatten(DataStream.default_stream( dataset, iteration_scheme=ShuffledScheme(indices_valid, batch_size)), which_sources=('features', )) return tarin_stream, valid_stream
def get_mixed_streams(batch_size): from fuel.datasets import IterableDataset from fuel.transformers import Flatten data = numpy.load('data_train_100.npz') n = data['features_labeled'].shape[0] features_labeled = data['features_labeled'].reshape( (n / batch_size, batch_size, -1)) targets_labeled = data['targets_labeled'].reshape( (n / batch_size, batch_size, -1)) features_unlabeled = data['features_unlabeled'].reshape( (n / batch_size, batch_size, -1)) dataset = IterableDataset({ 'features_labeled': features_labeled, 'targets_labeled': targets_labeled, 'features_unlabeled': features_unlabeled }) tarin_stream = Flatten(DataStream(dataset), which_sources=('targets_labeled', )) data = numpy.load('data_test.npz') n = data['features_labeled'].shape[0] features_labeled = data['features_labeled'].reshape( (n / batch_size, batch_size, -1)) targets_labeled = data['targets_labeled'].reshape( (n / batch_size, batch_size, -1)) features_unlabeled = data['features_unlabeled'].reshape( (n / batch_size, batch_size, -1)) dataset = IterableDataset({ 'features_labeled': features_labeled, 'targets_labeled': targets_labeled, 'features_unlabeled': features_unlabeled }) test_stream = Flatten(DataStream(dataset), which_sources=('targets_labeled', )) return tarin_stream, test_stream
def get_stream(batch_size, source_window=4000, target_window=1000, num_examples=5000): from fuel.datasets.youtube_audio import YouTubeAudio data = YouTubeAudio('XqaJ2Ol5cC4') train_stream = data.get_example_stream() train_stream = ForceFloatX(train_stream) window_stream = Window(0, source_window, target_window, overlapping=False, data_stream=train_stream) source_stream = FilterSources(window_stream, sources=('features', )) feats_stream = Mapping(source_stream, mfcc) targets_stream = FilterSources(window_stream, sources=('targets', )) targets_stream = Flatten(targets_stream) stream = Merge((feats_stream, targets_stream), sources=('features', 'targets')) #Add a random Scheme? it_scheme = ConstantScheme(batch_size, num_examples) batched_stream = Batch(stream, it_scheme, strictness=1) return batched_stream
def main(job_id, params): config = ConfigParser.ConfigParser() config.readfp(open('./params')) max_epoch = int(config.get('hyperparams', 'max_iter', 100)) base_lr = float(config.get('hyperparams', 'base_lr', 0.01)) train_batch = int(config.get('hyperparams', 'train_batch', 256)) valid_batch = int(config.get('hyperparams', 'valid_batch', 512)) test_batch = int(config.get('hyperparams', 'valid_batch', 512)) W_sd = float(config.get('hyperparams', 'W_sd', 0.01)) W_mu = float(config.get('hyperparams', 'W_mu', 0.0)) b_sd = float(config.get('hyperparams', 'b_sd', 0.01)) b_mu = float(config.get('hyperparams', 'b_mu', 0.0)) hidden_units = int(config.get('hyperparams', 'hidden_units', 32)) input_dropout_ratio = float( config.get('hyperparams', 'input_dropout_ratio', 0.2)) dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2)) weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001)) max_norm = float(config.get('hyperparams', 'max_norm', 100.0)) solver = config.get('hyperparams', 'solver_type', 'rmsprop') data_file = config.get('hyperparams', 'data_file') side = config.get('hyperparams', 'side', 'b') # Spearmint optimization parameters: if params: base_lr = float(params['base_lr'][0]) dropout_ratio = float(params['dropout_ratio'][0]) hidden_units = params['hidden_units'][0] weight_decay = params['weight_decay'][0] if 'adagrad' in solver: solver_type = CompositeRule([ AdaGrad(learning_rate=base_lr), VariableClipping(threshold=max_norm) ]) else: solver_type = CompositeRule([ RMSProp(learning_rate=base_lr), VariableClipping(threshold=max_norm) ]) input_dim = {'l': 11427, 'r': 10519, 'b': 10519 + 11427} data_file = config.get('hyperparams', 'data_file') if 'b' in side: train = H5PYDataset(data_file, which_set='train') valid = H5PYDataset(data_file, which_set='valid') test = H5PYDataset(data_file, which_set='test') x_l = tensor.matrix('l_features') x_r = tensor.matrix('r_features') x = tensor.concatenate([x_l, x_r], axis=1) else: train = H5PYDataset(data_file, which_set='train', sources=['{}_features'.format(side), 'targets']) valid = H5PYDataset(data_file, which_set='valid', sources=['{}_features'.format(side), 'targets']) test = H5PYDataset(data_file, which_set='test', sources=['{}_features'.format(side), 'targets']) x = tensor.matrix('{}_features'.format(side)) y = tensor.lmatrix('targets') # Define a feed-forward net with an input, two hidden layers, and a softmax output: model = MLP(activations=[ Rectifier(name='h1'), Rectifier(name='h2'), Softmax(name='output'), ], dims=[input_dim[side], hidden_units, hidden_units, 2], weights_init=IsotropicGaussian(std=W_sd, mean=W_mu), biases_init=IsotropicGaussian(b_sd, b_mu)) # Don't forget to initialize params: model.initialize() # y_hat is the output of the neural net with x as its inputs y_hat = model.apply(x) # Define a cost function to optimize, and a classification error rate. # Also apply the outputs from the net and corresponding targets: cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat) error = MisclassificationRate().apply(y.flatten(), y_hat) error.name = 'error' # This is the model: before applying dropout model = Model(cost) # Need to define the computation graph for the cost func: cost_graph = ComputationGraph([cost]) # This returns a list of weight vectors for each layer W = VariableFilter(roles=[WEIGHT])(cost_graph.variables) # Add some regularization to this model: cost += weight_decay * l2_norm(W) cost.name = 'entropy' # computational graph with l2 reg cost_graph = ComputationGraph([cost]) # Apply dropout to inputs: inputs = VariableFilter([INPUT])(cost_graph.variables) dropout_inputs = [ input for input in inputs if input.name.startswith('linear_') ] dropout_graph = apply_dropout(cost_graph, [dropout_inputs[0]], input_dropout_ratio) dropout_graph = apply_dropout(dropout_graph, dropout_inputs[1:], dropout_ratio) dropout_cost = dropout_graph.outputs[0] dropout_cost.name = 'dropout_entropy' # Learning Algorithm (notice: we use the dropout cost for learning): algo = GradientDescent(step_rule=solver_type, params=dropout_graph.parameters, cost=dropout_cost) # algo.step_rule.learning_rate.name = 'learning_rate' # Data stream used for training model: training_stream = Flatten( DataStream.default_stream(dataset=train, iteration_scheme=ShuffledScheme( train.num_examples, batch_size=train_batch))) training_monitor = TrainingDataMonitoring([ dropout_cost, aggregation.mean(error), aggregation.mean(algo.total_gradient_norm) ], after_batch=True) # Use the 'valid' set for validation during training: validation_stream = Flatten( DataStream.default_stream(dataset=valid, iteration_scheme=ShuffledScheme( valid.num_examples, batch_size=valid_batch))) validation_monitor = DataStreamMonitoring(variables=[cost, error], data_stream=validation_stream, prefix='validation', after_epoch=True) test_stream = Flatten( DataStream.default_stream( dataset=test, iteration_scheme=ShuffledScheme(test.num_examples, batch_size=test_batch))) test_monitor = DataStreamMonitoring(variables=[error], data_stream=test_stream, prefix='test', after_training=True) plotting = Plot('AdniNet_{}'.format(side), channels=[ ['dropout_entropy', 'validation_entropy'], ['error', 'validation_error'], ], after_batch=False) # Checkpoint class used to save model and log: stamp = datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d-%H:%M') checkpoint = Checkpoint('./models/{}net/{}'.format(side, stamp), save_separately=['model', 'log'], every_n_epochs=1) # Home-brewed class for early stopping when we detect we have started to overfit early_stopper = FinishIfOverfitting(error_name='error', validation_name='validation_error', threshold=0.1, epochs=5, burn_in=100) # The main loop will train the network and output reports, etc main_loop = MainLoop(data_stream=training_stream, model=model, algorithm=algo, extensions=[ validation_monitor, training_monitor, plotting, FinishAfter(after_n_epochs=max_epoch), early_stopper, Printing(), ProgressBar(), checkpoint, test_monitor, ]) main_loop.run() ve = float(main_loop.log.last_epoch_row['validation_error']) te = float(main_loop.log.last_epoch_row['error']) spearmint_loss = ve + abs(te - ve) print 'Spearmint Loss: {}'.format(spearmint_loss) return spearmint_loss
logger = logging.Logger(__name__) FORMAT = '[%(asctime)s] %(name)s %(message)s' DATEFMT = "%M:%D:%S" logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.DEBUG) inits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.) } batch_size = 100 data_train = MNIST(which_sets=['train'], sources=['features']) train_stream = Flatten( DataStream.default_stream(data_train, iteration_scheme=SequentialScheme( data_train.num_examples, batch_size))) features_size = 28 * 28 * 1 inputs = T.matrix('features') test_data = { inputs: 255 * np.random.normal(size=(batch_size, 28 * 28)).astype('float32') } prior = Z_prior(dim=128) gen = Generator(input_dim=128, dims=[128, 64, 64, features_size],
def _pokemon_dcgan(): inits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.) } batch_size = 20 data_train = PokemonGenYellowNormal(which_sets=['train'], sources=['features']) train_stream = Flatten(DataStream.default_stream( data_train, iteration_scheme=SequentialScheme( data_train.num_examples, batch_size))) features_size = 56 * 56 * 1 inputs = T.matrix('features') inputs = (inputs)/255. * 2. - 1. # rng = MRG_RandomStreams(123) # inputs = inputs * rng.binomial(size=inputs.shape, p=0.1) prior = Z_prior(dim=256) gen = Generator(input_dim=256, dims=[128, 64, 64, features_size], alpha=0.1, **inits) dis = Discriminator(dims=[features_size, 128, 64, 64], alpha=0.1, **inits) gan = GAN(dis=dis, gen=gen, prior=prior) gan.initialize() y_hat1, y_hat0, z = gan.apply(inputs) model = Model([y_hat0, y_hat1]) loss = WGANLoss() dis_obj, gen_obj = loss.apply(y_hat0, y_hat1) dis_obj.name = 'Discriminator loss' gen_obj.name = 'Generator loss' cg = ComputationGraph([gen_obj, dis_obj]) gen_filter = VariableFilter(roles=[PARAMETER], bricks=gen.linear_transformations) dis_filter = VariableFilter(roles=[PARAMETER], bricks=dis.linear_transformations) gen_params = gen_filter(cg.variables) dis_params = dis_filter(cg.variables) # Prepare the dropout _inputs = [] for brick_ in [gen]: _inputs.extend(VariableFilter(roles=[INPUT], bricks=brick_.linear_transformations)( cg.variables)) cg_dropout = apply_dropout(cg, _inputs, 0.02) gen_obj = cg_dropout.outputs[0] dis_obj = cg_dropout.outputs[1] gan.dis_params = dis_params gan.gen_params = gen_params algo = AdverserialTraning(gen_obj=gen_obj, dis_obj=dis_obj, model=gan, dis_iter=5, step_rule=RMSProp(learning_rate=1e-4), gen_consider_constant=z) neg_sample = gan.sampling(size=25) monitor = TrainingDataMonitoring(variables=[gen_obj, dis_obj], prefix="train", after_batch=True) subdir = './exp/' + 'pokemon' + "-" + time.strftime("%Y%m%d-%H%M%S") check_point = Checkpoint("{}/{}".format(subdir, 'pokemon'), every_n_epochs=100, save_separately=['log', 'model']) neg_sampling = GenerateNegtiveSample(neg_sample, img_size=(25, 56, 56), every_n_epochs=100) if not os.path.exists(subdir): os.makedirs(subdir) main_loop = MainLoop(algorithm=algo, model=model, data_stream=train_stream, extensions=[Printing(), ProgressBar(), monitor, check_point, neg_sampling]) main_loop.run()
##################### mnist_train = MNIST(('train',))#, sources=('features', 'targets')) num_examples = 10#mnist_train.num_examples train_data_stream = Flatten(DataStream.default_stream( mnist_train, iteration_scheme=ShuffledScheme(num_examples, batch_size=batch_size))) train_monitor_stream = Flatten(DataStream.default_stream( mnist_train, iteration_scheme=ShuffledScheme(num_examples, batch_size=batch_size))) x = T.matrix('features') y = T.matrix('targets') epoch = train_data_stream.get_epoch_iterator() for j, batch in enumerate(epoch): if j>0: break theano.config.compute_test_value = 'warn'
#---------------------------------------------------------------------- logger.info("Loading dataset...") x_dim, data_train, data_valid, data_test = datasets.get_data(args.data) num_examples = data_test.num_examples n_samples = (int(s) for s in args.nsamples.split(",")) dict_p = {} dict_ps = {} for K in n_samples: batch_size = max(args.max_batch // K, 1) stream = Flatten(DataStream( data_test, iteration_scheme=ShuffledScheme(num_examples, batch_size) ), which_sources='features') log_p = np.asarray([]) log_ps = np.asarray([]) for batch in stream.get_epoch_iterator(as_dict=True): log_p_, log_ps_ = do_nll(batch['features'], K) log_p = np.concatenate((log_p, log_p_)) log_ps = np.concatenate((log_ps, log_ps_)) log_p_ = stats.sem(log_p) log_p = np.mean(log_p) log_ps_ = stats.sem(log_ps) log_ps = np.mean(log_ps)
def s(s): return Flatten( DataStream.default_stream(s, iteration_scheme=ShuffledScheme( s.num_examples, batch_size=256)))
inputs = VariableFilter([INPUT])(cost_graph.variables) dropout_inputs = [input for input in inputs if input.name.startswith('linear_')] dropout_graph = apply_dropout(cost_graph, dropout_inputs, dropout_ratio) dropout_cost = dropout_graph.outputs[0] dropout_cost.name = 'dropout_entropy' # Learning Algorithm: algo = GradientDescent( step_rule=solver_type, params=dropout_graph.parameters, cost=dropout_cost) # Data stream used for training model: training_stream = Flatten( DataStream.default_stream( dataset=train, iteration_scheme=ShuffledScheme( train.num_examples, batch_size=train_batch))) training_monitor = TrainingDataMonitoring([cost], after_batch=True) # Use the 'valid' set for validation during training: validation_stream = Flatten( DataStream.default_stream( dataset=valid, iteration_scheme=ShuffledScheme( valid.num_examples, batch_size=valid_batch))) validation_monitor = DataStreamMonitoring( variables=[cost],
stream = DataStream.default_stream(train, iteration_scheme=ShuffledScheme( train.num_examples, 128)) # Enlarge images that are too small downnscale_stream = MinimumImageDimensions(stream, (64, 64), which_sources=('image_features', )) # Our images are of different sizes, so we'll use a Fuel transformer # to take random crops of size (32 x 32) from each image cropped_stream = RandomFixedSizeCrop(downnscale_stream, (32, 32), which_sources=('image_features', )) # We'll use a simple MLP, so we need to flatten the images # from (channel, width, height) to simply (features,) flattened_stream = Flatten(cropped_stream, which_sources=('image_features', )) # Create the Theano MLP import theano from theano import tensor import numpy X = tensor.matrix('image_features') T = tensor.lmatrix('targets') W = theano.shared(numpy.random.uniform(low=-0.01, high=0.01, size=(3072, 500)), 'W') b = theano.shared(numpy.zeros(500)) V = theano.shared(numpy.random.uniform(low=-0.01, high=0.01, size=(500, 2)), 'V') c = theano.shared(numpy.zeros(2))
def _pokemon_wgan_gp(): import os os.environ["FUEL_DATA_PATH"] = os.getcwd() + "/data/" batch_size = 20 data_train = PokemonGenYellowNormal(which_sets=['train'], sources=['features']) train_stream = Flatten(DataStream.default_stream( data_train, iteration_scheme=SequentialScheme( data_train.num_examples, batch_size))) features_size = 56 * 56 * 1 inits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.) } # print train_stream.get_epoch_iterator(as_dict=True).next() # raise inputs = T.matrix('features') inputs = ((inputs / 255.) * 2. - 1.) rng = MRG_RandomStreams(123) prior = Z_prior(dim=512) gen = Generator(input_dim=512, dims=[512, 512, 512, 512, features_size], alpha=0.1, **inits) dis = Discriminator(dims=[features_size, 512, 512 , 512, 512], alpha=0.1, **inits) gan = GAN(dis=dis, gen=gen, prior=prior) gan.initialize() # gradient penalty fake_samples, _ = gan.sampling(inputs.shape[0]) e = rng.uniform(size=(inputs.shape[0], 1)) mixed_input = (e * fake_samples) + (1 - e) * inputs output_d_mixed = gan._dis.apply(mixed_input) grad_mixed = T.grad(T.sum(output_d_mixed), mixed_input) norm_grad_mixed = T.sqrt(T.sum(T.square(grad_mixed), axis=1)) grad_penalty = T.mean(T.square(norm_grad_mixed -1)) y_hat1, y_hat0, z = gan.apply(inputs) d_loss_real = y_hat1.mean() d_loss_fake = y_hat0.mean() d_loss = - d_loss_real + d_loss_fake + 10 * grad_penalty g_loss = - d_loss_fake dis_obj = d_loss gen_obj = g_loss model = Model([y_hat0, y_hat1]) em_loss = -d_loss_real + d_loss_fake em_loss.name = "Earth Move loss" dis_obj.name = 'Discriminator loss' gen_obj.name = 'Generator loss' cg = ComputationGraph([gen_obj, dis_obj]) gen_filter = VariableFilter(roles=[PARAMETER], bricks=gen.linear_transformations) dis_filter = VariableFilter(roles=[PARAMETER], bricks=dis.linear_transformations) gen_params = gen_filter(cg.variables) dis_params = dis_filter(cg.variables) # Prepare the dropout _inputs = [] for brick_ in [gen]: _inputs.extend(VariableFilter(roles=[INPUT], bricks=brick_.linear_transformations)(cg.variables)) cg_dropout = apply_dropout(cg, _inputs, 0.02) gen_obj = cg_dropout.outputs[0] dis_obj = cg_dropout.outputs[1] gan.dis_params = dis_params gan.gen_params = gen_params # gradient penalty algo = AdverserialTraning(gen_obj=gen_obj, dis_obj=dis_obj, model=gan, dis_iter=5, gradient_clip=None, step_rule=RMSProp(learning_rate=1e-4), gen_consider_constant=z) neg_sample = gan.sampling(size=25) from blocks.monitoring.aggregation import mean monitor = TrainingDataMonitoring(variables=[mean(gen_obj), mean(dis_obj), mean(em_loss)], prefix="train", after_batch=True) subdir = './exp/' + 'pokemon-wgan-gp' + "-" + time.strftime("%Y%m%d-%H%M%S") check_point = Checkpoint("{}/{}".format(subdir, 'CIFAR10'), every_n_epochs=100, save_separately=['log', 'model']) neg_sampling = GenerateNegtiveSample(neg_sample, img_size=(25, 56, 56), every_n_epochs=10) if not os.path.exists(subdir): os.makedirs(subdir) main_loop = MainLoop(algorithm=algo, model=model, data_stream=train_stream, extensions=[Printing(), ProgressBar(), monitor, check_point, neg_sampling]) main_loop.run()
def train(args, model_args): #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_' model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'logs/walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir print model_dir2 + '/' + 'log.jsonl.gz' logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features', )) dataset_test = CIFAR10(['test'], sources=('features', )) n_colors = 3 spatial_width = 32 elif args.dataset == "lsun" or args.dataset == "lsunsmall": print "loading lsun class!" from load_lsun import load_lsun print "loading lsun data!" if args.dataset == "lsunsmall": dataset_train, dataset_test = load_lsun(args.batch_size, downsample=True) spatial_width = 32 else: dataset_train, dataset_test = load_lsun(args.batch_size, downsample=False) spatial_width = 64 n_colors = 3 elif args.dataset == "celeba": print "loading celeba data" from fuel.datasets.celeba import CelebA dataset_train = CelebA(which_sets=['train'], which_format="64", sources=('features', ), load_in_memory=False) dataset_test = CelebA(which_sets=['test'], which_format="64", sources=('features', ), load_in_memory=False) spatial_width = 64 n_colors = 3 tr_scheme = SequentialScheme(examples=dataset_train.num_examples, batch_size=args.batch_size) ts_scheme = SequentialScheme(examples=dataset_test.num_examples, batch_size=args.batch_size) train_stream = DataStream.default_stream(dataset_train, iteration_scheme=tr_scheme) test_stream = DataStream.default_stream(dataset_test, iteration_scheme=ts_scheme) dataset_train = train_stream dataset_test = test_stream #epoch_it = train_stream.get_epoch_iterator() elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01, sources=('features', )) dataset_train = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, args.batch_size)) else: raise ValueError("Unknown dataset %s." % args.dataset) model_options = locals().copy() if args.dataset != 'lsun' and args.dataset != 'celeba': train_stream = Flatten( DataStream.default_stream( dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples - (dataset_train.num_examples % args.batch_size), batch_size=args.batch_size))) else: train_stream = dataset_train test_stream = dataset_test print "Width", WIDTH, spatial_width shp = next(train_stream.get_epoch_iterator())[0].shape print "got epoch iterator" # make the training data 0 mean and variance 1 # TODO compute mean and variance on full dataset, not minibatch Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2)) shft = -np.mean(Xbatch * scl) # scale is applied before shift #train_stream = ScaleAndShift(train_stream, scl, shft) #test_stream = ScaleAndShift(test_stream, scl, shft) print 'Building model' params = init_params(model_options) if args.reload_: print "Trying to reload parameters" if os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) print tparams ''' x = T.matrix('x', dtype='float32') temp = T.scalar('temp', dtype='float32') f=transition_operator(tparams, model_options, x, temp) for data in train_stream.get_epoch_iterator(): print data[0] a = f([data[0], 1.0, 1]) #ipdb.set_trace() ''' x, cost, start_temperature = build_model(tparams, model_options) inps = [x, start_temperature] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature) #print 'Building f_cost...', #f_cost = theano.function(inps, cost) #print 'Done' print tparams grads = T.grad(cost, wrt=itemlist(tparams)) #get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' for param in tparams: print param print tparams[param].get_value().shape print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 1 print 'Number of steps....' print args.num_steps print "Number of metasteps...." print args.meta_steps print 'Done' count_sample = 1 for eidx in xrange(max_epochs): if eidx % 20 == 0: params = unzip(tparams) save_params(params, model_dir + '/' + 'params_' + str(eidx) + '.npz') n_samples = 0 print 'Starting Next Epoch ', eidx for data in train_stream.get_epoch_iterator(): if args.dataset == 'CIFAR10': if data[0].shape[0] == args.batch_size: data_use = (data[0].reshape(args.batch_size, 3 * 32 * 32), ) else: continue t0 = time.time() batch_index += 1 n_samples += len(data_use[0]) uidx += 1 if data_use[0] is None: print 'No data ' uidx -= 1 continue ud_start = time.time() t1 = time.time() data_run = data_use[0] temperature_forward = args.temperature meta_cost = [] for meta_step in range(0, args.meta_steps): meta_cost.append(f_grad_shared(data_run, temperature_forward)) f_update(lrate) if args.meta_steps > 1: data_run, sigma, _, _ = forward_diffusion( [data_run, temperature_forward, 1]) temperature_forward *= args.temperature_factor cost = sum(meta_cost) / len(meta_cost) ud = time.time() - ud_start #gradient_updates_ = get_grads(data_use[0],args.temperature) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1. t1 = time.time() #print time.time() - t1, "time to get grads" t1 = time.time() logger.log({ 'epoch': eidx, 'batch_index': batch_index, 'uidx': uidx, 'training_error': cost }) #'Norm_1': np.linalg.norm(gradient_updates_[0]), #'Norm_2': np.linalg.norm(gradient_updates_[1]), #'Norm_3': np.linalg.norm(gradient_updates_[2]), #'Norm_4': np.linalg.norm(gradient_updates_[3])}) #print time.time() - t1, "time to log" #print time.time() - t0, "total time in batch" t5 = time.time() if batch_index % 20 == 0: print batch_index, "cost", cost if batch_index % 200 == 0: count_sample += 1 temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) temperature_forward = args.temperature for num_step in range(args.num_steps * args.meta_steps): print "Forward temperature", temperature_forward if num_step == 0: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion( [data_use[0], temperature_forward, 1]) x_data = np.asarray(x_data).astype('float32').reshape( args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images( x_temp, model_dir + '/' + "batch_" + str(batch_index) + '_corrupted' + 'epoch_' + str(count_sample) + '_time_step_' + str(num_step)) else: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion( [x_data, temperature_forward, 1]) x_data = np.asarray(x_data).astype('float32').reshape( args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images( x_temp, model_dir + '/batch_' + str(batch_index) + '_corrupted' + '_epoch_' + str(count_sample) + '_time_step_' + str(num_step)) temperature_forward = temperature_forward * args.temperature_factor x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images( x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) + '_batch_index_' + str(batch_index)) temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) for i in range(args.num_steps * args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample( [x_data, temperature, 0]) print 'On backward step number, using temperature', i, temperature reverse_time( scl, shft, x_data, model_dir + '/' + "batch_" + str(batch_index) + '_samples_backward_' + 'epoch_' + str(count_sample) + '_time_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor if args.noise == "gaussian": x_sampled = np.random.normal( 0.5, 2.0, size=(args.batch_size, INPUT_SIZE)).clip(0.0, 1.0) else: s = np.random.binomial(1, 0.5, INPUT_SIZE) temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) x_data = np.asarray(x_sampled).astype('float32') for i in range(args.num_steps * args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample( [x_data, temperature, 0]) print 'On step number, using temperature', i, temperature reverse_time( scl, shft, x_data, model_dir + '/batch_index_' + str(batch_index) + '_inference_' + 'epoch_' + str(count_sample) + '_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor ipdb.set_trace()
def main(name, dataset, epochs, batch_size, learning_rate, attention, n_iter, enc_dim, dec_dim, z_dim, oldmodel, live_plotting): image_size, channels, data_train, data_valid, data_test = datasets.get_data( dataset) train_stream = Flatten( DataStream.default_stream(data_train, iteration_scheme=SequentialScheme( data_train.num_examples, batch_size))) valid_stream = Flatten( DataStream.default_stream(data_valid, iteration_scheme=SequentialScheme( data_valid.num_examples, batch_size))) test_stream = Flatten( DataStream.default_stream(data_test, iteration_scheme=SequentialScheme( data_test.num_examples, batch_size))) if name is None: name = dataset img_height, img_width = image_size x_dim = channels * img_height * img_width rnninits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } # Configure attention mechanism if attention != "": read_N, write_N = attention.split(',') read_N = int(read_N) write_N = int(write_N) read_dim = 2 * channels * read_N**2 reader = AttentionReader(x_dim=x_dim, dec_dim=dec_dim, channels=channels, width=img_width, height=img_height, N=read_N, **inits) writer = AttentionWriter(input_dim=dec_dim, output_dim=x_dim, channels=channels, width=img_width, height=img_height, N=write_N, **inits) attention_tag = "r%d-w%d" % (read_N, write_N) else: read_dim = 2 * x_dim reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits) attention_tag = "full" #---------------------------------------------------------------------- if name is None: name = dataset # Learning rate def lr_tag(value): """ Convert a float into a short tag-usable string representation. E.g.: 0.1 -> 11 0.01 -> 12 0.001 -> 13 0.005 -> 53 """ exp = np.floor(np.log10(value)) leading = ("%e" % value)[0] return "%s%d" % (leading, -exp) lr_str = lr_tag(learning_rate) subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S") longname = "%s-%s-t%d-enc%d-dec%d-z%d-lr%s" % ( dataset, attention_tag, n_iter, enc_dim, dec_dim, z_dim, lr_str) pickle_file = subdir + "/" + longname + ".pkl" print("\nRunning experiment %s" % longname) print(" dataset: %s" % dataset) print(" subdirectory: %s" % subdir) print(" learning rate: %g" % learning_rate) print(" attention: %s" % attention) print(" n_iterations: %d" % n_iter) print(" encoder dimension: %d" % enc_dim) print(" z dimension: %d" % z_dim) print(" decoder dimension: %d" % dec_dim) print(" batch size: %d" % batch_size) print(" epochs: %d" % epochs) print() #---------------------------------------------------------------------- encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits) decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits) encoder_mlp = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim], name="MLP_enc", **inits) decoder_mlp = MLP([Identity()], [z_dim, 4 * dec_dim], name="MLP_dec", **inits) q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits) draw = DrawModel(n_iter, reader=reader, encoder_mlp=encoder_mlp, encoder_rnn=encoder_rnn, sampler=q_sampler, decoder_mlp=decoder_mlp, decoder_rnn=decoder_rnn, writer=writer) draw.initialize() #------------------------------------------------------------------------ x = tensor.matrix('features') x_recons, kl_terms = draw.reconstruct(x) recons_term = BinaryCrossEntropy().apply(x, x_recons) recons_term.name = "recons_term" cost = recons_term + kl_terms.sum(axis=0).mean() cost.name = "nll_bound" #------------------------------------------------------------ cg = ComputationGraph([cost]) params = VariableFilter(roles=[PARAMETER])(cg.variables) algorithm = GradientDescent( cost=cost, parameters=params, step_rule=CompositeRule([ StepClipping(10.), Adam(learning_rate), ]) #step_rule=RMSProp(learning_rate), #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95) ) #------------------------------------------------------------------------ # Setup monitors monitors = [cost] for t in range(n_iter): kl_term_t = kl_terms[t, :].mean() kl_term_t.name = "kl_term_%d" % t #x_recons_t = T.nnet.sigmoid(c[t,:,:]) #recons_term_t = BinaryCrossEntropy().apply(x, x_recons_t) #recons_term_t = recons_term_t.mean() #recons_term_t.name = "recons_term_%d" % t monitors += [kl_term_t] train_monitors = monitors[:] train_monitors += [aggregation.mean(algorithm.total_gradient_norm)] train_monitors += [aggregation.mean(algorithm.total_step_norm)] # Live plotting... plot_channels = [ ["train_nll_bound", "test_nll_bound"], ["train_kl_term_%d" % t for t in range(n_iter)], #["train_recons_term_%d" % t for t in range(n_iter)], ["train_total_gradient_norm", "train_total_step_norm"] ] #------------------------------------------------------------ if not os.path.exists(subdir): os.makedirs(subdir) plotting_extensions = [] if live_plotting: plotting_extensions = [Plot(name, channels=plot_channels)] main_loop = MainLoop( model=Model(cost), data_stream=train_stream, algorithm=algorithm, extensions=[ Timing(), FinishAfter(after_n_epochs=epochs), TrainingDataMonitoring( train_monitors, prefix="train", after_epoch=True), # DataStreamMonitoring( # monitors, # valid_stream, ## updates=scan_updates, # prefix="valid"), DataStreamMonitoring( monitors, test_stream, # updates=scan_updates, prefix="test"), #Checkpoint(name, before_training=False, after_epoch=True, save_separately=['log', 'model']), Checkpoint("{}/{}".format(subdir, name), save_main_loop=False, before_training=True, after_epoch=True, save_separately=['log', 'model']), SampleCheckpoint(image_size=image_size[0], channels=channels, save_subdir=subdir, before_training=True, after_epoch=True), ProgressBar(), Printing() ] + plotting_extensions) if oldmodel is not None: print("Initializing parameters with old model %s" % oldmodel) with open(oldmodel, "rb") as f: oldmodel = pickle.load(f) main_loop.model.set_parameter_values(oldmodel.get_param_values()) del oldmodel main_loop.run()
#---------------------------------------------------------------------- logger.info("Loading dataset...") x_dim, data_train, data_valid, data_test = datasets.get_data(args.data) num_examples = data_test.num_examples n_samples = (int(s) for s in args.nsamples.split(",")) dict_p = {} dict_ps = {} for K in n_samples: batch_size = max(args.max_batch // K, 1) stream = Flatten(DataStream(data_test, iteration_scheme=ShuffledScheme( num_examples, batch_size)), which_sources='features') log_p = np.asarray([]) log_ps = np.asarray([]) for batch in stream.get_epoch_iterator(as_dict=True): log_p_, log_ps_ = do_nll(batch['features'], K) log_p = np.concatenate((log_p, log_p_)) log_ps = np.concatenate((log_ps, log_ps_)) log_p_ = stats.sem(log_p) log_p = np.mean(log_p) log_ps_ = stats.sem(log_ps) log_ps = np.mean(log_ps)
if resume: print "Restoring from previous breakpoint" extensions.extend([ Load(path) ]) return model, algorithm, extensions if __name__ == '__main__': mnist = MNIST(("train",), sources=sources) mnist_test = MNIST(("test",), sources=sources) training_stream = Flatten( DataStream( mnist, iteration_scheme=ShuffledScheme(mnist.num_examples, batch_size) ), which_sources=sources ) # import ipdb; ipdb.set_trace() test_stream = Flatten( DataStream( mnist_test, iteration_scheme=ShuffledScheme(mnist_test.num_examples, batch_size) ), which_sources=sources ) "Print data loaded" if train: cost = create_network()
def main(name, model, epochs, batch_size, learning_rate, bokeh, layers, gamma, rectifier, predict, dropout, qlinear, sparse): runname = "vae%s-L%s%s%s%s-l%s-g%s-b%d" % (name, layers, 'r' if rectifier else '', 'd' if dropout else '', 'l' if qlinear else '', shnum(learning_rate), shnum(gamma), batch_size//100) if rectifier: activation = Rectifier() full_weights_init = Orthogonal() else: activation = Tanh() full_weights_init = Orthogonal() if sparse: runname += '-s%d'%sparse weights_init = Sparse(num_init=sparse, weights_init=full_weights_init) else: weights_init = full_weights_init layers = map(int,layers.split(',')) encoder_layers = layers[:-1] encoder_mlp = MLP([activation] * (len(encoder_layers)-1), encoder_layers, name="MLP_enc", biases_init=Constant(0.), weights_init=weights_init) enc_dim = encoder_layers[-1] z_dim = layers[-1] if qlinear: sampler = Qlinear(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init) else: sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init) decoder_layers = layers[:] ## includes z_dim as first layer decoder_layers.reverse() decoder_mlp = MLP([activation] * (len(decoder_layers)-2) + [Logistic()], decoder_layers, name="MLP_dec", biases_init=Constant(0.), weights_init=weights_init) vae = VAEModel(encoder_mlp, sampler, decoder_mlp) vae.initialize() x = tensor.matrix('features')/256. x.tag.test_value = np.random.random((batch_size,layers[0])).astype(np.float32) if predict: mean_z, enc = vae.mean_z(x) # cg = ComputationGraph([mean_z, enc]) newmodel = Model([mean_z,enc]) else: x_recons, kl_terms = vae.reconstruct(x) recons_term = BinaryCrossEntropy().apply(x, x_recons) recons_term.name = "recons_term" cost = recons_term + kl_terms.mean() cg = ComputationGraph([cost]) if gamma > 0: weights = VariableFilter(roles=[WEIGHT])(cg.variables) cost += gamma * blocks.theano_expressions.l2_norm(weights) cost.name = "nll_bound" newmodel = Model(cost) if dropout: from blocks.roles import INPUT inputs = VariableFilter(roles=[INPUT])(cg.variables) # dropout_target = [v for k,v in newmodel.get_params().iteritems() # if k.find('MLP')>=0 and k.endswith('.W') and not k.endswith('MLP_enc/linear_0.W')] dropout_target = filter(lambda x: x.name.startswith('linear_'), inputs) cg = apply_dropout(cg, dropout_target, 0.5) target_cost = cg.outputs[0] else: target_cost = cost if name == 'mnist': if predict: train_ds = MNIST("train") else: train_ds = MNIST("train", sources=['features']) test_ds = MNIST("test") else: datasource_dir = os.path.join(fuel.config.data_path, name) datasource_fname = os.path.join(datasource_dir , name+'.hdf5') if predict: train_ds = H5PYDataset(datasource_fname, which_set='train') else: train_ds = H5PYDataset(datasource_fname, which_set='train', sources=['features']) test_ds = H5PYDataset(datasource_fname, which_set='test') train_s = Flatten(DataStream(train_ds, iteration_scheme=ShuffledScheme( train_ds.num_examples, batch_size))) test_s = Flatten(DataStream(test_ds, iteration_scheme=ShuffledScheme( test_ds.num_examples, batch_size))) if predict: from itertools import chain fprop = newmodel.get_theano_function() allpdata = None alledata = None f = train_s.sources.index('features') assert f == test_s.sources.index('features') sources = test_s.sources alllabels = dict((s,[]) for s in sources if s != 'features') for data in chain(train_s.get_epoch_iterator(), test_s.get_epoch_iterator()): for s,d in zip(sources,data): if s != 'features': alllabels[s].extend(list(d)) pdata, edata = fprop(data[f]) if allpdata is None: allpdata = pdata else: allpdata = np.vstack((allpdata, pdata)) if alledata is None: alledata = edata else: alledata = np.vstack((alledata, edata)) print 'Saving',allpdata.shape,'intermidiate layer, for all training and test examples, to',name+'_z.npy' np.save(name+'_z', allpdata) print 'Saving',alledata.shape,'last encoder layer to',name+'_e.npy' np.save(name+'_e', alledata) print 'Saving additional labels/targets:',','.join(alllabels.keys()), print ' of size',','.join(map(lambda x: str(len(x)),alllabels.values())), print 'to',name+'_labels.pkl' with open(name+'_labels.pkl','wb') as fp: pickle.dump(alllabels, fp, -1) else: cg = ComputationGraph([target_cost]) algorithm = GradientDescent( cost=target_cost, params=cg.parameters, step_rule=Adam(learning_rate) # Scale(learning_rate=learning_rate) ) extensions = [] if model: extensions.append(Load(model)) extensions += [Timing(), FinishAfter(after_n_epochs=epochs), DataStreamMonitoring( [cost, recons_term], test_s, prefix="test"), TrainingDataMonitoring( [cost, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), Checkpoint(runname, every_n_epochs=10), Printing()] if bokeh: extensions.append(Plot( 'Auto', channels=[ ['test_recons_term','test_nll_bound','train_nll_bound' ], ['train_total_gradient_norm']])) main_loop = MainLoop( algorithm, train_s, model=newmodel, extensions=extensions) main_loop.run()
width=img_width, N=N, n_iter=n_iter, sources=("features", "bbox_lefts", "bbox_tops", "bbox_widths", "bbox_heights"), ) batch_size = 1000 num_examples = int(svhn.num_examples / batch_size) + 1 evaluation = True # num_examples = 100 # batch_size = 1 # evaluation = False svhn_stream = Flatten( DataStream.default_stream(svhn, iteration_scheme=SequentialScheme(svhn.num_examples, batch_size)) ) svhn_stream.get_epoch_iterator() x = T.fmatrix("features") batch_size = T.iscalar("batch_size") center_y, center_x, deltaY, deltaX = locator.find(x, batch_size) do_sample = theano.function( [x, batch_size], outputs=[center_y, center_x, deltaY, deltaX], allow_input_downcast=True ) overlap = 0.0 distance = 0.0
nonlinearity=None) ''' return net['conv1_1'] if __name__ == '__main__': from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 train_stream = Flatten( DataStream.default_stream(dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples - (dataset_train.num_examples % 32), batch_size=32))) shp = next(train_stream.get_epoch_iterator())[0].shape input_ = T.tensor4('inputs_var') unet = buildUnet(1, dropout=True, input_var=input_, trainable=True) output = unet.get_output_for(input_) test_prediction = lasagne.layers.get_output(unet, deterministic=True)[0] #test_prediction_dimshuffle = test_prediction.dimshuffle((0, 2, 3, 1)) pred_fcn_fn = theano.function([input_], test_prediction) for data in train_stream.get_epoch_iterator(): data_use = (data[0].reshape(32, 1, 28, 28), ) out_put = pred_fcn_fn(data_use[0]) import ipdb
def train(args, model_args, lrate): model_id = '/data/lisatmp4/anirudhg/minst_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir logger = mimir.Logger(filename=model_dir2 + '/' + model_id2 + 'log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features', )) dataset_test = CIFAR10(['test'], sources=('features', )) n_colors = 3 spatial_width = 32 elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01, sources=('features', )) dataset_train = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, args.batch_size)) else: raise ValueError("Unknown dataset %s." % args.dataset) model_options = locals().copy() train_stream = Flatten( DataStream.default_stream(dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples, batch_size=args.batch_size))) shp = next(train_stream.get_epoch_iterator())[0].shape # make the training data 0 mean and variance 1 # TODO compute mean and variance on full dataset, not minibatch Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2)) shft = -np.mean(Xbatch * scl) # scale is applied before shift #train_stream = ScaleAndShift(train_stream, scl, shft) #test_stream = ScaleAndShift(test_stream, scl, shft) print 'Building model' params = init_params(model_options) if args.reload_ and os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) ''' x = T.matrix('x', dtype='float32') f=transition_operator(tparams, model_options, x, 1) for data in train_stream.get_epoch_iterator(): print data[0] a = f(data[0]) print a ipdb.set_trace() ''' x, cost = build_model(tparams, model_options) inps = [x] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature) print 'Building f_cost...', f_cost = theano.function(inps, cost) print 'Done' print tparams grads = T.grad(cost, wrt=itemlist(tparams)) get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 0 print 'Number of steps....' print args.num_steps print 'Done' count_sample = 1 for eidx in xrange(max_epochs): n_samples = 0 print 'Starting Next Epoch ', eidx for data in train_stream.get_epoch_iterator(): batch_index += 1 n_samples += len(data[0]) uidx += 1 if data[0] is None: print 'No data ' uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(data[0]) f_update(lrate) ud = time.time() - ud_start if batch_index % 1 == 0: print 'Cost is this', cost count_sample += 1 from impainting import change_image, inpainting train_temp = data[0] print data[0].shape change_image(train_temp.reshape(args.batch_size, 1, 28, 28), 3) train_temp = train_temp.reshape(args.batch_size, 784) output = inpainting(train_temp) change_image(output.reshape(args.batch_size, 1, 28, 28), 1) reverse_time( scl, shft, output, model_dir + '/' + 'impainting_orig_' + 'epoch_' + str(count_sample) + '_batch_index_' + str(batch_index)) x_data = np.asarray(output).astype('float32') temperature = args.temperature * (args.temperature_factor **(args.num_steps - 1)) temperature = args.temperature #* (args.temperature_factor ** (args.num_steps -1 )) orig_impainted_data = np.asarray(data[0]).astype('float32') for i in range(args.num_steps + args.extra_steps + 5): x_data, sampled, sampled_activation, sampled_preactivation = f_sample( x_data, temperature) print 'Impainting using temperature', i, temperature x_data = do_half_image(x_data, orig_impainted_data) reverse_time( scl, shft, x_data, model_dir + '/' + 'impainting_orig_' + 'epoch_' + str(count_sample) + '_batch_index_' + str(batch_index) + 'step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature = temperature #temperature /= args.temperature_factor ipdb.set_trace()
import SVRT_analysis_helper_functions #Initialize vars and theano function num_estimates = 20 num_training = 6 #per evaluation num_testing = 1 #per evaluation batch_size = num_estimates * num_training + num_estimates * num_testing #get a bunch just to be sure image_size, channels, data_train, data_valid, data_test = datasets.get_data('sketch') rows = 10 cols = 20 N_iter = 64; #Load images train_stream = Flatten(DataStream.default_stream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, batch_size))) train_batch = train_stream.get_epoch_iterator() train_image = train_batch.data_stream.get_data()[0][:].reshape(batch_size,32,32) train_labels = train_batch.data_stream.get_data()[1] test_stream = Flatten(DataStream.default_stream(data_test, iteration_scheme=SequentialScheme(data_test.num_examples, batch_size))) test_batch = test_stream.get_epoch_iterator() test_image = test_batch.data_stream.get_data()[0][:].reshape(batch_size,32,32) test_labels = test_batch.data_stream.get_data()[1] #Load old model #model_file = 'new_test-20160313-125114/new_test_model' model_file = 'all_params-20160315-221022/all_params_model' with open(model_file,"rb") as f: model = pickle.load(f) draw = model.get_top_bricks()[0]
def main(save_to, cost_name, learning_rate, momentum, num_epochs): mlp = MLP([None], [784, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') scores = mlp.apply(x) batch_size = y.shape[0] indices = tensor.arange(y.shape[0]) target_scores = tensor.set_subtensor( tensor.zeros((batch_size, 10))[indices, y.flatten()], 1) score_diff = scores - target_scores # Logistic Regression if cost_name == 'lr': cost = Softmax().categorical_cross_entropy(y.flatten(), scores).mean() # MSE elif cost_name == 'mse': cost = (score_diff**2).mean() # Perceptron elif cost_name == 'perceptron': cost = (scores.max(axis=1) - scores[indices, y.flatten()]).mean() # TLE elif cost_name == 'minmin': cost = abs(score_diff[indices, y.flatten()]).mean() cost += abs(score_diff[indices, scores.argmax(axis=1)]).mean() # TLEcut elif cost_name == 'minmin_cut': # Score of the groundtruth should be greater or equal than its target score cost = tensor.maximum(0, -score_diff[indices, y.flatten()]).mean() # Score of the prediction should be less or equal than its actual score cost += tensor.maximum(0, score_diff[indices, scores.argmax(axis=1)]).mean() # TLE2 elif cost_name == 'minmin2': cost = ((score_diff[tensor.arange(y.shape[0]), y.flatten()])**2).mean() cost += ((score_diff[tensor.arange(y.shape[0]), scores.argmax(axis=1)])**2).mean() # Direct loss minimization elif cost_name == 'direct': epsilon = 0.1 cost = (-scores[indices, (scores + epsilon * target_scores).argmax(axis=1)] + scores[indices, scores.argmax(axis=1)]).mean() cost /= epsilon elif cost_name == 'svm': cost = (scores[indices, (scores - 1 * target_scores).argmax(axis=1)] - scores[indices, y.flatten()]).mean() else: raise ValueError("Unknown cost " + cost) error_rate = MisclassificationRate().apply(y.flatten(), scores) error_rate.name = 'error_rate' cg = ComputationGraph([cost]) cost.name = 'cost' mnist_train = MNIST(("train", )) mnist_test = MNIST(("test", )) if learning_rate == None: learning_rate = 0.0001 if momentum == None: momentum = 0.0 rule = Momentum(learning_rate=learning_rate, momentum=momentum) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=rule) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], Flatten(DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), which_sources=('features', )), prefix="test"), # CallbackExtension( # lambda: rule.learning_rate.set_value(rule.learning_rate.get_value() * 0.9), # after_epoch=True), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm), rule.learning_rate ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] if BLOCKS_EXTRAS_AVAILABLE: extensions.append( Plot('MNIST example', channels=[['test_cost', 'test_error_rate'], ['train_total_gradient_norm']])) main_loop = MainLoop(algorithm, Flatten(DataStream.default_stream( mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), which_sources=('features', )), model=Model(cost), extensions=extensions) main_loop.run() df = pandas.DataFrame.from_dict(main_loop.log, orient='index') res = { 'cost': cost_name, 'learning_rate': learning_rate, 'momentum': momentum, 'train_cost': df.train_cost.iloc[-1], 'test_cost': df.test_cost.iloc[-1], 'best_test_cost': df.test_cost.min(), 'train_error': df.train_error_rate.iloc[-1], 'test_error': df.test_error_rate.iloc[-1], 'best_test_error': df.test_error_rate.min() } res = { k: float(v) if isinstance(v, numpy.ndarray) else v for k, v in res.items() } json.dump(res, sys.stdout) sys.stdout.flush()
def main(save_to, model, train, test, num_epochs, input_size = (150,150), learning_rate=0.01, batch_size=50, num_batches=None, flatten_stream=False): """ save_to : where to save trained model model : model given in input must be already initialised (works with convnet and mlp) input_size : the shape of the reshaped image in input (before flattening is applied if flatten_stream is True) """ if flatten_stream : x = tensor.matrix('image_features') else : x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') #Data augmentation #insert data augmentation here #Generating stream train_stream = DataStream.default_stream( train, iteration_scheme=ShuffledScheme(train.num_examples, batch_size) ) test_stream = DataStream.default_stream( test, iteration_scheme=ShuffledScheme(test.num_examples, batch_size) ) #Reshaping procedure #Add a crop option in scikitresize so that the image is not deformed #Resize to desired square shape train_stream = ScikitResize(train_stream, input_size, which_sources=('image_features',)) test_stream = ScikitResize(test_stream, input_size, which_sources=('image_features',)) #Flattening the stream if flatten_stream is True: train_stream = Flatten(train_stream, which_sources=('image_features',)) test_stream = Flatten(test_stream, which_sources=('image_features',)) # Apply input to model probs = model.apply(x) #Defining cost and various indices to watch #print(probs) #cost = SquaredError().apply(y.flatten(),probs) cost = CategoricalCrossEntropy().apply(y.flatten(), probs).copy(name='cost') error_rate = MisclassificationRate().apply(y.flatten(), probs).copy( name='error_rate') #Building Computation Graph cg = ComputationGraph([cost, error_rate]) # Train with simple SGD algorithm = GradientDescent( cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=learning_rate)) #Defining extensions extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), TrainingDataMonitoring([cost, error_rate,aggregation.mean(algorithm.total_gradient_norm)], prefix="train", every_n_batches=5), DataStreamMonitoring([cost, error_rate],test_stream,prefix="test", every_n_batches=25), Checkpoint(save_to), ProgressBar(), Printing(every_n_batches=5)] # `Timing` extension reports time for reading data, aggregating a batch # and monitoring; # `ProgressBar` displays a nice progress bar during training. model = Model(cost) main_loop = MainLoop( algorithm, train_stream, model=model, extensions=extensions) main_loop.run()
from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features',)) dataset_test = CIFAR10(['test'], sources=('features',)) n_colors = 3 spatial_width = 32 elif args.dataset == 'IMAGENET': from imagenet_data import IMAGENET spatial_width = 128 dataset_train = IMAGENET(['train'], width=spatial_width) dataset_test = IMAGENET(['test'], width=spatial_width) n_colors = 3 else: raise ValueError("Unknown dataset %s."%args.dataset) train_stream = Flatten(DataStream.default_stream(dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples, batch_size=args.batch_size))) test_stream = Flatten(DataStream.default_stream(dataset_test, iteration_scheme=ShuffledScheme( examples=dataset_test.num_examples, batch_size=args.batch_size)) ) shp = next(train_stream.get_epoch_iterator())[0].shape # make the training data 0 mean and variance 1 # TODO compute mean and variance on full dataset, not minibatch Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1./np.sqrt(np.mean((Xbatch-np.mean(Xbatch))**2)) shft = -np.mean(Xbatch*scl) # scale is applied before shift
dims=[784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() # Calculate the loss function x = T.matrix('features') y = T.lmatrix('targets') y_hat = mlp.apply(x) cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat) error_rate = MisclassificationRate().apply(y.flatten(), y_hat) # load training data using Fuel mnist_train = MNIST("train") train_stream = Flatten( DataStream.default_stream(dataset=mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 128)), ) # load testing data mnist_test = MNIST("test") test_stream = Flatten( DataStream.default_stream(dataset=mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 1024)), ) # train the model from blocks.model import Model main_loop = MainLoop(model=Model(cost), data_stream=train_stream, algorithm=GradientDescent( cost=cost,
# In[12]: batch[1].shape # ## Transformers # In[13]: from fuel.transformers import Flatten # In[14]: data_stream = Flatten(data_stream) # In[15]: epoch = data_stream.get_epoch_iterator() batch = next(epoch) # (ndarray, dnarray) # In[16]: batch[0].shape # In[17]: