Exemplo n.º 1
0
def get_streams(num_train_examples, batch_size, use_test=True):
    dataset = MNIST(("train", ))
    all_ind = numpy.arange(dataset.num_examples)
    rng = numpy.random.RandomState(seed=1)
    rng.shuffle(all_ind)

    indices_train = all_ind[:num_train_examples]
    indices_valid = all_ind[num_train_examples:]

    tarin_stream = Flatten(
        DataStream.default_stream(dataset,
                                  iteration_scheme=ShuffledScheme(
                                      indices_train, batch_size)))

    valid_stream = None
    if len(indices_valid) != 0:
        valid_stream = Flatten(
            DataStream.default_stream(dataset,
                                      iteration_scheme=ShuffledScheme(
                                          indices_valid, batch_size)))

    test_stream = None
    if use_test:
        dataset = MNIST(("test", ))
        ind = numpy.arange(dataset.num_examples)
        rng = numpy.random.RandomState(seed=1)
        rng.shuffle(all_ind)

        test_stream = Flatten(
            DataStream.default_stream(dataset,
                                      iteration_scheme=ShuffledScheme(
                                          ind, batch_size)))

    return tarin_stream, valid_stream, test_stream
Exemplo n.º 2
0
 def test_flatten_examples(self):
     wrapper = Flatten(DataStream(
         IndexableDataset(self.data),
         iteration_scheme=SequentialExampleScheme(4)),
                       which_sources=('features', ))
     assert_equal(list(wrapper.get_epoch_iterator()),
                  [(numpy.ones(4), 0), (numpy.ones(4), 1)] * 2)
Exemplo n.º 3
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Exemplo n.º 4
0
 def test_flatten_batches(self):
     wrapper = Flatten(DataStream(IndexableDataset(self.data),
                                  iteration_scheme=SequentialScheme(4, 2)),
                       which_sources=('features', ))
     assert_equal(list(wrapper.get_epoch_iterator()),
                  [(numpy.ones((2, 4)), numpy.array([[0], [1]])),
                   (numpy.ones((2, 4)), numpy.array([[0], [1]]))])
Exemplo n.º 5
0
 def test_flatten_examples(self):
     wrapper = Flatten(
         DataStream(IndexableDataset(self.data),
                    iteration_scheme=SequentialExampleScheme(4)),
         which_sources=('features',))
     assert_equal(
         list(wrapper.get_epoch_iterator()),
         [(numpy.ones(4), 0), (numpy.ones(4), 1)] * 2)
Exemplo n.º 6
0
def test_flatten():
    stream = DataStream(IndexableDataset(
        OrderedDict([('features', numpy.ones((4, 2, 2))),
                     ('targets', numpy.array([0, 1, 0, 1]))])),
                        iteration_scheme=SequentialScheme(4, 2))
    wrapper = Flatten(stream, which_sources=('features', ))
    assert_equal(list(wrapper.get_epoch_iterator()),
                 [(numpy.ones((2, 4)), numpy.array([0, 1])),
                  (numpy.ones((2, 4)), numpy.array([0, 1]))])
Exemplo n.º 7
0
 def test_flatten_batches(self):
     wrapper = Flatten(
         DataStream(IndexableDataset(self.data),
                    iteration_scheme=SequentialScheme(4, 2)),
         which_sources=('features',))
     assert_equal(
         list(wrapper.get_epoch_iterator()),
         [(numpy.ones((2, 4)), numpy.array([[0], [1]])),
          (numpy.ones((2, 4)), numpy.array([[0], [1]]))])
def test_flatten():
    stream = DataStream(
        IndexableDataset(OrderedDict([("features", numpy.ones((4, 2, 2))), ("targets", numpy.array([0, 1, 0, 1]))])),
        iteration_scheme=SequentialScheme(4, 2),
    )
    wrapper = Flatten(stream, which_sources=("features",))
    assert_equal(
        list(wrapper.get_epoch_iterator()),
        [(numpy.ones((2, 4)), numpy.array([0, 1])), (numpy.ones((2, 4)), numpy.array([0, 1]))],
    )
Exemplo n.º 9
0
def test_flatten():
    stream = DataStream(
        IndexableDataset({'features': numpy.ones((4, 2, 2)),
                         'targets': numpy.array([0, 1, 0, 1])}),
        iteration_scheme=SequentialScheme(4, 2))
    wrapper = Flatten(stream, which_sources=('features',))
    assert_equal(
        list(wrapper.get_epoch_iterator()),
        [(numpy.ones((2, 4)), numpy.array([0, 1])),
         (numpy.ones((2, 4)), numpy.array([0, 1]))])
Exemplo n.º 10
0
def get_streams(data_name, batch_size):

    if data_name == "mnist":
        map_fn = map_mnist
    elif data_name == "tfd":
        map_fn = map_tfd
    else:
        map_fn = None

    small_batch_size = max(1, batch_size // 10)

    # Our usual train/valid/test data streams...
    x_dim, data_train, data_valid, data_test = get_data(data_name)
    train_stream, valid_stream, test_stream = (Flatten(
        MapFeatures(DataStream(data,
                               iteration_scheme=ShuffledScheme(
                                   data.num_examples, batch_size)),
                    fn=map_fn),
        which_sources='features') for data, batch_size in ((data_train,
                                                            batch_size),
                                                           (data_valid,
                                                            small_batch_size),
                                                           (data_test,
                                                            small_batch_size)))

    return x_dim, train_stream, valid_stream, test_stream
Exemplo n.º 11
0
Arquivo: data.py Projeto: tombosc/cpae
    def get_stream(self,
                   part,
                   batch_size=None,
                   max_length=None,
                   seed=None,
                   remove_keys=False,
                   add_bos_=True,
                   remove_n_identical_keys=True):
        dataset = self.get_dataset(part, max_length)
        if self._layout == 'lambada' and part == 'train':
            stream = DataStream(dataset,
                                iteration_scheme=RandomSpanScheme(
                                    dataset.num_examples, max_length, seed))
            stream = Mapping(stream, listify)
        else:
            stream = dataset.get_example_stream()

        if add_bos_:
            stream = SourcewiseMapping(stream,
                                       functools.partial(
                                           add_bos, Vocabulary.BOS),
                                       which_sources=('words'))
        if max_length != None:
            stream = SourcewiseMapping(stream,
                                       functools.partial(
                                           cut_if_too_long, max_length),
                                       which_sources=('words'))
        stream = SourcewiseMapping(stream, vectorize, which_sources=('words'))
        stream = SourcewiseMapping(stream,
                                   word_to_singleton_list,
                                   which_sources=('keys'))
        stream = SourcewiseMapping(stream, vectorize, which_sources=('keys'))
        stream = Flatten(stream, which_sources=('keys'))

        if self._layout == 'dict':
            if remove_keys:
                stream = FilterSources(
                    stream,
                    [source for source in stream.sources if source != 'keys'])
            if remove_n_identical_keys:
                print "remove identical keys"
                stream = FilterSources(stream, [
                    source for source in stream.sources
                    if source != 'n_identical_keys'
                ])
        if not batch_size:
            return stream

        stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size))

        stream = Padding(stream, mask_sources=('words'))
        #stream = Flatten(stream, which_sources=('n_identical_keys'))

        #if self._layout == 'dict':
        #    stream = FilterSources(stream, [source for source in stream.sources
        #                                    if source != 'keys_mask'])
        #    stream = FilterSources(stream, [source for source in stream.sources
        #                                    if source != 'n_identical_keys_mask'])
        return stream
Exemplo n.º 12
0
 def test_axis_labels_on_flatten_batches_with_none(self):
     wrapper = Flatten(
         DataStream(IndexableDataset(self.data),
                    iteration_scheme=SequentialScheme(4, 2),
                    axis_labels={'features': None,
                                 'targets': ('batch', 'index')}),
         which_sources=('features',))
     assert_equal(wrapper.axis_labels, {'features': None,
                                        'targets': ('batch', 'index')})
Exemplo n.º 13
0
 def test_axis_labels_on_flatten_examples(self):
     wrapper = Flatten(
         DataStream(IndexableDataset(self.data),
                    iteration_scheme=SequentialExampleScheme(4),
                    axis_labels={'features': ('batch', 'width', 'height'),
                                 'targets': ('batch', 'index')}),
         which_sources=('features',))
     assert_equal(wrapper.axis_labels, {'features': ('feature',),
                                        'targets': ('index',)})
Exemplo n.º 14
0
def apply_transformers(data_stream):

    data_stream_ = Flatten(data_stream,
                           which_sources=['features_1', 'features_2'])
    data_stream_ = ScaleAndShift(data_stream_,
                                 which_sources=['features_1', 'features_2'],
                                 scale=2.0,
                                 shift=-1.0)

    return data_stream_
Exemplo n.º 15
0
def get_mnist_streams(num_train_examples, batch_size):
    from fuel.datasets import MNIST
    dataset = MNIST(("train", ))
    all_ind = numpy.arange(dataset.num_examples)
    rng = numpy.random.RandomState(seed=1)
    rng.shuffle(all_ind)

    indices_train = all_ind[:num_train_examples]
    indices_valid = all_ind[num_train_examples:]

    tarin_stream = Flatten(DataStream.default_stream(
        dataset, iteration_scheme=ShuffledScheme(indices_train, batch_size)),
                           which_sources=('features', ))

    valid_stream = Flatten(DataStream.default_stream(
        dataset, iteration_scheme=ShuffledScheme(indices_valid, batch_size)),
                           which_sources=('features', ))

    return tarin_stream, valid_stream
Exemplo n.º 16
0
def get_mixed_streams(batch_size):
    from fuel.datasets import IterableDataset
    from fuel.transformers import Flatten
    data = numpy.load('data_train_100.npz')
    n = data['features_labeled'].shape[0]
    features_labeled = data['features_labeled'].reshape(
        (n / batch_size, batch_size, -1))
    targets_labeled = data['targets_labeled'].reshape(
        (n / batch_size, batch_size, -1))
    features_unlabeled = data['features_unlabeled'].reshape(
        (n / batch_size, batch_size, -1))

    dataset = IterableDataset({
        'features_labeled': features_labeled,
        'targets_labeled': targets_labeled,
        'features_unlabeled': features_unlabeled
    })
    tarin_stream = Flatten(DataStream(dataset),
                           which_sources=('targets_labeled', ))

    data = numpy.load('data_test.npz')
    n = data['features_labeled'].shape[0]
    features_labeled = data['features_labeled'].reshape(
        (n / batch_size, batch_size, -1))
    targets_labeled = data['targets_labeled'].reshape(
        (n / batch_size, batch_size, -1))
    features_unlabeled = data['features_unlabeled'].reshape(
        (n / batch_size, batch_size, -1))

    dataset = IterableDataset({
        'features_labeled': features_labeled,
        'targets_labeled': targets_labeled,
        'features_unlabeled': features_unlabeled
    })
    test_stream = Flatten(DataStream(dataset),
                          which_sources=('targets_labeled', ))

    return tarin_stream, test_stream
Exemplo n.º 17
0
def get_stream(batch_size,
               source_window=4000,
               target_window=1000,
               num_examples=5000):
    from fuel.datasets.youtube_audio import YouTubeAudio
    data = YouTubeAudio('XqaJ2Ol5cC4')
    train_stream = data.get_example_stream()
    train_stream = ForceFloatX(train_stream)
    window_stream = Window(0,
                           source_window,
                           target_window,
                           overlapping=False,
                           data_stream=train_stream)
    source_stream = FilterSources(window_stream, sources=('features', ))
    feats_stream = Mapping(source_stream, mfcc)
    targets_stream = FilterSources(window_stream, sources=('targets', ))
    targets_stream = Flatten(targets_stream)
    stream = Merge((feats_stream, targets_stream),
                   sources=('features', 'targets'))
    #Add a random Scheme?
    it_scheme = ConstantScheme(batch_size, num_examples)
    batched_stream = Batch(stream, it_scheme, strictness=1)
    return batched_stream
Exemplo n.º 18
0
def main(job_id, params):
    config = ConfigParser.ConfigParser()
    config.readfp(open('./params'))
    max_epoch = int(config.get('hyperparams', 'max_iter', 100))
    base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
    train_batch = int(config.get('hyperparams', 'train_batch', 256))
    valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
    test_batch = int(config.get('hyperparams', 'valid_batch', 512))

    W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
    W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
    b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
    b_mu = float(config.get('hyperparams', 'b_mu', 0.0))

    hidden_units = int(config.get('hyperparams', 'hidden_units', 32))
    input_dropout_ratio = float(
        config.get('hyperparams', 'input_dropout_ratio', 0.2))
    dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
    weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
    max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
    solver = config.get('hyperparams', 'solver_type', 'rmsprop')
    data_file = config.get('hyperparams', 'data_file')
    side = config.get('hyperparams', 'side', 'b')

    # Spearmint optimization parameters:
    if params:
        base_lr = float(params['base_lr'][0])
        dropout_ratio = float(params['dropout_ratio'][0])
        hidden_units = params['hidden_units'][0]
        weight_decay = params['weight_decay'][0]

    if 'adagrad' in solver:
        solver_type = CompositeRule([
            AdaGrad(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])
    else:
        solver_type = CompositeRule([
            RMSProp(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])

    input_dim = {'l': 11427, 'r': 10519, 'b': 10519 + 11427}
    data_file = config.get('hyperparams', 'data_file')

    if 'b' in side:
        train = H5PYDataset(data_file, which_set='train')
        valid = H5PYDataset(data_file, which_set='valid')
        test = H5PYDataset(data_file, which_set='test')
        x_l = tensor.matrix('l_features')
        x_r = tensor.matrix('r_features')
        x = tensor.concatenate([x_l, x_r], axis=1)

    else:
        train = H5PYDataset(data_file,
                            which_set='train',
                            sources=['{}_features'.format(side), 'targets'])
        valid = H5PYDataset(data_file,
                            which_set='valid',
                            sources=['{}_features'.format(side), 'targets'])
        test = H5PYDataset(data_file,
                           which_set='test',
                           sources=['{}_features'.format(side), 'targets'])
        x = tensor.matrix('{}_features'.format(side))

    y = tensor.lmatrix('targets')

    # Define a feed-forward net with an input, two hidden layers, and a softmax output:
    model = MLP(activations=[
        Rectifier(name='h1'),
        Rectifier(name='h2'),
        Softmax(name='output'),
    ],
                dims=[input_dim[side], hidden_units, hidden_units, 2],
                weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
                biases_init=IsotropicGaussian(b_sd, b_mu))

    # Don't forget to initialize params:
    model.initialize()

    # y_hat is the output of the neural net with x as its inputs
    y_hat = model.apply(x)

    # Define a cost function to optimize, and a classification error rate.
    # Also apply the outputs from the net and corresponding targets:
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'error'

    # This is the model: before applying dropout
    model = Model(cost)

    # Need to define the computation graph for the cost func:
    cost_graph = ComputationGraph([cost])

    # This returns a list of weight vectors for each layer
    W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)

    # Add some regularization to this model:
    cost += weight_decay * l2_norm(W)
    cost.name = 'entropy'

    # computational graph with l2 reg
    cost_graph = ComputationGraph([cost])

    # Apply dropout to inputs:
    inputs = VariableFilter([INPUT])(cost_graph.variables)
    dropout_inputs = [
        input for input in inputs if input.name.startswith('linear_')
    ]
    dropout_graph = apply_dropout(cost_graph, [dropout_inputs[0]],
                                  input_dropout_ratio)
    dropout_graph = apply_dropout(dropout_graph, dropout_inputs[1:],
                                  dropout_ratio)
    dropout_cost = dropout_graph.outputs[0]
    dropout_cost.name = 'dropout_entropy'

    # Learning Algorithm (notice: we use the dropout cost for learning):
    algo = GradientDescent(step_rule=solver_type,
                           params=dropout_graph.parameters,
                           cost=dropout_cost)

    # algo.step_rule.learning_rate.name = 'learning_rate'

    # Data stream used for training model:
    training_stream = Flatten(
        DataStream.default_stream(dataset=train,
                                  iteration_scheme=ShuffledScheme(
                                      train.num_examples,
                                      batch_size=train_batch)))

    training_monitor = TrainingDataMonitoring([
        dropout_cost,
        aggregation.mean(error),
        aggregation.mean(algo.total_gradient_norm)
    ],
                                              after_batch=True)

    # Use the 'valid' set for validation during training:
    validation_stream = Flatten(
        DataStream.default_stream(dataset=valid,
                                  iteration_scheme=ShuffledScheme(
                                      valid.num_examples,
                                      batch_size=valid_batch)))

    validation_monitor = DataStreamMonitoring(variables=[cost, error],
                                              data_stream=validation_stream,
                                              prefix='validation',
                                              after_epoch=True)

    test_stream = Flatten(
        DataStream.default_stream(
            dataset=test,
            iteration_scheme=ShuffledScheme(test.num_examples,
                                            batch_size=test_batch)))

    test_monitor = DataStreamMonitoring(variables=[error],
                                        data_stream=test_stream,
                                        prefix='test',
                                        after_training=True)

    plotting = Plot('AdniNet_{}'.format(side),
                    channels=[
                        ['dropout_entropy', 'validation_entropy'],
                        ['error', 'validation_error'],
                    ],
                    after_batch=False)

    # Checkpoint class used to save model and log:
    stamp = datetime.datetime.fromtimestamp(
        time.time()).strftime('%Y-%m-%d-%H:%M')
    checkpoint = Checkpoint('./models/{}net/{}'.format(side, stamp),
                            save_separately=['model', 'log'],
                            every_n_epochs=1)

    # Home-brewed class for early stopping when we detect we have started to overfit
    early_stopper = FinishIfOverfitting(error_name='error',
                                        validation_name='validation_error',
                                        threshold=0.1,
                                        epochs=5,
                                        burn_in=100)

    # The main loop will train the network and output reports, etc
    main_loop = MainLoop(data_stream=training_stream,
                         model=model,
                         algorithm=algo,
                         extensions=[
                             validation_monitor,
                             training_monitor,
                             plotting,
                             FinishAfter(after_n_epochs=max_epoch),
                             early_stopper,
                             Printing(),
                             ProgressBar(),
                             checkpoint,
                             test_monitor,
                         ])
    main_loop.run()

    ve = float(main_loop.log.last_epoch_row['validation_error'])
    te = float(main_loop.log.last_epoch_row['error'])
    spearmint_loss = ve + abs(te - ve)
    print 'Spearmint Loss: {}'.format(spearmint_loss)
    return spearmint_loss
Exemplo n.º 19
0
    logger = logging.Logger(__name__)
    FORMAT = '[%(asctime)s] %(name)s %(message)s'
    DATEFMT = "%M:%D:%S"
    logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.DEBUG)

    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.)
    }

    batch_size = 100
    data_train = MNIST(which_sets=['train'], sources=['features'])

    train_stream = Flatten(
        DataStream.default_stream(data_train,
                                  iteration_scheme=SequentialScheme(
                                      data_train.num_examples, batch_size)))

    features_size = 28 * 28 * 1

    inputs = T.matrix('features')

    test_data = {
        inputs:
        255 * np.random.normal(size=(batch_size, 28 * 28)).astype('float32')
    }

    prior = Z_prior(dim=128)

    gen = Generator(input_dim=128,
                    dims=[128, 64, 64, features_size],
Exemplo n.º 20
0
def _pokemon_dcgan():
    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.)
    }

    batch_size = 20
    data_train = PokemonGenYellowNormal(which_sets=['train'],
                                        sources=['features'])

    train_stream = Flatten(DataStream.default_stream(
        data_train, iteration_scheme=SequentialScheme(
            data_train.num_examples, batch_size)))

    features_size = 56 * 56 * 1

    inputs = T.matrix('features')

    inputs = (inputs)/255. * 2. - 1.

# rng = MRG_RandomStreams(123)
# inputs = inputs * rng.binomial(size=inputs.shape, p=0.1)

    prior = Z_prior(dim=256)
    gen = Generator(input_dim=256, dims=[128, 64, 64, features_size],
                    alpha=0.1, **inits)

    dis = Discriminator(dims=[features_size, 128, 64, 64], alpha=0.1, **inits)

    gan = GAN(dis=dis, gen=gen, prior=prior)
    gan.initialize()

    y_hat1, y_hat0, z = gan.apply(inputs)
    model = Model([y_hat0, y_hat1])
    loss = WGANLoss()
    dis_obj, gen_obj = loss.apply(y_hat0, y_hat1)

    dis_obj.name = 'Discriminator loss'
    gen_obj.name = 'Generator loss'

    cg = ComputationGraph([gen_obj, dis_obj])

    gen_filter = VariableFilter(roles=[PARAMETER],
                                bricks=gen.linear_transformations)

    dis_filter = VariableFilter(roles=[PARAMETER],
                                bricks=dis.linear_transformations)

    gen_params = gen_filter(cg.variables)
    dis_params = dis_filter(cg.variables)

# Prepare the dropout
    _inputs = []
    for brick_ in [gen]:
        _inputs.extend(VariableFilter(roles=[INPUT],
                                      bricks=brick_.linear_transformations)(
                                          cg.variables))

    cg_dropout = apply_dropout(cg, _inputs, 0.02)

    gen_obj = cg_dropout.outputs[0]
    dis_obj = cg_dropout.outputs[1]

    gan.dis_params = dis_params
    gan.gen_params = gen_params

    algo = AdverserialTraning(gen_obj=gen_obj, dis_obj=dis_obj,
                            model=gan, dis_iter=5,
                            step_rule=RMSProp(learning_rate=1e-4),
                            gen_consider_constant=z)

    neg_sample = gan.sampling(size=25)

    monitor = TrainingDataMonitoring(variables=[gen_obj, dis_obj],
                                        prefix="train", after_batch=True)

    subdir = './exp/' + 'pokemon' + "-" + time.strftime("%Y%m%d-%H%M%S")

    check_point = Checkpoint("{}/{}".format(subdir, 'pokemon'),
                                every_n_epochs=100,
                                save_separately=['log', 'model'])

    neg_sampling = GenerateNegtiveSample(neg_sample, img_size=(25, 56, 56),
                                         every_n_epochs=100)

    if not os.path.exists(subdir):
        os.makedirs(subdir)

    main_loop = MainLoop(algorithm=algo, model=model,
                        data_stream=train_stream,
                        extensions=[Printing(), ProgressBar(), monitor,
                                    check_point, neg_sampling])

    main_loop.run()
Exemplo n.º 21
0







#####################


mnist_train = MNIST(('train',))#, sources=('features', 'targets'))
num_examples = 10#mnist_train.num_examples

train_data_stream = Flatten(DataStream.default_stream(
    mnist_train,
    iteration_scheme=ShuffledScheme(num_examples, batch_size=batch_size)))

train_monitor_stream = Flatten(DataStream.default_stream(
    mnist_train,
    iteration_scheme=ShuffledScheme(num_examples, batch_size=batch_size)))


x = T.matrix('features')
y = T.matrix('targets')

epoch = train_data_stream.get_epoch_iterator()
for j, batch in enumerate(epoch):
    if j>0:
        break
    theano.config.compute_test_value = 'warn'    
Exemplo n.º 22
0
    #----------------------------------------------------------------------
    logger.info("Loading dataset...")

    x_dim, data_train, data_valid, data_test = datasets.get_data(args.data)

    num_examples = data_test.num_examples
    n_samples = (int(s) for s in args.nsamples.split(","))

    dict_p = {}
    dict_ps = {}
    
    for K in n_samples:
        batch_size = max(args.max_batch // K, 1)
        stream = Flatten(DataStream(
                        data_test,
                        iteration_scheme=ShuffledScheme(num_examples, batch_size)
                    ), which_sources='features')

        log_p = np.asarray([])
        log_ps = np.asarray([])
        for batch in stream.get_epoch_iterator(as_dict=True):
            log_p_, log_ps_ = do_nll(batch['features'], K)
    
            log_p = np.concatenate((log_p, log_p_))
            log_ps = np.concatenate((log_ps, log_ps_))
    
        log_p_ = stats.sem(log_p)
        log_p = np.mean(log_p)
        log_ps_ = stats.sem(log_ps)
        log_ps = np.mean(log_ps)
Exemplo n.º 23
0
 def s(s):
     return Flatten(
         DataStream.default_stream(s,
                                   iteration_scheme=ShuffledScheme(
                                       s.num_examples, batch_size=256)))
Exemplo n.º 24
0
inputs = VariableFilter([INPUT])(cost_graph.variables)
dropout_inputs = [input for input in inputs if input.name.startswith('linear_')]
dropout_graph = apply_dropout(cost_graph, dropout_inputs, dropout_ratio)
dropout_cost = dropout_graph.outputs[0]
dropout_cost.name = 'dropout_entropy'

# Learning Algorithm:
algo = GradientDescent(
    step_rule=solver_type,
    params=dropout_graph.parameters,
    cost=dropout_cost)

# Data stream used for training model:
training_stream = Flatten(
    DataStream.default_stream(
        dataset=train,
        iteration_scheme=ShuffledScheme(
            train.num_examples,
            batch_size=train_batch)))

training_monitor = TrainingDataMonitoring([cost], after_batch=True)

# Use the 'valid' set for validation during training:
validation_stream = Flatten(
    DataStream.default_stream(
        dataset=valid,
        iteration_scheme=ShuffledScheme(
            valid.num_examples,
            batch_size=valid_batch)))

validation_monitor = DataStreamMonitoring(
    variables=[cost],
Exemplo n.º 25
0
stream = DataStream.default_stream(train,
                                   iteration_scheme=ShuffledScheme(
                                       train.num_examples, 128))

# Enlarge images that are too small
downnscale_stream = MinimumImageDimensions(stream, (64, 64),
                                           which_sources=('image_features', ))

# Our images are of different sizes, so we'll use a Fuel transformer
# to take random crops of size (32 x 32) from each image
cropped_stream = RandomFixedSizeCrop(downnscale_stream, (32, 32),
                                     which_sources=('image_features', ))

# We'll use a simple MLP, so we need to flatten the images
# from (channel, width, height) to simply (features,)
flattened_stream = Flatten(cropped_stream, which_sources=('image_features', ))

# Create the Theano MLP
import theano
from theano import tensor
import numpy

X = tensor.matrix('image_features')
T = tensor.lmatrix('targets')

W = theano.shared(numpy.random.uniform(low=-0.01, high=0.01, size=(3072, 500)),
                  'W')
b = theano.shared(numpy.zeros(500))
V = theano.shared(numpy.random.uniform(low=-0.01, high=0.01, size=(500, 2)),
                  'V')
c = theano.shared(numpy.zeros(2))
Exemplo n.º 26
0
def _pokemon_wgan_gp():
    import os
    os.environ["FUEL_DATA_PATH"] = os.getcwd() + "/data/"
    batch_size = 20
    data_train = PokemonGenYellowNormal(which_sets=['train'],
                                        sources=['features'])

    train_stream = Flatten(DataStream.default_stream(
        data_train, iteration_scheme=SequentialScheme(
            data_train.num_examples, batch_size)))

    features_size = 56 * 56 * 1

    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.)
    }

    # print train_stream.get_epoch_iterator(as_dict=True).next()
    # raise

    inputs = T.matrix('features')
    inputs = ((inputs / 255.) * 2. - 1.)

    rng = MRG_RandomStreams(123)

    prior = Z_prior(dim=512)
    gen = Generator(input_dim=512, dims=[512, 512, 512, 512,
                                         features_size],
                    alpha=0.1, **inits)

    dis = Discriminator(dims=[features_size, 512, 512 , 512, 512],
                        alpha=0.1, **inits)

    gan = GAN(dis=dis, gen=gen, prior=prior)
    gan.initialize()

    # gradient penalty
    fake_samples, _ = gan.sampling(inputs.shape[0])
    e = rng.uniform(size=(inputs.shape[0], 1))

    mixed_input = (e * fake_samples) + (1 - e) * inputs

    output_d_mixed = gan._dis.apply(mixed_input)

    grad_mixed = T.grad(T.sum(output_d_mixed), mixed_input)

    norm_grad_mixed = T.sqrt(T.sum(T.square(grad_mixed), axis=1))
    grad_penalty = T.mean(T.square(norm_grad_mixed -1))

    y_hat1, y_hat0, z = gan.apply(inputs)

    d_loss_real = y_hat1.mean()
    d_loss_fake = y_hat0.mean()
    d_loss = - d_loss_real + d_loss_fake + 10 * grad_penalty
    g_loss = - d_loss_fake


    dis_obj = d_loss
    gen_obj = g_loss

    model = Model([y_hat0, y_hat1])

    em_loss = -d_loss_real + d_loss_fake

    em_loss.name = "Earth Move loss"
    dis_obj.name = 'Discriminator loss'
    gen_obj.name = 'Generator loss'

    cg = ComputationGraph([gen_obj, dis_obj])

    gen_filter = VariableFilter(roles=[PARAMETER],
                                bricks=gen.linear_transformations)

    dis_filter = VariableFilter(roles=[PARAMETER],
                                bricks=dis.linear_transformations)

    gen_params = gen_filter(cg.variables)
    dis_params = dis_filter(cg.variables)

# Prepare the dropout
    _inputs = []
    for brick_ in [gen]:
        _inputs.extend(VariableFilter(roles=[INPUT],
                    bricks=brick_.linear_transformations)(cg.variables))

    cg_dropout = apply_dropout(cg, _inputs, 0.02)

    gen_obj = cg_dropout.outputs[0]
    dis_obj = cg_dropout.outputs[1]

    gan.dis_params = dis_params
    gan.gen_params = gen_params

    # gradient penalty

    algo = AdverserialTraning(gen_obj=gen_obj, dis_obj=dis_obj,
                              model=gan, dis_iter=5, gradient_clip=None,
                              step_rule=RMSProp(learning_rate=1e-4),
                              gen_consider_constant=z)

    neg_sample = gan.sampling(size=25)

    from blocks.monitoring.aggregation import mean

    monitor = TrainingDataMonitoring(variables=[mean(gen_obj), mean(dis_obj),
                                                mean(em_loss)],
                                     prefix="train", after_batch=True)

    subdir = './exp/' + 'pokemon-wgan-gp' + "-" + time.strftime("%Y%m%d-%H%M%S")

    check_point = Checkpoint("{}/{}".format(subdir, 'CIFAR10'),
                                every_n_epochs=100,
                                save_separately=['log', 'model'])

    neg_sampling = GenerateNegtiveSample(neg_sample,
                                         img_size=(25, 56, 56),
                                         every_n_epochs=10)

    if not os.path.exists(subdir):
        os.makedirs(subdir)

    main_loop = MainLoop(algorithm=algo, model=model,
                         data_stream=train_stream,
                         extensions=[Printing(), ProgressBar(), monitor,
                                     check_point, neg_sampling])

    main_loop.run()
Exemplo n.º 27
0
def train(args, model_args):

    #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_'

    model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 = 'logs/walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    print model_dir2 + '/' + 'log.jsonl.gz'
    logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz',
                          formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features', ))
        dataset_test = MNIST(['test'], sources=('features', ))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features', ))
        dataset_test = CIFAR10(['test'], sources=('features', ))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == "lsun" or args.dataset == "lsunsmall":

        print "loading lsun class!"

        from load_lsun import load_lsun

        print "loading lsun data!"

        if args.dataset == "lsunsmall":
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=True)
            spatial_width = 32
        else:
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=False)
            spatial_width = 64

        n_colors = 3

    elif args.dataset == "celeba":

        print "loading celeba data"

        from fuel.datasets.celeba import CelebA

        dataset_train = CelebA(which_sets=['train'],
                               which_format="64",
                               sources=('features', ),
                               load_in_memory=False)
        dataset_test = CelebA(which_sets=['test'],
                              which_format="64",
                              sources=('features', ),
                              load_in_memory=False)

        spatial_width = 64
        n_colors = 3

        tr_scheme = SequentialScheme(examples=dataset_train.num_examples,
                                     batch_size=args.batch_size)
        ts_scheme = SequentialScheme(examples=dataset_test.num_examples,
                                     batch_size=args.batch_size)

        train_stream = DataStream.default_stream(dataset_train,
                                                 iteration_scheme=tr_scheme)
        test_stream = DataStream.default_stream(dataset_test,
                                                iteration_scheme=ts_scheme)

        dataset_train = train_stream
        dataset_test = test_stream

        #epoch_it = train_stream.get_epoch_iterator()

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=100000,
                           classes=1,
                           cycles=2.,
                           noise=0.01,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))

    else:
        raise ValueError("Unknown dataset %s." % args.dataset)

    model_options = locals().copy()

    if args.dataset != 'lsun' and args.dataset != 'celeba':
        train_stream = Flatten(
            DataStream.default_stream(
                dataset_train,
                iteration_scheme=ShuffledScheme(
                    examples=dataset_train.num_examples -
                    (dataset_train.num_examples % args.batch_size),
                    batch_size=args.batch_size)))
    else:
        train_stream = dataset_train
        test_stream = dataset_test

    print "Width", WIDTH, spatial_width

    shp = next(train_stream.get_epoch_iterator())[0].shape

    print "got epoch iterator"

    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch * scl)
    # scale is applied before shift
    #train_stream = ScaleAndShift(train_stream, scl, shft)
    #test_stream = ScaleAndShift(test_stream, scl, shft)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_:
        print "Trying to reload parameters"
        if os.path.exists(args.saveto_filename):
            print 'Reloading Parameters'
            print args.saveto_filename
            params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    print tparams
    '''
    x = T.matrix('x', dtype='float32')
    temp  = T.scalar('temp', dtype='float32')
    f=transition_operator(tparams, model_options, x, temp)

    for data in train_stream.get_epoch_iterator():
        print data[0]
        a = f([data[0], 1.0, 1])
        #ipdb.set_trace()
    '''
    x, cost, start_temperature = build_model(tparams, model_options)
    inps = [x, start_temperature]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature = T.scalar('temperature', dtype='float32')
    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams,
                                           temperature)

    #print 'Building f_cost...',
    #f_cost = theano.function(inps, cost)
    #print 'Done'
    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    #get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]),
                            grads[j])

    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams,
                                                             grads, inps, cost)
    print 'Done'

    for param in tparams:
        print param
        print tparams[param].get_value().shape

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'

    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 1
    print 'Number of steps....'
    print args.num_steps
    print "Number of metasteps...."
    print args.meta_steps
    print 'Done'
    count_sample = 1
    for eidx in xrange(max_epochs):
        if eidx % 20 == 0:
            params = unzip(tparams)
            save_params(params,
                        model_dir + '/' + 'params_' + str(eidx) + '.npz')
        n_samples = 0
        print 'Starting Next Epoch ', eidx
        for data in train_stream.get_epoch_iterator():

            if args.dataset == 'CIFAR10':
                if data[0].shape[0] == args.batch_size:
                    data_use = (data[0].reshape(args.batch_size,
                                                3 * 32 * 32), )
                else:
                    continue
            t0 = time.time()
            batch_index += 1
            n_samples += len(data_use[0])
            uidx += 1
            if data_use[0] is None:
                print 'No data '
                uidx -= 1
                continue
            ud_start = time.time()

            t1 = time.time()

            data_run = data_use[0]
            temperature_forward = args.temperature
            meta_cost = []
            for meta_step in range(0, args.meta_steps):
                meta_cost.append(f_grad_shared(data_run, temperature_forward))
                f_update(lrate)
                if args.meta_steps > 1:
                    data_run, sigma, _, _ = forward_diffusion(
                        [data_run, temperature_forward, 1])
                    temperature_forward *= args.temperature_factor
            cost = sum(meta_cost) / len(meta_cost)

            ud = time.time() - ud_start

            #gradient_updates_ = get_grads(data_use[0],args.temperature)

            if np.isnan(cost) or np.isinf(cost):
                print 'NaN detected'
                return 1.
            t1 = time.time()
            #print time.time() - t1, "time to get grads"
            t1 = time.time()
            logger.log({
                'epoch': eidx,
                'batch_index': batch_index,
                'uidx': uidx,
                'training_error': cost
            })
            #'Norm_1': np.linalg.norm(gradient_updates_[0]),
            #'Norm_2': np.linalg.norm(gradient_updates_[1]),
            #'Norm_3': np.linalg.norm(gradient_updates_[2]),
            #'Norm_4': np.linalg.norm(gradient_updates_[3])})
            #print time.time() - t1, "time to log"

            #print time.time() - t0, "total time in batch"
            t5 = time.time()

            if batch_index % 20 == 0:
                print batch_index, "cost", cost

            if batch_index % 200 == 0:
                count_sample += 1
                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))
                temperature_forward = args.temperature

                for num_step in range(args.num_steps * args.meta_steps):
                    print "Forward temperature", temperature_forward
                    if num_step == 0:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(
                            [data_use[0], temperature_forward, 1])
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors,
                                                WIDTH, WIDTH)
                        plot_images(
                            x_temp, model_dir + '/' + "batch_" +
                            str(batch_index) + '_corrupted' + 'epoch_' +
                            str(count_sample) + '_time_step_' + str(num_step))
                    else:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(
                            [x_data, temperature_forward, 1])
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors,
                                                WIDTH, WIDTH)
                        plot_images(
                            x_temp, model_dir + '/batch_' + str(batch_index) +
                            '_corrupted' + '_epoch_' + str(count_sample) +
                            '_time_step_' + str(num_step))

                    temperature_forward = temperature_forward * args.temperature_factor

                x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH,
                                              WIDTH)
                plot_images(
                    x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) +
                    '_batch_index_' + str(batch_index))

                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))

                for i in range(args.num_steps * args.meta_steps +
                               args.extra_steps):
                    x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                        [x_data, temperature, 0])
                    print 'On backward step number, using temperature', i, temperature
                    reverse_time(
                        scl, shft, x_data, model_dir + '/' + "batch_" +
                        str(batch_index) + '_samples_backward_' + 'epoch_' +
                        str(count_sample) + '_time_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

                if args.noise == "gaussian":
                    x_sampled = np.random.normal(
                        0.5, 2.0,
                        size=(args.batch_size, INPUT_SIZE)).clip(0.0, 1.0)
                else:
                    s = np.random.binomial(1, 0.5, INPUT_SIZE)

                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))

                x_data = np.asarray(x_sampled).astype('float32')
                for i in range(args.num_steps * args.meta_steps +
                               args.extra_steps):
                    x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                        [x_data, temperature, 0])
                    print 'On step number, using temperature', i, temperature
                    reverse_time(
                        scl, shft, x_data, model_dir + '/batch_index_' +
                        str(batch_index) + '_inference_' + 'epoch_' +
                        str(count_sample) + '_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

    ipdb.set_trace()
Exemplo n.º 28
0
def main(name, dataset, epochs, batch_size, learning_rate, attention, n_iter,
         enc_dim, dec_dim, z_dim, oldmodel, live_plotting):

    image_size, channels, data_train, data_valid, data_test = datasets.get_data(
        dataset)

    train_stream = Flatten(
        DataStream.default_stream(data_train,
                                  iteration_scheme=SequentialScheme(
                                      data_train.num_examples, batch_size)))
    valid_stream = Flatten(
        DataStream.default_stream(data_valid,
                                  iteration_scheme=SequentialScheme(
                                      data_valid.num_examples, batch_size)))
    test_stream = Flatten(
        DataStream.default_stream(data_test,
                                  iteration_scheme=SequentialScheme(
                                      data_test.num_examples, batch_size)))

    if name is None:
        name = dataset

    img_height, img_width = image_size
    x_dim = channels * img_height * img_width

    rnninits = {
        #'weights_init': Orthogonal(),
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }
    inits = {
        #'weights_init': Orthogonal(),
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }

    # Configure attention mechanism
    if attention != "":
        read_N, write_N = attention.split(',')

        read_N = int(read_N)
        write_N = int(write_N)
        read_dim = 2 * channels * read_N**2

        reader = AttentionReader(x_dim=x_dim,
                                 dec_dim=dec_dim,
                                 channels=channels,
                                 width=img_width,
                                 height=img_height,
                                 N=read_N,
                                 **inits)
        writer = AttentionWriter(input_dim=dec_dim,
                                 output_dim=x_dim,
                                 channels=channels,
                                 width=img_width,
                                 height=img_height,
                                 N=write_N,
                                 **inits)
        attention_tag = "r%d-w%d" % (read_N, write_N)
    else:
        read_dim = 2 * x_dim

        reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits)
        writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits)

        attention_tag = "full"

    #----------------------------------------------------------------------

    if name is None:
        name = dataset

    # Learning rate
    def lr_tag(value):
        """ Convert a float into a short tag-usable string representation. E.g.:
            0.1   -> 11
            0.01  -> 12
            0.001 -> 13
            0.005 -> 53
        """
        exp = np.floor(np.log10(value))
        leading = ("%e" % value)[0]
        return "%s%d" % (leading, -exp)

    lr_str = lr_tag(learning_rate)

    subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S")
    longname = "%s-%s-t%d-enc%d-dec%d-z%d-lr%s" % (
        dataset, attention_tag, n_iter, enc_dim, dec_dim, z_dim, lr_str)
    pickle_file = subdir + "/" + longname + ".pkl"

    print("\nRunning experiment %s" % longname)
    print("               dataset: %s" % dataset)
    print("          subdirectory: %s" % subdir)
    print("         learning rate: %g" % learning_rate)
    print("             attention: %s" % attention)
    print("          n_iterations: %d" % n_iter)
    print("     encoder dimension: %d" % enc_dim)
    print("           z dimension: %d" % z_dim)
    print("     decoder dimension: %d" % dec_dim)
    print("            batch size: %d" % batch_size)
    print("                epochs: %d" % epochs)
    print()

    #----------------------------------------------------------------------

    encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits)
    decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits)
    encoder_mlp = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim],
                      name="MLP_enc",
                      **inits)
    decoder_mlp = MLP([Identity()], [z_dim, 4 * dec_dim],
                      name="MLP_dec",
                      **inits)
    q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits)

    draw = DrawModel(n_iter,
                     reader=reader,
                     encoder_mlp=encoder_mlp,
                     encoder_rnn=encoder_rnn,
                     sampler=q_sampler,
                     decoder_mlp=decoder_mlp,
                     decoder_rnn=decoder_rnn,
                     writer=writer)
    draw.initialize()

    #------------------------------------------------------------------------
    x = tensor.matrix('features')

    x_recons, kl_terms = draw.reconstruct(x)

    recons_term = BinaryCrossEntropy().apply(x, x_recons)
    recons_term.name = "recons_term"

    cost = recons_term + kl_terms.sum(axis=0).mean()
    cost.name = "nll_bound"

    #------------------------------------------------------------
    cg = ComputationGraph([cost])
    params = VariableFilter(roles=[PARAMETER])(cg.variables)

    algorithm = GradientDescent(
        cost=cost,
        parameters=params,
        step_rule=CompositeRule([
            StepClipping(10.),
            Adam(learning_rate),
        ])
        #step_rule=RMSProp(learning_rate),
        #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95)
    )

    #------------------------------------------------------------------------
    # Setup monitors
    monitors = [cost]
    for t in range(n_iter):
        kl_term_t = kl_terms[t, :].mean()
        kl_term_t.name = "kl_term_%d" % t

        #x_recons_t = T.nnet.sigmoid(c[t,:,:])
        #recons_term_t = BinaryCrossEntropy().apply(x, x_recons_t)
        #recons_term_t = recons_term_t.mean()
        #recons_term_t.name = "recons_term_%d" % t

        monitors += [kl_term_t]

    train_monitors = monitors[:]
    train_monitors += [aggregation.mean(algorithm.total_gradient_norm)]
    train_monitors += [aggregation.mean(algorithm.total_step_norm)]
    # Live plotting...
    plot_channels = [
        ["train_nll_bound", "test_nll_bound"],
        ["train_kl_term_%d" % t for t in range(n_iter)],
        #["train_recons_term_%d" % t for t in range(n_iter)],
        ["train_total_gradient_norm", "train_total_step_norm"]
    ]

    #------------------------------------------------------------

    if not os.path.exists(subdir):
        os.makedirs(subdir)

    plotting_extensions = []
    if live_plotting:
        plotting_extensions = [Plot(name, channels=plot_channels)]

    main_loop = MainLoop(
        model=Model(cost),
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=epochs),
            TrainingDataMonitoring(
                train_monitors, prefix="train", after_epoch=True),
            #            DataStreamMonitoring(
            #                monitors,
            #                valid_stream,
            ##                updates=scan_updates,
            #                prefix="valid"),
            DataStreamMonitoring(
                monitors,
                test_stream,
                #                updates=scan_updates,
                prefix="test"),
            #Checkpoint(name, before_training=False, after_epoch=True, save_separately=['log', 'model']),
            Checkpoint("{}/{}".format(subdir, name),
                       save_main_loop=False,
                       before_training=True,
                       after_epoch=True,
                       save_separately=['log', 'model']),
            SampleCheckpoint(image_size=image_size[0],
                             channels=channels,
                             save_subdir=subdir,
                             before_training=True,
                             after_epoch=True),
            ProgressBar(),
            Printing()
        ] + plotting_extensions)

    if oldmodel is not None:
        print("Initializing parameters with old model %s" % oldmodel)
        with open(oldmodel, "rb") as f:
            oldmodel = pickle.load(f)
            main_loop.model.set_parameter_values(oldmodel.get_param_values())
        del oldmodel

    main_loop.run()
Exemplo n.º 29
0
    #----------------------------------------------------------------------
    logger.info("Loading dataset...")

    x_dim, data_train, data_valid, data_test = datasets.get_data(args.data)

    num_examples = data_test.num_examples
    n_samples = (int(s) for s in args.nsamples.split(","))

    dict_p = {}
    dict_ps = {}

    for K in n_samples:
        batch_size = max(args.max_batch // K, 1)
        stream = Flatten(DataStream(data_test,
                                    iteration_scheme=ShuffledScheme(
                                        num_examples, batch_size)),
                         which_sources='features')

        log_p = np.asarray([])
        log_ps = np.asarray([])
        for batch in stream.get_epoch_iterator(as_dict=True):
            log_p_, log_ps_ = do_nll(batch['features'], K)

            log_p = np.concatenate((log_p, log_p_))
            log_ps = np.concatenate((log_ps, log_ps_))

        log_p_ = stats.sem(log_p)
        log_p = np.mean(log_p)
        log_ps_ = stats.sem(log_ps)
        log_ps = np.mean(log_ps)
Exemplo n.º 30
0
    if resume:
        print "Restoring from previous breakpoint"
        extensions.extend([
            Load(path)
        ])
    return model, algorithm, extensions


if __name__ == '__main__':
    mnist = MNIST(("train",), sources=sources)
    mnist_test = MNIST(("test",), sources=sources)
    training_stream = Flatten(
        DataStream(
            mnist,
            iteration_scheme=ShuffledScheme(mnist.num_examples, batch_size)
        ),
        which_sources=sources
    )
    # import ipdb; ipdb.set_trace()
    test_stream = Flatten(
        DataStream(
            mnist_test,
            iteration_scheme=ShuffledScheme(mnist_test.num_examples, batch_size)
        ),
        which_sources=sources
    )
    "Print data loaded"

    if train:
        cost = create_network()
Exemplo n.º 31
0
Arquivo: VAE.py Projeto: udibr/VAE
def main(name, model, epochs, batch_size, learning_rate, bokeh, layers, gamma,
         rectifier, predict, dropout, qlinear, sparse):
    runname = "vae%s-L%s%s%s%s-l%s-g%s-b%d" % (name, layers,
                                            'r' if rectifier else '',
                                            'd' if dropout else '',
                                            'l' if qlinear else '',
                                      shnum(learning_rate), shnum(gamma), batch_size//100)
    if rectifier:
        activation = Rectifier()
        full_weights_init = Orthogonal()
    else:
        activation = Tanh()
        full_weights_init = Orthogonal()

    if sparse:
        runname += '-s%d'%sparse
        weights_init = Sparse(num_init=sparse, weights_init=full_weights_init)
    else:
        weights_init = full_weights_init

    layers = map(int,layers.split(','))

    encoder_layers = layers[:-1]
    encoder_mlp = MLP([activation] * (len(encoder_layers)-1),
              encoder_layers,
              name="MLP_enc", biases_init=Constant(0.), weights_init=weights_init)

    enc_dim = encoder_layers[-1]
    z_dim = layers[-1]
    if qlinear:
        sampler = Qlinear(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init)
    else:
        sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init)

    decoder_layers = layers[:]  ## includes z_dim as first layer
    decoder_layers.reverse()
    decoder_mlp = MLP([activation] * (len(decoder_layers)-2) + [Logistic()],
              decoder_layers,
              name="MLP_dec", biases_init=Constant(0.), weights_init=weights_init)


    vae = VAEModel(encoder_mlp, sampler, decoder_mlp)
    vae.initialize()

    x = tensor.matrix('features')/256.
    x.tag.test_value = np.random.random((batch_size,layers[0])).astype(np.float32)

    if predict:
        mean_z, enc = vae.mean_z(x)
        # cg = ComputationGraph([mean_z, enc])
        newmodel = Model([mean_z,enc])
    else:
        x_recons, kl_terms = vae.reconstruct(x)
        recons_term = BinaryCrossEntropy().apply(x, x_recons)
        recons_term.name = "recons_term"

        cost = recons_term + kl_terms.mean()
        cg = ComputationGraph([cost])

        if gamma > 0:
            weights = VariableFilter(roles=[WEIGHT])(cg.variables)
            cost += gamma * blocks.theano_expressions.l2_norm(weights)

        cost.name = "nll_bound"
        newmodel = Model(cost)

        if dropout:
            from blocks.roles import INPUT
            inputs = VariableFilter(roles=[INPUT])(cg.variables)
            # dropout_target = [v for k,v in newmodel.get_params().iteritems()
            #            if k.find('MLP')>=0 and k.endswith('.W') and not k.endswith('MLP_enc/linear_0.W')]
            dropout_target = filter(lambda x: x.name.startswith('linear_'), inputs)
            cg = apply_dropout(cg, dropout_target, 0.5)
            target_cost = cg.outputs[0]
        else:
            target_cost = cost

    if name == 'mnist':
        if predict:
            train_ds = MNIST("train")
        else:
            train_ds = MNIST("train", sources=['features'])
        test_ds = MNIST("test")
    else:
        datasource_dir = os.path.join(fuel.config.data_path, name)
        datasource_fname = os.path.join(datasource_dir , name+'.hdf5')
        if predict:
            train_ds = H5PYDataset(datasource_fname, which_set='train')
        else:
            train_ds = H5PYDataset(datasource_fname, which_set='train', sources=['features'])
        test_ds = H5PYDataset(datasource_fname, which_set='test')
    train_s = Flatten(DataStream(train_ds,
                 iteration_scheme=ShuffledScheme(
                     train_ds.num_examples, batch_size)))
    test_s = Flatten(DataStream(test_ds,
                 iteration_scheme=ShuffledScheme(
                     test_ds.num_examples, batch_size)))

    if predict:
        from itertools import chain
        fprop = newmodel.get_theano_function()
        allpdata = None
        alledata = None
        f = train_s.sources.index('features')
        assert f == test_s.sources.index('features')
        sources = test_s.sources
        alllabels = dict((s,[]) for s in sources if s != 'features')
        for data in chain(train_s.get_epoch_iterator(), test_s.get_epoch_iterator()):
            for s,d in zip(sources,data):
                if s != 'features':
                    alllabels[s].extend(list(d))

            pdata, edata = fprop(data[f])
            if allpdata is None:
                allpdata = pdata
            else:
                allpdata = np.vstack((allpdata, pdata))
            if alledata is None:
                alledata = edata
            else:
                alledata = np.vstack((alledata, edata))
        print 'Saving',allpdata.shape,'intermidiate layer, for all training and test examples, to',name+'_z.npy'
        np.save(name+'_z', allpdata)
        print 'Saving',alledata.shape,'last encoder layer to',name+'_e.npy'
        np.save(name+'_e', alledata)
        print 'Saving additional labels/targets:',','.join(alllabels.keys()),
        print ' of size',','.join(map(lambda x: str(len(x)),alllabels.values())),
        print 'to',name+'_labels.pkl'
        with open(name+'_labels.pkl','wb') as fp:
            pickle.dump(alllabels, fp, -1)
    else:
        cg = ComputationGraph([target_cost])
        algorithm = GradientDescent(
            cost=target_cost, params=cg.parameters,
            step_rule=Adam(learning_rate)  # Scale(learning_rate=learning_rate)
        )
        extensions = []
        if model:
            extensions.append(Load(model))

        extensions += [Timing(),
                      FinishAfter(after_n_epochs=epochs),
                      DataStreamMonitoring(
                          [cost, recons_term],
                          test_s,
                          prefix="test"),
                      TrainingDataMonitoring(
                          [cost,
                           aggregation.mean(algorithm.total_gradient_norm)],
                          prefix="train",
                          after_epoch=True),
                      Checkpoint(runname, every_n_epochs=10),
                      Printing()]

        if bokeh:
            extensions.append(Plot(
                'Auto',
                channels=[
                    ['test_recons_term','test_nll_bound','train_nll_bound'
                     ],
                    ['train_total_gradient_norm']]))

        main_loop = MainLoop(
            algorithm,
            train_s,
            model=newmodel,
            extensions=extensions)

        main_loop.run()
Exemplo n.º 32
0
        width=img_width,
        N=N,
        n_iter=n_iter,
        sources=("features", "bbox_lefts", "bbox_tops", "bbox_widths", "bbox_heights"),
    )

    batch_size = 1000
    num_examples = int(svhn.num_examples / batch_size) + 1
    evaluation = True

    # num_examples = 100
    # batch_size = 1
    # evaluation = False

    svhn_stream = Flatten(
        DataStream.default_stream(svhn, iteration_scheme=SequentialScheme(svhn.num_examples, batch_size))
    )
    svhn_stream.get_epoch_iterator()

    x = T.fmatrix("features")
    batch_size = T.iscalar("batch_size")

    center_y, center_x, deltaY, deltaX = locator.find(x, batch_size)

    do_sample = theano.function(
        [x, batch_size], outputs=[center_y, center_x, deltaY, deltaX], allow_input_downcast=True
    )

    overlap = 0.0
    distance = 0.0
Exemplo n.º 33
0
                                                 nonlinearity=None)

    '''
    return net['conv1_1']


if __name__ == '__main__':

    from fuel.datasets import MNIST
    dataset_train = MNIST(['train'], sources=('features', ))
    dataset_test = MNIST(['test'], sources=('features', ))
    n_colors = 1
    spatial_width = 28
    train_stream = Flatten(
        DataStream.default_stream(dataset_train,
                                  iteration_scheme=ShuffledScheme(
                                      examples=dataset_train.num_examples -
                                      (dataset_train.num_examples % 32),
                                      batch_size=32)))
    shp = next(train_stream.get_epoch_iterator())[0].shape

    input_ = T.tensor4('inputs_var')
    unet = buildUnet(1, dropout=True, input_var=input_, trainable=True)
    output = unet.get_output_for(input_)
    test_prediction = lasagne.layers.get_output(unet, deterministic=True)[0]
    #test_prediction_dimshuffle = test_prediction.dimshuffle((0, 2, 3, 1))
    pred_fcn_fn = theano.function([input_], test_prediction)

    for data in train_stream.get_epoch_iterator():
        data_use = (data[0].reshape(32, 1, 28, 28), )
        out_put = pred_fcn_fn(data_use[0])
        import ipdb
Exemplo n.º 34
0
def train(args, model_args, lrate):

    model_id = '/data/lisatmp4/anirudhg/minst_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 = 'walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    logger = mimir.Logger(filename=model_dir2 + '/' + model_id2 +
                          'log.jsonl.gz',
                          formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features', ))
        dataset_test = MNIST(['test'], sources=('features', ))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features', ))
        dataset_test = CIFAR10(['test'], sources=('features', ))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=100000,
                           classes=1,
                           cycles=2.,
                           noise=0.01,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))

    else:
        raise ValueError("Unknown dataset %s." % args.dataset)

    model_options = locals().copy()

    train_stream = Flatten(
        DataStream.default_stream(dataset_train,
                                  iteration_scheme=ShuffledScheme(
                                      examples=dataset_train.num_examples,
                                      batch_size=args.batch_size)))

    shp = next(train_stream.get_epoch_iterator())[0].shape
    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch * scl)
    # scale is applied before shift
    #train_stream = ScaleAndShift(train_stream, scl, shft)
    #test_stream = ScaleAndShift(test_stream, scl, shft)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_ and os.path.exists(args.saveto_filename):
        print 'Reloading Parameters'
        print args.saveto_filename
        params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    '''
    x = T.matrix('x', dtype='float32')
    f=transition_operator(tparams, model_options, x, 1)

    for data in train_stream.get_epoch_iterator():
        print data[0]
        a = f(data[0])
        print a
        ipdb.set_trace()
    '''
    x, cost = build_model(tparams, model_options)
    inps = [x]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature = T.scalar('temperature', dtype='float32')
    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams,
                                           temperature)

    print 'Building f_cost...',
    f_cost = theano.function(inps, cost)
    print 'Done'
    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]),
                            grads[j])

    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams,
                                                             grads, inps, cost)
    print 'Done'

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'

    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 0
    print 'Number of steps....'
    print args.num_steps
    print 'Done'
    count_sample = 1
    for eidx in xrange(max_epochs):
        n_samples = 0
        print 'Starting Next Epoch ', eidx
        for data in train_stream.get_epoch_iterator():
            batch_index += 1
            n_samples += len(data[0])
            uidx += 1
            if data[0] is None:
                print 'No data '
                uidx -= 1
                continue
            ud_start = time.time()
            cost = f_grad_shared(data[0])
            f_update(lrate)
            ud = time.time() - ud_start

            if batch_index % 1 == 0:
                print 'Cost is this', cost
                count_sample += 1

                from impainting import change_image, inpainting
                train_temp = data[0]
                print data[0].shape
                change_image(train_temp.reshape(args.batch_size, 1, 28, 28), 3)
                train_temp = train_temp.reshape(args.batch_size, 784)
                output = inpainting(train_temp)
                change_image(output.reshape(args.batch_size, 1, 28, 28), 1)

                reverse_time(
                    scl, shft, output,
                    model_dir + '/' + 'impainting_orig_' + 'epoch_' +
                    str(count_sample) + '_batch_index_' + str(batch_index))
                x_data = np.asarray(output).astype('float32')
                temperature = args.temperature * (args.temperature_factor
                                                  **(args.num_steps - 1))
                temperature = args.temperature  #* (args.temperature_factor ** (args.num_steps -1 ))
                orig_impainted_data = np.asarray(data[0]).astype('float32')

                for i in range(args.num_steps + args.extra_steps + 5):
                    x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                        x_data, temperature)
                    print 'Impainting using temperature', i, temperature
                    x_data = do_half_image(x_data, orig_impainted_data)
                    reverse_time(
                        scl, shft, x_data, model_dir + '/' +
                        'impainting_orig_' + 'epoch_' + str(count_sample) +
                        '_batch_index_' + str(batch_index) + 'step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature = temperature
                        #temperature /= args.temperature_factor
    ipdb.set_trace()
Exemplo n.º 35
0
import SVRT_analysis_helper_functions 

#Initialize vars and theano function
num_estimates = 20
num_training = 6 #per evaluation
num_testing = 1 #per evaluation
batch_size = num_estimates * num_training + num_estimates * num_testing #get a bunch just to be sure

image_size, channels, data_train, data_valid, data_test = datasets.get_data('sketch')
rows = 10
cols = 20
N_iter = 64;

#Load images
train_stream  = Flatten(DataStream.default_stream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, batch_size)))
train_batch = train_stream.get_epoch_iterator()
train_image = train_batch.data_stream.get_data()[0][:].reshape(batch_size,32,32)
train_labels = train_batch.data_stream.get_data()[1]
test_stream  = Flatten(DataStream.default_stream(data_test, iteration_scheme=SequentialScheme(data_test.num_examples, batch_size)))
test_batch = test_stream.get_epoch_iterator()
test_image = test_batch.data_stream.get_data()[0][:].reshape(batch_size,32,32)
test_labels = test_batch.data_stream.get_data()[1]


#Load old model
#model_file = 'new_test-20160313-125114/new_test_model'
model_file = 'all_params-20160315-221022/all_params_model'
with open(model_file,"rb") as f:
    model = pickle.load(f)
draw = model.get_top_bricks()[0]
Exemplo n.º 36
0
def main(save_to, cost_name, learning_rate, momentum, num_epochs):
    mlp = MLP([None], [784, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    scores = mlp.apply(x)

    batch_size = y.shape[0]
    indices = tensor.arange(y.shape[0])
    target_scores = tensor.set_subtensor(
        tensor.zeros((batch_size, 10))[indices, y.flatten()], 1)
    score_diff = scores - target_scores

    # Logistic Regression
    if cost_name == 'lr':
        cost = Softmax().categorical_cross_entropy(y.flatten(), scores).mean()
    # MSE
    elif cost_name == 'mse':
        cost = (score_diff**2).mean()
    # Perceptron
    elif cost_name == 'perceptron':
        cost = (scores.max(axis=1) - scores[indices, y.flatten()]).mean()
    # TLE
    elif cost_name == 'minmin':
        cost = abs(score_diff[indices, y.flatten()]).mean()
        cost += abs(score_diff[indices, scores.argmax(axis=1)]).mean()
    # TLEcut
    elif cost_name == 'minmin_cut':
        # Score of the groundtruth should be greater or equal than its target score
        cost = tensor.maximum(0, -score_diff[indices, y.flatten()]).mean()
        # Score of the prediction should be less or equal than its actual score
        cost += tensor.maximum(0, score_diff[indices,
                                             scores.argmax(axis=1)]).mean()
    # TLE2
    elif cost_name == 'minmin2':
        cost = ((score_diff[tensor.arange(y.shape[0]), y.flatten()])**2).mean()
        cost += ((score_diff[tensor.arange(y.shape[0]),
                             scores.argmax(axis=1)])**2).mean()
    # Direct loss minimization
    elif cost_name == 'direct':
        epsilon = 0.1
        cost = (-scores[indices,
                        (scores + epsilon * target_scores).argmax(axis=1)] +
                scores[indices, scores.argmax(axis=1)]).mean()
        cost /= epsilon
    elif cost_name == 'svm':
        cost = (scores[indices, (scores - 1 * target_scores).argmax(axis=1)] -
                scores[indices, y.flatten()]).mean()
    else:
        raise ValueError("Unknown cost " + cost)

    error_rate = MisclassificationRate().apply(y.flatten(), scores)
    error_rate.name = 'error_rate'

    cg = ComputationGraph([cost])
    cost.name = 'cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    if learning_rate == None:
        learning_rate = 0.0001
    if momentum == None:
        momentum = 0.0
    rule = Momentum(learning_rate=learning_rate, momentum=momentum)
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=rule)
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        # CallbackExtension(
        #    lambda: rule.learning_rate.set_value(rule.learning_rate.get_value() * 0.9),
        #    after_epoch=True),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm), rule.learning_rate
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[['test_cost', 'test_error_rate'],
                           ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()

    df = pandas.DataFrame.from_dict(main_loop.log, orient='index')
    res = {
        'cost': cost_name,
        'learning_rate': learning_rate,
        'momentum': momentum,
        'train_cost': df.train_cost.iloc[-1],
        'test_cost': df.test_cost.iloc[-1],
        'best_test_cost': df.test_cost.min(),
        'train_error': df.train_error_rate.iloc[-1],
        'test_error': df.test_error_rate.iloc[-1],
        'best_test_error': df.test_error_rate.min()
    }
    res = {
        k: float(v) if isinstance(v, numpy.ndarray) else v
        for k, v in res.items()
    }
    json.dump(res, sys.stdout)
    sys.stdout.flush()
Exemplo n.º 37
0
def main(save_to, model, train, test, num_epochs, input_size = (150,150), learning_rate=0.01,
batch_size=50, num_batches=None, flatten_stream=False):
    """ 
    save_to : where to save trained model
    model : model given in input must be already initialised (works with convnet and mlp)
    
    input_size : the shape of the reshaped image in input (before flattening is applied if flatten_stream is True)
    
    """
    if flatten_stream :
        x = tensor.matrix('image_features')
    else :
        x = tensor.tensor4('image_features')
    y = tensor.lmatrix('targets')

    #Data augmentation
    #insert data augmentation here 
    
    #Generating stream
    train_stream = DataStream.default_stream(
        train,
        iteration_scheme=ShuffledScheme(train.num_examples, batch_size)
    )

    test_stream = DataStream.default_stream(
        test,
        iteration_scheme=ShuffledScheme(test.num_examples, batch_size)
    )
    
    
    #Reshaping procedure
    #Add a crop option in scikitresize so that the image is not deformed
    
    #Resize to desired square shape
    train_stream = ScikitResize(train_stream, input_size, which_sources=('image_features',))
    test_stream = ScikitResize(test_stream, input_size, which_sources=('image_features',))
    
    #Flattening the stream
    if flatten_stream is True:
        train_stream = Flatten(train_stream, which_sources=('image_features',))
        test_stream = Flatten(test_stream, which_sources=('image_features',))
    
    # Apply input to model
    probs = model.apply(x)
    
    #Defining cost and various indices to watch
    #print(probs)
    #cost = SquaredError().apply(y.flatten(),probs)

    cost = CategoricalCrossEntropy().apply(y.flatten(), probs).copy(name='cost')
    error_rate = MisclassificationRate().apply(y.flatten(), probs).copy(
            name='error_rate')

    #Building Computation Graph
    cg = ComputationGraph([cost, error_rate])

    # Train with simple SGD
    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=Scale(learning_rate=learning_rate))
    
    #Defining extensions
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs,
                              after_n_batches=num_batches),
                  TrainingDataMonitoring([cost, error_rate,aggregation.mean(algorithm.total_gradient_norm)], prefix="train", every_n_batches=5),
                  DataStreamMonitoring([cost, error_rate],test_stream,prefix="test", every_n_batches=25),
                  Checkpoint(save_to),
                  ProgressBar(),
                  Printing(every_n_batches=5)]

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.


    model = Model(cost)

    main_loop = MainLoop(
        algorithm,
        train_stream,
        model=model,
        extensions=extensions)

    main_loop.run()
Exemplo n.º 38
0
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features',))
        dataset_test = CIFAR10(['test'], sources=('features',))
        n_colors = 3
        spatial_width = 32
    elif args.dataset == 'IMAGENET':
        from imagenet_data import IMAGENET
        spatial_width = 128
        dataset_train = IMAGENET(['train'], width=spatial_width)
        dataset_test = IMAGENET(['test'], width=spatial_width)
        n_colors = 3
    else:
        raise ValueError("Unknown dataset %s."%args.dataset)

    train_stream = Flatten(DataStream.default_stream(dataset_train,
                              iteration_scheme=ShuffledScheme(
                                  examples=dataset_train.num_examples,
                                  batch_size=args.batch_size)))
    test_stream = Flatten(DataStream.default_stream(dataset_test,
                             iteration_scheme=ShuffledScheme(
                                 examples=dataset_test.num_examples,
                                 batch_size=args.batch_size))
                             )

    shp = next(train_stream.get_epoch_iterator())[0].shape

    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1./np.sqrt(np.mean((Xbatch-np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch*scl)
    # scale is applied before shift
Exemplo n.º 39
0
          dims=[784, 100, 10],
          weights_init=IsotropicGaussian(0.01),
          biases_init=Constant(0))
mlp.initialize()

# Calculate the loss function
x = T.matrix('features')
y = T.lmatrix('targets')
y_hat = mlp.apply(x)
cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
error_rate = MisclassificationRate().apply(y.flatten(), y_hat)

# load training data using Fuel
mnist_train = MNIST("train")
train_stream = Flatten(
    DataStream.default_stream(dataset=mnist_train,
                              iteration_scheme=SequentialScheme(
                                  mnist_train.num_examples, 128)), )

# load testing data
mnist_test = MNIST("test")
test_stream = Flatten(
    DataStream.default_stream(dataset=mnist_test,
                              iteration_scheme=SequentialScheme(
                                  mnist_test.num_examples, 1024)), )

# train the model
from blocks.model import Model
main_loop = MainLoop(model=Model(cost),
                     data_stream=train_stream,
                     algorithm=GradientDescent(
                         cost=cost,
# In[12]:

batch[1].shape


# ## Transformers

# In[13]:

from fuel.transformers import Flatten


# In[14]:

data_stream = Flatten(data_stream)


# In[15]:

epoch = data_stream.get_epoch_iterator()
batch = next(epoch)  # (ndarray, dnarray)


# In[16]:

batch[0].shape


# In[17]: