Exemplo n.º 1
0
def test_apply_batch_normalization_nested():
    x = tensor.matrix()
    eps = 1e-8
    batch_dims = (3, 9)
    bn = BatchNormalization(input_dim=5, epsilon=eps)
    mlp = MLP([Sequence([bn.apply, Tanh().apply])], [9, 5],
              weights_init=Constant(0.4), biases_init=Constant(1))
    mlp.initialize()
    y = mlp.apply(x)
    cg = apply_batch_normalization(ComputationGraph([y]))
    y_bn = cg.outputs[0]
    rng = numpy.random.RandomState((2016, 1, 18))
    x_ = rng.uniform(size=batch_dims).astype(theano.config.floatX)
    y_ = y_bn.eval({x: x_})
    W_, b_ = map(lambda s: (getattr(mlp.linear_transformations[0], s)
                            .get_value(borrow=True)), ['W', 'b'])
    z_ = numpy.dot(x_, W_) + b_
    y_expected = numpy.tanh((z_ - z_.mean(axis=0)) /
                            numpy.sqrt(z_.var(axis=0) + eps))
    assert_allclose(y_, y_expected, rtol=1e-3)
Exemplo n.º 2
0
def create_training_computation_graphs():
    x = tensor.tensor4('features')
    y = tensor.imatrix('targets')

    convnet, mlp = create_model_bricks()
    y_hat = mlp.apply(convnet.apply(x).flatten(ndim=2))
    cost = BinaryCrossEntropy().apply(y, y_hat)
    accuracy = 1 - tensor.neq(y > 0.5, y_hat > 0.5).mean()
    cg = ComputationGraph([cost, accuracy])

    # Create a graph which uses batch statistics for batch normalization
    # as well as dropout on selected variables
    bn_cg = apply_batch_normalization(cg)
    bricks_to_drop = ([convnet.layers[i] for i in (5, 11, 17)] +
                      [mlp.application_methods[1].brick])
    variables_to_drop = VariableFilter(
        roles=[OUTPUT], bricks=bricks_to_drop)(bn_cg.variables)
    bn_dropout_cg = apply_dropout(bn_cg, variables_to_drop, 0.5)

    return cg, bn_dropout_cg
Exemplo n.º 3
0
def test_apply_batch_normalization_nested():
    x = tensor.matrix()
    eps = 1e-8
    batch_dims = (3, 9)
    bn = BatchNormalization(input_dim=5, epsilon=eps)
    mlp = MLP([Sequence([bn.apply, Tanh().apply])], [9, 5],
              weights_init=Constant(0.4),
              biases_init=Constant(1))
    mlp.initialize()
    y = mlp.apply(x)
    cg = apply_batch_normalization(ComputationGraph([y]))
    y_bn = cg.outputs[0]
    rng = numpy.random.RandomState((2016, 1, 18))
    x_ = rng.uniform(size=batch_dims).astype(theano.config.floatX)
    y_ = y_bn.eval({x: x_})
    W_, b_ = map(
        lambda s:
        (getattr(mlp.linear_transformations[0], s).get_value(borrow=True)),
        ['W', 'b'])
    z_ = numpy.dot(x_, W_) + b_
    y_expected = numpy.tanh(
        (z_ - z_.mean(axis=0)) / numpy.sqrt(z_.var(axis=0) + eps))
    assert_allclose(y_, y_expected, rtol=1e-3)
Exemplo n.º 4
0
def main(num_epochs,
         feature_maps=None,
         mlp_hiddens=None,
         conv_sizes=None,
         pool_sizes=None,
         batch_size=500,
         num_batches=None):

    ############# Architecture #############
    if feature_maps is None:
        feature_maps = [20, 50]
    if mlp_hiddens is None:
        mlp_hiddens = [500]
    if conv_sizes is None:
        conv_sizes = [5, 5]
    if pool_sizes is None:
        pool_sizes = [2, 2]
    image_size = (32, 32)
    batch_size = 50
    output_size = 2
    learningRate = 0.1
    num_epochs = 10
    num_batches = None
    delta = 0.01
    drop_prob = 0.5
    weight_noise = 0.75

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = LeNet(conv_activations,
                    3,
                    image_size,
                    filter_sizes=zip(conv_sizes, conv_sizes),
                    feature_maps=feature_maps,
                    pooling_sizes=zip(pool_sizes, pool_sizes),
                    top_mlp_activations=mlp_activations,
                    top_mlp_dims=mlp_hiddens + [output_size],
                    border_mode='full',
                    weights_init=Uniform(width=.2),
                    biases_init=Constant(0))

    # We push initialization config to set different initialization schemes
    # for convolutional layers.

    convnet.push_initialization_config()
    convnet.layers[0].weights_init = Uniform(width=.2)
    convnet.layers[1].weights_init = Uniform(width=.09)
    convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08)
    convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11)
    convnet.initialize()
    logging.info(
        "Input dim: {} {} {}".format(*convnet.children[0].get_dim('input_')))
    for i, layer in enumerate(convnet.layers):
        if isinstance(layer, Activation):
            logging.info("Layer {} ({})".format(i, layer.__class__.__name__))
        else:
            logging.info("Layer {} ({}) dim: {} {} {}".format(
                i, layer.__class__.__name__, *layer.get_dim('output')))

    x = tensor.tensor4('image_features')
    y = tensor.lmatrix('targets')

    probs = (convnet.apply(x)).copy(name='probs')

    # Computational Graph just for cost for drop_out and noise application
    cg_probs = ComputationGraph([probs])
    inputs = VariableFilter(roles=[INPUT])(cg_probs.variables)
    weights = VariableFilter(roles=[FILTER, WEIGHT])(cg_probs.variables)

    ############# Regularization #############
    #regularization = 0
    logger.info('Applying regularization')
    regularization = delta * sum([(W**2).mean() for W in weights])
    probs.name = "reg_probs"

    ############# Guaussian Noise #############

    logger.info('Applying Gaussian noise')
    cg_train = apply_noise(cg_probs, weights, weight_noise)

    ############# Dropout #############

    logger.info('Applying dropout')
    cg_probs = apply_dropout(cg_probs, inputs, drop_prob)
    dropped_out = VariableFilter(roles=[DROPOUT])(cg_probs.variables)
    inputs_referenced = [var.tag.replacement_of for var in dropped_out]
    set(inputs) == set(inputs_referenced)

    ############# Batch normalization #############

    # recalculate probs after dropout and noise and regularization:
    probs = cg_probs.outputs[0] + regularization
    cost = (CategoricalCrossEntropy().apply(y.flatten(),
                                            probs).copy(name='cost'))
    error_rate = (MisclassificationRate().apply(y.flatten(),
                                                probs).copy(name='error_rate'))
    cg = ComputationGraph([probs, cost, error_rate])
    cg = apply_batch_normalization(cg)

    ########### Loading images #####################

    from fuel.datasets.dogs_vs_cats import DogsVsCats
    from fuel.streams import DataStream, ServerDataStream
    from fuel.schemes import ShuffledScheme
    from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, Random2DRotation
    from fuel.transformers import Flatten, Cast, ScaleAndShift

    def create_data(data):
        stream = DataStream(data,
                            iteration_scheme=ShuffledScheme(
                                data.num_examples, batch_size))
        stream_downscale = MinimumImageDimensions(
            stream, image_size, which_sources=('image_features', ))
        stream_rotate = Random2DRotation(stream_downscale,
                                         which_sources=('image_features', ))
        stream_max = ScikitResize(stream_rotate,
                                  image_size,
                                  which_sources=('image_features', ))
        stream_scale = ScaleAndShift(stream_max,
                                     1. / 255,
                                     0,
                                     which_sources=('image_features', ))
        stream_cast = Cast(stream_scale,
                           dtype='float32',
                           which_sources=('image_features', ))
        #stream_flat = Flatten(stream_scale, which_sources=('image_features',))

        return stream_cast

    stream_data_train = create_data(
        DogsVsCats(('train', ), subset=slice(0, 20)))
    stream_data_test = create_data(
        DogsVsCats(('train', ), subset=slice(20, 30)))

    # Train with simple SGD
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=learningRate))
    #algorithm = GradientDescent(cost=cost, parameters=cg.parameters,step_rule=Adam(0.001))
    #algorithm.add_updates(extra_updates)

    # `Timing` extension reports time for reading data, aggregating a batch and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = []
    extensions.append(Timing())
    extensions.append(
        FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches))
    extensions.append(
        DataStreamMonitoring([cost, error_rate],
                             stream_data_test,
                             prefix="valid"))
    extensions.append(
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True))
    #extensions.append(Checkpoint(save_to))
    extensions.append(ProgressBar())
    extensions.append(Printing())

    logger.info("Building the model")
    model = Model(cost)

    main_loop = MainLoop(algorithm,
                         stream_data_train,
                         model=model,
                         extensions=extensions)

    main_loop.run()
Exemplo n.º 5
0
def run(model_name, port_train, port_valid):

    running_on_laptop = socket.gethostname() == 'yop'

    X = tensor.tensor4('image_features', dtype='float32')
    T = tensor.matrix('targets', dtype='float32')

    image_border_size = (100, 100)

    if running_on_laptop:
        host_plot = 'http://*****:*****@ %s' %
             (model_name, datetime.datetime.now(), socket.gethostname()),
             channels=[['loss'], ['error', 'valid_error']],
             after_epoch=True,
             server_url=host_plot),
        Printing(),
        Checkpoint('/tmp/train_bn2')
    ]

    main_loop = MainLoop(data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions,
                         model=model)
    main_loop.run()
Exemplo n.º 6
0
def main(port_data):
    mlp_hiddens = [500]
    filter_sizes = [(3,3),(3,3)]
    feature_maps = [20, 20]
    pooling_sizes = [(3,3),(2,2)]
    save_to="DvC.pkl"
    image_size = (128, 128)
    output_size = 2
    learningRate=0.1
    num_epochs=300
    num_batches=None
    if socket.gethostname()=='tim-X550JX':host_plot = 'http://*****:*****@ %s' % ('CNN ', datetime.datetime.now(), socket.gethostname()),
                        channels=[['train_error_rate', 'valid_error_rate'],
                         ['train_total_gradient_norm']], after_epoch=True, server_url=host_plot))

    model = Model(cost)

    main_loop = MainLoop(
        algorithm,
        stream_data_train,
        model=model,
        extensions=extensions)

    main_loop.run()
Exemplo n.º 7
0
def run(model_name, port_train, port_valid):

	running_on_laptop = socket.gethostname() == 'yop'

	X = tensor.tensor4('image_features', dtype='float32')
	T = tensor.matrix('targets', dtype='float32')

	image_border_size = (100, 100)

	if running_on_laptop:
		host_plot = 'http://*****:*****@ %s' % (model_name, datetime.datetime.now(), socket.gethostname()), channels=[['loss'], ['error', 'valid_error']], after_epoch=True, server_url=host_plot),
		Printing(),
		Checkpoint('/tmp/train_bn2')
	]

	main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
	                     extensions=extensions, model=model)
	main_loop.run()
Exemplo n.º 8
0
def main(
    num_epochs, feature_maps=None, mlp_hiddens=None, conv_sizes=None, pool_sizes=None, batch_size=500, num_batches=None
):

    ############# Architecture #############
    if feature_maps is None:
        feature_maps = [20, 50]
    if mlp_hiddens is None:
        mlp_hiddens = [500]
    if conv_sizes is None:
        conv_sizes = [5, 5]
    if pool_sizes is None:
        pool_sizes = [2, 2]
    image_size = (32, 32)
    batch_size = 50
    output_size = 2
    learningRate = 0.1
    num_epochs = 10
    num_batches = None
    delta = 0.01
    drop_prob = 0.5
    weight_noise = 0.75

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = LeNet(
        conv_activations,
        3,
        image_size,
        filter_sizes=zip(conv_sizes, conv_sizes),
        feature_maps=feature_maps,
        pooling_sizes=zip(pool_sizes, pool_sizes),
        top_mlp_activations=mlp_activations,
        top_mlp_dims=mlp_hiddens + [output_size],
        border_mode="full",
        weights_init=Uniform(width=0.2),
        biases_init=Constant(0),
    )

    # We push initialization config to set different initialization schemes
    # for convolutional layers.

    convnet.push_initialization_config()
    convnet.layers[0].weights_init = Uniform(width=0.2)
    convnet.layers[1].weights_init = Uniform(width=0.09)
    convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=0.08)
    convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=0.11)
    convnet.initialize()
    logging.info("Input dim: {} {} {}".format(*convnet.children[0].get_dim("input_")))
    for i, layer in enumerate(convnet.layers):
        if isinstance(layer, Activation):
            logging.info("Layer {} ({})".format(i, layer.__class__.__name__))
        else:
            logging.info("Layer {} ({}) dim: {} {} {}".format(i, layer.__class__.__name__, *layer.get_dim("output")))

    x = tensor.tensor4("image_features")
    y = tensor.lmatrix("targets")

    probs = (convnet.apply(x)).copy(name="probs")

    # Computational Graph just for cost for drop_out and noise application
    cg_probs = ComputationGraph([probs])
    inputs = VariableFilter(roles=[INPUT])(cg_probs.variables)
    weights = VariableFilter(roles=[FILTER, WEIGHT])(cg_probs.variables)

    ############# Regularization #############
    # regularization = 0
    logger.info("Applying regularization")
    regularization = delta * sum([(W ** 2).mean() for W in weights])
    probs.name = "reg_probs"

    ############# Guaussian Noise #############

    logger.info("Applying Gaussian noise")
    cg_train = apply_noise(cg_probs, weights, weight_noise)

    ############# Dropout #############

    logger.info("Applying dropout")
    cg_probs = apply_dropout(cg_probs, inputs, drop_prob)
    dropped_out = VariableFilter(roles=[DROPOUT])(cg_probs.variables)
    inputs_referenced = [var.tag.replacement_of for var in dropped_out]
    set(inputs) == set(inputs_referenced)

    ############# Batch normalization #############

    # recalculate probs after dropout and noise and regularization:
    probs = cg_probs.outputs[0] + regularization
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs).copy(name="cost")
    error_rate = MisclassificationRate().apply(y.flatten(), probs).copy(name="error_rate")
    cg = ComputationGraph([probs, cost, error_rate])
    cg = apply_batch_normalization(cg)

    ########### Loading images #####################

    from fuel.datasets.dogs_vs_cats import DogsVsCats
    from fuel.streams import DataStream, ServerDataStream
    from fuel.schemes import ShuffledScheme
    from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, Random2DRotation
    from fuel.transformers import Flatten, Cast, ScaleAndShift

    def create_data(data):
        stream = DataStream(data, iteration_scheme=ShuffledScheme(data.num_examples, batch_size))
        stream_downscale = MinimumImageDimensions(stream, image_size, which_sources=("image_features",))
        stream_rotate = Random2DRotation(stream_downscale, which_sources=("image_features",))
        stream_max = ScikitResize(stream_rotate, image_size, which_sources=("image_features",))
        stream_scale = ScaleAndShift(stream_max, 1.0 / 255, 0, which_sources=("image_features",))
        stream_cast = Cast(stream_scale, dtype="float32", which_sources=("image_features",))
        # stream_flat = Flatten(stream_scale, which_sources=('image_features',))

        return stream_cast

    stream_data_train = create_data(DogsVsCats(("train",), subset=slice(0, 20)))
    stream_data_test = create_data(DogsVsCats(("train",), subset=slice(20, 30)))

    # Train with simple SGD
    algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=learningRate))
    # algorithm = GradientDescent(cost=cost, parameters=cg.parameters,step_rule=Adam(0.001))
    # algorithm.add_updates(extra_updates)

    # `Timing` extension reports time for reading data, aggregating a batch and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = []
    extensions.append(Timing())
    extensions.append(FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches))
    extensions.append(DataStreamMonitoring([cost, error_rate], stream_data_test, prefix="valid"))
    extensions.append(
        TrainingDataMonitoring(
            [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True
        )
    )
    # extensions.append(Checkpoint(save_to))
    extensions.append(ProgressBar())
    extensions.append(Printing())

    logger.info("Building the model")
    model = Model(cost)

    main_loop = MainLoop(algorithm, stream_data_train, model=model, extensions=extensions)

    main_loop.run()
Exemplo n.º 9
0
cost, all_parameters = build_model(images, labels)


# LEARN WEIGHTS

# In[3]:

train_stream = ServerDataStream(('driver_id', 'images', 'labels'), False, hwm=10)
valid_stream = ServerDataStream(('driver_id', 'images', 'labels'), False, hwm=10, port=5558)


# In[5]:
alpha = 0.1

cg    = ComputationGraph(cost)
cg_bn = apply_batch_normalization(cg)

inputs = VariableFilter(roles=[INPUT])(cg_bn.variables)
print inputs

cg_dropout = apply_dropout(cg_bn, [inputs[11], inputs[0]], .5)

cost_bn = cg_bn.outputs[0]
cost_dropout = cg_dropout.outputs[0]
model = Model(cost)
    

print 'Optimizing parameters :'
print all_parameters

for parameters in all_parameters:
Exemplo n.º 10
0
def main(port_data):
    mlp_hiddens = [500]
    filter_sizes = [(3, 3), (3, 3)]
    feature_maps = [20, 20]
    pooling_sizes = [(3, 3), (2, 2)]
    save_to = "DvC.pkl"
    image_size = (128, 128)
    output_size = 2
    learningRate = 0.1
    num_epochs = 300
    num_batches = None
    if socket.gethostname() == 'tim-X550JX':
        host_plot = 'http://*****:*****@ %s' %
             ('CNN ', datetime.datetime.now(), socket.gethostname()),
             channels=[['train_error_rate', 'valid_error_rate'],
                       ['train_total_gradient_norm']],
             after_epoch=True,
             server_url=host_plot))

    model = Model(cost)

    main_loop = MainLoop(algorithm,
                         stream_data_train,
                         model=model,
                         extensions=extensions)

    main_loop.run()
Exemplo n.º 11
0
def main(num_epochs=50, batch_normalized=True, alpha=0.1):
    """Run the example.

    Parameters
    ----------
    num_epochs : int, optional
        Number of epochs for which to train.

    batch_normalized : bool, optional
        Batch-normalize the training graph. Defaults to `True`.

    alpha : float, optional
        Weight to apply to a new sample when calculating running
        averages for population statistics (1 - alpha weight is
        given to the existing average).

    """
    if batch_normalized:
        # Add an extra keyword argument that only BatchNormalizedMLP takes,
        # in order to speed things up at the cost of a bit of extra memory.
        mlp_class = BatchNormalizedMLP
        extra_kwargs = {'conserve_memory': False}
    else:
        mlp_class = MLP
        extra_kwargs = {}
    mlp = mlp_class([Logistic(), Logistic(),
                     Logistic(), Softmax()], [2, 5, 5, 5, 3],
                    weights_init=IsotropicGaussian(0.2),
                    biases_init=Constant(0.),
                    **extra_kwargs)
    mlp.initialize()

    # Generate a dataset with 3 spiral arms, using 8000 examples for
    # training and 2000 for testing.
    dataset = Spiral(num_examples=10000,
                     classes=3,
                     sources=['features', 'label'],
                     noise=0.05)
    train_stream = DataStream(dataset,
                              iteration_scheme=ShuffledScheme(examples=8000,
                                                              batch_size=20))
    test_stream = DataStream(dataset,
                             iteration_scheme=SequentialScheme(
                                 examples=list(range(8000, 10000)),
                                 batch_size=2000))

    # Build a cost graph; this contains BatchNormalization bricks that will
    # by default run in inference mode.
    features = tensor.matrix('features')
    label = tensor.lvector('label')
    prediction = mlp.apply(features)
    cost = CategoricalCrossEntropy().apply(label, prediction)
    misclass = MisclassificationRate().apply(label, prediction)
    misclass.name = 'misclass'  # The default name for this is annoyingly long
    original_cg = ComputationGraph([cost, misclass])

    if batch_normalized:
        cg = apply_batch_normalization(original_cg)
        # Add updates for population parameters
        pop_updates = get_batch_normalization_updates(cg)
        extra_updates = [(p, m * alpha + p * (1 - alpha))
                         for p, m in pop_updates]
    else:
        cg = original_cg
        extra_updates = []

    algorithm = GradientDescent(step_rule=Adam(0.001),
                                cost=cg.outputs[0],
                                parameters=cg.parameters)
    algorithm.add_updates(extra_updates)

    main_loop = MainLoop(
        algorithm=algorithm,
        data_stream=train_stream,
        # Use the original cost and misclass variables so
        # that we monitor the (original) inference-mode graph.
        extensions=[
            DataStreamMonitoring([cost, misclass],
                                 train_stream,
                                 prefix='train'),
            DataStreamMonitoring([cost, misclass], test_stream, prefix='test'),
            Printing(),
            FinishAfter(after_n_epochs=num_epochs)
        ])
    main_loop.run()
    return main_loop
Exemplo n.º 12
0
def main(num_epochs=50, batch_normalized=True, alpha=0.1):
    """Run the example.

    Parameters
    ----------
    num_epochs : int, optional
        Number of epochs for which to train.

    batch_normalized : bool, optional
        Batch-normalize the training graph. Defaults to `True`.

    alpha : float, optional
        Weight to apply to a new sample when calculating running
        averages for population statistics (1 - alpha weight is
        given to the existing average).

    """
    if batch_normalized:
        # Add an extra keyword argument that only BatchNormalizedMLP takes,
        # in order to speed things up at the cost of a bit of extra memory.
        mlp_class = BatchNormalizedMLP
        extra_kwargs = {'conserve_memory': False}
    else:
        mlp_class = MLP
        extra_kwargs = {}
    mlp = mlp_class([Logistic(), Logistic(), Logistic(), Softmax()],
                    [2, 5, 5, 5, 3],
                    weights_init=IsotropicGaussian(0.2),
                    biases_init=Constant(0.), **extra_kwargs)
    mlp.initialize()

    # Generate a dataset with 3 spiral arms, using 8000 examples for
    # training and 2000 for testing.
    dataset = Spiral(num_examples=10000, classes=3,
                     sources=['features', 'label'],
                     noise=0.05)
    train_stream = DataStream(dataset,
                              iteration_scheme=ShuffledScheme(examples=8000,
                                                              batch_size=20))
    test_stream = DataStream(dataset,
                             iteration_scheme=SequentialScheme(
                                 examples=list(range(8000, 10000)),
                                 batch_size=2000))

    # Build a cost graph; this contains BatchNormalization bricks that will
    # by default run in inference mode.
    features = tensor.matrix('features')
    label = tensor.lvector('label')
    prediction = mlp.apply(features)
    cost = CategoricalCrossEntropy().apply(label, prediction)
    misclass = MisclassificationRate().apply(label, prediction)
    misclass.name = 'misclass'  # The default name for this is annoyingly long
    original_cg = ComputationGraph([cost, misclass])

    if batch_normalized:
        cg = apply_batch_normalization(original_cg)
        # Add updates for population parameters
        pop_updates = get_batch_normalization_updates(cg)
        extra_updates = [(p, m * alpha + p * (1 - alpha))
                         for p, m in pop_updates]
    else:
        cg = original_cg
        extra_updates = []

    algorithm = GradientDescent(step_rule=Adam(0.001),
                                cost=cg.outputs[0],
                                parameters=cg.parameters)
    algorithm.add_updates(extra_updates)

    main_loop = MainLoop(algorithm=algorithm,
                         data_stream=train_stream,
                         # Use the original cost and misclass variables so
                         # that we monitor the (original) inference-mode graph.
                         extensions=[DataStreamMonitoring([cost, misclass],
                                                          train_stream,
                                                          prefix='train'),
                                     DataStreamMonitoring([cost, misclass],
                                                          test_stream,
                                                          prefix='test'),
                                     Printing(),
                                     FinishAfter(after_n_epochs=num_epochs)])
    main_loop.run()
    return main_loop
Exemplo n.º 13
0
# print texture_image_nn_input
print texture_image_nn_input.shape

f_features_gram = theano.function(
        inputs=[X],
        outputs=[gram_matrix(f) for f in texture_features(X)]
)
target_image_features = f_features_gram(texture_image_nn_input)
# print target_image_features
print [t.shape for t in target_image_features]

from blocks.graph import ComputationGraph, apply_batch_normalization, get_batch_normalization_updates

cg = ComputationGraph(generated_image_graph)
cg_bn = apply_batch_normalization(cg)
pop_updates = get_batch_normalization_updates(cg_bn)



text_generated = texture_features(cg.outputs[0])
gram_generated = [gram_matrix(f) for f in text_generated]

loss = 0
for i in range(len(target_image_features)):
    N = text_generated[i].shape[1]
    M = text_generated[i].shape[2]*text_generated[i].shape[3]
    loss += 1./ (4 * 16 * N ** 2 * M ** 2) * ((gram_generated[i]
        - tensor.addbroadcast(theano.shared(target_image_features[i]), 0)) ** 2).sum()

Exemplo n.º 14
0
def main(config, use_bokeh=False):

    tr_stream = get_tr_stream(**config)
    # dev_stream = get_dev_stream(**config)
    # Create Theano variables
    logger.info('Creating theano variables')
    source_image = tensor.ftensor4('image')
    target_sentence = tensor.lmatrix('target')
    target_sentence_mask = tensor.matrix('target_mask')
    sampling_input = tensor.ftensor4('input')
    sampling_output = tensor.lmatrix('output')

    # Construct model
    logger.info('Building RNN encoder-decoder')
    cnn_encoder = CNNEncoder(config['batch_norm'])
    image_embedding = cnn_encoder.conv_sequence.apply(source_image)
    if config['use_rnn']:
        encoder = BidirectionalEncoder(config['enc_embed'],
                                       config['enc_nhids'])
        encoder_inputs = image_embedding.dimshuffle(2, 3, 0, 1)
        encoded_images, _ = theano.map(encoder.apply,
                                       sequences=encoder_inputs,
                                       name='parallel_encoders')
    else:
        encoded_images = image_embedding.dimshuffle(2, 3, 0, 1)
    encoded_shape = encoded_images.shape
    annotation_vector = encoded_images.reshape(
        (-1, encoded_shape[2], encoded_shape[3]))
    annotation_vector_mask = tensor.ones(annotation_vector.shape[:2])
    decoder = Decoder(config['trg_vocab_size'], config['dec_embed'],
                      config['dec_nhids'], config['enc_nhids'] * 2)

    cost = decoder.cost(annotation_vector, annotation_vector_mask,
                        target_sentence, target_sentence_mask)

    logger.info('Creating computational graph')
    cg = ComputationGraph(cost)

    # Initialize model
    logger.info('Initializing model')
    cnn_encoder.conv_sequence.weights_init = IsotropicGaussian(
        config['weight_scale'])
    cnn_encoder.conv_sequence.biases_init = Constant(0)
    if config['use_rnn']:
        encoder.weights_init = IsotropicGaussian(config['weight_scale'])
        encoder.biases_init = Constant(0)
        encoder.push_initialization_config()
        encoder.bidir.prototype.weights_init = Orthogonal()
        encoder.initialize()
    decoder.weights_init = IsotropicGaussian(config['weight_scale'])
    decoder.biases_init = Constant(0)
    decoder.push_initialization_config()
    decoder.transition.weights_init = Orthogonal()
    decoder.initialize()
    cnn_encoder.conv_sequence.push_initialization_config()
    cnn_encoder.conv_sequence.initialize()

    # apply dropout for regularization
    if config['dropout'] < 1.0:
        # dropout is applied to the output of maxout in ghog
        logger.info('Applying dropout')
        dropout_inputs = [
            x for x in cg.intermediary_variables
            if x.name == 'maxout_apply_output'
        ]
        cg = apply_dropout(cg, dropout_inputs, config['dropout'])

    # Apply weight noise for regularization
    if config['weight_noise_ff'] > 0.0:
        logger.info('Applying weight noise to ff layers')
        cnn_params = Selector(
            cnn_encoder.conv_sequence).get_parameters().values()
        enc_params = []
        if config['use_rnn']:
            enc_params += Selector(encoder.fwd_fork).get_parameters().values()
            enc_params += Selector(encoder.back_fork).get_parameters().values()
        dec_params = Selector(
            decoder.sequence_generator.readout).get_parameters().values()
        dec_params += Selector(
            decoder.sequence_generator.fork).get_parameters().values()
        dec_params += Selector(
            decoder.transition.initial_transformer).get_parameters().values()
        cg = apply_noise(cg, cnn_params + enc_params + dec_params,
                         config['weight_noise_ff'])

    # Apply batch normalization
    if config['batch_norm']:
        logger.info('Applying batch normalization')
        cg = apply_batch_normalization(cg)
        pop_updates = get_batch_normalization_updates(cg)
        extra_updates = [(p, m * 0.05 + p * (1 - 0.05))
                         for p, m in pop_updates]
    else:
        extra_updates = []

    # Print shapes
    shapes = [param.get_value().shape for param in cg.parameters]
    logger.info("Parameter shapes: ")
    for shape, count in Counter(shapes).most_common():
        logger.info('    {:15}: {}'.format(shape, count))
    logger.info("Total number of parameters: {}".format(len(shapes)))

    # Print parameter names
    if config['use_rnn']:
        enc_dec_param_dict = merge(
            Selector(cnn_encoder.conv_sequence).get_parameters(),
            Selector(encoder).get_parameters(),
            Selector(decoder).get_parameters())
    else:
        enc_dec_param_dict = merge(
            Selector(cnn_encoder.conv_sequence).get_parameters(),
            Selector(decoder).get_parameters())
    logger.info("Parameter names: ")
    for name, value in enc_dec_param_dict.items():
        logger.info('    {:15}: {}'.format(value.get_value().shape, name))
    logger.info("Total number of parameters: {}".format(
        len(enc_dec_param_dict)))

    # Set up training model
    logger.info("Building model")
    training_model = Model(cost)

    # Set extensions
    logger.info("Initializing extensions")
    extensions = [
        FinishAfter(after_n_batches=config['finish_after']),
        TrainingDataMonitoring([cost], after_batch=True),
        Printing(after_batch=True),
        CheckpointNMT(config['saveto'], every_n_batches=config['save_freq'])
    ]

    # Set up beam search and sampling computation graphs if necessary
    if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
        logger.info("Building sampling model")
        sampling_image_embedding = cnn_encoder.conv_sequence.apply(
            sampling_input)
        if config['use_rnn']:
            sampling_encoder_inputs = sampling_image_embedding.dimshuffle(
                2, 3, 0, 1)
            sampling_encoded_images, _ = theano.map(
                encoder.apply,
                sequences=sampling_encoder_inputs,
                name='parallel_encoders_inf')
        else:
            sampling_encoded_images = sampling_image_embedding.dimshuffle(
                2, 3, 0, 1)
        sampling_encoded_shape = sampling_encoded_images.shape
        sampling_annotation_vector = sampling_encoded_images.reshape(
            (-1, sampling_encoded_shape[2], sampling_encoded_shape[3]))
        sampling_annotation_vector_mask = tensor.ones(
            sampling_annotation_vector.shape[:2])
        generated = decoder.generate(sampling_annotation_vector)
        search_model = Model(generated)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs

    # Add sampling
    if config['hook_samples'] >= 1:
        logger.info("Building sampler")
        extensions.append(
            Sampler(model=search_model,
                    data_stream=tr_stream,
                    hook_samples=config['hook_samples'],
                    every_n_batches=config['sampling_freq'],
                    trg_vocab=config['trg_vocab']))
    # Add early stopping based on bleu
    if 'bleu_script' in config:
        logger.info("Building bleu validator")
        extensions.append(
            BleuValidator(sampling_input,
                          samples=samples,
                          config=config,
                          model=search_model,
                          data_stream=dev_stream,
                          normalize=config['normalized_bleu'],
                          every_n_batches=config['bleu_val_freq']))

    # Reload model if necessary
    if config['reload']:
        extensions.append(LoadNMT(config['saveto']))

    # Plot cost in bokeh if necessary
    if use_bokeh and BOKEH_AVAILABLE:
        extensions.append(
            Plot('Cs-En', channels=[['decoder_cost_cost']], after_batch=True))

    # Set up training algorithm
    logger.info("Initializing training algorithm")
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=CompositeRule([
                                    StepClipping(config['step_clipping']),
                                    eval(config['step_rule'])()
                                ]))
    # algorithm.add_updates(extra_updates)

    # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(model=training_model,
                         algorithm=algorithm,
                         data_stream=tr_stream,
                         extensions=extensions)

    # Train!
    main_loop.run()