Exemplo n.º 1
0
def main():
  x = tensor.matrix("features")
  input_to_hidden1 = get_typical_layer(x, 784, 500)
  #hidden1_to_hidden2 = get_typical_layer(input_to_hidden1, 500, 300)
  hidden1_to_latent = get_typical_layer(input_to_hidden1, 500, 20)

  latent_to_hidden2 = get_typical_layer(hidden1_to_latent, 20, 500)
  #hidden3_to_hidden4 = get_typical_layer(latent_to_hidden3, 300, 500)
  hidden2_to_output = get_typical_layer(latent_to_hidden2, 500, 784, Logistic())
  hidden2_to_output.name = "last_before_output"

  from blocks.bricks.cost import SquaredError, AbsoluteError, BinaryCrossEntropy
  from blocks.graph import ComputationGraph
  from blocks.algorithms import Adam, GradientDescent, Scale
  from blocks.roles import WEIGHT

  cost = BinaryCrossEntropy(name="error").apply(x, hidden2_to_output)
  cg = ComputationGraph(cost)
  weights = VariableFilter(roles=[WEIGHT]) (cg.variables)
#  cost += 0.0001 * tensor.sum(map(lambda x: (x**2).sum(), weights))
#  cost.name = "regularized error"
  gd = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam())

  from blocks.main_loop import MainLoop
  from blocks.extensions import FinishAfter, Printing, ProgressBar
  from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
  monitor = TrainingDataMonitoring([cost], after_epoch=True)
  main_loop = MainLoop(data_stream=get_data_stream(), algorithm=gd, extensions=[monitor, FinishAfter(after_n_epochs=5),  ProgressBar(), Printing()])

  main_loop.run()
  showcase(cg, "last_before_output")
Exemplo n.º 2
0
 def __init__(self, params, feature_source, input_dim):
     super(MaxoutMLP, self).__init__(params)
     self.x = tensor.matrix(feature_source, dtype='float32')
     self.y = tensor.matrix('genres', dtype='int32')
     mlp = MLPGenreClassifier(input_dim, self.params['n_classes'],
                              self.params['hidden_size'],
                              self.params['init_ranges'])
     mlp.initialize()
     with batch_normalization(mlp):
         self.y_hat = mlp.apply(self.x)
     self.cost = BinaryCrossEntropy().apply(self.y, self.y_hat)
Exemplo n.º 3
0
 def __init__(self, params):
     super(MoETrainer, self).__init__(params)
     self.x_v = tensor.matrix('vgg_features', dtype='float32')
     self.x_t = tensor.matrix('features', dtype='float32')
     self.y = tensor.matrix('genres', dtype='int32')
     model = MoEClassifier(params['visual_dim'], params['textual_dim'],
                           params['n_classes'], params['hidden_size'],
                           params['init_ranges'])
     model.initialize()
     with batch_normalization(model):
         self.y_hat = model.apply(self.x_v, self.x_t)
     self.cost = BinaryCrossEntropy().apply(self.y, self.y_hat)
Exemplo n.º 4
0
 def __init__(self, params):
     super(ConcatenateTrainer, self).__init__(params)
     x_v = tensor.matrix('vgg_features', dtype='float32')
     x_t = tensor.matrix('features', dtype='float32')
     self.x = tensor.concatenate([x_v, x_t], axis=1)
     self.y = tensor.matrix('genres', dtype='int32')
     input_dim = params['visual_dim'] + params['textual_dim']
     mlp = MLPGenreClassifier(input_dim, self.params['n_classes'],
                              self.params['hidden_size'],
                              self.params['init_ranges'])
     mlp.initialize()
     with batch_normalization(mlp):
         self.y_hat = mlp.apply(self.x)
     self.cost = BinaryCrossEntropy().apply(self.y, self.y_hat)
Exemplo n.º 5
0
def create_training_computation_graphs():
    x = tensor.tensor4('features')
    y = tensor.imatrix('targets')

    convnet, mlp = create_model_bricks()
    y_hat = mlp.apply(convnet.apply(x).flatten(ndim=2))
    cost = BinaryCrossEntropy().apply(y, y_hat)
    accuracy = 1 - tensor.neq(y > 0.5, y_hat > 0.5).mean()
    cg = ComputationGraph([cost, accuracy])

    # Create a graph which uses batch statistics for batch normalization
    # as well as dropout on selected variables
    bn_cg = apply_batch_normalization(cg)
    bricks_to_drop = ([convnet.layers[i] for i in (5, 11, 17)] +
                      [mlp.application_methods[1].brick])
    variables_to_drop = VariableFilter(
        roles=[OUTPUT], bricks=bricks_to_drop)(bn_cg.variables)
    bn_dropout_cg = apply_dropout(bn_cg, variables_to_drop, 0.5)

    return cg, bn_dropout_cg
Exemplo n.º 6
0
def build_autoencoder(features, labels_num, labels_cat):

    mlp_bottom = MLP(activations=[
        Rectifier(),
        Rectifier(),
        Rectifier(),
        Rectifier(),
        Rectifier()
    ],
                     dims=[24033, 5000, 1000, 100, 1000, 5000],
                     weights_init=IsotropicGaussian(),
                     biases_init=Constant(1))
    mlp_bottom.initialize()

    mlp_top = build_top_mlp()
    mlp_top.push_initialization_config()
    mlp_top.initialize()

    # a = mlp_bottom.apply(features)
    # b = mlp_top.apply(a)

    # Construct feedforward sequence
    ss_seq = Sequence([mlp_bottom.apply, mlp_top.apply])
    ss_seq.push_initialization_config()
    ss_seq.initialize()

    [outputs_numerical, outputs_categorical] = ss_seq.apply(features)

    cost = SquaredError().apply(
        labels_num, outputs_numerical) + BinaryCrossEntropy().apply(
            labels_cat, outputs_categorical)

    cg = ComputationGraph(cost)

    #cg_dropout0   = apply_dropout(cg, [VariableFilter(roles=[INPUT])(cg.variables)[1]], .2)
    #cg_dropout1   = apply_dropout(cg, [VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3]], .2)
    #cost_dropout1 = cg_dropout1.outputs[0]

    return cost, cg.parameters
Exemplo n.º 7
0
def main(name, dataset, epochs, batch_size, learning_rate, attention, n_iter,
         enc_dim, dec_dim, z_dim, oldmodel):

    image_size, data_train, data_valid, data_test = datasets.get_data(dataset)

    train_stream = Flatten(
        DataStream(data_train,
                   iteration_scheme=SequentialScheme(data_train.num_examples,
                                                     batch_size)))
    valid_stream = Flatten(
        DataStream(data_valid,
                   iteration_scheme=SequentialScheme(data_valid.num_examples,
                                                     batch_size)))
    test_stream = Flatten(
        DataStream(data_test,
                   iteration_scheme=SequentialScheme(data_test.num_examples,
                                                     batch_size)))

    if name is None:
        name = dataset

    img_height, img_width = image_size
    x_dim = img_height * img_width

    rnninits = {
        #'weights_init': Orthogonal(),
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }
    inits = {
        #'weights_init': Orthogonal(),
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }

    if attention != "":
        read_N, write_N = attention.split(',')

        read_N = int(read_N)
        write_N = int(write_N)
        read_dim = 2 * read_N**2

        reader = AttentionReader(x_dim=x_dim,
                                 dec_dim=dec_dim,
                                 width=img_width,
                                 height=img_height,
                                 N=read_N,
                                 **inits)
        writer = AttentionWriter(input_dim=dec_dim,
                                 output_dim=x_dim,
                                 width=img_width,
                                 height=img_height,
                                 N=write_N,
                                 **inits)
        attention_tag = "r%d-w%d" % (read_N, write_N)
    else:
        read_dim = 2 * x_dim

        reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits)
        writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits)

        attention_tag = "full"

    #----------------------------------------------------------------------

    # Learning rate
    def lr_tag(value):
        """ Convert a float into a short tag-usable string representation. E.g.:
            0.1   -> 11
            0.01  -> 12
            0.001 -> 13
            0.005 -> 53
        """
        exp = np.floor(np.log10(value))
        leading = ("%e" % value)[0]
        return "%s%d" % (leading, -exp)

    lr_str = lr_tag(learning_rate)

    subdir = time.strftime("%Y%m%d-%H%M%S") + "-" + name
    longname = "%s-%s-t%d-enc%d-dec%d-z%d-lr%s" % (
        dataset, attention_tag, n_iter, enc_dim, dec_dim, z_dim, lr_str)
    pickle_file = subdir + "/" + longname + ".pkl"

    print("\nRunning experiment %s" % longname)
    print("               dataset: %s" % dataset)
    print("          subdirectory: %s" % subdir)
    print("         learning rate: %g" % learning_rate)
    print("             attention: %s" % attention)
    print("          n_iterations: %d" % n_iter)
    print("     encoder dimension: %d" % enc_dim)
    print("           z dimension: %d" % z_dim)
    print("     decoder dimension: %d" % dec_dim)
    print("            batch size: %d" % batch_size)
    print("                epochs: %d" % epochs)
    print()

    #----------------------------------------------------------------------

    encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits)
    decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits)
    encoder_mlp = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim],
                      name="MLP_enc",
                      **inits)
    decoder_mlp = MLP([Identity()], [z_dim, 4 * dec_dim],
                      name="MLP_dec",
                      **inits)
    q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits)

    draw = DrawModel(n_iter,
                     reader=reader,
                     encoder_mlp=encoder_mlp,
                     encoder_rnn=encoder_rnn,
                     sampler=q_sampler,
                     decoder_mlp=decoder_mlp,
                     decoder_rnn=decoder_rnn,
                     writer=writer)
    draw.initialize()

    #------------------------------------------------------------------------
    x = tensor.matrix('features')

    #x_recons = 1. + x
    x_recons, kl_terms = draw.reconstruct(x)
    #x_recons, _, _, _, _ = draw.silly(x, n_steps=10, batch_size=100)
    #x_recons = x_recons[-1,:,:]

    #samples = draw.sample(100)
    #x_recons = samples[-1, :, :]
    #x_recons = samples[-1, :, :]

    recons_term = BinaryCrossEntropy().apply(x, x_recons)
    recons_term.name = "recons_term"

    cost = recons_term + kl_terms.sum(axis=0).mean()
    cost.name = "nll_bound"

    #------------------------------------------------------------
    cg = ComputationGraph([cost])
    params = VariableFilter(roles=[PARAMETER])(cg.variables)

    algorithm = GradientDescent(
        cost=cost,
        params=params,
        step_rule=CompositeRule([
            StepClipping(10.),
            Adam(learning_rate),
        ])
        #step_rule=RMSProp(learning_rate),
        #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95)
    )
    #algorithm.add_updates(scan_updates)

    #------------------------------------------------------------------------
    # Setup monitors
    monitors = [cost]
    for t in range(n_iter):
        kl_term_t = kl_terms[t, :].mean()
        kl_term_t.name = "kl_term_%d" % t

        #x_recons_t = T.nnet.sigmoid(c[t,:,:])
        #recons_term_t = BinaryCrossEntropy().apply(x, x_recons_t)
        #recons_term_t = recons_term_t.mean()
        #recons_term_t.name = "recons_term_%d" % t

        monitors += [kl_term_t]

    train_monitors = monitors[:]
    train_monitors += [aggregation.mean(algorithm.total_gradient_norm)]
    train_monitors += [aggregation.mean(algorithm.total_step_norm)]
    # Live plotting...
    plot_channels = [
        ["train_nll_bound", "test_nll_bound"],
        ["train_kl_term_%d" % t for t in range(n_iter)],
        #["train_recons_term_%d" % t for t in range(n_iter)],
        ["train_total_gradient_norm", "train_total_step_norm"]
    ]

    #------------------------------------------------------------

    if not os.path.exists(subdir):
        os.makedirs(subdir)

    main_loop = MainLoop(
        model=Model(cost),
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=epochs),
            TrainingDataMonitoring(train_monitors,
                                   prefix="train",
                                   after_epoch=True),
            #            DataStreamMonitoring(
            #                monitors,
            #                valid_stream,
            ##                updates=scan_updates,
            #                prefix="valid"),
            DataStreamMonitoring(
                monitors,
                test_stream,
                #                updates=scan_updates,
                prefix="test"),
            Checkpoint(name,
                       before_training=False,
                       after_epoch=True,
                       save_separately=['log', 'model']),
            #Checkpoint(image_size=image_size, save_subdir=subdir, path=pickle_file, before_training=False, after_epoch=True, save_separately=['log', 'model']),
            Plot(name, channels=plot_channels),
            ProgressBar(),
            Printing()
        ])

    if oldmodel is not None:
        print("Initializing parameters with old model %s" % oldmodel)
        with open(oldmodel, "rb") as f:
            oldmodel = pickle.load(f)
            main_loop.model.set_param_values(oldmodel.get_param_values())
        del oldmodel

    main_loop.run()
Exemplo n.º 8
0
def main(name, epochs, batch_size, learning_rate, attention, n_iter, enc_dim,
         dec_dim, z_dim):

    if name is None:
        tag = "watt" if attention else "woatt"
        name = "%s-t%d-enc%d-dec%d-z%d" % (tag, n_iter, enc_dim, dec_dim,
                                           z_dim)

    print("\nRunning experiment %s" % name)
    print("         learning rate: %5.3f" % learning_rate)
    print("             attention: %s" % attention)
    print("          n_iterations: %d" % n_iter)
    print("     encoder dimension: %d" % enc_dim)
    print("           z dimension: %d" % z_dim)
    print("     decoder dimension: %d" % dec_dim)
    print()

    #------------------------------------------------------------------------

    x_dim = 28 * 28
    img_height, img_width = (28, 28)

    rnninits = {
        'weights_init': Orthogonal(),
        #'weights_init': IsotropicGaussian(0.001),
        'biases_init': Constant(0.),
    }
    inits = {
        'weights_init': Orthogonal(),
        #'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }

    prior_mu = T.zeros([z_dim])
    prior_log_sigma = T.zeros([z_dim])

    if attention:
        read_N = 4
        write_N = 6
        read_dim = 2 * read_N**2

        reader = AttentionReader(x_dim=x_dim,
                                 dec_dim=dec_dim,
                                 width=img_width,
                                 height=img_height,
                                 N=read_N,
                                 **inits)
        writer = AttentionWriter(input_dim=dec_dim,
                                 output_dim=x_dim,
                                 width=img_width,
                                 height=img_height,
                                 N=read_N,
                                 **inits)
    else:
        read_dim = 2 * x_dim

        reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits)
        writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits)

    encoder = LSTM(dim=enc_dim, name="RNN_enc", **rnninits)
    decoder = LSTM(dim=dec_dim, name="RNN_dec", **rnninits)
    encoder_mlp = MLP([Tanh()], [(read_dim + dec_dim), 4 * enc_dim],
                      name="MLP_enc",
                      **inits)
    decoder_mlp = MLP([Tanh()], [z_dim, 4 * dec_dim], name="MLP_dec", **inits)
    q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits)

    for brick in [
            reader, writer, encoder, decoder, encoder_mlp, decoder_mlp,
            q_sampler
    ]:
        brick.allocate()
        brick.initialize()

    #------------------------------------------------------------------------
    x = tensor.matrix('features')

    # This is one iteration
    def one_iteration(c, h_enc, c_enc, z_mean, z_log_sigma, z, h_dec, c_dec,
                      x):
        x_hat = x - T.nnet.sigmoid(c)
        r = reader.apply(x, x_hat, h_dec)
        i_enc = encoder_mlp.apply(T.concatenate([r, h_dec], axis=1))
        h_enc, c_enc = encoder.apply(states=h_enc,
                                     cells=c_enc,
                                     inputs=i_enc,
                                     iterate=False)
        z_mean, z_log_sigma, z = q_sampler.apply(h_enc)
        i_dec = decoder_mlp.apply(z)
        h_dec, c_dec = decoder.apply(states=h_dec,
                                     cells=c_dec,
                                     inputs=i_dec,
                                     iterate=False)
        c = c + writer.apply(h_dec)
        return c, h_enc, c_enc, z_mean, z_log_sigma, z, h_dec, c_dec

    outputs_info = [
        T.zeros([batch_size, x_dim]),  # c
        T.zeros([batch_size, enc_dim]),  # h_enc
        T.zeros([batch_size, enc_dim]),  # c_enc
        T.zeros([batch_size, z_dim]),  # z_mean
        T.zeros([batch_size, z_dim]),  # z_log_sigma
        T.zeros([batch_size, z_dim]),  # z
        T.zeros([batch_size, dec_dim]),  # h_dec
        T.zeros([batch_size, dec_dim]),  # c_dec
    ]

    outputs, scan_updates = theano.scan(fn=one_iteration,
                                        sequences=[],
                                        outputs_info=outputs_info,
                                        non_sequences=[x],
                                        n_steps=n_iter)

    c, h_enc, c_enc, z_mean, z_log_sigma, z, h_dec, c_dec = outputs

    kl_terms = (prior_log_sigma - z_log_sigma + 0.5 *
                (tensor.exp(2 * z_log_sigma) +
                 (z_mean - prior_mu)**2) / tensor.exp(2 * prior_log_sigma) -
                0.5).sum(axis=-1)

    x_recons = T.nnet.sigmoid(c[-1, :, :])
    recons_term = BinaryCrossEntropy().apply(x, x_recons)
    recons_term.name = "recons_term"

    cost = recons_term + kl_terms.sum(axis=0).mean()
    cost.name = "nll_bound"

    #------------------------------------------------------------
    cg = ComputationGraph([cost])
    params = VariableFilter(roles=[PARAMETER])(cg.variables)

    algorithm = GradientDescent(
        cost=cost,
        params=params,
        step_rule=CompositeRule([
            #StepClipping(3.),
            Adam(learning_rate),
        ])
        #step_rule=RMSProp(learning_rate),
        #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95)
    )
    algorithm.add_updates(scan_updates)

    #------------------------------------------------------------------------
    # Setup monitors
    monitors = [cost]
    for t in range(n_iter):
        kl_term_t = kl_terms[t, :].mean()
        kl_term_t.name = "kl_term_%d" % t

        x_recons_t = T.nnet.sigmoid(c[t, :, :])
        recons_term_t = BinaryCrossEntropy().apply(x, x_recons_t)
        recons_term_t = recons_term_t.mean()
        recons_term_t.name = "recons_term_%d" % t

        monitors += [kl_term_t, recons_term_t]

    train_monitors = monitors[:]
    train_monitors += [aggregation.mean(algorithm.total_gradient_norm)]
    train_monitors += [aggregation.mean(algorithm.total_step_norm)]

    # Live plotting...
    plot_channels = [["train_nll_bound", "test_nll_bound"],
                     ["train_kl_term_%d" % t for t in range(n_iter)],
                     ["train_recons_term_%d" % t for t in range(n_iter)],
                     ["train_total_gradient_norm", "train_total_step_norm"]]

    #------------------------------------------------------------

    mnist_train = BinarizedMNIST("train", sources=['features'])
    mnist_test = BinarizedMNIST("test", sources=['features'])
    #mnist_train = MNIST("train", binary=True, sources=['features'])
    #mnist_test = MNIST("test", binary=True, sources=['features'])

    main_loop = MainLoop(
        model=None,
        data_stream=ForceFloatX(
            DataStream(mnist_train,
                       iteration_scheme=SequentialScheme(
                           mnist_train.num_examples, batch_size))),
        algorithm=algorithm,
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=epochs),
            DataStreamMonitoring(
                monitors,
                ForceFloatX(
                    DataStream(mnist_test,
                               iteration_scheme=SequentialScheme(
                                   mnist_test.num_examples, batch_size))),
                updates=scan_updates,
                prefix="test"),
            TrainingDataMonitoring(train_monitors,
                                   prefix="train",
                                   after_every_epoch=True),
            SerializeMainLoop(name + ".pkl"),
            Plot(name, channels=plot_channels),
            ProgressBar(),
            Printing()
        ])
    main_loop.run()
Exemplo n.º 9
0
def main(max_seq_length, lstm_dim, batch_size, num_batches, num_epochs):
    dataset_train = IterableDataset(
        generate_data(max_seq_length, batch_size, num_batches))
    dataset_test = IterableDataset(
        generate_data(max_seq_length, batch_size, 100))

    stream_train = DataStream(dataset=dataset_train)
    stream_test = DataStream(dataset=dataset_test)

    x = T.tensor3('x')
    y = T.matrix('y')

    # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see
    # LSTM layer documentation for the explanation
    x_to_h = Linear(1,
                    lstm_dim * 4,
                    name='x_to_h',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))
    lstm = LSTM(lstm_dim,
                name='lstm',
                weights_init=IsotropicGaussian(),
                biases_init=Constant(0.0))
    h_to_o = Linear(lstm_dim,
                    1,
                    name='h_to_o',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))

    x_transform = x_to_h.apply(x)
    h, c = lstm.apply(x_transform)

    # only values of hidden units of the last timeframe are used for
    # the classification
    y_hat = h_to_o.apply(h[-1])
    y_hat = Logistic().apply(y_hat)

    cost = BinaryCrossEntropy().apply(y, y_hat)
    cost.name = 'cost'

    lstm.initialize()
    x_to_h.initialize()
    h_to_o.initialize()

    cg = ComputationGraph(cost)

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Adam())
    test_monitor = DataStreamMonitoring(variables=[cost],
                                        data_stream=stream_test,
                                        prefix="test")
    train_monitor = TrainingDataMonitoring(variables=[cost],
                                           prefix="train",
                                           after_epoch=True)

    main_loop = MainLoop(algorithm,
                         stream_train,
                         extensions=[
                             test_monitor, train_monitor,
                             FinishAfter(after_n_epochs=num_epochs),
                             Printing(),
                             ProgressBar()
                         ])
    main_loop.run()

    print('Learned weights:')
    for layer in (x_to_h, lstm, h_to_o):
        print("Layer '%s':" % layer.name)
        for param in layer.parameters:
            print(param.name, ': ', param.get_value())
        print()

    return main_loop
Exemplo n.º 10
0
                #4096,
                128,
                #4096,
                ],
            weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
            biases_init=Constant(W_b))

# Don't forget to initialize params:
autoencoder.initialize()

# Reconstruction layer:
x_hat = Linear(input_dim=128, output_dim=input_dim[side]).apply(autoencoder.apply(x))

# Define a cost function to optimize, and a classification error rate.
# Also apply the outputs from the net and corresponding targets:
cost = BinaryCrossEntropy().apply(x, x_hat)

# This is the model: before applying dropout
autoencoder = Model(cost)

# Need to define the computation graph for the cost func:
cost_graph = ComputationGraph([cost])

# This returns a list of weight vectors for each layer
W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)

# Add some regularization to this model:
cost += weight_decay * l2_norm(W)
cost.name = 'entropy'

# computational graph with l2 reg
Exemplo n.º 11
0
from hinton import hinton

x = tensor.matrix('x')
y = tensor.lmatrix('y')

mlp = MLP(activations=[Logistic(), Softmax()],
          dims=[117, 55, 2],
          weights_init=IsotropicGaussian(),
          biases_init=Constant(0.01))

mlp.initialize()

y_hat = mlp.apply(x)

cost = BinaryCrossEntropy().apply(y, y_hat)

cg = ComputationGraph(cost)

W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
cost = cost + 0.001 * abs(W1).sum() + 0.001 * abs(W2).sum()
cost.name = 'cost'

error_rate = MisclassificationRate().apply(y.argmax(axis=1), y_hat)
error_rate.name = 'error_rate'

algorithm = GradientDescent(cost=cost,
                            parameters=cg.parameters,
                            step_rule=Scale(learning_rate=0.1))

train_set = H5PYDataset('mushrooms.hdf5', which_sets=('train', ))
Exemplo n.º 12
0
def main(name, epochs, batch_size, learning_rate):
    if name is None:
        name = "att-rw"

    print("\nRunning experiment %s" % name)
    print("         learning rate: %5.3f" % learning_rate)
    print()

    #------------------------------------------------------------------------

    img_height, img_width = 28, 28

    read_N = 12
    write_N = 14

    inits = {
        #'weights_init': Orthogonal(),
        'weights_init': IsotropicGaussian(0.001),
        'biases_init': Constant(0.),
    }

    x_dim = img_height * img_width

    reader = ZoomableAttentionWindow(img_height, img_width, read_N)
    writer = ZoomableAttentionWindow(img_height, img_width, write_N)

    # Parameterize the attention reader and writer
    mlpr = MLP(activations=[Tanh(), Identity()],
               dims=[x_dim, 50, 5],
               name="RMLP",
               **inits)
    mlpw = MLP(activations=[Tanh(), Identity()],
               dims=[x_dim, 50, 5],
               name="WMLP",
               **inits)

    # MLP between the reader and writer
    mlp = MLP(activations=[Tanh(), Identity()],
              dims=[read_N**2, 300, write_N**2],
              name="MLP",
              **inits)

    for brick in [mlpr, mlpw, mlp]:
        brick.allocate()
        brick.initialize()

    #------------------------------------------------------------------------
    x = tensor.matrix('features')

    hr = mlpr.apply(x)
    hw = mlpw.apply(x)

    center_y, center_x, delta, sigma, gamma = reader.nn2att(hr)
    r = reader.read(x, center_y, center_x, delta, sigma)

    h = mlp.apply(r)

    center_y, center_x, delta, sigma, gamma = writer.nn2att(hw)
    c = writer.write(h, center_y, center_x, delta, sigma) / gamma
    x_recons = T.nnet.sigmoid(c)

    cost = BinaryCrossEntropy().apply(x, x_recons)
    cost.name = "cost"

    #------------------------------------------------------------
    cg = ComputationGraph([cost])
    params = VariableFilter(roles=[PARAMETER])(cg.variables)

    algorithm = GradientDescent(
        cost=cost,
        params=params,
        step_rule=CompositeRule([
            RemoveNotFinite(),
            Adam(learning_rate),
            StepClipping(3.),
        ])
        #step_rule=RMSProp(learning_rate),
        #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95)
    )

    #------------------------------------------------------------------------
    # Setup monitors
    monitors = [cost]
    #for v in [center_y, center_x, log_delta, log_sigma, log_gamma]:
    #    v_mean = v.mean()
    #    v_mean.name = v.name
    #    monitors += [v_mean]
    #    monitors += [aggregation.mean(v)]

    train_monitors = monitors[:]
    train_monitors += [aggregation.mean(algorithm.total_gradient_norm)]
    train_monitors += [aggregation.mean(algorithm.total_step_norm)]

    # Live plotting...
    plot_channels = [
        ["cost"],
    ]

    #------------------------------------------------------------

    mnist_train = BinarizedMNIST("train", sources=['features'])
    mnist_test = BinarizedMNIST("test", sources=['features'])
    #mnist_train = MNIST("train", binary=True, sources=['features'])
    #mnist_test = MNIST("test", binary=True, sources=['features'])

    main_loop = MainLoop(
        model=Model(cost),
        data_stream=ForceFloatX(
            DataStream(mnist_train,
                       iteration_scheme=SequentialScheme(
                           mnist_train.num_examples, batch_size))),
        algorithm=algorithm,
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=epochs),
            DataStreamMonitoring(
                monitors,
                ForceFloatX(
                    DataStream(mnist_test,
                               iteration_scheme=SequentialScheme(
                                   mnist_test.num_examples, batch_size))),
                prefix="test"),
            TrainingDataMonitoring(train_monitors,
                                   prefix="train",
                                   after_every_epoch=True),
            SerializeMainLoop(name + ".pkl"),
            #Plot(name, channels=plot_channels),
            ProgressBar(),
            Printing()
        ])
    main_loop.run()
Exemplo n.º 13
0
def create_network(inputs=None, batch=batch_size):
    if inputs is None:
        inputs = T.tensor4('features')
    x = T.cast(inputs, 'float32')
    x = x / 255. if dataset != 'binarized_mnist' else x

    # GatedPixelCNN
    gated = GatedPixelCNN(name='gated_layer_0',
                          filter_size=7,
                          image_size=(img_dim, img_dim),
                          num_filters=h * n_channel,
                          num_channels=n_channel,
                          batch_size=batch,
                          weights_init=IsotropicGaussian(std=0.02, mean=0),
                          biases_init=Constant(0.02),
                          res=False)
    gated.initialize()
    x_v, x_h = gated.apply(x, x)

    for i in range(n_layer):
        gated = GatedPixelCNN(name='gated_layer_{}'.format(i + 1),
                              filter_size=3,
                              image_size=(img_dim, img_dim),
                              num_channels=h * n_channel,
                              batch_size=batch,
                              weights_init=IsotropicGaussian(std=0.02, mean=0),
                              biases_init=Constant(0.02),
                              res=True)
        gated.initialize()
        x_v, x_h = gated.apply(x_v, x_h)

    conv_list = []
    conv_list.extend([
        Rectifier(),
        ConvolutionalNoFlip((1, 1),
                            h * n_channel,
                            mask_type='B',
                            name='1x1_conv_1')
    ])
    #conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask='B', name='1x1_conv_2')])
    conv_list.extend([
        Rectifier(),
        ConvolutionalNoFlip(*third_layer, mask_type='B', name='output_layer')
    ])

    sequence = ConvolutionalSequence(conv_list,
                                     num_channels=h * n_channel,
                                     batch_size=batch,
                                     image_size=(img_dim, img_dim),
                                     border_mode='half',
                                     weights_init=IsotropicGaussian(std=0.02,
                                                                    mean=0),
                                     biases_init=Constant(0.02),
                                     tied_biases=False)
    sequence.initialize()
    x = sequence.apply(x_h)
    if MODE == '256ary':
        x = x.reshape(
            (-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0, 2, 3, 4, 1)
        x = x.reshape((-1, 256))
        x_hat = Softmax().apply(x)
        inp = T.cast(inputs, 'int64').flatten()
        cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim
        cost_bits_dim = categorical_crossentropy(log_softmax(x), inp)
    else:
        x_hat = Logistic().apply(x)
        cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim
        #cost = T.nnet.binary_crossentropy(x_hat, inputs)
        #cost = cost.sum() / inputs.shape[0]
        cost_bits_dim = -(inputs * T.log2(x_hat) +
                          (1.0 - inputs) * T.log2(1.0 - x_hat)).mean()

    cost_bits_dim.name = "nnl_bits_dim"
    cost.name = 'loglikelihood_nat'
    return cost, cost_bits_dim
Exemplo n.º 14
0
def create_network(inputs=None, batch=batch_size):
    if inputs is None:
        inputs = T.tensor4('features')
    x = T.cast(inputs, 'float32')
    x = x / 255. if dataset != 'binarized_mnist' else x

    # PixelCNN architecture
    conv_list = [
        ConvolutionalNoFlip(*first_layer, mask='A', name='0'),
        Rectifier()
    ]
    for i in range(n_layer):
        conv_list.extend([
            ConvolutionalNoFlip(*second_layer, mask='B', name=str(i + 1)),
            Rectifier()
        ])

    conv_list.extend([
        ConvolutionalNoFlip((1, 1),
                            h * n_channel,
                            mask='B',
                            name=str(n_layer + 1)),
        Rectifier()
    ])
    conv_list.extend([
        ConvolutionalNoFlip((1, 1),
                            h * n_channel,
                            mask='B',
                            name=str(n_layer + 2)),
        Rectifier()
    ])
    conv_list.extend(
        [ConvolutionalNoFlip(*third_layer, mask='B', name=str(n_layer + 3))])

    sequence = ConvolutionalSequence(conv_list,
                                     num_channels=n_channel,
                                     batch_size=batch,
                                     image_size=(img_dim, img_dim),
                                     border_mode='half',
                                     weights_init=IsotropicGaussian(std=0.05,
                                                                    mean=0),
                                     biases_init=Constant(0.02),
                                     tied_biases=False)
    sequence.initialize()
    x = sequence.apply(x)
    if MODE == '256ary':
        x = x.reshape(
            (-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0, 2, 3, 4, 1)
        x = x.reshape((-1, 256))
        x_hat = Softmax().apply(x)
        inp = T.cast(inputs, 'int64').flatten()
        cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim
        cost_bits_dim = categorical_crossentropy(log_softmax(x), inp)
    else:
        x_hat = Logistic().apply(x)
        cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim
        #cost = T.nnet.binary_crossentropy(x_hat, inputs)
        #cost = cost.sum() / inputs.shape[0]
        cost_bits_dim = -(inputs * T.log2(x_hat) +
                          (1.0 - inputs) * T.log2(1.0 - x_hat)).mean()

    cost_bits_dim.name = "nnl_bits_dim"
    cost.name = 'loglikelihood_nat'
    return cost, cost_bits_dim
Exemplo n.º 15
0
    def train(self):

        x = self.sharedBatch['x']
        x.name = 'x_myinput'
        x_mask = self.sharedBatch['x_mask']
        x_mask.name = 'x_mask_myinput'
        y = self.sharedBatch['y']
        y.name = 'y_myinput'

        if self.usePro:
            proportion = self.sharedBatch['pro']
            proportion.name = 'pro'

        # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see
        # LSTM layer documentation for the explanation
        x_to_h = Linear(self.input_dimx1,
                        self.dim * 4,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))
        lstm = LSTM(self.dim,
                    name='lstm',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))
        h_to_o = Linear(self.dim,
                        1,
                        name='h_to_o',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))

        x_transform = x_to_h.apply(x)
        h, c = lstm.apply(x_transform, mask=x_mask)

        # only values of hidden units of the last timeframe are used for
        # the classification
        y_hat = h_to_o.apply(h[-1])
        y_hat = Logistic().apply(y_hat)

        if self.usePro:
            cost = BinaryCrossEntropyProp().apply(y, y_hat, proportion)
        else:
            cost = BinaryCrossEntropy().apply(y, y_hat)

        cost.name = 'cost'

        lstm.initialize()
        x_to_h.initialize()
        h_to_o.initialize()

        self.f = theano.function(inputs=[], outputs=y_hat)
        self.lastH = theano.function(inputs=[], outputs=h[-1])
        self.cg = ComputationGraph(cost)
        m = Model(cost)

        algorithm = GradientDescent(cost=cost,
                                    parameters=self.cg.parameters,
                                    step_rule=RMSProp(learning_rate=0.01),
                                    on_unused_sources='ignore')
        valid_monitor = DataStreamMonitoringShared(
            variables=[cost],
            data_stream=self.stream_valid_int,
            prefix="valid",
            sharedBatch=self.sharedBatch,
            sharedData=self.sharedData)
        train_monitor = TrainingDataMonitoring(variables=[cost],
                                               prefix="train",
                                               after_epoch=True)

        sharedVarMonitor = SwitchSharedReferences(self.sharedBatch,
                                                  self.sharedData)
        tBest = self.track_best('valid_cost', self.cg)
        self.tracker = tBest[0]
        extensions = [sharedVarMonitor, valid_monitor] + tBest

        if self.debug:
            extensions.append(Printing())

        self.algorithm = algorithm
        self.extensions = extensions
        self.model = m
        self.mainloop = MainLoop(self.algorithm,
                                 self.stream_train_int,
                                 extensions=self.extensions,
                                 model=self.model)
        self.main_loop(True)
Exemplo n.º 16
0
def main(name, epochs, batch_size, learning_rate, 
         attention, n_iter, enc_dim, dec_dim, z_dim):

     # Learning rate
    def lr_tag(value):
        """ Convert a float into a short tag-usable string representation. E.g.:
            0.1   -> 11
            0.01  -> 12
            0.001 -> 13
            0.005 -> 53
        """
        exp = np.floor(np.log10(value))
        leading = ("%e"%value)[0]
        return "%s%d" % (leading, -exp)

    if name is None:
        tag = "watt" if attention else "woatt"
        lr_str = lr_tag(learning_rate)
        name = "%s-t%d-enc%d-dec%d-z%d-lr%s" % (tag, n_iter, enc_dim, dec_dim, z_dim, lr_str)

    print("\nRunning experiment %s" % name)
    print("         learning rate: %5.3f" % learning_rate) 
    print("             attention: %s" % attention)
    print("          n_iterations: %d" % n_iter)
    print("     encoder dimension: %d" % enc_dim)
    print("           z dimension: %d" % z_dim)
    print("     decoder dimension: %d" % dec_dim)
    print()


    #------------------------------------------------------------------------

    x_dim = 28*28
    img_height, img_width = (28, 28)
    
    rnninits = {
        #'weights_init': Orthogonal(),
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }
    inits = {
        #'weights_init': Orthogonal(),
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }
    
    if attention:
        read_N = 4
        write_N = 7
        read_dim = 2*read_N**2

        reader = AttentionReader(x_dim=x_dim, dec_dim=dec_dim,
                                 width=img_width, height=img_height,
                                 N=read_N, **inits)
        writer = AttentionWriter(input_dim=dec_dim, output_dim=x_dim,
                                 width=img_width, height=img_height,
                                 N=read_N, **inits)
    else:
        read_dim = 2*x_dim

        reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits)
        writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits)

    encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits)
    decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits)
    encoder_mlp = MLP([Tanh()], [(read_dim+dec_dim), 4*enc_dim], name="MLP_enc", **inits)
    decoder_mlp = MLP([Tanh()], [             z_dim, 4*dec_dim], name="MLP_dec", **inits)
    q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits)

    draw = DrawModel(
                n_iter, 
                reader=reader,
                encoder_mlp=encoder_mlp,
                encoder_rnn=encoder_rnn,
                sampler=q_sampler,
                decoder_mlp=decoder_mlp,
                decoder_rnn=decoder_rnn,
                writer=writer)
    draw.initialize()


    #------------------------------------------------------------------------
    x = tensor.matrix('features')
    
    #x_recons = 1. + x
    x_recons, kl_terms = draw.reconstruct(x)
    #x_recons, _, _, _, _ = draw.silly(x, n_steps=10, batch_size=100)
    #x_recons = x_recons[-1,:,:]

    #samples = draw.sample(100) 
    #x_recons = samples[-1, :, :]
    #x_recons = samples[-1, :, :]

    recons_term = BinaryCrossEntropy().apply(x, x_recons)
    recons_term.name = "recons_term"

    cost = recons_term + kl_terms.sum(axis=0).mean()
    cost.name = "nll_bound"

    #------------------------------------------------------------
    cg = ComputationGraph([cost])
    params = VariableFilter(roles=[PARAMETER])(cg.variables)

    algorithm = GradientDescent(
        cost=cost, 
        params=params,
        step_rule=CompositeRule([
            StepClipping(3.), 
            Adam(learning_rate),
        ])
        #step_rule=RMSProp(learning_rate),
        #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95)
    )
    #algorithm.add_updates(scan_updates)


    #------------------------------------------------------------------------
    # Setup monitors
    monitors = [cost]
    """
    for t in range(n_iter):
        kl_term_t = kl_terms[t,:].mean()
        kl_term_t.name = "kl_term_%d" % t

        x_recons_t = T.nnet.sigmoid(c[t,:,:])
        recons_term_t = BinaryCrossEntropy().apply(x, x_recons_t)
        recons_term_t = recons_term_t.mean()
        recons_term_t.name = "recons_term_%d" % t

        monitors +=[kl_term_t, recons_term_t]
    """
    train_monitors = monitors[:]
    train_monitors += [aggregation.mean(algorithm.total_gradient_norm)]
    train_monitors += [aggregation.mean(algorithm.total_step_norm)]
    # Live plotting...
    plot_channels = [
        ["train_nll_bound", "test_nll_bound"],
        ["train_kl_term_%d" % t for t in range(n_iter)],
        ["train_recons_term_%d" % t for t in range(n_iter)],
        ["train_total_gradient_norm", "train_total_step_norm"]
    ]

    #------------------------------------------------------------

    mnist_train = BinarizedMNIST("train", sources=['features'])
    mnist_test = BinarizedMNIST("test", sources=['features'])

    main_loop = MainLoop(
        model=Model(cost),
        data_stream=ForceFloatX(DataStream(mnist_train,
                        iteration_scheme=SequentialScheme(
                        mnist_train.num_examples, batch_size))),
        algorithm=algorithm,
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=epochs),
            DataStreamMonitoring(
                monitors,
                ForceFloatX(DataStream(mnist_test,
                    iteration_scheme=SequentialScheme(
                    mnist_test.num_examples, batch_size))),
##                updates=scan_updates, 
                prefix="test"),
            TrainingDataMonitoring(
                train_monitors, 
                prefix="train",
                after_every_epoch=True),
            SerializeMainLoop(name+".pkl"),
            Plot(name, channels=plot_channels),
            ProgressBar(),
            Printing()])
    main_loop.run()
Exemplo n.º 17
0
    def train(self):

        #print(self.sharedBatch.keys())
        x = self.sharedBatch['x']
        x.name = 'x_myinput'
        xmask = self.sharedBatch['xmask']
        xmask.name = 'xmask_myinput'
        xmini = self.sharedBatch['xmini']
        xmini.name = 'xmini_myinput'
        xmini_mask = self.sharedBatch['xmini_mask']
        xmini_mask.name = 'xmini_mask_myinput'
        y = self.sharedBatch['y']
        y.name = 'y_myinput'
        xattr = self.sharedBatch['xattr']
        xattr.name = 'xattr_myinput'

        # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see
        # LSTM layer documentation for the explanation
        x_to_h = Linear(self.input_dimx,
                        self.dim,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))
        xmini_to_h = Linear(self.input_dimxmini,
                            self.mini_dim * 4,
                            name='xmini_to_h',
                            weights_init=IsotropicGaussian(),
                            biases_init=Constant(0.0))

        comb_to_h = Linear(self.input_dimxattr + self.summary_dim,
                           self.input_dimxattr + self.summary_dim,
                           name='comb_to_h',
                           weights_init=IsotropicGaussian(),
                           biases_init=Constant(0.0))

        lstmwmini = LSTMwMini(dim=self.dim,
                              mini_dim=self.mini_dim,
                              summary_dim=self.summary_dim)

        mlp = MLP(
            activations=[Rectifier()],
            dims=[self.summary_dim + self.input_dimxattr, self.summary_dim],
            weights_init=IsotropicGaussian(),
            biases_init=Constant(0.0))

        h_to_o = Linear(self.summary_dim,
                        1,
                        name='h_to_o',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))

        x_transform = x_to_h.apply(x)
        xmini_transform = xmini_to_h.apply(xmini)

        h, c = lstmwmini.apply(x=x_transform,
                               xmini=xmini_transform,
                               xmask=xmask,
                               xmini_mask=xmini_mask)

        attr_and_rnn = T.concatenate([xattr, h[-1]], axis=1)

        #self.f = theano.function(inputs = [], outputs=attr_and_rnn)
        #print(self.summary_dim)
        #print(self.input_dimx)
        #print("self.f === ")
        #print(self.f())
        #print(self.f().shape)
        #print("====")
        comb_transform = comb_to_h.apply(attr_and_rnn)
        mlp_transform = mlp.apply(comb_transform)

        # only values of hidden units of the last timeframe are used for
        # the classification
        #y_hat = h_to_o.apply(h[-1])
        y_hat = h_to_o.apply(mlp_transform)
        y_hat = Logistic().apply(y_hat)

        cost = BinaryCrossEntropy().apply(y, y_hat)
        cost.name = 'cost'

        lstmwmini.initialize()
        x_to_h.initialize()
        xmini_to_h.initialize()
        comb_to_h.initialize()
        mlp.initialize()
        h_to_o.initialize()

        self.f = theano.function(inputs=[], outputs=y_hat)

        #print("self.f === ")
        #print(self.f())
        #print(self.f().shape)
        #print("====")

        self.cg = ComputationGraph(cost)
        m = Model(cost)

        algorithm = GradientDescent(cost=cost,
                                    parameters=self.cg.parameters,
                                    step_rule=RMSProp(learning_rate=0.01),
                                    on_unused_sources='ignore')
        valid_monitor = DataStreamMonitoringShared(
            variables=[cost],
            data_stream=self.stream_valid_int,
            prefix="valid",
            sharedBatch=self.sharedBatch,
            sharedData=self.sharedData)
        train_monitor = TrainingDataMonitoring(variables=[cost],
                                               prefix="train",
                                               after_epoch=True)

        sharedVarMonitor = SwitchSharedReferences(self.sharedBatch,
                                                  self.sharedData)
        tBest = self.track_best('valid_cost', self.cg)
        self.tracker = tBest[0]
        extensions = [sharedVarMonitor, valid_monitor] + tBest

        if self.debug:
            extensions.append(Printing())

        self.algorithm = algorithm
        self.extensions = extensions
        self.model = m
        self.mainloop = MainLoop(self.algorithm,
                                 self.stream_train_int,
                                 extensions=self.extensions,
                                 model=self.model)
        self.main_loop(True)