Exemplo n.º 1
0
def main():
    # shape (batch, time, pitch)
    xs = T.tensor3("features")
    # shape (batch, time)
    mask = T.matrix("features_mask")

    theano.config.compute_test_value = "warn"
    test_batch = next(dataset.get_stream("train", max_examples=11,
                                         piano_roll=True).get_epoch_iterator(as_dict=True))
    xs.tag.test_value = test_batch["features"][:11]
    mask.tag.test_value = test_batch["features_mask"][:11]

    # mask doesn't have a pitch axis; make it broadcast
    mask = T.shape_padright(mask)

    # move time axis in front of batch axis
    xs = xs.dimshuffle(1, 0, 2)
    mask = mask.dimshuffle(1, 0, 2)

    input_dim = 128
    intermediate_dim = 32

    x_to_y = blocks.bricks.MLP(
        name="x_to_y",
        dims=[input_dim, intermediate_dim, input_dim],
        activations=[blocks.bricks.Rectifier(), blocks.bricks.Rectifier()],
        weights_init=blocks.initialization.Orthogonal(),
        biases_init=blocks.initialization.Constant(0))

    x_to_y.initialize()

    def stepfn(x):
        y = x_to_y.apply(x)
        return y

    def predict(xs):
        ys, _ = theano.scan(
            stepfn,
            sequences=[xs],
            outputs_info=[None])
        return ys

    def generate(xs):
        # initialize hidden state based on xs
        ys, _ = theano.scan(
            stepfn,
            sequences=[xs],
            outputs_info=[None])
        # let the model extrapolate based on its own predictions
        ys, _ = theano.scan(
            stepfn,
            n_steps=128,
            outputs_info=[ys[-1]])
        return ys

    ys = predict(xs)
    errors = (ys[:-1] - xs[1:])**2 * mask[1:]
    cost = (errors.sum(axis=0) / mask.sum(axis=0)).mean()
    cost.name = "cost"

    graph = blocks.graph.ComputationGraph(cost)
    model = blocks.model.Model(cost)
    algorithm = blocks.algorithms.GradientDescent(
        cost=cost,
        parameters=graph.parameters,
        step_rule=blocks.algorithms.Adam())

    step_channels = []
    for key, parameter in model.get_parameter_dict().items():
        step_channels.append(algorithm.steps[parameter].norm(2)
                             .copy(name="step_norm:%s" % key))

    monitors = [
        blocks.extensions.monitoring.DataStreamMonitoring(
            graph.outputs + (step_channels if which_set == "train" else []),
            data_stream=dataset.get_stream(which_set, max_examples=100),
            prefix=which_set,
            after_epoch=True)
        for which_set in "train test".split()]
    main_loop = blocks.main_loop.MainLoop(
        data_stream=dataset.get_stream("train"),
        model=model, algorithm=algorithm,
        extensions=(monitors + [
            blocks.extensions.FinishAfter(after_n_epochs=100),
            blocks.extensions.ProgressBar(),
            blocks.extensions.Printing(),
            extensions.Generate(
                path="samples_{epoch}.npz",
                generate_fn=theano.function([xs], generate(xs)),
                input_dim=input_dim,
                every_n_epochs=1),
            blocks.extensions.saveload.Checkpoint(
                path="checkpoint.pkl",
                after_epoch=True,
                on_interrupt=True)]))
    main_loop.run()
Exemplo n.º 2
0
def main():
    # shape (batch, time, pitch)
    xs_down = T.tensor3("features")
    # shape (batch, time)
    mask = T.matrix("features_mask")

    theano.config.compute_test_value = "warn"
    test_batch = next(dataset.get_stream("train", max_examples=11).get_epoch_iterator(as_dict=True))
    xs_down.tag.test_value = test_batch["features"][:11]
    mask.tag.test_value = test_batch["features_mask"][:11]

    # mask doesn't have a pitch axis; make it broadcast
    mask = T.shape_padright(mask)

    # truncate sequence for debugging
    xs_down = xs_down[:, :30]
    mask = mask[:, :30]

    pitch_dim = 128

    def make_convnet(name, dims):
        dims = list(dims)
        filter_size = (9, 9)
        return blocks.bricks.conv.ConvolutionalSequence(
            name=name,
            layers=list(itertools.chain.from_iterable(
                # theano conv2d for now takes only border modes "full" or
                # "valid"; we use full and then remove the excess padding with
                # the Unpad brick. the result is like "same" convolution.
                (blocks.bricks.conv.ConvolutionalActivation(
                    name="conv_%i" % i,
                    activation=blocks.bricks.Rectifier().apply,
                    filter_size=filter_size,
                    num_filters=dim,
                    border_mode="full"),
                 bricks.Unpad(
                    name="unpad_%i" % i,
                    filter_size=filter_size,
                    num_channels=dim))
                for i, dim in enumerate(dims[1:]))),
            num_channels=dims[0],
            image_size=(None, None),
            tied_biases=True,
            weights_init=initialization.ConvolutionalInitialization(
                blocks.initialization.Orthogonal()),
            biases_init=initialization.Constant(0))

    # one convnet to 2d convolve the piano rolls, another to be its inverse
    convnet_dims = [1, 16]
    lower_dim = convnet_dims[0] * pitch_dim
    upper_dim = convnet_dims[-1] * pitch_dim
    convnet_up = make_convnet("up", convnet_dims)
    convnet_down = make_convnet("down", reversed(convnet_dims))

    convnet_up.initialize()
    convnet_down.initialize()

    def convapply(xs, indim, outdim, convnet):
        # reinstitute channel axis
        xs = xs.reshape((xs.shape[0], xs.shape[1], indim, pitch_dim)).dimshuffle(0, 2, 1, 3)
        xs = convnet.apply(xs)
        # move channel axis after time and lump it in with the pitch axis
        xs = xs.dimshuffle(0, 2, 1, 3).reshape((xs.shape[0], xs.shape[2], outdim * pitch_dim))
        return xs

    def convup(xs):
        return convapply(xs, convnet_dims[0], convnet_dims[-1], convnet_up)

    def convdown(xs):
        return convapply(xs, convnet_dims[-1], convnet_dims[0], convnet_down)

    intermediate_dim = 256
    recurrent_dim = 256

    x_to_h = blocks.bricks.MLP(
        name="x_to_h",
        dims=[upper_dim, intermediate_dim, 4*recurrent_dim],
        activations=[blocks.bricks.Rectifier(), blocks.bricks.Identity()],
        weights_init=blocks.initialization.Orthogonal(),
        biases_init=blocks.initialization.Constant(0))
    lstm = blocks.bricks.recurrent.LSTM(
        dim=recurrent_dim,
        weights_init=initialization.GlorotInitialization(),
        biases_init=blocks.initialization.Constant(0))
    h_to_y = blocks.bricks.MLP(
        name="h_to_y",
        dims=[recurrent_dim, intermediate_dim, upper_dim],
        activations=[blocks.bricks.Rectifier(), blocks.bricks.Rectifier()],
        weights_init=blocks.initialization.Orthogonal(),
        biases_init=blocks.initialization.Constant(0))

    x_to_h.initialize()
    lstm.initialize()
    h_to_y.initialize()

    initialization.lstm_identity_initialize(lstm)
    initialization.lstm_bias_initialize(lstm, x_to_h.linear_transformations[-1].b)

    def stepfn(x, h, c):
        u = x_to_h.apply(x)
        h, c = lstm.apply(
            inputs=u, states=h, cells=c,
            iterate=False)
        y = h_to_y.apply(h)
        return y, h, c

    def predict(xs):
        [xs] = swap_tb(xs)
        [ys, hs, cs], _ = theano.scan(
            stepfn,
            sequences=[xs],
            outputs_info=[None] + lstm.initial_states(xs.shape[1]))
        [ys, hs, cs] = swap_tb(ys, hs, cs)
        return ys, hs, cs

    def generate(xs):
        [xs] = swap_tb(xs)
        # initialize hidden state based on xs
        [ys, hs, cs], _ = theano.scan(
            stepfn,
            sequences=[xs],
            outputs_info=[None] + lstm.initial_states(xs.shape[1]))
        # let the model extrapolate based on its own predictions
        [ys, _, _], _ = theano.scan(
            stepfn,
            n_steps=128,
            outputs_info=[ys[-1], hs[-1], cs[-1]])
        [ys] = swap_tb(ys)
        return ys

    xs_up = convup(xs_down)

    # never backprop through targets
    ys_down = theano.gradient.disconnected_grad(xs_down)
    ys_up = theano.gradient.disconnected_grad(xs_up)

    yhats_up, hs, cs = predict(xs_up)
    prediction_errors = (yhats_up[:, :-1] - ys_up[:, 1:])**2 * mask[:, 1:]
    prediction_cost = (prediction_errors.sum(axis=1) / mask.sum(axis=1)).mean()
    prediction_cost.name = "prediction_cost"

    # train convdown to reconstruct training examples while keeping convup fixed
    reconstruction_cost = ((convdown(ys_up) - ys_down)**2 * mask[:, 1:]).mean()
    reconstruction_cost.name = "reconstruction_cost"

    cost = prediction_cost + reconstruction_cost
    cost.name = "cost"

    graph = blocks.graph.ComputationGraph(cost)
    model = blocks.model.Model(cost)
    algorithm = blocks.algorithms.GradientDescent(
        cost=cost,
        parameters=graph.parameters,
        step_rule=blocks.algorithms.Adam())

    step_channels = []
    for key, parameter in model.get_parameter_dict().items():
        step_channels.extend([algorithm.steps[parameter].norm(2)
                              .copy(name="step_norm:%s" % key),
                              algorithm.gradients[parameter].norm(2)
                              .copy(name="gradient_norm:%s" % key)])
    step_channels.extend([algorithm.total_step_norm.copy(name="total_step_norm"),
                          algorithm.total_gradient_norm.copy(name="total_gradient_norm")])

    activations = [
        hs.mean().copy(name="states.mean"),
        cs.mean().copy(name="cells.mean")]

    monitors = []
    monitors.append(blocks.extensions.monitoring.TrainingDataMonitoring(
        step_channels,
        prefix="iteration"))
    monitors.extend(
        blocks.extensions.monitoring.DataStreamMonitoring(
            graph.outputs + activations,
            data_stream=dataset.get_stream(which_set, max_examples=100),
            prefix=which_set,
            after_epoch=True)
        for which_set in "train test".split())

    main_loop = blocks.main_loop.MainLoop(
        data_stream=dataset.get_stream("train"),
        model=model, algorithm=algorithm,
        extensions=(monitors + [
            blocks.extensions.FinishAfter(after_n_epochs=100),
            blocks.extensions.ProgressBar(),
            blocks.extensions.Printing(),
            extensions.Generate(
                path="samples_{epoch}.npz",
                generate_fn=theano.function([xs_down], convdown(generate(convup(xs_down)))),
                pitch_dim=pitch_dim,
                every_n_epochs=1)]))
            #blocks.extensions.saveload.Checkpoint(
            #    path="checkpoint.pkl",
            #    after_epoch=True,
            #    on_interrupt=True)]))
    main_loop.run()