def initialize_rnn(rnn, args):
    # Dont initialize as Orthogonal if we are about to load new parameters
    if args.load_path is not None:
        rnn.weights_init = initialization.Constant(0)
    else:
        rnn.weights_init = initialization.Orthogonal()
    rnn.biases_init = initialization.Constant(0)
    rnn.initialize()
Пример #2
0
def create_rnn(hidden_dim, vocab_dim, mode="rnn"):
    # input
    x = tensor.imatrix('inchar')
    y = tensor.imatrix('outchar')

    #
    W = LookupTable(
        name="W1",
        #dim = hidden_dim*4,
        dim=hidden_dim,
        length=vocab_dim,
        weights_init=initialization.IsotropicGaussian(0.01),
        biases_init=initialization.Constant(0))
    if mode == "lstm":
        # Long Short Term Memory
        H = LSTM(hidden_dim,
                 name='H',
                 weights_init=initialization.IsotropicGaussian(0.01),
                 biases_init=initialization.Constant(0.0))
    else:
        # recurrent history weight
        H = SimpleRecurrent(
            name="H",
            dim=hidden_dim,
            activation=Tanh(),
            weights_init=initialization.IsotropicGaussian(0.01))
    #
    S = Linear(name="W2",
               input_dim=hidden_dim,
               output_dim=vocab_dim,
               weights_init=initialization.IsotropicGaussian(0.01),
               biases_init=initialization.Constant(0))

    A = NDimensionalSoftmax(name="softmax")

    initLayers([W, H, S])
    activations = W.apply(x)
    hiddens = H.apply(activations)  #[0]
    activations2 = S.apply(hiddens)
    y_hat = A.apply(activations2, extra_ndim=1)
    cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean()

    cg = ComputationGraph(cost)
    #print VariableFilter(roles=[WEIGHT])(cg.variables)
    #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables)

    layers = (x, W, H, S, A, y)

    return cg, layers, y_hat, cost
Пример #3
0
    def __init__(self, input_dim, output_dim, hidden_size, init_ranges,
                 **kwargs):
        linear1 = LinearMaxout(input_dim=input_dim,
                               output_dim=hidden_size,
                               num_pieces=2,
                               name='linear1')
        linear2 = LinearMaxout(input_dim=hidden_size,
                               output_dim=hidden_size,
                               num_pieces=2,
                               name='linear2')
        linear3 = Linear(input_dim=hidden_size, output_dim=output_dim)
        logistic = Logistic()
        bricks = [
            linear1,
            BatchNormalization(input_dim=hidden_size, name='bn2'), linear2,
            BatchNormalization(input_dim=hidden_size, name='bnl'), linear3,
            logistic
        ]
        for init_range, b in zip(init_ranges, (linear1, linear2, linear3)):
            b.biases_init = initialization.Constant(0)
            b.weights_init = initialization.Uniform(width=init_range)

        kwargs.setdefault('use_bias', False)
        super(ConcatenateClassifier, self).__init__([b.apply for b in bricks],
                                                    **kwargs)
Пример #4
0
def example2():
    """GRU"""
    x = tensor.tensor3('x')
    dim = 3

    fork = Fork(input_dim=dim,
                output_dims=[dim, dim * 2],
                name='fork',
                output_names=["linear", "gates"],
                weights_init=initialization.Identity(),
                biases_init=Constant(0))
    gru = GatedRecurrent(dim=dim,
                         weights_init=initialization.Identity(),
                         biases_init=Constant(0))

    fork.initialize()
    gru.initialize()

    linear, gate_inputs = fork.apply(x)
    h = gru.apply(linear, gate_inputs)

    f = theano.function([x], h)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))

    doubler = Linear(input_dim=dim,
                     output_dim=dim,
                     weights_init=initialization.Identity(2),
                     biases_init=initialization.Constant(0))
    doubler.initialize()

    lin, gate = fork.apply(doubler.apply(x))
    h_doubler = gru.apply(lin, gate)

    f = theano.function([x], h_doubler)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
Пример #5
0
def example():
    """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """
    x = tensor.tensor3('x')

    rnn = SimpleRecurrent(dim=3,
                          activation=Identity(),
                          weights_init=initialization.Identity())
    rnn.initialize()
    h = rnn.apply(x)

    f = theano.function([x], h)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX)))

    doubler = Linear(input_dim=3,
                     output_dim=3,
                     weights_init=initialization.Identity(2),
                     biases_init=initialization.Constant(0))
    doubler.initialize()
    h_doubler = rnn.apply(doubler.apply(x))

    f = theano.function([x], h_doubler)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX)))

    #Initial State
    h0 = tensor.matrix('h0')
    h = rnn.apply(inputs=x, states=h0)

    f = theano.function([x, h0], h)
    print(
        f(np.ones((3, 1, 3), dtype=theano.config.floatX),
          np.ones((1, 3), dtype=theano.config.floatX)))
Пример #6
0
def initialize_lasthid(last_hid, matrixfile=None, max_dim=None):
    rng = numpy.random.RandomState(42)
    w = 0.08
    myarray = rng.uniform(-w,
                          +w,
                          size=(last_hid.input_dim, last_hid.output_dim))
    print myarray.shape
    if matrixfile:
        typematrix = (numpy.load(matrixfile))
        if max_dim == None: max_dim = len(typematrix)
        print typematrix.shape
        myarray[0:max_dim, :] = typematrix[0:max_dim, :]
        print myarray
    last_hid.weights_init = initialization.Constant(myarray)
    last_hid.biases_init = initialization.Constant(0)
    last_hid.initialize()
Пример #7
0
def initialize2(brick, num_feature_maps):
    fan_in = numpy.prod(brick.filter_size)
    fan_out = numpy.prod(
        brick.filter_size) * brick.num_filters / num_feature_maps
    W_bound = numpy.sqrt(6. / (fan_in + fan_out))
    brick.weights_init = initialization.Uniform(width=W_bound)
    brick.biases_init = initialization.Constant(0)
    brick.initialize()
def initialize(to_init, width):
    """
    Initialize weights according to Xavier Parameter Initialization
    :param to_init the block to initialize
    :param width width of uniform distribution
    """
    to_init.weights_init = initialization.Uniform(width=width)
    to_init.biases_init = initialization.Constant(0)
    to_init.initialize()
Пример #9
0
    def __init__(self, visual_dim, textual_dim, output_dim, hidden_size,
                 init_ranges, **kwargs):
        (visual_range, textual_range, linear_range_1, linear_range_2,
         linear_range_3) = init_ranges
        visual_layer = FeedforwardSequence([
            BatchNormalization(input_dim=visual_dim).apply,
            LinearMaxout(
                input_dim=visual_dim,
                output_dim=hidden_size,
                weights_init=initialization.Uniform(width=visual_range),
                use_bias=False,
                biases_init=initialization.Constant(0),
                num_pieces=2).apply
        ],
                                           name='visual_layer')
        textual_layer = FeedforwardSequence([
            BatchNormalization(input_dim=textual_dim).apply,
            LinearMaxout(
                input_dim=textual_dim,
                output_dim=hidden_size,
                weights_init=initialization.Uniform(width=textual_range),
                biases_init=initialization.Constant(0),
                use_bias=False,
                num_pieces=2).apply
        ],
                                            name='textual_layer')
        logistic_mlp = MLPGenreClassifier(
            hidden_size, output_dim, hidden_size,
            [linear_range_1, linear_range_2, linear_range_3])
        # logistic_mlp = Sequence([
        #   BatchNormalization(input_dim=hidden_size, name='bn1').apply,
        #   Linear(hidden_size, output_dim, name='linear_output', use_bias=False,
        #          weights_init=initialization.Uniform(width=linear_range_1)).apply,
        #   Logistic().apply
        #], name='logistic_mlp')

        children = [visual_layer, textual_layer, logistic_mlp]
        kwargs.setdefault('use_bias', False)
        kwargs.setdefault('children', children)
        super(LinearSumClassifier, self).__init__(**kwargs)
def get_presoft(h, args):
    output_size = get_output_size(args.dataset)
    # If args.skip_connections: dim = args.layers * args.state_dim
    # else: dim = args.state_dim
    use_all_states = args.skip_connections or args.skip_output or (
        args.rnn_type in ["clockwork", "soft"])
    output_layer = Linear(
        input_dim=use_all_states * args.layers * args.state_dim +
        (1 - use_all_states) * args.state_dim,
        output_dim=output_size,
        name="output_layer")

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()
    presoft = output_layer.apply(h)
    if not has_indices(args.dataset):
        presoft = Tanh().apply(presoft)
    presoft.name = 'presoft'
    return presoft
Пример #11
0
def build_fork_lookup(vocab_size, args):
    x = tensor.lmatrix('features')
    virtual_dim = 6
    time_length = 5
    mini_batch_size = 2
    skip_connections = True
    layers = 3

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(virtual_dim)

    print output_names
    print output_dims
    lookup = LookupTable(length=vocab_size, dim=virtual_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    fork = Fork(output_names=output_names,
                input_dim=time_length,
                output_dims=output_dims,
                prototype=FeedforwardSequence([lookup.apply]))

    # Return list of 3D Tensor, one for each layer
    # (Batch X Time X embedding_dim)
    pre_rnn = fork.apply(x)
    fork.initialize()

    f = theano.function([x], pre_rnn)
    return f
Пример #12
0
def initialize_inout(brick, fan_in, fan_out,seed=1):
    W_bound = numpy.sqrt(6. / (fan_in + fan_out)) 
    brick.weights_init = initialization.Uniform(width=2 * W_bound)
    brick.biases_init = initialization.Constant(0)
    brick.initialize()
Пример #13
0
def initialize(to_init, rndstd=0.01):
    for bricks in to_init:
        bricks.weights_init = initialization.Uniform(width=0.08)
        bricks.biases_init = initialization.Constant(0)
        bricks.initialize()
Пример #14
0
def build_model_hard(vocab_size, args, dtype=floatX):
    logger.info('Building model ...')

    # Parameters for the model
    context = args.context
    state_dim = args.state_dim
    layers = args.layers
    skip_connections = args.skip_connections

    # Symbolic variables
    # In both cases: Time X Batch
    x = tensor.lmatrix('features')
    y = tensor.lmatrix('targets')

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(state_dim)

    lookup = LookupTable(length=vocab_size, dim=state_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    fork = Fork(output_names=output_names,
                input_dim=args.mini_batch_size,
                output_dims=output_dims,
                prototype=FeedforwardSequence([lookup.apply]))

    transitions = [SimpleRecurrent(dim=state_dim, activation=Tanh())]
    for i in range(layers - 1):
        mlp = MLP(activations=[Logistic()],
                  dims=[2 * state_dim, 1],
                  weights_init=initialization.IsotropicGaussian(0.1),
                  biases_init=initialization.Constant(0),
                  name="mlp_" + str(i))
        transitions.append(
            HardGatedRecurrent(dim=state_dim, mlp=mlp, activation=Tanh()))

    rnn = RecurrentStack(transitions, skip_connections=skip_connections)

    # dim = layers * state_dim
    output_layer = Linear(input_dim=layers * state_dim,
                          output_dim=vocab_size,
                          name="output_layer")

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn = fork.apply(x)

    # Give a name to the input of each layer
    if skip_connections:
        for t in range(len(pre_rnn)):
            pre_rnn[t].name = "pre_rnn_" + str(t)
    else:
        pre_rnn.name = "pre_rnn"

    # Prepare inputs for the RNN
    kwargs = OrderedDict()
    init_states = {}
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if skip_connections:
            kwargs['inputs' + suffix] = pre_rnn[d]
        elif d == 0:
            kwargs['inputs' + suffix] = pre_rnn
        init_states[d] = theano.shared(numpy.zeros(
            (args.mini_batch_size, state_dim)).astype(floatX),
                                       name='state0_%d' % d)
        kwargs['states' + suffix] = init_states[d]

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, **kwargs)

    # Now we have correctly:
    # h = [state_1, state_2, state_3 ...]

    # Save all the last states
    last_states = {}
    for d in range(layers):
        last_states[d] = h[d][-1, :, :]

    # Concatenate all the states
    if layers > 1:
        h = tensor.concatenate(h, axis=2)
    h.name = "hidden_state"

    # The updates of the hidden states
    updates = []
    for d in range(layers):
        updates.append((init_states[d], last_states[d]))

    presoft = output_layer.apply(h[context:, :, :])
    # Define the cost
    # Compute the probability distribution
    time, batch, feat = presoft.shape
    presoft.name = 'presoft'

    cross_entropy = Softmax().categorical_cross_entropy(
        y[context:, :].flatten(), presoft.reshape((batch * time, feat)))
    cross_entropy = cross_entropy / tensor.log(2)
    cross_entropy.name = "cross_entropy"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = cross_entropy + tensor.log(1)
    cost.name = "regularized_cost"

    # Initialize the model
    logger.info('Initializing...')

    fork.initialize()

    rnn.weights_init = initialization.Orthogonal()
    rnn.biases_init = initialization.Constant(0)
    rnn.initialize()

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()

    return cost, cross_entropy, updates
Пример #15
0
from blocks.bricks.parallel import Fork

transition = SimpleRecurrent2(dim = dimension,
	activation = Identity())

readout = Readout(
    readout_dim=dimension,
    source_names=transition.apply.states + ["feedback"],
    name="readout")

generator = SequenceGenerator(
    readout=readout,
    transition=transition,
    fork = Fork(['inputs'], prototype=Identity()),
    weights_init = initialization.Identity(1.),
    biases_init = initialization.Constant(0.),
    name="generator")

generator.push_initialization_config()
#generator.fork.weights_init = initialization.Identity(1.)
generator.transition.transition.weights_init = initialization.Identity(2.)
generator.initialize()

results = generator.generate(n_steps=n_steps, 
            batch_size=2, iterate=True,
            return_initial_states = True)

results_cg = ComputationGraph(results)
results_tf = results_cg.get_theano_function()

generated_sequence_t = results_tf()[1]
def get_prernn(args):

    # time x batch
    x_mask = tensor.fmatrix('mask')

    # Compute the state dim
    if args.rnn_type == 'lstm':
        state_dim = 4 * args.state_dim
    else:
        state_dim = args.state_dim

    # Prepare the arguments for the fork
    output_names = []
    output_dims = []
    for d in range(args.layers):
        if d > 0:
            suffix = RECURRENTSTACK_SEPARATOR + str(d)
        else:
            suffix = ''
        if d == 0 or args.skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(state_dim)

    # Prepare the brick to be forked (LookupTable or Linear)
    # Check if the dataset provides indices (in the case of a
    # fixed vocabulary, x is 2D tensor) or if it gives raw values
    # (x is 3D tensor)
    if has_indices(args.dataset):
        features = args.mini_batch_size
        x = tensor.lmatrix('features')
        vocab_size = get_output_size(args.dataset)
        lookup = LookupTable(length=vocab_size, dim=state_dim)
        lookup.weights_init = initialization.IsotropicGaussian(0.1)
        lookup.biases_init = initialization.Constant(0)
        forked = FeedforwardSequence([lookup.apply])
        if not has_mask(args.dataset):
            x_mask = tensor.ones_like(x, dtype=floatX)

    else:
        x = tensor.tensor3('features', dtype=floatX)
        if args.used_inputs is not None:
            x = tensor.set_subtensor(
                x[args.used_inputs:, :, :],
                tensor.zeros_like(x[args.used_inputs:, :, :], dtype=floatX))
        features = get_output_size(args.dataset)
        forked = Linear(input_dim=features, output_dim=state_dim)
        forked.weights_init = initialization.IsotropicGaussian(0.1)
        forked.biases_init = initialization.Constant(0)

        if not has_mask(args.dataset):
            x_mask = tensor.ones_like(x[:, :, 0], dtype=floatX)

    # Define the fork
    fork = Fork(output_names=output_names,
                input_dim=features,
                output_dims=output_dims,
                prototype=forked)
    fork.initialize()

    # Apply the fork
    prernn = fork.apply(x)

    # Give a name to the input of each layer
    if args.skip_connections:
        for t in range(len(prernn)):
            prernn[t].name = "pre_rnn_" + str(t)
    else:
        prernn.name = "pre_rnn"

    return prernn, x_mask
Пример #17
0
readout = Readout(
    readout_dim=dimension,
    source_names=['states', 'feedback'],
    emitter=TrivialEmitter2(readout_dim = dimension),
    feedback_brick=TrivialFeedback(output_dim = dimension),
    #merge = Merge(),
    post_merge = Identity(),
    merged_dim = dimension,
    name="readout")

generator = SequenceGenerator(
    readout=readout,
    transition=transition,
    fork = Fork(['inputs'], prototype=Identity()),
    weights_init = initialization.Identity(1.),
    biases_init = initialization.Constant(0.),
    name="generator")

generator.push_initialization_config()
generator.transition.transition.weights_init = initialization.Identity(2.)
generator.initialize()

results = generator.generate(n_steps=n_steps, 
            batch_size=1, iterate=True,
            return_initial_states = True)

results_cg = ComputationGraph(results)
results_tf = results_cg.get_theano_function()

generated_sequence_t = results_tf()[1]
generated_sequence_t.shape=(n_steps+1, dimension)
Пример #18
0
def build_fork_lookup(vocab_size, time_length, args):
    x = tensor.lmatrix('features')
    virtual_dim = 6
    state_dim = 6
    skip_connections = False
    layers = 1

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(virtual_dim)

    lookup = LookupTable(length=vocab_size, dim=virtual_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    fork = Fork(output_names=output_names, input_dim=time_length,
                output_dims=output_dims,
                prototype=FeedforwardSequence(
                    [lookup.apply]))

    # Note that this order of the periods makes faster modules flow in slower
    # ones with is the opposite of the original paper
    transitions = [ClockworkBase(dim=state_dim, activation=Tanh(),
                                 period=2 ** i) for i in range(layers)]

    rnn = RecurrentStack(transitions, skip_connections=skip_connections)

    # Return list of 3D Tensor, one for each layer
    # (Batch X Time X embedding_dim)
    pre_rnn = fork.apply(x)

    # Give time as the first index for each element in the list:
    # (Time X Batch X embedding_dim)
    if layers > 1 and skip_connections:
        for t in range(len(pre_rnn)):
            pre_rnn[t] = pre_rnn[t].dimshuffle(1, 0, 2)
    else:
        pre_rnn = pre_rnn.dimshuffle(1, 0, 2)

    f_pre_rnn = theano.function([x], pre_rnn)

    # Prepare inputs for the RNN
    kwargs = OrderedDict()
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            if skip_connections:
                kwargs['inputs' + suffix] = pre_rnn[d]
            else:
                kwargs['inputs' + suffix] = pre_rnn

    print kwargs
    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, **kwargs)

    fork.initialize()

    rnn.weights_init = initialization.Orthogonal()
    rnn.biases_init = initialization.Constant(0)
    rnn.initialize()

    f_h = theano.function([x], h)
    return f_pre_rnn, f_h
Пример #19
0
def build_model_soft(args, dtype=floatX):
    logger.info('Building model ...')

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn, x_mask = get_prernn(args)

    transitions = [SimpleRecurrent(dim=args.state_dim, activation=Tanh())]

    # Build the MLP
    dims = [2 * args.state_dim]
    activations = []
    for i in range(args.mlp_layers):
        activations.append(Rectifier())
        dims.append(args.state_dim)

    # Activation of the last layer of the MLP
    if args.mlp_activation == "logistic":
        activations.append(Logistic())
    elif args.mlp_activation == "rectifier":
        activations.append(Rectifier())
    elif args.mlp_activation == "hard_logistic":
        activations.append(HardLogistic())
    else:
        assert False

    # Output of MLP has dimension 1
    dims.append(1)

    for i in range(args.layers - 1):
        mlp = MLP(activations=activations, dims=dims,
                  weights_init=initialization.IsotropicGaussian(0.1),
                  biases_init=initialization.Constant(0),
                  name="mlp_" + str(i))
        transitions.append(
            SoftGatedRecurrent(dim=args.state_dim,
                               mlp=mlp,
                               activation=Tanh()))

    rnn = RecurrentStack(transitions, skip_connections=args.skip_connections)
    initialize_rnn(rnn, args)

    # Prepare inputs and initial states for the RNN
    kwargs, inits = get_rnn_kwargs(pre_rnn, args)

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, mask=x_mask, **kwargs)

    # Now we have:
    # h = [state, state_1, gate_value_1, state_2, gate_value_2, state_3, ...]

    # Extract gate_values
    gate_values = h[2::2]
    new_h = [h[0]]
    new_h.extend(h[1::2])
    h = new_h

    # Now we have:
    # h = [state, state_1, state_2, ...]
    # gate_values = [gate_value_1, gate_value_2, gate_value_3]

    for i, gate_value in enumerate(gate_values):
        gate_value.name = "gate_value_" + str(i)

    # Save all the last states
    last_states = {}
    hidden_states = []
    for d in range(args.layers):
        h[d] = h[d] * x_mask
        last_states[d] = h[d][-1, :, :]
        h[d].name = "hidden_state_" + str(d)
        hidden_states.append(h[d])

    # Concatenate all the states
    if args.layers > 1:
        h = tensor.concatenate(h, axis=2)
    h.name = "hidden_state_all"

    # The updates of the hidden states
    updates = []
    for d in range(args.layers):
        updates.append((inits[0][d], last_states[d]))

    presoft = get_presoft(h, args)

    cost, cross_entropy = get_costs(presoft, args)

    return cost, cross_entropy, updates, gate_values, hidden_states
Пример #20
0
def build_model(args, dtype=floatX):
    logger.info('Building model ...')

    # Variables of the model
    # the rubik's cube stickers
    x = tensor.bmatrix("x")

    # the action taken
    action = tensor.bmatrix("action")

    # y is the reward (Batch,)
    y = tensor.fvector("y")

    #####
    # LookupTable
    #####
    lookup_x = LookupTable(length=6, dim=args.embed_dim)
    lookup_action = LookupTable(length=6 + args.cube_size + 3,
                                dim=args.embed_dim)

    lookup_x.name = "lookup_x"
    lookup_x.weights_init = initialization.IsotropicGaussian(0.1)
    lookup_x.biases_init = initialization.Constant(0)
    lookup_action.name = "lookup_action"
    lookup_action.weights_init = initialization.IsotropicGaussian(0.1)
    lookup_action.biases_init = initialization.Constant(0)
    lookup_x.initialize()
    lookup_action.initialize()

    x_embeded = lookup_x.apply(x)
    action_embeded = lookup_action.apply(action)

    #####
    # MLP
    #####
    # Make x_embeded and action_embeded 2D
    x_embeded = x_embeded.reshape(
        (x_embeded.shape[0], x_embeded.shape[1] * x_embeded.shape[2]))
    action_embeded = action_embeded.reshape(
        (action_embeded.shape[0],
         action_embeded.shape[1] * action_embeded.shape[2]))

    # Concatenate inputs :
    mlp_input = tensor.concatenate((x_embeded, action_embeded), axis=1)

    # Bricks
    l = args.layers
    activations = []
    # first layer dimension
    dims = [args.embed_dim * (6 * (args.cube_size**2) + 3)]

    # every hidden layer dimension and activation function
    for _ in range(l):
        activations.append(Rectifier())
        dims.append(args.units_per_layer)
    # last layer dimension
    dims[-1] = 1

    mlp = MLP(activations=activations, dims=dims)

    y_hat = mlp.apply(mlp_input)

    cost = SquaredError().apply(y.dimshuffle(0, "x"), y_hat)
    cost.name = "mean_squared_error"

    # Initialization
    mlp.weights_init = initialization.IsotropicGaussian(0.1)
    mlp.biases_init = initialization.Constant(0)
    mlp.initialize()

    # Q function
    # Check if the parameters in this function will change through
    # the updates of the gradient descent
    Q = theano.function(inputs=[x, action],
                        outputs=y_hat,
                        allow_input_downcast=True)

    # Cost, gradient and learning rate
    lr = tensor.scalar('lr')
    params = ComputationGraph(cost).parameters
    gradients = tensor.grad(cost, params)
    updates = OrderedDict((p, p - lr * g) for p, g in zip(params, gradients))

    # Function to call to perfom a gradient descent on (y - Q)^2
    gradient_descent_step = theano.function([x, action, y, lr],
                                            cost,
                                            updates=updates,
                                            allow_input_downcast=True)

    # Load the good parameters
    if args.load_path is not None:
        param_values = load_parameter_values(args.load_path)
        model = Model(cost)
        model.set_parameter_values(param_values)

    return Q, gradient_descent_step, params
Пример #21
0
from datasets import parrot_stream
from model import Parrot
from utils import train_parse

args = train_parse()

exp_name = args.experiment_name
save_dir = args.save_dir

print "Saving config ..."
with open(os.path.join(save_dir, 'config', exp_name + '.pkl'), 'w') as f:
    cPickle.dump(args, f)
print "Finished saving."

w_init = initialization.IsotropicGaussian(0.01)
b_init = initialization.Constant(0.)

train_stream = parrot_stream(
    args.dataset, args.use_speaker, ('train',), args.batch_size,
    noise_level=args.feedback_noise_level, labels_type=args.labels_type,
    seq_size=args.seq_size, raw_data=args.raw_output)

if args.feedback_noise_level is None:
    val_noise_level = None
else:
    val_noise_level = 0.

valid_stream = parrot_stream(
    args.dataset, args.use_speaker, ('valid',), args.batch_size,
    noise_level=val_noise_level, labels_type=args.labels_type,
    seq_size=args.seq_size, raw_data=args.raw_output)
Пример #22
0
def build_model_lstm(vocab_size, args, dtype=floatX):
    logger.info('Building model ...')

    # Parameters for the model
    context = args.context
    state_dim = args.state_dim
    layers = args.layers
    skip_connections = args.skip_connections

    virtual_dim = 4 * state_dim

    # Symbolic variables
    # In both cases: Time X Batch
    x = tensor.lmatrix('features')
    y = tensor.lmatrix('targets')

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(virtual_dim)

    lookup = LookupTable(length=vocab_size, dim=virtual_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    # Make sure time_length is what we need
    fork = Fork(output_names=output_names,
                input_dim=args.mini_batch_size,
                output_dims=output_dims,
                prototype=FeedforwardSequence([lookup.apply]))

    transitions = [
        LSTM(dim=state_dim, activation=Tanh()) for _ in range(layers)
    ]

    rnn = RecurrentStack(transitions, skip_connections=skip_connections)

    # If skip_connections: dim = layers * state_dim
    # else: dim = state_dim
    output_layer = Linear(input_dim=skip_connections * layers * state_dim +
                          (1 - skip_connections) * state_dim,
                          output_dim=vocab_size,
                          name="output_layer")

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn = fork.apply(x)

    # Give a name to the input of each layer
    if skip_connections:
        for t in range(len(pre_rnn)):
            pre_rnn[t].name = "pre_rnn_" + str(t)
    else:
        pre_rnn.name = "pre_rnn"

    # Prepare inputs for the RNN
    kwargs = OrderedDict()
    init_states = {}
    init_cells = {}
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if skip_connections:
            kwargs['inputs' + suffix] = pre_rnn[d]
        elif d == 0:
            kwargs['inputs'] = pre_rnn
        init_states[d] = theano.shared(numpy.zeros(
            (args.mini_batch_size, state_dim)).astype(floatX),
                                       name='state0_%d' % d)
        init_cells[d] = theano.shared(numpy.zeros(
            (args.mini_batch_size, state_dim)).astype(floatX),
                                      name='cell0_%d' % d)
        kwargs['states' + suffix] = init_states[d]
        kwargs['cells' + suffix] = init_cells[d]

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, **kwargs)

    # h = [state, cell, in, forget, out, state_1,
    #        cell_1, in_1, forget_1, out_1 ...]

    last_states = {}
    last_cells = {}
    for d in range(layers):
        last_states[d] = h[5 * d][-1, :, :]
        last_cells[d] = h[5 * d + 1][-1, :, :]

    # The updates of the hidden states
    updates = []
    for d in range(layers):
        updates.append((init_states[d], last_states[d]))
        updates.append((init_cells[d], last_states[d]))

    # h = [state, cell, in, forget, out, state_1,
    #        cell_1, in_1, forget_1, out_1 ...]

    # Extract the values
    in_gates = h[2::5]
    forget_gates = h[3::5]
    out_gates = h[4::5]

    gate_values = {
        "in_gates": in_gates,
        "forget_gates": forget_gates,
        "out_gates": out_gates
    }

    h = h[::5]

    # Now we have correctly:
    # h = [state, state_1, state_2 ...] if layers > 1
    # h = [state] if layers == 1

    # If we have skip connections, concatenate all the states
    # Else only consider the state of the highest layer
    if layers > 1:
        if skip_connections:
            h = tensor.concatenate(h, axis=2)
        else:
            h = h[-1]
    else:
        h = h[0]
    h.name = "hidden_state"

    presoft = output_layer.apply(h[context:, :, :])
    # Define the cost
    # Compute the probability distribution
    time, batch, feat = presoft.shape
    presoft.name = 'presoft'

    cross_entropy = Softmax().categorical_cross_entropy(
        y[context:, :].flatten(), presoft.reshape((batch * time, feat)))
    cross_entropy = cross_entropy / tensor.log(2)
    cross_entropy.name = "cross_entropy"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = cross_entropy + tensor.log(1)
    cost.name = "regularized_cost"

    # Initialize the model
    logger.info('Initializing...')

    fork.initialize()

    # Dont initialize as Orthogonal if we are about to load new parameters
    if args.load_path is not None:
        rnn.weights_init = initialization.Constant(0)
    else:
        rnn.weights_init = initialization.Orthogonal()
    rnn.biases_init = initialization.Constant(0)
    rnn.initialize()

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()

    return cost, cross_entropy, updates, gate_values
Пример #23
0
from fuel.datasets import IterableDataset

from blocks.filter import VariableFilter
from blocks.roles import PARAMETER

from collections import OrderedDict

N_CLASSES = len(MORSE_CHR)

x = T.ftensor3('x')

input_layer = br.MLP(activations=[br.Rectifier()] * 2,
                     dims=[CHUNK, 128, 128],
                     name='input_layer',
                     weights_init=blinit.Orthogonal(0.9),
                     biases_init=blinit.Constant(0.0))
input_layer_app = input_layer.apply(x)
input_layer.initialize()

recurrent_layer = brrec.SimpleRecurrent(dim=128,
                                        activation=br.Rectifier(),
                                        name='recurrent_layer',
                                        weights_init=blinit.Orthogonal(0.01),
                                        biases_init=blinit.Constant(0.0))
state = T.fmatrix('state')
recurrent_layer_app = recurrent_layer.apply(input_layer_app,
                                            state,
                                            iterate=False)
recurrent_layer.initialize()

output_layer = br.MLP(activations=[br.Rectifier()] * 1 + [None],
Пример #24
0
def initialize_identity(to_init):
    for bricks in to_init:
        bricks.weights_init = initialization.Identity()
        bricks.biases_init = initialization.Constant(0)
        bricks.initialize()