Пример #1
0
    def create_model(self):
        input_dim = self.input_dim
        x = self.x
        x_to_h = Linear(input_dim,
                        input_dim * 4,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))
        lstm = LSTM(input_dim,
                    name='lstm',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))
        h_to_o = Linear(input_dim,
                        1,
                        name='h_to_o',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))

        x_transform = x_to_h.apply(x)
        self.x_to_h = x_to_h
        self.lstm = lstm
        self.h_to_o = h_to_o

        h, c = lstm.apply(x_transform)

        # only values of hidden units of the last timeframe are used for
        # the classification
        probs = h_to_o.apply(h[-1])
        return probs
Пример #2
0
def rnn_layer(in_size, dim, x, h, n, first_layer=False):
    if connect_h_to_h == 'all-previous':
        if first_layer:
            rnn_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
        elif connect_x_to_h:
            rnn_input = T.concatenate([x] + [hidden for hidden in h], axis=2)
            linear = Linear(input_dim=in_size + dim * n,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
        else:
            rnn_input = T.concatenate([hidden for hidden in h], axis=2)
            linear = Linear(input_dim=dim * n,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
    elif connect_h_to_h == 'two-previous':
        if first_layer:
            rnn_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
        elif connect_x_to_h:
            rnn_input = T.concatenate([x] + h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=in_size + dim * 2 if n > 1 else in_size +
                            dim,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
        else:
            rnn_input = T.concatenate(h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=dim * 2 if n > 1 else dim,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
    elif connect_h_to_h == 'one-previous':
        if first_layer:
            rnn_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
        elif connect_x_to_h:
            rnn_input = T.concatenate([x] + [h[n - 1]], axis=2)
            linear = Linear(input_dim=in_size + dim,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
        else:
            rnn_input = h[n]
            linear = Linear(input_dim=dim,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
    rnn = SimpleRecurrent(dim=dim,
                          activation=Tanh(),
                          name=layer_models[n] + str(n) + '-')
    initialize([linear, rnn])
    if layer_models[n] == 'rnn':
        return rnn.apply(linear.apply(rnn_input))
    elif layer_models[n] == 'mt_rnn':
        return rnn.apply(linear.apply(rnn_input),
                         time_scale=layer_resolutions[n],
                         time_offset=layer_execution_time_offset[n])
Пример #3
0
class AttentionWriter(Initializable):
    def __init__(self, input_dim, output_dim, channels, width, height, N, **kwargs):
        super(AttentionWriter, self).__init__(name="writer", **kwargs)

        self.channels = channels
        self.img_width = width
        self.img_height = height
        self.N = N
        self.input_dim = input_dim
        self.output_dim = output_dim

        assert output_dim == channels*width*height

        self.zoomer = ZoomableAttentionWindow(channels, height, width, N)
        self.z_trafo = Linear(
                name=self.name+'_ztrafo',
                input_dim=input_dim, output_dim=5, 
                weights_init=self.weights_init, biases_init=self.biases_init,
                use_bias=True)

        self.w_trafo = Linear(
                name=self.name+'_wtrafo',
                input_dim=input_dim, output_dim=channels*N*N, 
                weights_init=self.weights_init, biases_init=self.biases_init,
                use_bias=True)

        self.children = [self.z_trafo, self.w_trafo]

    @application(inputs=['h'], outputs=['c_update'])
    def apply(self, h):
        w = self.w_trafo.apply(h)
        l = self.z_trafo.apply(h)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        c_update = 1./gamma * self.zoomer.write(w, center_y, center_x, delta, sigma)

        return c_update

    @application(inputs=['h'], outputs=['c_update', 'center_y', 'center_x', 'delta'])
    def apply_detailed(self, h):
        w = self.w_trafo.apply(h)
        l = self.z_trafo.apply(h)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        c_update = 1./gamma * self.zoomer.write(w, center_y, center_x, delta, sigma)

        return c_update, center_y, center_x, delta

    @application(inputs=['x','h'], outputs=['c_update', 'center_y', 'center_x', 'delta'])
    def apply_circular(self,x,h):
        #w = self.w_trafo.apply(h)
        l = self.z_trafo.apply(h)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        c_update = 1./gamma * self.zoomer.write(x, center_y, center_x, delta, sigma)

        return c_update, center_y, center_x, delta
Пример #4
0
class embeddingLayer:
    def __init__(self, word_dim, visual_dim, joint_dim):
        self.word_embed = Linear(word_dim,
                                 joint_dim,
                                 name='word_to_joint',
                                 weights_init=IsotropicGaussian(0.01),
                                 biases_init=Constant(0))
        self.visual_embed = Linear(visual_dim,
                                   joint_dim,
                                   name='visual_to_joint',
                                   weights_init=IsotropicGaussian(0.01),
                                   biases_init=Constant(0))
        self.word_embed.initialize()
        self.visual_embed.initialize()

    # words: batch_size x q x word_dim
    # video: batch_size x video_length x visual_dim
    def apply(self, words, video, u1, u2):
        w = self.word_embed.apply(words)
        v = self.visual_embed.apply(video)
        w = T.tanh(w)
        v = T.tanh(v)
        u = T.concatenate([u1, u2], axis=1)
        u = self.word_embed.apply(u)
        return w, v, u

    def apply_sentence(self, words, u1, u2):
        w = self.word_embed.apply(words)
        w = T.tanh(w)
        u = T.concatenate([u1, u2], axis=1)
        u = self.word_embed.apply(u)
        return w, u
Пример #5
0
    def apply(self, input_, target):
        x_to_h = Linear(name='x_to_h',
                        input_dim=self.dims[0],
                        output_dim=self.dims[1] * 4)
        pre_rnn = x_to_h.apply(input_)
        pre_rnn.name = 'pre_rnn'
        rnn = LSTM(activation=Tanh(),
                   dim=self.dims[1], name=self.name)
        h, _ = rnn.apply(pre_rnn)
        h.name = 'h'
        h_to_y = Linear(name='h_to_y',
                        input_dim=self.dims[1],
                        output_dim=self.dims[2])
        y_hat = h_to_y.apply(h)
        y_hat.name = 'y_hat'

        cost = SquaredError().apply(target, y_hat)
        cost.name = 'MSE'

        self.outputs = {}
        self.outputs['y_hat'] = y_hat
        self.outputs['cost'] = cost
        self.outputs['pre_rnn'] = pre_rnn
        self.outputs['h'] = h

        # Initialization
        for brick in (rnn, x_to_h, h_to_y):
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0)
            brick.initialize()
Пример #6
0
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        x = tensor.tensor3('x', dtype=floatX)
        y = tensor.tensor3('y', dtype=floatX)

        x_to_lstm = Linear(name="x_to_lstm", input_dim=input_size, output_dim=4 * hidden_size,
                           weights_init=IsotropicGaussian(), biases_init=Constant(0))
        lstm = LSTM(dim=hidden_size, name="lstm", weights_init=IsotropicGaussian(), biases_init=Constant(0))
        lstm_to_output = Linear(name="lstm_to_output", input_dim=hidden_size, output_dim=output_size,
                                weights_init=IsotropicGaussian(), biases_init=Constant(0))

        x_transform = x_to_lstm.apply(x)
        h, c = lstm.apply(x_transform)

        y_hat = lstm_to_output.apply(h)
        y_hat = Logistic(name="y_hat").apply(y_hat)

        self.cost = BinaryCrossEntropy(name="cost").apply(y, y_hat)

        x_to_lstm.initialize()
        lstm.initialize()
        lstm_to_output.initialize()

        self.computation_graph = ComputationGraph(self.cost)
Пример #7
0
def build_model(args):
    x = tensor.tensor3('features', dtype=floatX)
    y = tensor.tensor3('targets', dtype=floatX)

    linear = Linear(input_dim=1, output_dim=4 * args.units)
    rnn = LSTM(dim=args.units, activation=Tanh())
    linear2 = Linear(input_dim=args.units, output_dim=1)

    prediction = Tanh().apply(linear2.apply(rnn.apply(linear.apply(x))))

    prediction = prediction[:-1, :, :]

    # SquaredError does not work on 3D tensor
    y = y.reshape((y.shape[0] * y.shape[1], y.shape[2]))
    prediction = prediction.reshape((prediction.shape[0] * prediction.shape[1],
                                     prediction.shape[2]))

    cost = SquaredError().apply(y, prediction)

    # Initialization
    linear.weights_init = IsotropicGaussian(0.1)
    linear2.weights_init = IsotropicGaussian(0.1)
    linear.biases_init = Constant(0)
    linear2.biases_init = Constant(0)
    rnn.weights_init = Orthogonal()

    return cost
Пример #8
0
def lstm_layer(in_size, dim, x, h, n, first_layer=False):
    if connect_h_to_h == 'all-previous':
        if first_layer:
            lstm_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
        elif connect_x_to_h:
            lstm_input = T.concatenate([x] + [hidden for hidden in h], axis=2)
            linear = Linear(input_dim=in_size + dim * (n),
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
        else:
            lstm_input = T.concatenate([hidden for hidden in h], axis=2)
            linear = Linear(input_dim=dim * (n + 1),
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
    elif connect_h_to_h == 'two-previous':
        if first_layer:
            lstm_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
        elif connect_x_to_h:
            lstm_input = T.concatenate([x] + h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=in_size + dim * 2 if n > 1 else in_size +
                            dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
        else:
            lstm_input = T.concatenate(h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=dim * 2 if n > 1 else dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
    elif connect_h_to_h == 'one-previous':
        if first_layer:
            lstm_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
        elif connect_x_to_h:
            lstm_input = T.concatenate([x] + [h[n - 1]], axis=2)
            linear = Linear(input_dim=in_size + dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
        else:
            lstm_input = h[n - 1]
            # linear = LN_LSTM(input_dim=dim, output_dim=dim * 4, name='linear' + str(n) + '-' )
            linear = Linear(input_dim=dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
    lstm = LN_LSTM(dim=dim, name=layer_models[network_mode][n] + str(n) + '-')
    initialize([linear, lstm])
    if layer_models[network_mode][n] == 'lstm':
        return lstm.apply(linear.apply(lstm_input))
        # return lstm.apply(linear.apply(lstm_input), mask=x_mask)
    elif layer_models[network_mode][n] == 'mt_lstm':
        return lstm.apply(linear.apply(lstm_input),
                          time_scale=layer_resolutions[n],
                          time_offset=layer_execution_time_offset[n])
Пример #9
0
def main(max_seq_length, lstm_dim, batch_size, num_batches, num_epochs):
    dataset_train = IterableDataset(generate_data(max_seq_length, batch_size,
                                                  num_batches))
    dataset_test = IterableDataset(generate_data(max_seq_length, batch_size,
                                                 100))

    stream_train = DataStream(dataset=dataset_train)
    stream_test = DataStream(dataset=dataset_test)

    x = T.tensor3('x')
    y = T.matrix('y')

    # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see
    # LSTM layer documentation for the explanation
    x_to_h = Linear(1, lstm_dim * 4, name='x_to_h',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))
    lstm = LSTM(lstm_dim, name='lstm',
                weights_init=IsotropicGaussian(),
                biases_init=Constant(0.0))
    h_to_o = Linear(lstm_dim, 1, name='h_to_o',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))

    x_transform = x_to_h.apply(x)
    h, c = lstm.apply(x_transform)

    # only values of hidden units of the last timeframe are used for
    # the classification
    y_hat = h_to_o.apply(h[-1])
    y_hat = Logistic().apply(y_hat)

    cost = BinaryCrossEntropy().apply(y, y_hat)
    cost.name = 'cost'

    lstm.initialize()
    x_to_h.initialize()
    h_to_o.initialize()

    cg = ComputationGraph(cost)

    algorithm = GradientDescent(cost=cost, parameters=cg.parameters,
                                step_rule=Adam())
    test_monitor = DataStreamMonitoring(variables=[cost],
                                        data_stream=stream_test, prefix="test")
    train_monitor = TrainingDataMonitoring(variables=[cost], prefix="train",
                                           after_epoch=True)

    main_loop = MainLoop(algorithm, stream_train,
                         extensions=[test_monitor, train_monitor,
                                     FinishAfter(after_n_epochs=num_epochs),
                                     Printing(), ProgressBar()])
    main_loop.run()

    print 'Learned weights:'
    for layer in (x_to_h, lstm, h_to_o):
        print "Layer '%s':" % layer.name
        for param in layer.parameters:
            print param.name, ': ', param.get_value()
        print
Пример #10
0
    def apply(self, input_, target):
        x_to_h = Linear(name='x_to_h',
                        input_dim=self.dims[0],
                        output_dim=self.dims[1] * 4)
        pre_rnn = x_to_h.apply(input_)
        pre_rnn.name = 'pre_rnn'
        rnn = LSTM(activation=Tanh(), dim=self.dims[1], name=self.name)
        h, _ = rnn.apply(pre_rnn)
        h.name = 'h'
        h_to_y = Linear(name='h_to_y',
                        input_dim=self.dims[1],
                        output_dim=self.dims[2])
        y_hat = h_to_y.apply(h)
        y_hat.name = 'y_hat'

        cost = SquaredError().apply(target, y_hat)
        cost.name = 'MSE'

        self.outputs = {}
        self.outputs['y_hat'] = y_hat
        self.outputs['cost'] = cost
        self.outputs['pre_rnn'] = pre_rnn
        self.outputs['h'] = h

        # Initialization
        for brick in (rnn, x_to_h, h_to_y):
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0)
            brick.initialize()
Пример #11
0
 def lllistool(i, inp, func):
     if func == LSTM:
         NUMS[i+1] *= 4
     sdim = DIMS[i]
     if func == SimpleRecurrent or func == LSTM:
         sdim = DIMS[i] + DIMS[i+1]
     l = Linear(input_dim=DIMS[i], output_dim=DIMS[i+1] * NUMS[i+1], 
                weights_init=IsotropicGaussian(std=sdim**(-0.5)), 
                biases_init=IsotropicGaussian(std=sdim**(-0.5)),
                name='Lin{}'.format(i))
     l.initialize()
     if func == SimpleRecurrent:
         gong = func(dim=DIMS[i+1], activation=Rectifier(), weights_init=IsotropicGaussian(std=sdim**(-0.5)))
         gong.initialize()
         ret = gong.apply(l.apply(inp))
     elif func == LSTM:
         gong = func(dim=DIMS[i+1], activation=Tanh(), weights_init=IsotropicGaussian(std=sdim**(-0.5)))
         gong.initialize()
         print(inp)
         ret, _ = gong.apply(
             l.apply(inp), 
             T.zeros((inp.shape[1], DIMS[i+1])),
             T.zeros((inp.shape[1], DIMS[i+1])),
         )
     elif func == SequenceGenerator:
         gong = func(
             readout=None, 
             transition=SimpleRecurrent(dim=100, activation=Rectifier(), weights_init=IsotropicGaussian(std=0.1)))
         ret = None
     elif func == None:
         ret = l.apply(inp)
     else:
         gong = func()
         ret = gong.apply(l.apply(inp))
     return ret
Пример #12
0
def construct_model(activation_function, r_dim, hidden_dim, out_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    nx = x.shape[0]
    nj = x.shape[1]  # also is r.shape[0]
    nr = r.shape[1]

    # r is nj x nr
    # x is nx x nj
    # y is nx

    # Get a representation of r of size r_dim
    r = DAE(r)

    # r is now nj x r_dim

    # r_rep is nx x nj x r_dim
    r_rep = r[None, :, :].repeat(axis=0, repeats=nx)
    # x3 is nx x nj x 1
    x3 = x[:, :, None]

    # concat is nx x nj x (r_dim + 1)
    concat = tensor.concatenate([r_rep, x3], axis=2)

    # Change concat from Batch x Time x Features to T X B x F
    rnn_input = concat.dimshuffle(1, 0, 2)

    linear = Linear(input_dim=r_dim + 1,
                    output_dim=4 * hidden_dim,
                    name="input_linear")
    lstm = LSTM(dim=hidden_dim,
                activation=activation_function,
                name="hidden_recurrent")
    top_linear = Linear(input_dim=hidden_dim,
                        output_dim=out_dim,
                        name="out_linear")

    pre_rnn = linear.apply(rnn_input)
    states = lstm.apply(pre_rnn)[0]
    activations = top_linear.apply(states)
    activations = tensor.mean(activations, axis=0)

    cost = Softmax().categorical_cross_entropy(y, activations)

    pred = activations.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters

    for brick in (linear, lstm, top_linear):
        brick.weights_init = IsotropicGaussian(0.1)
        brick.biases_init = Constant(0.)
        brick.initialize()

    return cost, error_rate
def construct_model(activation_function, r_dim, hidden_dim, out_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    nx = x.shape[0]
    nj = x.shape[1]  # also is r.shape[0]
    nr = r.shape[1]

    # r is nj x nr
    # x is nx x nj
    # y is nx

    # Get a representation of r of size r_dim
    r = DAE(r)

    # r is now nj x r_dim

    # r_rep is nx x nj x r_dim
    r_rep = r[None, :, :].repeat(axis=0, repeats=nx)
    # x3 is nx x nj x 1
    x3 = x[:, :, None]

    # concat is nx x nj x (r_dim + 1)
    concat = tensor.concatenate([r_rep, x3], axis=2)

    # Change concat from Batch x Time x Features to T X B x F
    rnn_input = concat.dimshuffle(1, 0, 2)

    linear = Linear(input_dim=r_dim + 1, output_dim=4 * hidden_dim,
                    name="input_linear")
    lstm = LSTM(dim=hidden_dim, activation=activation_function,
                name="hidden_recurrent")
    top_linear = Linear(input_dim=hidden_dim, output_dim=out_dim,
                        name="out_linear")

    pre_rnn = linear.apply(rnn_input)
    states = lstm.apply(pre_rnn)[0]
    activations = top_linear.apply(states)
    activations = tensor.mean(activations, axis=0)

    cost = Softmax().categorical_cross_entropy(y, activations)

    pred = activations.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters

    for brick in (linear, lstm, top_linear):
        brick.weights_init = IsotropicGaussian(0.1)
        brick.biases_init = Constant(0.)
        brick.initialize()

    return cost, error_rate
Пример #14
0
class AttentionWriter(Initializable):
    def __init__(self, input_dim, output_dim, channels, width, height, N,
                 **kwargs):
        super(AttentionWriter, self).__init__(name="writer", **kwargs)

        self.channels = channels
        self.img_width = width
        self.img_height = height
        self.N = N
        self.input_dim = input_dim
        self.output_dim = output_dim

        assert output_dim == channels * width * height

        self.zoomer = ZoomableAttentionWindow(channels, height, width, N)
        self.z_trafo = Linear(name=self.name + '_ztrafo',
                              input_dim=input_dim,
                              output_dim=5,
                              weights_init=self.weights_init,
                              biases_init=self.biases_init,
                              use_bias=True)

        self.w_trafo = Linear(name=self.name + '_wtrafo',
                              input_dim=input_dim,
                              output_dim=channels * N * N,
                              weights_init=self.weights_init,
                              biases_init=self.biases_init,
                              use_bias=True)

        self.children = [self.z_trafo, self.w_trafo]

    @application(inputs=['h'], outputs=['c_update'])
    def apply(self, h):
        w = self.w_trafo.apply(h)
        l = self.z_trafo.apply(h)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        c_update = 1. / gamma * self.zoomer.write(w, center_y, center_x, delta,
                                                  sigma)

        return c_update

    @application(inputs=['h'],
                 outputs=['c_update', 'center_y', 'center_x', 'delta'])
    def apply_detailed(self, h):
        w = self.w_trafo.apply(h)
        l = self.z_trafo.apply(h)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        c_update = 1. / gamma * self.zoomer.write(w, center_y, center_x, delta,
                                                  sigma)

        return c_update, center_y, center_x, delta
Пример #15
0
class MyRecurrent(Brick):
    def __init__(self,
                 recurrent,
                 dims,
                 activations=[Identity(), Identity()],
                 **kwargs):
        super(MyRecurrent, self).__init__(**kwargs)
        self.dims = dims
        self.recurrent = recurrent
        self.activations = activations
        if isinstance(self.recurrent,
                      (SimpleRecurrent, SimpleRecurrentBatchNorm)):
            output_dim = dims[1]
        elif isinstance(self.recurrent, (LSTM, LSTMBatchNorm)):
            output_dim = 4 * dims[1]
        else:
            raise NotImplementedError
        self.input_trans = Linear(name='input_trans',
                                  input_dim=dims[0],
                                  output_dim=output_dim,
                                  weights_init=NormalizedInitialization(),
                                  biases_init=Constant(0))
        self.output_trans = Linear(name='output_trans',
                                   input_dim=dims[1],
                                   output_dim=dims[2],
                                   weights_init=NormalizedInitialization(),
                                   biases_init=Constant(0))
        self.children = (
            [self.input_trans, self.recurrent, self.output_trans] +
            self.activations)

    def _initialize(self):
        self.input_trans.initialize()
        self.output_trans.initialize()
        #self.recurrent.initialize()

    @application
    def apply(self, input_, input_mask=None, *args, **kwargs):
        input_recurrent = self.input_trans.apply(input_)
        try:
            input_recurrent = self.activations[0].apply(input_recurrent,
                                                        input_mask=input_mask)
        except TypeError:
            input_recurrent = self.activations[0].apply(input_recurrent)
        output_recurrent = self.recurrent.apply(inputs=input_recurrent,
                                                mask=input_mask)
        if isinstance(self.recurrent, (LSTM, LSTMBatchNorm)):
            output_recurrent = output_recurrent[0]
        output = self.output_trans.apply(output_recurrent)
        try:
            output = self.activations[1].apply(output, input_mask=input_mask)
        except TypeError:
            output = self.activations[1].apply(output)
        return output
Пример #16
0
def MDN_output_layer(x, h, y, in_size, out_size, hidden_size, pred):
    if connect_h_to_o:
        hiddens = T.concatenate([hidden for hidden in h], axis=2)
        hidden_out_size = hidden_size * len(h)
    else:
        hiddens = h[-1]
        hidden_out_size = hidden_size

    mu_linear = Linear(name='mu_linear' + str(pred),
                       input_dim=hidden_out_size,
                       output_dim=out_size * components_size[network_mode])
    sigma_linear = Linear(name='sigma_linear' + str(pred),
                          input_dim=hidden_out_size,
                          output_dim=components_size[network_mode])
    mixing_linear = Linear(name='mixing_linear' + str(pred),
                           input_dim=hidden_out_size,
                           output_dim=components_size[network_mode])
    initialize([mu_linear, sigma_linear, mixing_linear])

    mu = mu_linear.apply(hiddens)
    mu = mu.reshape(
        (mu.shape[0], mu.shape[1], out_size, components_size[network_mode]))

    sigma_orig = sigma_linear.apply(hiddens)
    sigma = T.nnet.softplus(sigma_orig)

    mixing_orig = mixing_linear.apply(hiddens)
    e_x = T.exp(mixing_orig - mixing_orig.max(axis=2, keepdims=True))
    mixing = e_x / e_x.sum(axis=2, keepdims=True)

    exponent = -0.5 * T.inv(sigma) * T.sum(
        (y.dimshuffle(0, 1, 2, 'x') - mu)**2, axis=2)
    normalizer = (2 * np.pi * sigma)
    exponent = exponent + T.log(mixing) - (out_size * .5) * T.log(normalizer)

    # LogSumExp(x)
    max_exponent = T.max(exponent, axis=2, keepdims=True)
    mod_exponent = exponent - max_exponent
    gauss_mix = T.sum(T.exp(mod_exponent), axis=2, keepdims=True)
    log_gauss = T.log(gauss_mix) + max_exponent
    cost = -T.mean(log_gauss)

    srng = RandomStreams(seed=seed)
    mixing = mixing_orig * (1 + sampling_bias)
    sigma = T.nnet.softplus(sigma_orig - sampling_bias)
    e_x = T.exp(mixing - mixing.max(axis=2, keepdims=True))
    mixing = e_x / e_x.sum(axis=2, keepdims=True)
    component = srng.multinomial(pvals=mixing)
    component_mean = T.sum(mu * component.dimshuffle(0, 1, 'x', 2), axis=3)
    component_std = T.sum(sigma * component, axis=2, keepdims=True)
    linear_output = srng.normal(avg=component_mean, std=component_std)
    linear_output.name = 'linear_output'

    return linear_output, cost
Пример #17
0
class AttentionWriter(Initializable):
    def __init__(self, input_dim, output_dim, width, height, N, **kwargs):
        super(AttentionWriter, self).__init__(name="writer", **kwargs)

        self.img_width = width
        self.img_height = height
        self.N = N
        self.input_dim = input_dim
        self.output_dim = output_dim

        assert output_dim == width * height

        self.zoomer = ZoomableAttentionWindow(height, width, N)
        self.z_trafo = Linear(
            name=self.name + "_ztrafo",
            input_dim=input_dim,
            output_dim=5,
            weights_init=self.weights_init,
            biases_init=self.biases_init,
            use_bias=True,
        )

        self.w_trafo = Linear(
            name=self.name + "_wtrafo",
            input_dim=input_dim,
            output_dim=N * N,
            weights_init=self.weights_init,
            biases_init=self.biases_init,
            use_bias=True,
        )

        self.children = [self.z_trafo, self.w_trafo]

    @application(inputs=["h"], outputs=["c_update"])
    def apply(self, h):
        w = self.w_trafo.apply(h)
        l = self.z_trafo.apply(h)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        c_update = 1.0 / gamma * self.zoomer.write(w, center_y, center_x, delta, sigma)

        return c_update

    @application(inputs=["h"], outputs=["c_update", "center_y", "center_x", "delta"])
    def apply_detailed(self, h):
        w = self.w_trafo.apply(h)
        l = self.z_trafo.apply(h)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        c_update = 1.0 / gamma * self.zoomer.write(w, center_y, center_x, delta, sigma)

        return c_update, center_y, center_x, delta
Пример #18
0
def test_linear_nan_allocation():
    x = tensor.matrix()

    linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2),
                    biases_init=Constant(1))
    linear.apply(x)
    w1 = numpy.nan * numpy.zeros((16, 8))
    w2 = linear.params[0].get_value()
    b1 = numpy.nan * numpy.zeros(8)
    b2 = linear.params[1].get_value()
    numpy.testing.assert_equal(w1, w2)
    numpy.testing.assert_equal(b1, b2)
Пример #19
0
def test_linear_nan_allocation():
    x = tensor.matrix()

    linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2),
                    biases_init=Constant(1))
    linear.apply(x)
    w1 = numpy.nan * numpy.zeros((16, 8))
    w2 = linear.parameters[0].get_value()
    b1 = numpy.nan * numpy.zeros(8)
    b2 = linear.parameters[1].get_value()
    numpy.testing.assert_equal(w1, w2)
    numpy.testing.assert_equal(b1, b2)
Пример #20
0
class Highway(Initializable, Feedforward):
    """ Implements highway networks outlined in [1]

    y = H(x,WH)T(x,WT) + x(1-T(x,WT))

    Highway networks have the same input dimension and output dimension

    Parameters
    ----------
    input_dim: int
        number of input/output dimensions for the network
    output_activation: Activation
        activation function applied to x and the hidden weights
    transform_activation: Activation
        activation function applied to x and the transform weights
    [1] http://arxiv.org/pdf/1505.00387v1.pdf
    """
    @lazy(allocation=['input_dim'])
    def __init__(self,
                 input_dim,
                 output_activation=None,
                 transform_activation=None,
                 **kwargs):
        super(Highway, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.output_dim = input_dim

        if output_activation == None:
            output_activation = Rectifier()

        if transform_activation == None:
            transform_activation = Logistic()

        self._linear_h = Linear(name="linear_h",
                                input_dim=input_dim,
                                output_dim=input_dim)
        self._linear_t = Linear(name="linear_t",
                                input_dim=input_dim,
                                output_dim=input_dim)
        self._output_activation = output_activation
        self._transform_activation = transform_activation
        self.children = [
            self._linear_h, self._linear_t, self._output_activation,
            self._transform_activation
        ]

    @application(inputs=['input_'], outputs=['output'])
    def apply(self, input_):
        h = self._output_activation.apply(self._linear_h.apply(input_))
        t = self._transform_activation.apply(self._linear_t.apply(input_))

        return h * t + input_ * (1 - t)
Пример #21
0
class questionEncoder:
    def __init__(self, word_dim, hidden_dim):
        self.forward_lstm= LSTM(hidden_dim,
                                name='question_forward_lstm',
                                weights_init=IsotropicGaussian(0.01),
                                biases_init=Constant(0))
        self.backward_lstm= LSTM(hidden_dim,
                                 name='question_backward_lstm',
                                 weights_init=IsotropicGaussian(0.01),
                                 biases_init=Constant(0))
        self.x_to_h_forward = Linear(word_dim,
                                     hidden_dim * 4,
                                     name='word_x_to_h_forward',
                                     weights_init=IsotropicGaussian(0.01),
                                     biases_init=Constant(0))
        self.x_to_h_backward = Linear(word_dim,
                                      hidden_dim * 4,
                                      name='word_x_to_h_backward',
                                      weights_init=IsotropicGaussian(0.01),
                                      biases_init=Constant(0))

        self.forward_lstm.initialize()
        self.backward_lstm.initialize()
        self.x_to_h_forward.initialize()
        self.x_to_h_backward.initialize()

    # variable question length
    # words: batch_size x q x word_dim
    # words_reverse: be the reverse sentence of words
    #                padding with 0 to max length q
    # mask: batch_size 
    def apply(self, words, words_reverse, mask_, batch_size):
        mask = mask_.flatten()
        # batch_size x q x hidden_dim
        Wx = self.x_to_h_forward.apply(words)
        Wx_r = self.x_to_h_backward.apply(words_reverse)
        # q x batch_size x hidden_dim
        Wx = Wx.swapaxes(0, 1)
        Wx_r = Wx_r.swapaxes(0, 1)
        # q x batch_size x hidden_dim
        hf, cf = self.forward_lstm.apply(Wx)
        hb, cb = self.backward_lstm.apply(Wx_r)
        for i in range(batch_size):
            T.set_subtensor(hb[0:mask[i]+1, i, :], hb[0:mask[i]+1, i, :][::-1])

        # q x batch_size x (2 x hidden_dim)
        h = T.concatenate([hf, hb], axis=2)
        # batch_size x hidden_dim
        y_q = hf[mask, range(batch_size), :]
        y_1 = hb[0, range(batch_size), :]
        
        return h.swapaxes(0, 1), y_q, y_1
Пример #22
0
def prior_network(x, n_input, hu_encoder, n_latent):
    logger.info('In prior_network: n_input: %d, hu_encoder: %d', n_input, hu_encoder)
    mlp1 = MLP(activations=[Rectifier()], dims=[n_input, hu_encoder], name='prior_in_to_hidEncoder')
    initialize([mlp1])
    h_encoder = mlp1.apply(x)
    h_encoder = debug_print(h_encoder, 'h_encoder', False)
    lin1 = Linear(name='prior_hiddEncoder_to_latent_mu', input_dim=hu_encoder, output_dim=n_latent)
    lin2 = Linear(name='prior_hiddEncoder_to_latent_sigma', input_dim=hu_encoder, output_dim=n_latent)
    initialize([lin1])
    initialize([lin2], rndstd=0.001)
    mu = lin1.apply(h_encoder)
    log_sigma = lin2.apply(h_encoder)
    return mu, log_sigma
class MyRecurrent(Brick):
    def __init__(self, recurrent, dims,
                 activations=[Identity(), Identity()], **kwargs):
        super(MyRecurrent, self).__init__(**kwargs)
        self.dims = dims
        self.recurrent = recurrent
        self.activations = activations
        if isinstance(self.recurrent, (SimpleRecurrent, SimpleRecurrentBatchNorm)):
            output_dim = dims[1]
        elif isinstance(self.recurrent, (LSTM, LSTMBatchNorm)):
            output_dim = 4*dims[1]
        else:
            raise NotImplementedError
        self.input_trans = Linear(name='input_trans',
                                  input_dim=dims[0],
                                  output_dim=output_dim,
                                  weights_init=NormalizedInitialization(),
                                  biases_init=Constant(0))
        self.output_trans = Linear(name='output_trans',
                                   input_dim=dims[1],
                                   output_dim=dims[2],
                                   weights_init=NormalizedInitialization(),
                                   biases_init=Constant(0))
        self.children = ([self.input_trans, self.recurrent, self.output_trans]
                         + self.activations)
        
    def _initialize(self):
        self.input_trans.initialize()
        self.output_trans.initialize()
        #self.recurrent.initialize()

    @application
    def apply(self, input_, input_mask=None, *args, **kwargs):
        input_recurrent = self.input_trans.apply(input_)
        try:
            input_recurrent = self.activations[0].apply(input_recurrent, input_mask=input_mask)
        except TypeError:
            input_recurrent = self.activations[0].apply(input_recurrent)
        output_recurrent = self.recurrent.apply(inputs=input_recurrent,
                                                mask=input_mask)
        if isinstance(self.recurrent, (LSTM, LSTMBatchNorm)):
            output_recurrent = output_recurrent[0]
        output = self.output_trans.apply(output_recurrent)
        try:
            output = self.activations[1].apply(output, input_mask=input_mask)
        except TypeError:
            output = self.activations[1].apply(output)
        return output
Пример #24
0
def example2():
    """GRU"""
    x = tensor.tensor3('x')
    dim = 3

    fork = Fork(input_dim=dim, output_dims=[dim, dim*2],name='fork',output_names=["linear","gates"], weights_init=initialization.Identity(),biases_init=Constant(0))
    gru = GatedRecurrent(dim=dim, weights_init=initialization.Identity(),biases_init=Constant(0))

    fork.initialize()
    gru.initialize()

    linear, gate_inputs = fork.apply(x)
    h = gru.apply(linear, gate_inputs)

    f = theano.function([x], h)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) 

    doubler = Linear(
                 input_dim=dim, output_dim=dim, weights_init=initialization.Identity(2),
                 biases_init=initialization.Constant(0))
    doubler.initialize()

    lin, gate = fork.apply(doubler.apply(x))
    h_doubler = gru.apply(lin,gate)

    f = theano.function([x], h_doubler)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) 
Пример #25
0
class BernoulliLayer(Initializable, ProbabilisticLayer):
    @lazy
    def __init__(self, dim_X, dim_Y, **kwargs):
        super(BernoulliLayer, self).__init__(**kwargs)
        self.dim_X = dim_X
        self.dim_Y = dim_Y

        self.linear_transform = Linear(name=self.name + '_linear',
                                       input_dim=dim_Y,
                                       output_dim=dim_X,
                                       weights_init=self.weights_init,
                                       biases_init=self.biases_init,
                                       use_bias=self.use_bias)

        self.children = [self.linear_transform]

    @application(inputs=['Y'], outputs=['X_expected'])
    def sample_expected(self, Y):
        return tensor.nnet.sigmoid(self.linear_transform.apply(Y))

    @application(inputs=['Y'], outputs=['X', 'log_prob'])
    def sample(self, Y):
        prob_X = self.sample_expected(Y)
        U = self.theano_rng.uniform(size=prob_X.shape, nstreams=N_STREAMS)
        X = tensor.cast(U <= prob_X, floatX)
        return X, self.log_prob(X, Y)

    @application(inputs=['X', 'Y'], outputs=['log_prob'])
    def log_prob(self, X, Y):
        prob_X = self.sample_expected(Y)
        log_prob = X * tensor.log(prob_X) + (1. - X) * tensor.log(1 - prob_X)
        return log_prob.sum(axis=1)
Пример #26
0
def nn_fprop(x, y, recurrent_in_size, out_size, hidden_size,
             num_recurrent_layers, train_flag):
    if task_ID_type == 'feedforward':
        x, recurrent_in_size = task_ID_layers(x, recurrent_in_size)
    recurrent_input = x
    cells = []
    h = []
    if dropout > 0:
        recurrent_input = Dropout(name='dropout_recurrent_in',
                                  train_flag=train_flag).apply(recurrent_input)
    if linear_before_recurrent_size > 0:
        linear = Linear(input_dim=2,
                        output_dim=linear_before_recurrent_size,
                        name='linear_befor_recurrent')
        initialize([linear])
        recurrent_input = linear.apply(recurrent_input[:, :, -2:])
        recurrent_in_size = linear_before_recurrent_size
    if single_dim_out:
        recurrent_input = T.extra_ops.repeat(recurrent_input, out_size, axis=0)
    p_components_size = components_size
    for i in range(num_recurrent_layers):
        model = layer_models[i]
        h, cells = add_layer(model,
                             i,
                             recurrent_in_size,
                             hidden_size,
                             recurrent_input,
                             h,
                             cells,
                             train_flag,
                             first_layer=True if i == 0 else False)
    return output_layer(recurrent_input, h, y, recurrent_in_size, out_size,
                        hidden_size, p_components_size) + (cells, )
Пример #27
0
def example():
    """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """
    x = tensor.tensor3('x')

    rnn = SimpleRecurrent(dim=3, activation=Identity(), weights_init=initialization.Identity())
    rnn.initialize()
    h = rnn.apply(x)

    f = theano.function([x], h)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) 

    doubler = Linear(
                 input_dim=3, output_dim=3, weights_init=initialization.Identity(2),
                 biases_init=initialization.Constant(0))
    doubler.initialize()
    h_doubler = rnn.apply(doubler.apply(x))

    f = theano.function([x], h_doubler)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) 

    #Initial State
    h0 = tensor.matrix('h0')
    h = rnn.apply(inputs=x, states=h0)

    f = theano.function([x, h0], h)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX),
            np.ones((1, 3), dtype=theano.config.floatX))) 
Пример #28
0
class LinearActivation(Initializable, Feedforward):
    """Base class that adds documentation and has all the logic."""
    @lazy(allocation=['input_dim', 'output_dim'])
    def __init__(self, input_dim, output_dim, activation, **kwargs):
        super(LinearActivation, self).__init__(**kwargs)
        self.linear = Linear()
        self.activation = activation
        self.children = [self.linear,
                         self.activation]

        self.input_dim = input_dim
        self.output_dim = output_dim

    @property
    def input_dim(self):
        return self.linear.input_dim

    @input_dim.setter
    def input_dim(self, value):
        self.linear.input_dim = value

    @property
    def output_dim(self):
        return self.linear.output_dim

    @output_dim.setter
    def output_dim(self, value):
        self.linear.output_dim = value

    @application(inputs=['input_'], outputs=['output'])
    def apply(self, input_):
        pre_activation = self.linear.apply(input_)
        output = self.activation.apply(pre_activation)
        return output
Пример #29
0
class Encoder(Initializable):
    def __init__(self,
                 dimension,
                 input_size,
                 rnn_type=None,
                 embed_input=False,
                 **kwargs):
        super(Encoder, self).__init__(**kwargs)
        if rnn_type is None:
            rnn_type = SimpleRecurrent
        if embed_input:
            self.embedder = LookupTable(input_size, dimension)
        else:
            self.embedder = Linear(input_size, dimension)
        encoder = Bidirectional(rnn_type(dim=dimension, activation=Tanh()))
        fork = Fork([
            name for name in encoder.prototype.apply.sequences
            if name != 'mask'
        ])
        fork.input_dim = dimension
        fork.output_dims = [dimension for _ in fork.input_names]

        self.fork = fork
        self.encoder = encoder
        self.children = [fork, encoder, self.embedder]

    @application
    def apply(self, input_, input_mask):
        input_ = self.embedder.apply(input_)
        return self.encoder.apply(**dict_union(
            self.fork.apply(input_, as_dict=True), mask=input_mask))
Пример #30
0
def rnn_layer(in_dim, h, h_dim, n):
    linear = Linear(input_dim=in_dim,
                    output_dim=h_dim,
                    name='linear' + str(n) + h.name)
    rnn = SimpleRecurrent(dim=h_dim, name='rnn' + str(n))
    initialize([linear, rnn])
    return rnn.apply(linear.apply(h))
Пример #31
0
def example2():
    """GRU"""
    x = tensor.tensor3('x')
    dim = 3

    fork = Fork(input_dim=dim,
                output_dims=[dim, dim * 2],
                name='fork',
                output_names=["linear", "gates"],
                weights_init=initialization.Identity(),
                biases_init=Constant(0))
    gru = GatedRecurrent(dim=dim,
                         weights_init=initialization.Identity(),
                         biases_init=Constant(0))

    fork.initialize()
    gru.initialize()

    linear, gate_inputs = fork.apply(x)
    h = gru.apply(linear, gate_inputs)

    f = theano.function([x], h)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))

    doubler = Linear(input_dim=dim,
                     output_dim=dim,
                     weights_init=initialization.Identity(2),
                     biases_init=initialization.Constant(0))
    doubler.initialize()

    lin, gate = fork.apply(doubler.apply(x))
    h_doubler = gru.apply(lin, gate)

    f = theano.function([x], h_doubler)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
Пример #32
0
def bilstm_layer(in_dim, inp, h_dim, n, pref=""):
    linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n) + pref)
    lstm = LSTM(dim=h_dim, name='lstm' + str(n) + pref)
    bilstm = Bidirectional(prototype=lstm)
    bilstm.name = 'bilstm' + str(n) + pref
    initialize([linear, bilstm])
    return bilstm.apply(linear.apply(inp))[0]
Пример #33
0
class BernoulliLayer(Initializable, ProbabilisticLayer):
    @lazy
    def __init__(self, dim_X, dim_Y, **kwargs):
        super(BernoulliLayer, self).__init__(**kwargs)
        self.dim_X = dim_X
        self.dim_Y = dim_Y

        self.linear_transform = Linear(
                name=self.name + '_linear', input_dim=dim_Y,
                output_dim=dim_X, weights_init=self.weights_init,
                biases_init=self.biases_init, use_bias=self.use_bias)

        self.children = [self.linear_transform]

    @application(inputs=['Y'], outputs=['X_expected'])
    def sample_expected(self, Y):
        return tensor.nnet.sigmoid(self.linear_transform.apply(Y))

    @application(inputs=['Y'], outputs=['X', 'log_prob'])
    def sample(self, Y):
        prob_X = self.sample_expected(Y)
        U = self.theano_rng.uniform(size=prob_X.shape, nstreams=N_STREAMS)
        X = tensor.cast(U <= prob_X, floatX)
        return X, self.log_prob(X, Y)

    @application(inputs=['X', 'Y'], outputs=['log_prob'])
    def log_prob(self, X, Y):
        prob_X = self.sample_expected(Y)
        log_prob = X*tensor.log(prob_X) + (1.-X)*tensor.log(1-prob_X)
        return log_prob.sum(axis=1)
Пример #34
0
class Representer(Initializable):
    def __init__(self, representation_mlp, **kwargs):
        super(Representer, self).__init__(name="representer", **kwargs)

        self.representation_mlp = representation_mlp
        self.r_trafo = Linear(name=representation_mlp.name + '_trafo', input_dim=representation_mlp.output_dim, output_dim=representation_mlp.output_dim, weights_init=self.weights_init,
                              biases_init=self.biases_init, use_bias=True)

        self.children = [self.representation_mlp, self.r_trafo]

    def get_dim(self, name):
        if name == 'input':
            return self.representation_mlp.input_dim
        elif name == 'output':
            return self.representation_mlp.output_dim
        else:
            raise ValueError

    @application(inputs=['r'], outputs=['l_repr'])
    def apply(self, r):

        i_repr = self.representation_mlp.apply(r)
        l_repr = self.r_trafo.apply(i_repr)

        return l_repr
Пример #35
0
def example():
    """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """
    x = tensor.tensor3('x')

    rnn = SimpleRecurrent(dim=3,
                          activation=Identity(),
                          weights_init=initialization.Identity())
    rnn.initialize()
    h = rnn.apply(x)

    f = theano.function([x], h)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX)))

    doubler = Linear(input_dim=3,
                     output_dim=3,
                     weights_init=initialization.Identity(2),
                     biases_init=initialization.Constant(0))
    doubler.initialize()
    h_doubler = rnn.apply(doubler.apply(x))

    f = theano.function([x], h_doubler)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX)))

    #Initial State
    h0 = tensor.matrix('h0')
    h = rnn.apply(inputs=x, states=h0)

    f = theano.function([x, h0], h)
    print(
        f(np.ones((3, 1, 3), dtype=theano.config.floatX),
          np.ones((1, 3), dtype=theano.config.floatX)))
Пример #36
0
class Locater(Initializable):
    def __init__(self, location_mlp, **kwargs):
        super(Locater, self).__init__(name="locater", **kwargs)

        self.location_mlp = location_mlp

        self.l_trafo = Linear(name=location_mlp.name + '_trafo', input_dim=location_mlp.output_dim, output_dim=location_mlp.output_dim, weights_init=self.weights_init,
                              biases_init=self.biases_init,
                              use_bias=True)

        self.children = [self.location_mlp, self.l_trafo]

    def get_dim(self, name):
        if name == 'input':
            return self.location_mlp.input_dim
        elif name == 'output':
            return self.location_mlp.output_dim
        else:
            raise ValueError

    @application(inputs=['l'], outputs=['l_loc'])
    def apply(self, l):

        i_loc = self.location_mlp.apply(l)
        l_loc = self.l_trafo.apply(i_loc)

        return l_loc
Пример #37
0
class Locator(Initializable):
    def __init__(self, input_dim, n_spatial_dims, area_transform,
                 weights_init, biases_init, location_std, scale_std, **kwargs):
        super(Locator, self).__init__(**kwargs)

        self.n_spatial_dims = n_spatial_dims
        self.area_transform = area_transform

        self.locationscale = Linear(
            input_dim=area_transform.brick.output_dim,
            output_dim=2*n_spatial_dims,
            # these are huge reductions in dimensionality, so use
            # normalized initialization to avoid huge values.
            weights_init=NormalizedInitialization(IsotropicGaussian(std=1e-3)),
            biases_init=Constant(0),
            name="locationscale")

        self.T_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(12345)
        self.location_std = location_std
        self.scale_std = scale_std

        self.children = [self.area_transform.brick, self.locationscale]

    @application(inputs=['h'], outputs=['location', 'scale'])
    def apply(self, h):
        area = self.area_transform(h)
        locationscale = self.locationscale.apply(area)
        location, scale = (locationscale[:, :self.n_spatial_dims],
                           locationscale[:, self.n_spatial_dims:])
        location += self.T_rng.normal(location.shape, std=self.location_std)
        scale += self.T_rng.normal(scale.shape, std=self.scale_std)
        return location, scale
Пример #38
0
class ShallowEnergyComputer(Initializable, Feedforward):
    """A simple energy computer: first tanh, then weighted sum."""
    @lazy()
    def __init__(self, **kwargs):
        super(ShallowEnergyComputer, self).__init__(**kwargs)
        self.tanh = Tanh()
        self.linear = Linear(use_bias=False)
        self.children = [self.tanh, self.linear]

    @application
    def apply(self, *args):
        output = args
        output = self.tanh.apply(*pack(output))
        output = self.linear.apply(*pack(output))
        return output

    @property
    def input_dim(self):
        return self.children[1].input_dim

    @input_dim.setter
    def input_dim(self, value):
        self.children[1].input_dim = value

    @property
    def output_dim(self):
        return self.children[1].output_dim

    @output_dim.setter
    def output_dim(self, value):
        self.children[1].output_dim = value
Пример #39
0
def softmax_layer(h, y, x_mask, y_mask, lens, vocab_size, hidden_size,
                  boosting):
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=hidden_size,
                              output_dim=vocab_size)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    softmax = NDimensionalSoftmax()

    #y_hat = softmax.apply(linear_output, extra_ndim=1)
    #y_hat.name = 'y_hat'
    cost_a = softmax.categorical_cross_entropy(y, linear_output, extra_ndim=1)
    #produces correct average
    cost_a = cost_a * y_mask

    if boosting:
        #boosting step, must divide by length here
        lensMat = T.tile(lens, (y.shape[0], 1))
        cost_a = cost_a / lensMat

    #only count cost of correctly masked entries
    cost = cost_a.sum() / y_mask.sum()

    cost.name = 'cost'

    return (linear_output, cost)
Пример #40
0
class Locator(Initializable):
    def __init__(self, input_dim, n_spatial_dims, area_transform, weights_init,
                 biases_init, location_std, scale_std, **kwargs):
        super(Locator, self).__init__(**kwargs)

        self.n_spatial_dims = n_spatial_dims
        self.area_transform = area_transform

        self.locationscale = Linear(
            input_dim=area_transform.brick.output_dim,
            output_dim=2 * n_spatial_dims,
            # these are huge reductions in dimensionality, so use
            # normalized initialization to avoid huge values.
            weights_init=NormalizedInitialization(IsotropicGaussian(std=1e-3)),
            biases_init=Constant(0),
            name="locationscale")

        self.T_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(12345)
        self.location_std = location_std
        self.scale_std = scale_std

        self.children = [self.area_transform.brick, self.locationscale]

    @application(inputs=['h'], outputs=['location', 'scale'])
    def apply(self, h):
        area = self.area_transform(h)
        locationscale = self.locationscale.apply(area)
        location, scale = (locationscale[:, :self.n_spatial_dims],
                           locationscale[:, self.n_spatial_dims:])
        location += self.T_rng.normal(location.shape, std=self.location_std)
        scale += self.T_rng.normal(scale.shape, std=self.scale_std)
        return location, scale
Пример #41
0
def lstm_layer(in_dim, h, h_dim, n, pref=""):
    linear = Linear(input_dim=in_dim,
                    output_dim=h_dim * 4,
                    name='linear' + str(n) + pref)
    lstm = LSTM(dim=h_dim, name='lstm' + str(n) + pref)
    initialize([linear, lstm])
    return lstm.apply(linear.apply(h))[0]
Пример #42
0
def softmax_layer(h, y, hidden_size, num_targets, cost_fn='cross'):
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=hidden_size,
                              output_dim=num_targets)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    y_pred = T.argmax(linear_output, axis=1)
    label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred],
                                     'label_of_predicted', False)
    pat1 = T.mean(label_of_predicted)
    updates = None
    if 'ranking' in cost_fn:
        cost, updates = ranking_loss(linear_output, y)
        print 'using ranking loss function!'
    else:
        y_hat = Logistic().apply(linear_output)
        y_hat.name = 'y_hat'
        cost = cross_entropy_loss(y_hat, y)
    cost.name = 'cost'
    pat1.name = 'precision@1'
    misclassify_rate = MultiMisclassificationRate().apply(
        y, T.ge(linear_output, 0.5))
    misclassify_rate.name = 'error_rate'
    return cost, pat1, updates, misclassify_rate
Пример #43
0
def generation(z_list, n_latent, hu_decoder, n_out, y):
    logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent,
                hu_decoder)
    if hu_decoder == 0:
        return generation_simple(z_list, n_latent, n_out, y)
    mlp1 = MLP(activations=[Rectifier()],
               dims=[n_latent, hu_decoder],
               name='latent_to_hidDecoder')
    initialize([mlp1])
    hid_to_out = Linear(name='hidDecoder_to_output',
                        input_dim=hu_decoder,
                        output_dim=n_out)
    initialize([hid_to_out])
    mysigmoid = Logistic(name='y_hat_vae')
    agg_logpy_xz = 0.
    agg_y_hat = 0.
    for i, z in enumerate(z_list):
        y_hat = mysigmoid.apply(hid_to_out.apply(
            mlp1.apply(z)))  #reconstructed x
        agg_logpy_xz += cross_entropy_loss(y_hat, y)
        agg_y_hat += y_hat

    agg_logpy_xz /= len(z_list)
    agg_y_hat /= len(z_list)
    return agg_y_hat, agg_logpy_xz
Пример #44
0
def bilstm_layer(in_dim, inp, h_dim, n):
    linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n)+inp.name)
    lstm = LSTM(dim=h_dim, name='lstm' + str(n)+inp.name)
    bilstm = Bidirectional(prototype=lstm)
    bilstm.name = 'bilstm' + str(n) + inp.name
    initialize([linear, bilstm])
    return bilstm.apply(linear.apply(inp))[0]
Пример #45
0
class Embedder(Initializable):
    """
    Linear Embedding Brick
    Parameters
    ----------
    dim_in: :class:`int`
        Dimensionality of the input
    dim_out: :class:`int`
        Dimensionality of the output
    output_type: :class:`str`
        fc for fully connected. conv for convolutional
    """
    def __init__(self, dim_in, dim_out, output_type='fc', **kwargs):

        self.dim_in = dim_in
        self.dim_out = dim_out
        self.output_type = output_type
        self.linear = Linear(dim_in, dim_out, name='embed_layer')
        children = [self.linear]
        kwargs.setdefault('children', []).extend(children)
        super(Embedder, self).__init__(**kwargs)

    @application(inputs=['y'], outputs=['outputs'])
    def apply(self, y):
        embedding = self.linear.apply(y)
        if self.output_type == 'fc':
            return embedding
        if self.output_type == 'conv':
            return embedding.reshape((-1, embedding.shape[-1], 1, 1))

    def get_dim(self, name):
        if self.output_type == 'fc':
            return self.linear.get_dim(name)
        if self.output_type == 'conv':
            return (self.linear.get_dim(name), 1, 1)
Пример #46
0
class Qsampler(Qlinear, Random):
    """
    brick to handle the intermediate layer of an Autoencoder.
    The intermidate layer predict the mean and std of each dimension
    of the intermediate layer and then sample from a normal distribution.
    """
    # Special brick to handle Variatonal Autoencoder statistical sampling
    def __init__(self, input_dim, output_dim, **kwargs):
        super(Qsampler, self).__init__(input_dim, output_dim, **kwargs)

        self.prior_mean = 0.
        self.prior_log_sigma = 0.

        self.log_sigma_transform = Linear(
                name=self.name+'_log_sigma',
                input_dim=input_dim, output_dim=output_dim,
                weights_init=self.weights_init, biases_init=self.biases_init,
                use_bias=True)

        self.children.append(self.log_sigma_transform)

    @application(inputs=['x'], outputs=['z', 'kl_term'])
    def sample(self, x):
        """Return a samples and the corresponding KL term

        Parameters
        ----------
        x :

        Returns
        -------
        z : tensor.matrix
            Samples drawn from Q(z|x)
        kl : tensor.vector
            KL(Q(z|x) || P_z)

        """
        mean = self.mean_transform.apply(x)
        log_sigma = self.log_sigma_transform.apply(x)

        batch_size = x.shape[0]
        dim_z = self.get_dim('output')

        # Sample from mean-zeros std.-one Gaussian
        u = self.theano_rng.normal(
                    size=(batch_size, dim_z),
                    avg=0., std=1.)
        z = mean + tensor.exp(log_sigma) * u

        # Calculate KL
        kl = (
            self.prior_log_sigma - log_sigma
            + 0.5 * (
                tensor.exp(2 * log_sigma) + (mean - self.prior_mean) ** 2
                ) / tensor.exp(2 * self.prior_log_sigma)
            - 0.5
        ).sum(axis=-1)

        return z, kl
Пример #47
0
def test_variable_filter_applications_error():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name="linear1")

    x = tensor.vector()
    h1 = brick1.apply(x)
    cg = ComputationGraph(h1)
    VariableFilter(applications=brick1.apply)(cg.variables)
Пример #48
0
def test_variable_filter_applications_error():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name='linear1')

    x = tensor.vector()
    h1 = brick1.apply(x)
    cg = ComputationGraph(h1)
    VariableFilter(applications=brick1.apply)(cg.variables)
Пример #49
0
def decoder_network(latent_sample, latent_dim=J):
  # bernoulli case
  hidden2 = get_typical_layer(latent_sample, latent_dim, 500, Logistic())
  hidden2_to_output = Linear(name="last", input_dim=500, output_dim=784)
  hidden2_to_output.weights_init = IsotropicGaussian(0.01)
  hidden2_to_output.biases_init = Constant(0)
  hidden2_to_output.initialize()
  return Logistic().apply(hidden2_to_output.apply(hidden2))
Пример #50
0
class Encoder(Initializable):

    def __init__(self, image_feature_dim, embedding_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)

        self.image_embedding = Linear(
              input_dim=image_feature_dim
            , output_dim=embedding_dim
            # , weights_init=IsotropicGaussian(0.02)
            # , biases_init=Constant(0.)
            , name="image_embedding"
            )

        self.to_inputs = Linear(
              input_dim=embedding_dim
            , output_dim=embedding_dim*4 # gate_inputs = vstack(input, forget, cell, hidden)
            # , weights_init=IsotropicGaussian(0.02)
            # , biases_init=Constant(0.)
            , name="to_inputs"
            )

        # Don't think this dim has to also be dimension, more arbitrary
        self.transition = LSTM(
            dim=embedding_dim, name="transition")

        self.children = [ self.image_embedding
                        , self.to_inputs
                        , self.transition
                        ]

    @application(inputs=['image_vects', 'word_vects'], outputs=['image_embedding', 'sentence_embedding'])   
    def apply(self, image_vects, word_vects):

        image_embedding = self.image_embedding.apply(image_vects)

        # inputs = word_vects
        inputs = self.to_inputs.apply(word_vects)
        inputs = inputs.dimshuffle(1, 0, 2)
        hidden, cells = self.transition.apply(inputs=inputs, mask=None)

        # the last hidden state represents the accumulation of all the words (i.e. the sentence)
        # grab all batches, grab the last value representing accumulation of the sequence, grab all features
        sentence_embedding = hidden[-1]
        # sentence_embedding = inputs.mean(axis=0)
        return image_embedding, sentence_embedding
class Encoder(Initializable):

    def __init__(self, image_feature_dim, embedding_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)

        self.image_embedding = Linear(
              input_dim=image_feature_dim
            , output_dim=embedding_dim
            , name="image_embedding"
            )

        self.to_inputs = Linear(
              input_dim=embedding_dim
            , output_dim=embedding_dim*4 # times 4 cuz vstack(input, forget, cell, hidden)
            , name="to_inputs"
            )

        self.transition = LSTM(
            dim=embedding_dim, name="transition")

        self.children = [ self.image_embedding
                        , self.to_inputs
                        , self.transition
                        ]

    @application(
          inputs=['image_vects', 'word_vects']
        , outputs=['image_embedding', 'sentence_embedding']
        )   
    def apply(self, image_vects, word_vects):

        image_embedding = self.image_embedding.apply(image_vects)

        inputs = self.to_inputs.apply(word_vects)
        
        # shuffle dimensions to correspond to (sequence, batch, features)
        inputs = inputs.dimshuffle(1, 0, 2)
        
        hidden, cells = self.transition.apply(inputs=inputs, mask=None)

        # last hidden state represents the accumulation of word embeddings 
        # (i.e. the sentence embedding)
        sentence_embedding = hidden[-1]

        return image_embedding, sentence_embedding
Пример #52
0
def test_linear():
    x = tensor.matrix()

    linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2),
                    biases_init=Constant(1))
    y = linear.apply(x)
    linear.initialize()
    x_val = numpy.ones((4, 16), dtype=theano.config.floatX)
    assert_allclose(
        y.eval({x: x_val}),
        x_val.dot(2 * numpy.ones((16, 8))) + numpy.ones((4, 8)))

    linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2),
                    use_bias=False)
    y = linear.apply(x)
    linear.initialize()
    x_val = numpy.ones((4, 16), dtype=theano.config.floatX)
    assert_allclose(y.eval({x: x_val}), x_val.dot(2 * numpy.ones((16, 8))))
Пример #53
0
def create_rnn(hidden_dim, vocab_dim,mode="rnn"):
    # input
    x = tensor.imatrix('inchar')
    y = tensor.imatrix('outchar')

    # 
    W = LookupTable(
        name = "W1",
        #dim = hidden_dim*4,
        dim = hidden_dim,
        length = vocab_dim,
        weights_init = initialization.IsotropicGaussian(0.01),
        biases_init = initialization.Constant(0)
    )
    if mode == "lstm":
        # Long Short Term Memory
        H = LSTM(
            hidden_dim, 
            name = 'H',
            weights_init = initialization.IsotropicGaussian(0.01),
            biases_init = initialization.Constant(0.0)
        )
    else:
        # recurrent history weight
        H = SimpleRecurrent(
            name = "H",
            dim = hidden_dim,
            activation = Tanh(),
            weights_init = initialization.IsotropicGaussian(0.01)
        )
    # 
    S = Linear(
        name = "W2",
        input_dim = hidden_dim,
        output_dim = vocab_dim,
        weights_init = initialization.IsotropicGaussian(0.01),
        biases_init = initialization.Constant(0)
    )

    A = NDimensionalSoftmax(
        name = "softmax"
    )

    initLayers([W,H,S])
    activations = W.apply(x)
    hiddens = H.apply(activations)#[0]
    activations2 = S.apply(hiddens)
    y_hat = A.apply(activations2, extra_ndim=1)
    cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean()

    cg = ComputationGraph(cost)
    #print VariableFilter(roles=[WEIGHT])(cg.variables)
    #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables)

    layers = (x, W, H, S, A, y)

    return  cg, layers, y_hat, cost
Пример #54
0
def test_variable_filter_roles_error():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name="linear1")

    x = tensor.vector()
    h1 = brick1.apply(x)
    cg = ComputationGraph(h1)
    # testing role error
    VariableFilter(roles=PARAMETER)(cg.variables)
Пример #55
0
def MSEloss_layer(h, y, frame_length, hidden_size):
    hidden_to_output = Linear(name="hidden_to_output", input_dim=hidden_size, output_dim=frame_length)
    initialize([hidden_to_output])
    y_hat = hidden_to_output.apply(h)
    y_hat.name = "y_hat"
    cost = squared_error(y_hat, y).mean()
    cost.name = "cost"
    # import ipdb; ipdb.set_trace()
    return y_hat, cost
Пример #56
0
def test_variable_filter():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name='linear1')
    brick2 = Bias(2, name='bias1')
    activation = Sigmoid(name='sigm')

    x = tensor.vector()
    h1 = brick1.apply(x)
    h2 = activation.apply(h1)
    y = brick2.apply(h2)
    cg = ComputationGraph(y)

    parameters = [brick1.W, brick1.b, brick2.params[0]]
    bias = [brick1.b, brick2.params[0]]
    brick1_bias = [brick1.b]

    # Testing filtering by role
    role_filter = VariableFilter(roles=[PARAMETER])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[FILTER])
    assert [] == role_filter(cg.variables)

    # Testing filtering by role using each_role flag
    role_filter = VariableFilter(roles=[PARAMETER, BIAS])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[PARAMETER, BIAS], each_role=True)
    assert not parameters == role_filter(cg.variables)
    assert bias == role_filter(cg.variables)

    # Testing filtering by bricks classes
    brick_filter = VariableFilter(roles=[BIAS], bricks=[Linear])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by bricks instances
    brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by brick instance
    brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by name
    name_filter = VariableFilter(name='W_norm')
    assert [cg.variables[2]] == name_filter(cg.variables)

    # Testing filtering by name regex
    name_filter_regex = VariableFilter(name_regex='W_no.?m')
    assert [cg.variables[2]] == name_filter_regex(cg.variables)

    # Testing filtering by application
    appli_filter = VariableFilter(applications=[brick1.apply])
    variables = [cg.variables[1], cg.variables[8]]
    assert variables == appli_filter(cg.variables)

    # Testing filtering by application
    appli_filter_list = VariableFilter(applications=[brick1.apply])
    assert variables == appli_filter_list(cg.variables)
Пример #57
0
def add_lstm(input_dim, input_var):
    linear = Linear(input_dim=input_dim,output_dim=input_dim*4,name="linear_layer")
    lstm = LSTM(dim=input_dim, name="lstm_layer")

    testing_init(linear)
    #linear.initialize()
    default_init(lstm)

    h = linear.apply(input_var)
    return lstm.apply(h)
Пример #58
0
class Highway(Initializable, Feedforward):
    """ Implements highway networks outlined in [1]

    y = H(x,WH)T(x,WT) + x(1-T(x,WT))

    Highway networks have the same input dimension and output dimension

    Parameters
    ----------
    input_dim: int
        number of input/output dimensions for the network
    output_activation: Activation
        activation function applied to x and the hidden weights
    transform_activation: Activation
        activation function applied to x and the transform weights
    [1] http://arxiv.org/pdf/1505.00387v1.pdf
    """

    @lazy(allocation=['input_dim'])
    def __init__(self, input_dim, output_activation=None, transform_activation=None, **kwargs):
        super(Highway, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.output_dim = input_dim

        if output_activation == None:
            output_activation = Rectifier()

        if transform_activation == None:
            transform_activation = Logistic()

        self._linear_h = Linear(name="linear_h", input_dim=input_dim, output_dim=input_dim)
        self._linear_t = Linear(name="linear_t", input_dim=input_dim, output_dim=input_dim)
        self._output_activation = output_activation
        self._transform_activation = transform_activation
        self.children = [self._linear_h, self._linear_t, self._output_activation, self._transform_activation]

    @application(inputs=['input_'], outputs=['output'])
    def apply(self, input_):
        h = self._output_activation.apply(self._linear_h.apply(input_))
        t = self._transform_activation.apply(self._linear_t.apply(input_))

        return h*t+input_*(1-t)