Exemplo n.º 1
0
class TestBidirectional(unittest.TestCase):
    def setUp(self):
        self.bidir = Bidirectional(weights_init=Orthogonal(),
                                   prototype=SimpleRecurrent(
                                       dim=3, activation=Tanh()))
        self.simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(),
                                      activation=Tanh(), seed=1)
        self.bidir.allocate()
        self.simple.initialize()
        self.bidir.children[0].params[0].set_value(
            self.simple.params[0].get_value())
        self.bidir.children[1].params[0].set_value(
            self.simple.params[0].get_value())
        self.x_val = 0.1 * numpy.asarray(
            list(itertools.permutations(range(4))),
            dtype=floatX)
        self.x_val = (numpy.ones((24, 4, 3), dtype=floatX) *
                      self.x_val[..., None])
        self.mask_val = numpy.ones((24, 4), dtype=floatX)
        self.mask_val[12:24, 3] = 0

    def test(self):
        x = tensor.tensor3('x')
        mask = tensor.matrix('mask')
        calc_bidir = theano.function([x, mask],
                                     [self.bidir.apply(x, mask=mask)])
        calc_simple = theano.function([x, mask],
                                      [self.simple.apply(x, mask=mask)])
        h_bidir = calc_bidir(self.x_val, self.mask_val)[0]
        h_simple = calc_simple(self.x_val, self.mask_val)[0]
        h_simple_rev = calc_simple(self.x_val[::-1], self.mask_val[::-1])[0]

        assert_allclose(h_simple, h_bidir[..., :3], rtol=1e-04)
        assert_allclose(h_simple_rev, h_bidir[::-1, ...,  3:], rtol=1e-04)
Exemplo n.º 2
0
    def __init__(
            self,
            encoder_type,
            num_characters,
            input_dim,
            encoder_dim,
            **kwargs):
        assert encoder_type in [None, 'bidirectional']
        self.encoder_type = encoder_type
        super(Encoder, self).__init__(**kwargs)

        self.children = []

        if encoder_type in ['lookup', 'bidirectional']:
            self.embed_label = LookupTable(
                num_characters,
                input_dim,
                name='embed_label')
            self.children += [
                self.embed_label]
        else:
            # If there is no encoder.
            assert num_characters == input_dim

        if encoder_type == 'bidirectional':
            transition = RecurrentWithFork(
                GatedRecurrent(dim=encoder_dim).apply,
                input_dim, name='encoder_transition')
            self.encoder = Bidirectional(transition, name='encoder')
            self.children.append(self.encoder)
Exemplo n.º 3
0
def bilstm_layer(in_dim, inp, h_dim, n):
    linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n)+inp.name)
    lstm = LSTM(dim=h_dim, name='lstm' + str(n)+inp.name)
    bilstm = Bidirectional(prototype=lstm)
    bilstm.name = 'bilstm' + str(n) + inp.name
    initialize([linear, bilstm])
    return bilstm.apply(linear.apply(inp))[0]
Exemplo n.º 4
0
    def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_depth,
                 **kwargs):
        super(Decimator, self).__init__(**kwargs)

        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.dgru_state_dim = dgru_state_dim
        self.embedding_dim = embedding_dim
        self.lookup = LookupTable(name='embeddings')
        self.dgru_depth = dgru_depth
        # representation
        self.dgru = RecurrentStack([
            DGRU(activation=Tanh(), dim=self.dgru_state_dim)
            for _ in range(dgru_depth)
        ],
                                   skip_connections=True)
        # importance of this representation
        self.bidir_w = Bidirectional(RecurrentWithFork(
            DGRU(activation=Tanh(), dim=self.dgru_state_dim // 2),
            self.embedding_dim,
            name='src_word_with_fork'),
                                     name='bidir_src_word_encoder')

        self.gru_fork = Fork(
            [name for name in self.dgru.apply.sequences if name != 'mask'],
            prototype=Linear(),
            name='gru_fork')
        # map to a energy scalar
        self.wl = Linear(input_dim=dgru_state_dim, output_dim=1)

        self.children = [
            self.lookup, self.dgru, self.gru_fork, self.bidir_w, self.wl
        ]
Exemplo n.º 5
0
class TestBidirectional(unittest.TestCase):
    def setUp(self):
        self.bidir = Bidirectional(weights_init=Orthogonal(),
                                   prototype=Recurrent(
                                       dim=3, activation=Tanh()))
        self.simple = Recurrent(dim=3, weights_init=Orthogonal(),
                                activation=Tanh())
        self.bidir.initialize()
        self.simple.initialize()
        self.x_val = 0.1 * numpy.asarray(
            list(itertools.permutations(range(4))),
            dtype=floatX)
        self.x_val = (numpy.ones((24, 4, 3), dtype=floatX)
                      * self.x_val[..., None])
        self.mask_val = numpy.ones((24, 4), dtype=floatX)
        self.mask_val[12:24, 3] = 0

    def test(self):
        x = tensor.tensor3('x')
        mask = tensor.matrix('mask')
        calc_bidir = theano.function([x, mask],
                                     [self.bidir.apply(x, mask=mask)])
        calc_simple = theano.function([x, mask],
                                      [self.simple.apply(x, mask=mask)])
        h_bidir = calc_bidir(self.x_val, self.mask_val)[0]
        h_simple = calc_simple(self.x_val, self.mask_val)[0]
        h_simple_rev = calc_simple(self.x_val[::-1], self.mask_val[::-1])[0]

        assert_allclose(h_simple, h_bidir[..., :3], rtol=1e-04)
        assert_allclose(h_simple_rev, h_bidir[::-1, ...,  3:], rtol=1e-04)
Exemplo n.º 6
0
def bilstm_layer(in_dim, inp, h_dim, n, pref=""):
    linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n) + pref)
    lstm = LSTM(dim=h_dim, name='lstm' + str(n) + pref)
    bilstm = Bidirectional(prototype=lstm)
    bilstm.name = 'bilstm' + str(n) + pref
    initialize([linear, bilstm])
    return bilstm.apply(linear.apply(inp))[0]
Exemplo n.º 7
0
def example5():
    """Bidir + simplereccurent. Adaptation from a unittest in blocks """
    
    bidir = Bidirectional(weights_init=Orthogonal(),
                               prototype=SimpleRecurrent(
                                   dim=3, activation=Tanh()))
    
    simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(),
                                  activation=Tanh(), seed=1)
    
    bidir.allocate()
    simple.initialize()
    
    bidir.children[0].parameters[0].set_value(
        
        simple.parameters[0].get_value())
    
    bidir.children[1].parameters[0].set_value(        
        simple.parameters[0].get_value())
    
    #Initialize theano variables and functions
    x = tensor.tensor3('x')
    mask = tensor.matrix('mask')
 
    calc_bidir = theano.function([x, mask],
                                 [bidir.apply(x, mask=mask)])
    calc_simple = theano.function([x, mask],
                                  [simple.apply(x, mask=mask)])
 

    #Testing time
 
    x_val = 0.1 * np.asarray(
        list(itertools.permutations(range(4))),
        dtype=theano.config.floatX)
        
    x_val = (np.ones((24, 4, 3), dtype=theano.config.floatX) *
                  x_val[..., None])
                  
    mask_val = np.ones((24, 4), dtype=theano.config.floatX)
    mask_val[12:24, 3] = 0

    h_bidir = calc_bidir(x_val, mask_val)[0]
    h_simple = calc_simple(x_val, mask_val)[0]
    h_simple_rev = calc_simple(x_val[::-1], mask_val[::-1])[0]
    

    print(h_bidir)
    print(h_simple)
    print(h_simple_rev)
Exemplo n.º 8
0
 def setUp(self):
     self.bidir = Bidirectional(weights_init=Orthogonal(),
                                prototype=Recurrent(
                                    dim=3, activation=Tanh()))
     self.simple = Recurrent(dim=3, weights_init=Orthogonal(),
                             activation=Tanh())
     self.bidir.initialize()
     self.simple.initialize()
     self.x_val = 0.1 * numpy.asarray(
         list(itertools.permutations(range(4))),
         dtype=floatX)
     self.x_val = (numpy.ones((24, 4, 3), dtype=floatX)
                   * self.x_val[..., None])
     self.mask_val = numpy.ones((24, 4), dtype=floatX)
     self.mask_val[12:24, 3] = 0
Exemplo n.º 9
0
    def __init__(self, dimension, alphabet_size, **kwargs):
        super(WordReverser, self).__init__(**kwargs)
        encoder = Bidirectional(
            SimpleRecurrent(dim=dimension, activation=Tanh()))
        fork = Fork([name for name in encoder.prototype.apply.sequences
                    if name != 'mask'])
        fork.input_dim = dimension
        fork.output_dims = [dimension for name in fork.input_names]
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(
            activation=Tanh(),
            dim=dimension, name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=2 * dimension, match_dim=dimension, name="attention")
        readout = Readout(
            readout_dim=alphabet_size,
            source_names=[transition.apply.states[0],
                          attention.take_glimpses.outputs[0]],
            emitter=SoftmaxEmitter(name="emitter"),
            feedback_brick=LookupFeedback(alphabet_size, dimension),
            name="readout")
        generator = SequenceGenerator(
            readout=readout, transition=transition, attention=attention,
            name="generator")

        self.lookup = lookup
        self.fork = fork
        self.encoder = encoder
        self.generator = generator
        self.children = [lookup, fork, encoder, generator]
Exemplo n.º 10
0
	def __init__(self, dimen, vocab_size): #{
		# No idea what this is doing, but otherwise "allocated" is not set
		super(MorphGen, self).__init__(self)

		# The encoder 
		encoder = Bidirectional(SimpleRecurrent(dim=dimen, activation=Tanh()))

		# What is this doing ? 
		fork = Fork([name for name in encoder.prototype.apply.sequences if name != 'mask'])
		fork.input_dim = dimen
		fork.output_dims = [encoder.prototype.get_dim(name) for name in fork.input_names]

		lookup = LookupTable(vocab_size, dimen)

		transition = SimpleRecurrent(dim=dimen, activation=Tanh(), name="transition")

		atten = SequenceContentAttention(state_names=transition.apply.states,attended_dim=2*dimen, match_dim=dimen, name="attention")

		readout = Readout(
			readout_dim=vocab_size,
			source_names=[transition.apply.states[0],
			atten.take_glimpses.outputs[0]],
			emitter=SoftmaxEmitter(name="emitter"),
			feedback_brick=LookupFeedback(vocab_size, dimen),
			name="readout");

		generator = SequenceGenerator(readout=readout, transition=transition, attention=atten,name="generator")
	
		self.lookup = lookup
		self.fork = fork
		self.encoder = encoder
		self.generator = generator
		self.children = [lookup, fork, encoder, generator]
Exemplo n.º 11
0
 def setUp(self):
     self.bidir = Bidirectional(weights_init=Orthogonal(),
                                prototype=SimpleRecurrent(
                                    dim=3, activation=Tanh()))
     self.simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(),
                                   activation=Tanh(), seed=1)
     self.bidir.allocate()
     self.simple.initialize()
     self.bidir.children[0].parameters[0].set_value(
         self.simple.parameters[0].get_value())
     self.bidir.children[1].parameters[0].set_value(
         self.simple.parameters[0].get_value())
     self.x_val = 0.1 * numpy.asarray(
         list(itertools.permutations(range(4))),
         dtype=theano.config.floatX)
     self.x_val = (numpy.ones((24, 4, 3), dtype=theano.config.floatX) *
                   self.x_val[..., None])
     self.mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
     self.mask_val[12:24, 3] = 0
Exemplo n.º 12
0
def example5():
    """Bidir + simplereccurent. Adaptation from a unittest in blocks """

    bidir = Bidirectional(weights_init=Orthogonal(),
                          prototype=SimpleRecurrent(dim=3, activation=Tanh()))

    simple = SimpleRecurrent(dim=3,
                             weights_init=Orthogonal(),
                             activation=Tanh(),
                             seed=1)

    bidir.allocate()
    simple.initialize()

    bidir.children[0].parameters[0].set_value(simple.parameters[0].get_value())

    bidir.children[1].parameters[0].set_value(simple.parameters[0].get_value())

    #Initialize theano variables and functions
    x = tensor.tensor3('x')
    mask = tensor.matrix('mask')

    calc_bidir = theano.function([x, mask], [bidir.apply(x, mask=mask)])
    calc_simple = theano.function([x, mask], [simple.apply(x, mask=mask)])

    #Testing time

    x_val = 0.1 * np.asarray(list(itertools.permutations(range(4))),
                             dtype=theano.config.floatX)

    x_val = (np.ones(
        (24, 4, 3), dtype=theano.config.floatX) * x_val[..., None])

    mask_val = np.ones((24, 4), dtype=theano.config.floatX)
    mask_val[12:24, 3] = 0

    h_bidir = calc_bidir(x_val, mask_val)[0]
    h_simple = calc_simple(x_val, mask_val)[0]
    h_simple_rev = calc_simple(x_val[::-1], mask_val[::-1])[0]

    print(h_bidir)
    print(h_simple)
    print(h_simple_rev)
Exemplo n.º 13
0
    def __init__(self, enc_transition, dims, dim_input, subsample, **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.subsample = subsample

        for layer_num, (dim_under, dim) in enumerate(
                zip([dim_input] + list(2 * numpy.array(dims)), dims)):
            bidir = Bidirectional(RecurrentWithFork(enc_transition(
                dim=dim, activation=Tanh()).apply,
                                                    dim_under,
                                                    name='with_fork'),
                                  name='bidir{}'.format(layer_num))
            self.children.append(bidir)
Exemplo n.º 14
0
    def __init__(self, dimension, input_size, embed_input=False, **kwargs):
        super(LSTMEncoder, self).__init__(**kwargs)
        if embed_input:
            self.embedder = LookupTable(input_size, dimension)
        else:
            self.embedder = Linear(input_size, dimension)
        self.fork = Fork(['inputs'],
                         dimension,
                         output_dims=[dimension],
                         prototype=Linear(dimension, 4 * dimension))
        encoder = Bidirectional(LSTM(dim=dimension, activation=Tanh()))

        self.encoder = encoder
        self.children = [encoder, self.embedder, self.fork]
Exemplo n.º 15
0
    def __init__(self, dimension, input_size, embed_input=False, **kwargs):
        super(SimpleEncoder, self).__init__(**kwargs)
        if embed_input:
            self.embedder = LookupTable(input_size, dimension)
        else:
            self.embedder = Linear(input_size, dimension)
        self.transform = MLP([Tanh()], [dimension, dimension])
        self.fork = Fork(['inputs'],
                         dimension,
                         output_dims=[dimension],
                         prototype=Linear(dimension, dimension))
        encoder = Bidirectional(
            SimpleRecurrent(dim=dimension, activation=Tanh()))

        self.encoder = encoder
        self.children = [encoder, self.embedder, self.transform, self.fork]
Exemplo n.º 16
0
    def __init__(self, enc_transition, dims, dim_input, subsample, bidir,
                 **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.subsample = subsample

        dims_under = [dim_input] + list(
            (2 if bidir else 1) * numpy.array(dims))
        for layer_num, (dim_under, dim) in enumerate(zip(dims_under, dims)):
            layer = RecurrentWithFork(enc_transition(dim=dim,
                                                     activation=Tanh()).apply,
                                      dim_under,
                                      name='with_fork{}'.format(layer_num))
            if bidir:
                layer = Bidirectional(layer, name='bidir{}'.format(layer_num))
            self.children.append(layer)
        self.dim_encoded = (2 if bidir else 1) * dims[-1]
Exemplo n.º 17
0
class Encoder(Initializable):
    def __init__(
            self,
            encoder_type,
            num_characters,
            input_dim,
            encoder_dim,
            **kwargs):
        assert encoder_type in [None, 'bidirectional']
        self.encoder_type = encoder_type
        super(Encoder, self).__init__(**kwargs)

        self.children = []

        if encoder_type in ['lookup', 'bidirectional']:
            self.embed_label = LookupTable(
                num_characters,
                input_dim,
                name='embed_label')
            self.children += [
                self.embed_label]
        else:
            # If there is no encoder.
            assert num_characters == input_dim

        if encoder_type == 'bidirectional':
            transition = RecurrentWithFork(
                GatedRecurrent(dim=encoder_dim).apply,
                input_dim, name='encoder_transition')
            self.encoder = Bidirectional(transition, name='encoder')
            self.children.append(self.encoder)

    @application
    def apply(self, x, x_mask=None):
        if self.encoder_type is None:
            return x

        if self.encoder_type in ['lookup', 'bidirectional']:
            embed_x = self.embed_label.apply(x)

        if self.encoder_type == 'lookup':
            encoded_x = embed_x

        if self.encoder_type == 'bidirectional':
            encoded_x = self.encoder.apply(embed_x, x_mask)

        return encoded_x
Exemplo n.º 18
0
    def __init__(self, transition, dim_input, dims_hidden, rec_weights_init,
                 ff_weights_init, biases_init, **kwargs):
        super(DeepBidirectional, self).__init__(**kwargs)

        for layer_num, (input_dim, hidden_dim) in enumerate(
                zip([dim_input] + list(2 * np.array(dims_hidden)),
                    dims_hidden)):
            bidir = Bidirectional(RecurrentWithFork(
                transition=transition(dim=hidden_dim, activation=Tanh()),
                input_dim=input_dim,
                hidden_dim=hidden_dim,
                rec_weights_init=rec_weights_init,
                ff_weights_init=ff_weights_init,
                biases_init=biases_init,
                name='with_fork'),
                                  name='bidir{}'.format(layer_num))
            self.children.append(bidir)
Exemplo n.º 19
0
    def setUp(self):
        prototype = SimpleRecurrent(dim=3, activation=Tanh())
        self.layers = [
            Bidirectional(weights_init=Orthogonal(), prototype=prototype)
            for _ in range(3)
        ]
        self.stack = RecurrentStack(self.layers)
        for fork in self.stack.forks:
            fork.weights_init = Identity(1)
            fork.biases_init = Constant(0)
        self.stack.initialize()

        self.x_val = 0.1 * numpy.asarray(
            list(itertools.permutations(range(4))), dtype=theano.config.floatX)
        self.x_val = (numpy.ones(
            (24, 4, 3), dtype=theano.config.floatX) * self.x_val[..., None])
        self.mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        self.mask_val[12:24, 3] = 0
Exemplo n.º 20
0
    def __init__(self, src_vocab_size, embedding_dim, dgru_state_dim,
                 state_dim, src_dgru_depth, bidir_encoder_depth, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.state_dim = state_dim
        self.dgru_state_dim = dgru_state_dim
        self.decimator = Decimator(src_vocab_size, embedding_dim,
                                   dgru_state_dim, src_dgru_depth)
        self.bidir = Bidirectional(RecurrentWithFork(GatedRecurrent(
            activation=Tanh(), dim=state_dim),
                                                     dgru_state_dim,
                                                     name='with_fork'),
                                   name='bidir0')

        self.children = [self.decimator, self.bidir]
        for layer_n in range(1, bidir_encoder_depth):
            self.children.append(copy.deepcopy(self.bidir))
            for child in self.children[-1].children:
                child.input_dim = 2 * state_dim
            self.children[-1].name = 'bidir{}'.format(layer_n)
Exemplo n.º 21
0
    def __init__(self,
                 dimension,
                 input_size,
                 rnn_type=None,
                 embed_input=False,
                 **kwargs):
        super(Encoder, self).__init__(**kwargs)
        if rnn_type is None:
            rnn_type = SimpleRecurrent
        if embed_input:
            self.embedder = LookupTable(input_size, dimension)
        else:
            self.embedder = Linear(input_size, dimension)
        encoder = Bidirectional(rnn_type(dim=dimension, activation=Tanh()))
        fork = Fork([
            name for name in encoder.prototype.apply.sequences
            if name != 'mask'
        ])
        fork.input_dim = dimension
        fork.output_dims = [dimension for _ in fork.input_names]

        self.fork = fork
        self.encoder = encoder
        self.children = [fork, encoder, self.embedder]
Exemplo n.º 22
0
def main(num_epochs=100):
    x = tensor.matrix('features')
    m = tensor.matrix('features_mask')
    y = tensor.imatrix('targets')

    x_int = x.astype(dtype='int32').T - 2
    train_dataset = IMDB()
    idx_sort = numpy.argsort(
        [len(s) for s in
         train_dataset.indexables[
             train_dataset.sources.index('features')]]
    )
    n_voc = len(train_dataset.dict.keys())
    for idx in xrange(len(train_dataset.sources)):
        train_dataset.indexables[idx] = train_dataset.indexables[idx][idx_sort]

    n_h = 10
    linear_embedding = LookupTable(
        length=n_voc,
        dim=4 * n_h,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    linear_embedding.initialize()
    lstm_biases = numpy.zeros(4 * n_h).astype(dtype=theano.config.floatX)
    lstm_biases[n_h:(2 * n_h)] = 4.
    rnn = Bidirectional(LSTM(
        dim=n_h,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    ))
    rnn.initialize()
    score_layer = Linear(
        input_dim=2*n_h,
        output_dim=1,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    score_layer.initialize()

    embedding = linear_embedding.apply(x_int) * tensor.shape_padright(m.T)
    rnn_out = rnn.apply(embedding)
    rnn_out_mean_pooled = tensor.mean(rnn_out[0], axis=0)

    probs = Sigmoid().apply(
        score_layer.apply(rnn_out_mean_pooled))

    cost = - (y * tensor.log(probs)
              + (1 - y) * tensor.log(1 - probs)
              ).mean()
    cost.name = 'cost'

    misclassification = (y * (probs < 0.5)
                         + (1 - y) * (probs > 0.5)
                         ).mean()
    misclassification.name = 'misclassification'

    cg = ComputationGraph([cost])
    params = cg.parameters

    algorithm = GradientDescent(
        cost=cost,
        params=params,
        step_rule=CompositeRule(
            components=[StepClipping(threshold=10.),
                        Adam()
                        ]
        )
    )

    n_train = int(numpy.floor(.8 * train_dataset.num_examples))
    n_valid = int(numpy.floor(.1 * train_dataset.num_examples))
    train_data_stream = Padding(
        data_stream=DataStream(
            dataset=train_dataset,
            iteration_scheme=BatchwiseShuffledScheme(
                examples=range(100),
                batch_size=10,
            )
        ),
        mask_sources=('features',)
    )
    valid_data_stream = Padding(
        data_stream=DataStream(
            dataset=train_dataset,
            iteration_scheme=BatchwiseShuffledScheme(
                examples=range(100, 110),
                batch_size=10,
            )
        ),
        mask_sources=('features',)
    )
    test_data_stream = Padding(
        data_stream=DataStream(
            dataset=train_dataset,
            iteration_scheme=BatchwiseShuffledScheme(
                examples=range(110, 120),
                batch_size=10,
            )
        ),
        mask_sources=('features',)
    )

    model = Model(cost)

    extensions = []
    extensions.append(Timing())
    extensions.append(FinishAfter(after_n_epochs=num_epochs))
    extensions.append(DataStreamMonitoring(
        [cost, misclassification],
        test_data_stream,
        prefix='test'))
    extensions.append(DataStreamMonitoring(
        [cost, misclassification],
        valid_data_stream,
        prefix='valid'))
    extensions.append(TrainingDataMonitoring(
        [cost, misclassification],
        prefix='train',
        after_epoch=True))

    plotters = []
    plotters.append(Plotter(
        channels=[['train_cost', 'train_misclassification',
                   'valid_cost', 'valid_misclassification']],
        titles=['Costs']))

    extensions.append(PlotManager('IMDB classification example',
                                  plotters=plotters,
                                  after_epoch=True,
                                  after_training=True))
    extensions.append(Printing())

    main_loop = MainLoop(model=model,
                         data_stream=train_data_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    main_loop.run()
Exemplo n.º 23
0
class Decimator(Initializable):
    """Source word encoder, mapping a charater-level word to a vector.
        This encoder is able to learn the morphology.
        For compatibility with previous version, we call it Decimator.
    """
    def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_depth,
                 **kwargs):
        super(Decimator, self).__init__(**kwargs)

        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.dgru_state_dim = dgru_state_dim
        self.embedding_dim = embedding_dim
        self.lookup = LookupTable(name='embeddings')
        self.dgru_depth = dgru_depth
        # representation
        self.dgru = RecurrentStack([
            DGRU(activation=Tanh(), dim=self.dgru_state_dim)
            for _ in range(dgru_depth)
        ],
                                   skip_connections=True)
        # importance of this representation
        self.bidir_w = Bidirectional(RecurrentWithFork(
            DGRU(activation=Tanh(), dim=self.dgru_state_dim // 2),
            self.embedding_dim,
            name='src_word_with_fork'),
                                     name='bidir_src_word_encoder')

        self.gru_fork = Fork(
            [name for name in self.dgru.apply.sequences if name != 'mask'],
            prototype=Linear(),
            name='gru_fork')
        # map to a energy scalar
        self.wl = Linear(input_dim=dgru_state_dim, output_dim=1)

        self.children = [
            self.lookup, self.dgru, self.gru_fork, self.bidir_w, self.wl
        ]

    def _push_allocation_config(self):
        self.lookup.length = self.vocab_size
        self.lookup.dim = self.embedding_dim

        self.gru_fork.input_dim = self.embedding_dim
        self.gru_fork.output_dims = [
            self.dgru.get_dim(name) for name in self.gru_fork.output_names
        ]

    @application(inputs=['char_seq', 'sample_matrix', 'char_aux'],
                 outputs=['representation', 'weight'])
    def apply(self, char_seq, sample_matrix, char_aux):
        # Time as first dimension
        embeddings = self.lookup.apply(char_seq)
        gru_out = self.dgru.apply(**merge(
            self.gru_fork.apply(embeddings, as_dict=True), {'mask': char_aux}))
        wgru_out = tensor.exp(
            self.wl.apply(self.bidir_w.apply(embeddings, char_aux)))

        if self.dgru_depth > 1:
            gru_out = gru_out[-1]

        gru_out = tensor.addbroadcast(wgru_out, 2) * gru_out
        sampled_representation = tensor.tanh(
            tensor.batched_dot(sample_matrix, gru_out.dimshuffle([1, 0, 2])))
        return sampled_representation.dimshuffle([1, 0, 2]), wgru_out

    def get_dim(self, name):
        if name == 'output':
            return self.dgru_state_dim
        super(Decimator, self).get_dim(name)
Exemplo n.º 24
0
so let's think about sizes of the arrays...
"""

x = tensor.matrix('tokens', dtype="int32")

x_mask = tensor.matrix('tokens_mask', dtype=floatX)
#rnn.apply(inputs=input_to_hidden.apply(x), mask=x_mask)

lookup = LookupTable(vocab_size, embedding_dim)

x_extra = tensor.tensor3('extras', dtype=floatX)

rnn = Bidirectional(
    SimpleRecurrent(
        dim=hidden_dim,
        activation=Tanh(),
        weights_init=IsotropicGaussian(0.01),
        biases_init=Constant(0),
    ), )

### Will need to reshape the rnn outputs to produce suitable input here...
gather = Linear(name='hidden_to_output',
                input_dim=hidden_dim * 2,
                output_dim=labels_size,
                weights_init=IsotropicGaussian(0.01),
                biases_init=Constant(0))

p_labels = Softmax()

## Let's initialize the variables
lookup.allocate()
Comments in google-groups:blocks indicate that a reshaping has to be done, 
so let's think about sizes of the arrays...
"""

x = tensor.matrix('tokens', dtype="int32")

x_mask = tensor.matrix('tokens_mask', dtype=floatX)
#rnn.apply(inputs=input_to_hidden.apply(x), mask=x_mask)

lookup = LookupTable(vocab_size, embedding_dim)

x_extra = tensor.tensor3('extras', dtype=floatX)

rnn = Bidirectional(
  SimpleRecurrent(dim=hidden_dim, activation=Tanh(),
    weights_init=IsotropicGaussian(0.01),
    biases_init=Constant(0),
  ),
)

### Will need to reshape the rnn outputs to produce suitable input here...
gather = Linear(name='hidden_to_output', 
  input_dim=hidden_dim*2, output_dim=labels_size,
  weights_init=IsotropicGaussian(0.01),
  biases_init=Constant(0)
)

p_labels = Softmax()



## Let's initialize the variables
Exemplo n.º 26
0
def main(model_path, recurrent_type):
    dataset_options = dict(dictionary=char2code, level="character",
                           preprocess=_lower)
    dataset = OneBillionWord("training", [99], **dataset_options)
    data_stream = dataset.get_example_stream()
    data_stream = Filter(data_stream, _filter_long)
    data_stream = Mapping(data_stream, _make_target,
                          add_sources=('target',))
    data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(100))
    data_stream = Padding(data_stream)
    data_stream = Mapping(data_stream, _transpose)

    features = tensor.lmatrix('features')
    features_mask = tensor.matrix('features_mask')
    target = tensor.lmatrix('target')
    target_mask = tensor.matrix('target_mask')

    dim = 100
    lookup = LookupTable(len(all_chars), dim,
                         weights_init=IsotropicGaussian(0.01),
                         biases_init=Constant(0.))

    if recurrent_type == 'lstm':
        rnn = LSTM(dim / 4, Tanh(),
                   weights_init=IsotropicGaussian(0.01),
                   biases_init=Constant(0.))
    elif recurrent_type == 'simple':
        rnn = SimpleRecurrent(dim, Tanh())
        rnn = Bidirectional(rnn,
                            weights_init=IsotropicGaussian(0.01),
                            biases_init=Constant(0.))
    else:
        raise ValueError('Not known RNN type')
    rnn.initialize()
    lookup.initialize()
    y_hat = rnn.apply(lookup.apply(features), mask=features_mask)

    print len(all_chars)
    linear = Linear(2 * dim, len(all_chars),
                    weights_init=IsotropicGaussian(0.01),
                    biases_init=Constant(0.))
    linear.initialize()
    y_hat = linear.apply(y_hat)
    seq_lenght = y_hat.shape[0]
    batch_size = y_hat.shape[1]
    y_hat = Softmax().apply(y_hat.reshape((seq_lenght * batch_size, -1))).reshape(y_hat.shape)
    cost = CategoricalCrossEntropy().apply(
        target.flatten(),
        y_hat.reshape((-1, len(all_chars)))) * seq_lenght * batch_size
    cost.name = 'cost'
    cost_per_character = cost / features_mask.sum()
    cost_per_character.name = 'cost_per_character'

    cg = ComputationGraph([cost, cost_per_character])
    model = Model(cost)
    algorithm = GradientDescent(step_rule=Adam(), cost=cost,
                                params=cg.parameters)

    train_monitor = TrainingDataMonitoring(
        [cost, cost_per_character], prefix='train',
        after_batch=True)
    extensions = [train_monitor, Printing(every_n_batches=40),
                  Dump(model_path, every_n_batches=200),
                  #Checkpoint('rnn.pkl', every_n_batches=200)
                  ]
    main_loop = MainLoop(model=model, algorithm=algorithm,
                         data_stream=data_stream, extensions=extensions)
    main_loop.run()
Exemplo n.º 27
0
def main(mode, save_path, num_batches, from_dump):
    if mode == "train":
        # Experiment configuration
        dimension = 100
        readout_dimension = len(char2code)

        # Data processing pipeline
        data_stream = DataStreamMapping(
            mapping=lambda data: tuple(array.T for array in data),
            data_stream=PaddingDataStream(
                BatchDataStream(
                    iteration_scheme=ConstantScheme(10),
                    data_stream=DataStreamMapping(
                        mapping=reverse_words,
                        add_sources=("targets", ),
                        data_stream=DataStreamFilter(
                            predicate=lambda data: len(data[0]) <= 100,
                            data_stream=OneBillionWord(
                                "training", [99],
                                char2code,
                                level="character",
                                preprocess=str.lower).get_default_stream())))))

        # Build the model
        chars = tensor.lmatrix("features")
        chars_mask = tensor.matrix("features_mask")
        targets = tensor.lmatrix("targets")
        targets_mask = tensor.matrix("targets_mask")

        encoder = Bidirectional(GatedRecurrent(dim=dimension,
                                               activation=Tanh()),
                                weights_init=Orthogonal())
        encoder.initialize()
        fork = Fork([
            name
            for name in encoder.prototype.apply.sequences if name != 'mask'
        ],
                    weights_init=IsotropicGaussian(0.1),
                    biases_init=Constant(0))
        fork.input_dim = dimension
        fork.fork_dims = {name: dimension for name in fork.fork_names}
        fork.initialize()
        lookup = LookupTable(readout_dimension,
                             dimension,
                             weights_init=IsotropicGaussian(0.1))
        lookup.initialize()
        transition = Transition(activation=Tanh(),
                                dim=dimension,
                                attended_dim=2 * dimension,
                                name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            match_dim=dimension,
            name="attention")
        readout = LinearReadout(readout_dim=readout_dimension,
                                source_names=["states"],
                                emitter=SoftmaxEmitter(name="emitter"),
                                feedbacker=LookupFeedback(
                                    readout_dimension, dimension),
                                name="readout")
        generator = SequenceGenerator(readout=readout,
                                      transition=transition,
                                      attention=attention,
                                      weights_init=IsotropicGaussian(0.1),
                                      biases_init=Constant(0),
                                      name="generator")
        generator.push_initialization_config()
        transition.weights_init = Orthogonal()
        generator.initialize()
        bricks = [encoder, fork, lookup, generator]

        # Give an idea of what's going on
        params = Selector(bricks).get_params()
        logger.info("Parameters:\n" +
                    pprint.pformat([(key, value.get_value().shape)
                                    for key, value in params.items()],
                                   width=120))

        # Build the cost computation graph
        batch_cost = generator.cost(
            targets,
            targets_mask,
            attended=encoder.apply(**dict_union(fork.apply(
                lookup.lookup(chars), return_dict=True),
                                                mask=chars_mask)),
            attended_mask=chars_mask).sum()
        batch_size = named_copy(chars.shape[1], "batch_size")
        cost = aggregation.mean(batch_cost, batch_size)
        cost.name = "sequence_log_likelihood"
        logger.info("Cost graph is built")

        # Fetch variables useful for debugging
        max_length = named_copy(chars.shape[0], "max_length")
        cost_per_character = named_copy(
            aggregation.mean(batch_cost, batch_size * max_length),
            "character_log_likelihood")
        cg = ComputationGraph(cost)
        energies = unpack(VariableFilter(application=readout.readout,
                                         name="output")(cg.variables),
                          singleton=True)
        min_energy = named_copy(energies.min(), "min_energy")
        max_energy = named_copy(energies.max(), "max_energy")
        (activations, ) = VariableFilter(
            application=generator.transition.apply,
            name="states")(cg.variables)
        mean_activation = named_copy(activations.mean(), "mean_activation")

        # Define the training algorithm.
        algorithm = GradientDescent(cost=cost,
                                    step_rule=CompositeRule([
                                        GradientClipping(10.0),
                                        SteepestDescent(0.01)
                                    ]))

        observables = [
            cost, min_energy, max_energy, mean_activation, batch_size,
            max_length, cost_per_character, algorithm.total_step_norm,
            algorithm.total_gradient_norm
        ]
        for name, param in params.items():
            observables.append(named_copy(param.norm(2), name + "_norm"))
            observables.append(
                named_copy(algorithm.gradients[param].norm(2),
                           name + "_grad_norm"))

        main_loop = MainLoop(
            model=bricks,
            data_stream=data_stream,
            algorithm=algorithm,
            extensions=([LoadFromDump(from_dump)] if from_dump else []) + [
                Timing(),
                TrainingDataMonitoring(observables, after_every_batch=True),
                TrainingDataMonitoring(
                    observables, prefix="average", every_n_batches=10),
                FinishAfter(after_n_batches=num_batches).add_condition(
                    "after_batch", lambda log: math.isnan(
                        log.current_row.total_gradient_norm)),
                Plot(os.path.basename(save_path),
                     [["average_" + cost.name],
                      ["average_" + cost_per_character.name]],
                     every_n_batches=10),
                SerializeMainLoop(save_path,
                                  every_n_batches=500,
                                  save_separately=["model", "log"]),
                Printing(every_n_batches=1)
            ])
        main_loop.run()
    elif mode == "test":
        with open(save_path, "rb") as source:
            encoder, fork, lookup, generator = dill.load(source)
        logger.info("Model is loaded")
        chars = tensor.lmatrix("features")
        generated = generator.generate(
            n_steps=3 * chars.shape[0],
            batch_size=chars.shape[1],
            attended=encoder.apply(**dict_union(
                fork.apply(lookup.lookup(chars), return_dict=True))),
            attended_mask=tensor.ones(chars.shape))
        sample_function = ComputationGraph(generated).get_theano_function()
        logging.info("Sampling function is compiled")

        while True:
            # Python 2-3 compatibility
            line = input("Enter a sentence\n")
            batch_size = int(input("Enter a number of samples\n"))
            encoded_input = [
                char2code.get(char, char2code["<UNK>"])
                for char in line.lower().strip()
            ]
            encoded_input = ([char2code['<S>']] + encoded_input +
                             [char2code['</S>']])
            print("Encoder input:", encoded_input)
            target = reverse_words((encoded_input, ))[0]
            print("Target: ", target)
            states, samples, glimpses, weights, costs = sample_function(
                numpy.repeat(numpy.array(encoded_input)[:, None],
                             batch_size,
                             axis=1))

            messages = []
            for i in range(samples.shape[1]):
                sample = list(samples[:, i])
                try:
                    true_length = sample.index(char2code['</S>']) + 1
                except ValueError:
                    true_length = len(sample)
                sample = sample[:true_length]
                cost = costs[:true_length, i].sum()
                message = "({})".format(cost)
                message += "".join(code2char[code] for code in sample)
                if sample == target:
                    message += " CORRECT!"
                messages.append((cost, message))
            messages.sort(key=lambda tuple_: -tuple_[0])
            for _, message in messages:
                print(message)
Exemplo n.º 28
0
def main(config): 
	vocab_src, _ = text_to_dict([config['train_src'],
		config['dev_src'], config['test_src']])
	vocab_tgt, cabvo = text_to_dict([config['train_tgt'],
		config['dev_tgt']])

	# Create Theano variables
	logger.info('Creating theano variables')
	source_sentence = tensor.lmatrix('source')
	source_sentence_mask = tensor.matrix('source_mask')
	target_sentence = tensor.lmatrix('target')
	target_sentence_mask = tensor.matrix('target_mask')
	source_sentence.tag.test_value = [[13, 20, 0, 20, 0, 20, 0],
										[1, 4, 8, 4, 8, 4, 8],]
	source_sentence_mask.tag.test_value = [[0, 1, 0, 1, 0, 1, 0],
											[1, 0, 1, 0, 1, 0, 1],]
	target_sentence.tag.test_value = [[0,1,1,5],
										[2,0,1,0],]
	target_sentence_mask.tag.test_value = [[0,1,1,0],
											[1,1,1,0],]


	logger.info('Building RNN encoder-decoder')
	### Building Encoder 
	embedder = LookupTable(
		length=len(vocab_src), 
		dim=config['embed_src'], 
		weights_init=IsotropicGaussian(),
		biases_init=Constant(0.0), 
		name='embedder')
	transformer = Linear(
		config['embed_src'], 
		config['hidden_src']*4, 
		weights_init=IsotropicGaussian(),
		biases_init=Constant(0.0), 
		name='transformer')

	lstminit = np.asarray([0.0,]*config['hidden_src']+[0.0,]*config['hidden_src']+[1.0,]*config['hidden_src']+[0.0,]*config['hidden_src'])
	encoder = Bidirectional(
		LSTM(
			dim=config['hidden_src'], 
			weights_init=IsotropicGaussian(0.01),
			biases_init=Constant(lstminit)),
		name='encoderBiLSTM'
		)
	encoder.prototype.weights_init = Orthogonal()
	
	### Building Decoder 
	lstminit = np.asarray([0.0,]*config['hidden_tgt']+[0.0,]*config['hidden_tgt']+[1.0,]*config['hidden_tgt']+[0.0,]*config['hidden_tgt'])
	transition = LSTM2GO(
		attended_dim=config['hidden_tgt'], 
		dim=config['hidden_tgt'], 
		weights_init=IsotropicGaussian(0.01),
		biases_init=Constant(lstminit), 
		name='decoderLSTM')

	attention = SequenceContentAttention( 
		state_names=transition.apply.states, # default activation is Tanh
		state_dims=[config['hidden_tgt']],
		attended_dim=config['hidden_src']*2,
		match_dim=config['hidden_tgt'], 
		name="attention")

	readout = Readout(
		source_names=['states', 
			'feedback', 
			attention.take_glimpses.outputs[0]],
		readout_dim=len(vocab_tgt),
		emitter = SoftmaxEmitter(
			name='emitter'), 
		feedback_brick = LookupFeedback(
			num_outputs=len(vocab_tgt), 
			feedback_dim=config['embed_tgt'], 
			name='feedback'), 
		post_merge=InitializableFeedforwardSequence([
			Bias(dim=config['hidden_tgt'], 
				name='softmax_bias').apply,
			Linear(input_dim=config['hidden_tgt'], 
				output_dim=config['embed_tgt'],
				use_bias=False, 
				name='softmax0').apply,
			Linear(input_dim=config['embed_tgt'], 
				name='softmax1').apply]),
		merged_dim=config['hidden_tgt'])

	decoder = SequenceGenerator(
		readout=readout, 
		transition=transition, 
		attention=attention, 
		weights_init=IsotropicGaussian(0.01), 
		biases_init=Constant(0),
		name="generator",
		fork=Fork(
			[name for name in transition.apply.sequences if name != 'mask'], 
			prototype=Linear()),
		add_contexts=True)
	decoder.transition.weights_init = Orthogonal()

	#printchildren(encoder, 1)
	# Initialize model
	logger.info('Initializing model')
	embedder.initialize()
	transformer.initialize()
	encoder.initialize()
	decoder.initialize()
	
	# Apply model 
	embedded = embedder.apply(source_sentence)
	tansformed = transformer.apply(embedded)
	encoded = encoder.apply(tansformed)[0]
	generated = decoder.generate(
		n_steps=2*source_sentence.shape[1], 
		batch_size=source_sentence.shape[0], 
		attended = encoded.dimshuffle(1,0,2), 
		attended_mask=tensor.ones(source_sentence.shape).T
		)
	print 'Generated: ', generated
	# generator_generate_outputs
	#samples = generated[1] # For GRU 
	samples = generated[2] # For LSTM
	samples.name = 'samples'
	#samples_cost = generated[4] # For GRU 
	samples_cost = generated[5] # For LSTM
	samples_cost = 'sampling_cost'
	cost = decoder.cost(
		mask = target_sentence_mask.T, 
		outputs = target_sentence.T, 
		attended = encoded.dimshuffle(1,0,2), 
		attended_mask = source_sentence_mask.T)
	cost.name = 'target_cost'
	cost.tag.aggregation_scheme = TakeLast(cost)
	model = Model(cost)
	
	logger.info('Creating computational graph')
	cg = ComputationGraph(cost)
	
	# apply dropout for regularization
	if config['dropout'] < 1.0: # dropout is applied to the output of maxout in ghog
		logger.info('Applying dropout')
		dropout_inputs = [x for x in cg.intermediary_variables if x.name == 'maxout_apply_output']
		cg = apply_dropout(cg, dropout_inputs, config['dropout'])

	######## 
	# Print shapes
	shapes = [param.get_value().shape for param in cg.parameters]
	logger.info("Parameter shapes: ")
	for shape, count in Counter(shapes).most_common():
		logger.info('	{:15}: {}'.format(shape, count))
	logger.info("Total number of parameters: {}".format(len(shapes)))

	printchildren(embedder, 1)
	printchildren(transformer, 1)
	printchildren(encoder, 1)
	printchildren(decoder, 1)
	# Print parameter names
	# enc_dec_param_dict = merge(Selector(embedder).get_parameters(), Selector(encoder).get_parameters(), Selector(decoder).get_parameters())
	# enc_dec_param_dict = merge(Selector(decoder).get_parameters())
	# logger.info("Parameter names: ")
	# for name, value in enc_dec_param_dict.items():
	# 	logger.info('	{:15}: {}'.format(value.get_value().shape, name))
	# logger.info("Total number of parameters: {}".format(len(enc_dec_param_dict)))
	##########

	# Training data 
	train_stream = get_train_stream(config, 
		[config['train_src'],], [config['train_tgt'],], 
		vocab_src, vocab_tgt)
	dev_stream = get_dev_stream(
		[config['dev_src'],], [config['dev_tgt'],], 
		vocab_src, vocab_tgt)
	test_stream = get_test_stream([config['test_src'],], vocab_src)

	# Set extensions
	logger.info("Initializing extensions")
	extensions = [
		FinishAfter(after_n_batches=config['finish_after']),
		ProgressBar(),
		TrainingDataMonitoring([cost], 
			prefix="tra", 
			after_batch=True),
		DataStreamMonitoring(variables=[cost], 
			data_stream=dev_stream, 
			prefix="dev", 
			after_batch=True), 
		Sampler(
			model=Model(samples), 
			data_stream=dev_stream,
			vocab=cabvo,
			saveto=config['saveto']+'dev',
			every_n_batches=config['save_freq']), 
		Sampler(
			model=Model(samples), 
			data_stream=test_stream,
			vocab=cabvo,
			saveto=config['saveto']+'test',
			after_n_batches=1, 
			on_resumption=True,
			before_training=True), 
		Plotter(saveto=config['saveto'], after_batch=True),
		Printing(after_batch=True),
		Checkpoint(
			path=config['saveto'], 
			parameters = cg.parameters,
			save_main_loop=False,
			every_n_batches=config['save_freq'])]
	if BOKEH_AVAILABLE: 
		Plot('Training cost', channels=[['target_cost']], after_batch=True)
	if config['reload']: 
		extensions.append(Load(path=config['saveto'], 
			load_iteration_state=False, 
			load_log=False))
	else: 
		with open(config['saveto']+'.txt', 'w') as f: 
			pass 

	# Set up training algorithm
	logger.info("Initializing training algorithm")
	algorithm = GradientDescent(cost=cost, 
		parameters=cg.parameters,
		step_rule=CompositeRule([StepClipping(config['step_clipping']), 
			eval(config['step_rule'])()])
    )

	# Initialize main loop
	logger.info("Initializing main loop")
	main_loop = MainLoop(
		model=model,
		algorithm=algorithm,
		data_stream=train_stream,
		extensions=extensions)
	main_loop.run()