Пример #1
0
class DataBlock(object):
    def __init__(self, char_to_idx, device_id):
        self.context = Context(device_id)
        device_id = self.context.device_id
        self.char_idx = Connector(Matrix.empty(1, 1, 'int', device_id))
        self.char_to_idx = char_to_idx
        self.char = None

    def fprop(self):
        char_npa = np.zeros((1, 1), np.int32, 'F')
        char_npa[0][0] = self.char_to_idx[self.char] if self.char in self.char_to_idx else self.char_to_idx['<unk>']
        self.char_idx.assign_npa(self.context, char_npa)
        self.char_idx.fprop()
Пример #2
0
class DataBlock(object):
    def __init__(self, word_to_idx, device_id):
        self.context = Context(device_id)
        device_id = self.context.device_id
        self.word_idx = Connector(Matrix.empty(1, 1, 'int', device_id))
        self.word_to_idx = word_to_idx
        self.word = None

    def fprop(self):
        word_npa = np.zeros((1, 1), np.int32, 'F')
        word_npa[0][0] = self.word_to_idx[self.word] if self.word in self.word_to_idx else self.word_to_idx['<UNK>']
        self.word_idx.assign_npa(self.context, word_npa)
        self.word_idx.fprop()
Пример #3
0
class DataBlock(object):
    def __init__(self, data, char_to_idx, batch_size, x_device_id,
                 y_device_id):
        self.data = HomogeneousDataIterator(data, char_to_idx, batch_size,
                                            True, True)
        self.data_iterator = iter(self.data)
        self.x_context = Context(x_device_id)
        self.y_context = Context(y_device_id)
        max_len = 0
        for sub_line in data:
            cur_len = len(sub_line)
            if cur_len > max_len:
                max_len = cur_len
        print max_len
        self.x = Connector(
            Matrix.empty(batch_size, max_len - 1, 'int', x_device_id))
        self._y = Matrix.empty(batch_size, max_len - 1, 'int', y_device_id)
        self.y = List([Connector(self._y[:, i]) for i in xrange(max_len - 1)],
                      self.x.ncols)
        self.lengths = Matrix.empty(self.x.nrows, 1, 'int', x_device_id)
        self._mask = Matrix.empty(self.x.nrows, self.x.ncols, 'float',
                                  x_device_id)
        self.mask = List(
            [Connector(self._mask[:, i]) for i in xrange(max_len)],
            self.x.ncols)
        self.blocking_contexts = None

    def fprop(self):
        self.x_context.wait(*self.blocking_contexts)
        self.y_context.wait(*self.blocking_contexts)
        data = next(self.data_iterator)
        lengths_npa = np.array([[len(e) - 1] for e in data],
                               np.int32,
                               order='F')
        x_npa = np.zeros((len(data), int(np.max(lengths_npa))), np.int32, 'F')
        for k, e in enumerate(data):
            x_npa[k, :len(e) - 1] = e[:-1]
        self.x.assign_npa(self.x_context, x_npa)
        y_npa = np.zeros((len(data), int(np.max(lengths_npa))), np.int32, 'F')
        for k, e in enumerate(data):
            y_npa[k, :len(e) - 1] = e[1:]
        self._y.assign_npa(self.y_context, y_npa)
        for e in self.y:
            e.last_modification_context = self.y_context
        self.lengths.assign_npa(self.x_context, lengths_npa)
        self._mask.mask_column_numbers_row_wise(self.x_context, self.lengths)
        for e in self.mask:
            e.last_modification_context = self.x_context
        self.x.fprop()
        self.y.fprop()
        self.mask.fprop()
Пример #4
0
class DataBlock(object):
    def __init__(self, char_to_idx, device_id):
        self.context = Context(device_id)
        device_id = self.context.device_id
        self.char_idx = Connector(Matrix.empty(1, 1, 'int', device_id))
        self.char_to_idx = char_to_idx
        self.char = None

    def fprop(self):
        char_npa = np.zeros((1, 1), np.int32, 'F')
        char_npa[0][0] = self.char_to_idx[
            self.char] if self.char in self.char_to_idx else self.char_to_idx[
                '<unk>']
        self.char_idx.assign_npa(self.context, char_npa)
        self.char_idx.fprop()
Пример #5
0
class DataBlock(object):
    def __init__(self, word_to_idx, device_id):
        self.context = Context(device_id)
        device_id = self.context.device_id
        self.word_idx = Connector(Matrix.empty(1, 1, 'int', device_id))
        self.word_to_idx = word_to_idx
        self.word = None

    def fprop(self):
        word_npa = np.zeros((1, 1), np.int32, 'F')
        word_npa[0][0] = self.word_to_idx[
            self.word] if self.word in self.word_to_idx else self.word_to_idx[
                '<UNK>']
        self.word_idx.assign_npa(self.context, word_npa)
        self.word_idx.fprop()
Пример #6
0
class DataBlock(object):
    def __init__(self, data, char_to_idx, batch_size, x_device_id, y_device_id):
        self.data = HomogeneousDataIterator(data, char_to_idx, batch_size, True, True)
        self.data_iterator = iter(self.data)
        self.x_context = Context(x_device_id)
        self.y_context = Context(y_device_id)
        max_len = 0
        for sub_line in data:
            cur_len = len(sub_line)
            if cur_len > max_len:
                max_len = cur_len
        print max_len
        self.x = Connector(Matrix.empty(batch_size, max_len - 1, 'int', x_device_id))
        self._y = Matrix.empty(batch_size, max_len - 1, 'int', y_device_id)
        self.y = List([Connector(self._y[:, i]) for i in xrange(max_len - 1)], self.x.ncols)
        self.lengths = Matrix.empty(self.x.nrows, 1, 'int', x_device_id)
        self._mask = Matrix.empty(self.x.nrows, self.x.ncols, 'float', x_device_id)
        self.mask = List([Connector(self._mask[:, i]) for i in xrange(max_len)], self.x.ncols)
        self.blocking_contexts = None

    def fprop(self):
        self.x_context.wait(*self.blocking_contexts)
        self.y_context.wait(*self.blocking_contexts)
        data = next(self.data_iterator)
        lengths_npa = np.array([[len(e) - 1] for e in data], np.int32, order='F')
        x_npa = np.zeros((len(data), int(np.max(lengths_npa))), np.int32, 'F')
        for k, e in enumerate(data):
            x_npa[k, :len(e) - 1] = e[:-1]
        self.x.assign_npa(self.x_context, x_npa)
        y_npa = np.zeros((len(data), int(np.max(lengths_npa))), np.int32, 'F')
        for k, e in enumerate(data):
            y_npa[k, :len(e) - 1] = e[1:]
        self._y.assign_npa(self.y_context, y_npa)
        for e in self.y:
            e.last_modification_context = self.y_context
        self.lengths.assign_npa(self.x_context, lengths_npa)
        self._mask.mask_column_numbers_row_wise(self.x_context, self.lengths)
        for e in self.mask:
            e.last_modification_context = self.x_context
        self.x.fprop()
        self.y.fprop()
        self.mask.fprop()
Пример #7
0
class DataBlock(object):
    def __init__(self, train_data, valid_data, batch_size, word_dropout_prob, device_id):
        self.train_data = HomogeneousDataIterator(train_data, batch_size, randomize=True, infinite=True)
        self.valid_data = HomogeneousDataIterator(valid_data, batch_size)
        self.train_data_iterator = iter(self.train_data)
        self.valid_data_iterator = iter(self.valid_data)
        self.word_keep_prob = 1.0 - word_dropout_prob
        self.rnd = RandomState(47571)
        self.unk_idx = word_to_idx['<UNK>']

        self.context = Context(device_id)
        c = Counter([len(line) for line in chain(train_data, valid_data)])
        print c.most_common()
        max_len = max([len(line) for line in chain(train_data, valid_data)])

        self.enc_x = Connector(Matrix.empty(batch_size, max_len, 'int', device_id))
        self.enc_lengths = Matrix.empty(self.enc_x.nrows, 1, 'int', device_id)
        self._enc_mask = Matrix.empty(self.enc_x.nrows, self.enc_x.ncols, 'float', device_id)
        self.enc_mask = List([Connector(self._enc_mask[:, i]) for i in xrange(max_len)], self.enc_x.ncols)

        self.dec_x = Connector(Matrix.empty(batch_size, max_len + 1, 'int', device_id))
        self._dec_y = Matrix.empty(batch_size, max_len + 1, 'int', device_id)
        self.dec_y = List([Connector(self._dec_y[:, i]) for i in xrange(max_len + 1)], self._dec_y.ncols)
        self.dec_lengths = Matrix.empty(self.dec_x.nrows, 1, 'int', device_id)
        self._dec_mask = Matrix.empty(self.dec_x.nrows, self.dec_x.ncols, 'float', device_id)
        self.dec_mask = List([Connector(self._dec_mask[:, i]) for i in xrange(max_len + 1)], self.dec_x.ncols)

        self.blocking_contexts = None
        self.training_mode = True

    def set_training_mode(self):
        self.training_mode = True

    def set_testing_mode(self):
        self.training_mode = False

    def fprop(self):
        if self.training_mode:
            data = next(self.train_data_iterator)
        else:
            try:
                data = next(self.valid_data_iterator)
            except StopIteration as e:
                self.valid_data_iterator = iter(self.valid_data)
                raise e
        lengths_npa = np.array([[len(e)] for e in data], np.int32, order='F')
        max_len = int(np.max(lengths_npa))

        self.enc_lengths.assign_npa(self.context, lengths_npa)
        self._enc_mask.mask_column_numbers_row_wise(self.context, self.enc_lengths)
        for e in self.enc_mask:
            e.last_modification_context = self.context

        lengths_npa += 1
        self.dec_lengths.assign_npa(self.context, lengths_npa)
        self._dec_mask.mask_column_numbers_row_wise(self.context, self.dec_lengths)
        for e in self.dec_mask:
            e.last_modification_context = self.context

        enc_x_npa = np.zeros((len(data), max_len), np.int32, 'F')
        dec_x_npa = np.zeros((len(data), max_len + 1), np.int32, 'F')
        dec_y_npa = np.zeros((len(data), max_len + 1), np.int32, 'F')
        for k, e in enumerate(data):
            enc_x_npa[k, :len(e)] = e
            if self.training_mode:
                new_e = [_ if self.rnd.rand() < self.word_keep_prob else self.unk_idx for _ in e]
            else:
                new_e = e
            dec_x_npa[k, :len(e) + 1] = [word_to_idx['<<S>>']] + new_e
            dec_y_npa[k, :len(e) + 1] = e + [word_to_idx['<<S>>']]
        self.enc_x.assign_npa(self.context, enc_x_npa)
        self.dec_x.assign_npa(self.context, dec_x_npa)
        self._dec_y.assign_npa(self.context, dec_y_npa)
        for e in self.dec_y:
            e.last_modification_context = self.context

        self.enc_mask.fprop()
        self.dec_mask.fprop()
        self.enc_x.fprop()
        self.dec_x.fprop()
        self.dec_y.fprop()
Пример #8
0
    def test_theano_grad(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(300)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(128)
            input_dim, hidden_dim, class_num = self.rng.random_integers(1500,
                                                                        size=3)

            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            true_labels = [
                self.rng.randint(class_num,
                                 size=(batch_size, 1)).astype(np.int32)
                for _ in xrange(max_input_sequence_len)
            ]
            mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype(
                np.float32)
            h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            W_z = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_i = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_f = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_o = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W = np.hstack((W_z, W_i, W_f, W_o))
            R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R = np.hstack((R_z, R_i, R_f, R_o))
            lr_W = self.get_orthogonal_matrix(hidden_dim, class_num)
            lr_b = self.rng.rand(1, class_num).astype(dtype=np.float32)
            device_id = 0

            for reverse in [False, True]:
                for with_mask in [False, True]:
                    for learn_inital_states in [False, True]:
                        # quagga model
                        context = Context()
                        qx = List([
                            Connector(Matrix.from_npa(e), device_id) for e in x
                        ])
                        qtrue_labels = List([
                            Connector(Matrix.from_npa(e)) for e in true_labels
                        ], qx.length)
                        qmask = Matrix.empty(batch_size, qx.length, 'float')
                        qmask_list = [
                            Connector(qmask[:, i]) for i in xrange(qmask.ncols)
                        ]
                        qmask = Connector(qmask)
                        qh_0 = Connector(
                            Matrix.from_npa(h_0),
                            device_id if learn_inital_states else None)
                        qc_0 = Connector(
                            Matrix.from_npa(c_0),
                            device_id if learn_inital_states else None)
                        qW = Connector(Matrix.from_npa(W), device_id)
                        qR = Connector(Matrix.from_npa(R), device_id)
                        qlr_W = Connector(Matrix.from_npa(lr_W), device_id)
                        qlr_b = Connector(Matrix.from_npa(lr_b), device_id)
                        lstm = SequencerBlock(
                            block_class=LstmBlock,
                            params=[qW, qR],
                            sequences=[
                                qx,
                                qmask_list if with_mask else [None] * len(qx)
                            ],
                            output_names=['h'],
                            prev_names=['c', 'h'],
                            paddings=[qc_0, qh_0],
                            reverse=reverse)
                        seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                       params=[qlr_W, qlr_b],
                                                       sequences=[lstm.h],
                                                       output_names=['output'])
                        seq_sce_block = SequencerBlock(
                            block_class=SoftmaxCeBlock,
                            params=[],
                            sequences=[
                                seq_dot_block.output, qtrue_labels,
                                qmask_list if with_mask else [None] * len(qx)
                            ])
                        qx.length = sequence_len
                        for e in qx:
                            e.fprop()
                        for e in qtrue_labels:
                            e.fprop()
                        qmask.assign_npa(context, mask)
                        qmask.fprop()
                        qlr_W.fprop()
                        qlr_b.fprop()
                        qh_0.fprop()
                        qc_0.fprop()
                        qW.fprop()
                        qR.fprop()
                        lstm.fprop()
                        seq_dot_block.fprop()
                        seq_sce_block.fprop()
                        seq_sce_block.bprop()
                        seq_dot_block.bprop()
                        lstm.bprop()
                        quagga_grads = [
                            qlr_b.backward_matrix.to_host(),
                            qlr_W.backward_matrix.to_host(),
                            qW.backward_matrix.to_host(),
                            qR.backward_matrix.to_host()
                        ]
                        if learn_inital_states:
                            quagga_grads.append(qc_0.backward_matrix.to_host())
                            quagga_grads.append(qh_0.backward_matrix.to_host())
                        quagga_grads.append(
                            [e.backward_matrix.to_host() for e in qx])
                        del qx
                        del qlr_b
                        del qlr_W
                        del qW
                        del qR
                        del qmask
                        del lstm
                        del seq_dot_block
                        del seq_sce_block

                        # theano model
                        th_x = T.ftensor3()
                        th_true_labels = T.imatrix()
                        th_mask = T.fmatrix()
                        lstm_layer = LstmLayer(W, R, c_0, h_0, reverse=reverse)
                        th_h = lstm_layer.get_output_expr(
                            th_x, th_mask if with_mask else None)
                        seq_softmax_layer = SequentialSoftmaxLayer(
                            lr_W, lr_b, reverse)
                        loss = seq_softmax_layer.get_loss(
                            th_h, th_true_labels,
                            th_mask if with_mask else None)
                        wrt = [
                            seq_softmax_layer.b, seq_softmax_layer.W,
                            lstm_layer.W, lstm_layer.R
                        ]
                        if learn_inital_states:
                            wrt.append(lstm_layer.c0)
                            wrt.append(lstm_layer.h0)
                        wrt.append(th_x)
                        grads = T.grad(loss, wrt)
                        if with_mask:
                            get_theano_grads = theano.function(
                                [th_x, th_true_labels, th_mask], grads)
                            theano_grads = get_theano_grads(
                                np.dstack(x[:sequence_len]),
                                np.hstack(true_labels[:sequence_len]),
                                mask[:, :sequence_len])
                        else:
                            get_theano_grads = theano.function(
                                [th_x, th_true_labels], grads)
                            theano_grads = get_theano_grads(
                                np.dstack(x[:sequence_len]),
                                np.hstack(true_labels[:sequence_len]))

                        for quagga_grad, theano_grad in izip(
                                quagga_grads[:-1], theano_grads[:-1]):
                            r.append(
                                np.allclose(quagga_grad,
                                            theano_grad,
                                            atol=1e-6))
                        for i in xrange(theano_grads[-1].shape[-1]):
                            if not np.allclose(quagga_grads[-1][i],
                                               theano_grads[-1][..., i],
                                               atol=1e-6):
                                r.append(False)
                                break
                        else:
                            r.append(True)

        self.assertEqual(sum(r), len(r))
Пример #9
0
    def test_theano_fprop(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype(
                np.float32)
            h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            W_z = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_i = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_f = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_o = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W = np.hstack((W_z, W_i, W_f, W_o))
            R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R = np.hstack((R_z, R_i, R_f, R_o))

            for reverse in [False, True]:
                for with_mask in [False, True]:
                    context = Context()
                    qx = List([Connector(Matrix.from_npa(e)) for e in x])
                    qmask = Connector(
                        Matrix.empty(batch_size, len(qx), 'float'))
                    qh_0 = Connector(Matrix.from_npa(h_0))
                    qc_0 = Connector(Matrix.from_npa(c_0))
                    qW = Connector(Matrix.from_npa(W))
                    qR = Connector(Matrix.from_npa(R))
                    lstm = SequencerBlock(block_class=LstmBlock,
                                          params=[qW, qR],
                                          sequences=[qx] +
                                          ([qmask] if with_mask else []),
                                          output_names=['h'],
                                          prev_names=['c', 'h'],
                                          paddings=[qc_0, qh_0],
                                          reverse=reverse)

                    qx.length = sequence_len
                    for e in qx:
                        e.fprop()
                    qmask.assign_npa(context, mask)
                    qmask.fprop()
                    qh_0.fprop()
                    qc_0.fprop()
                    qW.fprop()
                    qR.fprop()
                    lstm.fprop()
                    q_h = lstm.h.to_host()

                    th_x = T.ftensor3()
                    lstm_layer = LstmLayer(W, R, c_0, h_0, reverse)
                    if with_mask:
                        th_mask = T.fmatrix()
                        get_th_h = theano.function([th_x, th_mask],
                                                   lstm_layer.get_output_expr(
                                                       th_x, th_mask))
                        th_h = get_th_h(np.dstack(x[:sequence_len]),
                                        mask[:, :sequence_len])
                    else:
                        get_th_h = theano.function(
                            [th_x], lstm_layer.get_output_expr(th_x))
                        th_h = get_th_h(np.dstack(x[:sequence_len]))

                    for i in xrange(th_h.shape[0]):
                        if not np.allclose(q_h[i], th_h[i]):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))