示例#1
0
 def test_no_backprop_mode(self):
     xs_data = numpy.random.uniform(-1, 1, (4, 2, 3)).astype(numpy.float32)
     t_data = numpy.array([[0, 1], [1, 0]]).astype(numpy.int32)
     with chainer.no_backprop_mode():
         x = [chainer.Variable(x_data) for x_data in xs_data]
         t = chainer.Variable(t_data)
         functions.connectionist_temporal_classification(x, t, 2)
示例#2
0
文件: test_ctc.py 项目: okuta/chainer
 def test_no_backprop_mode(self):
     xs_data = numpy.random.uniform(-1, 1, (4, 2, 3)).astype(numpy.float32)
     t_data = numpy.array([[0, 1], [1, 0]]).astype(numpy.int32)
     with chainer.no_backprop_mode():
         x = [chainer.Variable(x_data) for x_data in xs_data]
         t = chainer.Variable(t_data)
         functions.connectionist_temporal_classification(x, t, 2)
示例#3
0
def forward_one_sample(model, wavfile, label, SIL_idx, useGPU):
    try:
        samplerate, wavdata = wav.read(wavfile)
    except IOError:
        return None, None
    feats = mfcc(wavdata, samplerate).astype(np.float32)
    feats = (feats - mean) / std

    model.reset_state()

    if useGPU:
        input_seq = [
            Variable(cuda.to_gpu(feats[i, :].reshape((1, -1))))
            for i in range(feats.shape[0])
        ]
        y = model(input_seq)
        label = Variable(
            cuda.to_gpu(xp.array(label, dtype=xp.int32).reshape((1, -1))))
    else:
        input_seq = [
            Variable(feats[i, :][np.newaxis, :]) for i in range(feats.shape[0])
        ]
        # y = [model(item) for item in input_seq]
        y = model(input_seq)
        label = Variable(xp.array(label, dtype=xp.int32).reshape((1, -1)))

    loss = F.connectionist_temporal_classification(y, label, SIL_idx)
    return y, loss
示例#4
0
    def check_backward(self, t_data, xs_data, l_length, x_length, grad, gx):
        xs = tuple(chainer.Variable(x_data) for x_data in xs_data)
        t = chainer.Variable(t_data)

        loss = functions.connectionist_temporal_classification(
            xs,
            t,
            2,
            input_length=chainer.Variable(x_length),
            label_length=chainer.Variable(l_length))

        loss.grad = grad
        loss.backward()

        func = loss.creator
        xs_data = tuple(x.data for x in xs)
        f = lambda: func.forward((
            x_length,
            l_length,
            t.data,
        ) + xs_data)
        gx_0, gx_1, gx_2, gx_3 = gradient_check.numerical_grad(
            f, (xs_data), (gx, ))
        gradient_check.assert_allclose(xs[0].grad, gx_0, atol=1e-04)
        gradient_check.assert_allclose(xs[1].grad, gx_1, atol=1e-04)
        gradient_check.assert_allclose(xs[2].grad, gx_2, atol=1e-04)
        gradient_check.assert_allclose(xs[3].grad, gx_3, atol=1e-04)
示例#5
0
文件: test_ctc.py 项目: 2php/chainer
    def check_forward(self, t_data, xs_data, l_length, x_length):
        x = tuple(chainer.Variable(x_data) for x_data in xs_data)
        t = chainer.Variable(t_data)

        args = (x, t, self.blank_symbol)
        if self.use_length:
            args += (chainer.Variable(x_length), chainer.Variable(l_length))
        loss = functions.connectionist_temporal_classification(*args)
        loss_value = float(loss.data)

        # compute expected value by recursive computation.
        xp = cuda.get_array_module(self.x)
        xt = xp.swapaxes(self.x, 0, 1)
        for b in range(xt.shape[0]):
            for t in range(xt.shape[1]):
                xt[b][t] = numpy.exp(xt[b][t]) / numpy.sum(numpy.exp(xt[b][t]))
        loss_expect = 0
        batch_size = xt.shape[0]
        path_length = 2 * l_length + 1
        for xtb, lb, xlb, plb in zip(xt, self.l, x_length, path_length):
            loss_expect += -math.log(
                self.alpha(xtb, lb, int(xlb - 1), int(plb - 1)) +
                self.alpha(xtb, lb, int(xlb - 1), int(plb - 2)))
        loss_expect /= batch_size
        self.assertAlmostEqual(loss_expect, loss_value, places=5)
示例#6
0
    def check_forward(self, t_data, xs_data, l_length, x_length):
        x = tuple(chainer.Variable(x_data) for x_data in xs_data)
        t = chainer.Variable(t_data)

        args = (x, t, self.blank_symbol)
        if self.use_length:
            args += (chainer.Variable(x_length), chainer.Variable(l_length))
        loss = functions.connectionist_temporal_classification(
            *args, reduce=self.reduce).data

        # compute expected value by recursive computation.
        xp = cuda.get_array_module(self.x)
        xt = xp.swapaxes(self.x, 0, 1)
        for b in range(xt.shape[0]):
            for t in range(xt.shape[1]):
                xt[b][t] = numpy.exp(xt[b][t]) / numpy.sum(numpy.exp(xt[b][t]))
        batch_size = xt.shape[0]
        path_length = 2 * l_length + 1
        loss_expect = xp.zeros((batch_size,), dtype=xp.float32)
        for i in range(batch_size):
            xtb, lb, xlb, plb = xt[i], self.l[i], x_length[i], path_length[i]
            loss_expect[i] = -math.log(
                self.alpha(xtb, lb, int(xlb - 1), int(plb - 1)) +
                self.alpha(xtb, lb, int(xlb - 1), int(plb - 2)))
        if self.reduce == 'mean':
            loss_expect = xp.mean(loss_expect)
        testing.assert_allclose(loss_expect, loss)
示例#7
0
文件: test_loss.py 项目: zpppy/espnet
def test_ctc_loss():
    pytest.importorskip("torch")
    pytest.importorskip("warpctc_pytorch")
    import torch
    import warpctc_pytorch

    from espnet.nets.e2e_asr_th import pad_list

    n_out = 7
    input_length = numpy.array([11, 17, 15], dtype=numpy.int32)
    label_length = numpy.array([4, 2, 3], dtype=numpy.int32)
    np_pred = [
        numpy.random.rand(il, n_out).astype(numpy.float32)
        for il in input_length
    ]
    np_target = [
        numpy.random.randint(0, n_out, size=ol, dtype=numpy.int32)
        for ol in label_length
    ]

    # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr.py
    ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2)
    ch_target = F.pad_sequence(np_target, padding=-1)
    ch_loss = F.connectionist_temporal_classification(ch_pred, ch_target, 0,
                                                      input_length,
                                                      label_length).data

    th_pred = pad_list([torch.from_numpy(x) for x in np_pred],
                       0.0).transpose(0, 1)
    th_target = torch.from_numpy(numpy.concatenate(np_target))
    th_ilen = torch.from_numpy(input_length)
    th_olen = torch.from_numpy(label_length)
    th_loss = warpctc_pytorch.CTCLoss(size_average=True)(
        th_pred, th_target, th_ilen, th_olen).data.numpy()[0]
    numpy.testing.assert_allclose(th_loss, ch_loss, 0.05)
示例#8
0
    def check_forward(self, t_data, xs_data, l_length, x_length):
        x = tuple(chainer.Variable(x_data) for x_data in xs_data)
        t = chainer.Variable(t_data)

        args = (x, t, self.blank_symbol)
        if self.use_length:
            args += (chainer.Variable(x_length), chainer.Variable(l_length))
        loss = functions.connectionist_temporal_classification(*args)
        loss_value = float(loss.data)

        # compute expected value by recursive computation.
        xp = cuda.get_array_module(self.x)
        xt = xp.swapaxes(self.x, 0, 1)
        for b in range(xt.shape[0]):
            for t in range(xt.shape[1]):
                xt[b][t] = numpy.exp(xt[b][t]) / numpy.sum(numpy.exp(xt[b][t]))
        loss_expect = 0
        batch_size = xt.shape[0]
        path_length = 2 * l_length + 1
        for xtb, lb, xlb, plb in zip(xt, self.l, x_length, path_length):
            loss_expect += -math.log(
                self.alpha(xtb, lb, int(xlb - 1), int(plb - 1)) +
                self.alpha(xtb, lb, int(xlb - 1), int(plb - 2)))
        loss_expect /= batch_size
        self.assertAlmostEqual(loss_expect, loss_value, places=5)
示例#9
0
文件: ctc.py 项目: zhuanaa/espnet
    def __call__(self, hs, ys):
        """CTC forward.

        Args:
            hs (list of chainer.Variable | N-dimension array): Input variable from encoder.
            ys (list of chainer.Variable | N-dimension array): Input variable of decoder.

        Returns:
            chainer.Variable: A variable holding a scalar value of the CTC loss.

        """
        self.loss = None
        ilens = [x.shape[0] for x in hs]
        olens = [x.shape[0] for x in ys]

        # zero padding for hs
        y_hat = linear_tensor(self.ctc_lo, F.dropout(
            F.pad_sequence(hs), ratio=self.dropout_rate))
        y_hat = F.separate(y_hat, axis=1)  # ilen list of batch x hdim

        # zero padding for ys
        y_true = F.pad_sequence(ys, padding=-1)  # batch x olen

        # get length info
        input_length = chainer.Variable(self.xp.array(ilens, dtype=np.int32))
        label_length = chainer.Variable(self.xp.array(olens, dtype=np.int32))
        logging.info(self.__class__.__name__ + ' input lengths:  ' + str(input_length.data))
        logging.info(self.__class__.__name__ + ' output lengths: ' + str(label_length.data))

        # get ctc loss
        self.loss = F.connectionist_temporal_classification(
            y_hat, y_true, 0, input_length, label_length)
        logging.info('ctc loss:' + str(self.loss.data))

        return self.loss
示例#10
0
文件: e2e_asr.py 项目: zane678/espnet
    def __call__(self, hs, ys):
        '''CTC forward

        :param hs:
        :param ys:
        :return:
        '''
        self.loss = None
        ilens = [x.shape[0] for x in hs]
        olens = [x.shape[0] for x in ys]

        # zero padding for hs
        y_hat = linear_tensor(self.ctc_lo, F.dropout(
            F.pad_sequence(hs), ratio=self.dropout_rate))
        y_hat = F.separate(y_hat, axis=1)  # ilen list of batch x hdim

        # zero padding for ys
        y_true = F.pad_sequence(ys, padding=-1)  # batch x olen

        # get length info
        input_length = chainer.Variable(self.xp.array(ilens, dtype=np.int32))
        label_length = chainer.Variable(self.xp.array(olens, dtype=np.int32))
        logging.info(self.__class__.__name__ + ' input lengths:  ' + str(input_length.data))
        logging.info(self.__class__.__name__ + ' output lengths: ' + str(label_length.data))

        # get ctc loss
        self.loss = F.connectionist_temporal_classification(
            y_hat, y_true, 0, input_length, label_length)
        logging.info('ctc loss:' + str(self.loss.data))

        return self.loss
示例#11
0
    def ctc_loss(self, ys, lable_batch):
        lables = lable_batch
        (out_ys, input_length) = ys

        label_length = [len(l) for l in lables]
        label_length = self.xp.asarray(label_length, dtype=self.xp.int32)

        input_length = self.xp.asarray(input_length, dtype=self.xp.int32)

        word_lables = concat_examples(lables, self.device, padding=self.blank)
        word_loss = F.connectionist_temporal_classification(
            out_ys, word_lables, self.blank, input_length, label_length)

        # #confidence penalty
        # out_ys=F.stack(out_ys,axis=1)
        # out_ys=[F.softmax(out[:l]) for l,out in zip(input_length,out_ys)]

        # entropy=-sum([F.sum(out*F.log(out+1e-10)) for out in out_ys])/200

        # scale=0.2

        # word_loss=word_los-scale*entropy

        # if char_ys is not None:

        #     char_lables = concat_examples(char_lable, self.device, padding=self.blank)
        #     char_loss = F.connectionist_temporal_classification(char_ys, char_lables, self.blank, input_length, char_label_length)

        print(word_loss)

        return word_loss
示例#12
0
def test_ctc_loss():
    pytest.importorskip("torch")
    pytest.importorskip("warpctc_pytorch")
    import torch
    from warpctc_pytorch import CTCLoss

    from e2e_asr_attctc_th import pad_list

    n_out = 7
    n_batch = 3
    input_length = numpy.array([11, 17, 15], dtype=numpy.int32)
    label_length = numpy.array([4, 2, 3], dtype=numpy.int32)
    np_pred = [numpy.random.rand(il, n_out).astype(
        numpy.float32) for il in input_length]
    np_target = [numpy.random.randint(
        0, n_out, size=ol, dtype=numpy.int32) for ol in label_length]

    # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr_attctc.py
    ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2)
    ch_target = F.pad_sequence(np_target, padding=-1)
    ch_loss = F.connectionist_temporal_classification(
        ch_pred, ch_target, 0, input_length, label_length).data

    th_pred = pad_list([torch.autograd.Variable(torch.from_numpy(x))
                        for x in np_pred]).transpose(0, 1)
    th_target = torch.autograd.Variable(
        torch.from_numpy(numpy.concatenate(np_target)))
    th_ilen = torch.autograd.Variable(torch.from_numpy(input_length))
    th_olen = torch.autograd.Variable(torch.from_numpy(label_length))
    # NOTE: warpctc_pytorch.CTCLoss does not normalize itself by batch-size while chainer's default setting does
    th_loss = (CTCLoss()(th_pred, th_target, th_ilen,
                         th_olen) / n_batch).data.numpy()[0]
    numpy.testing.assert_allclose(th_loss, ch_loss, 0.05)
示例#13
0
    def ctc_loss(self, ys, lable_batch):
        (word_label, char_lable) = lable_batch
        (word_ys, input_length) = ys

        word_label_length = [len(l) for l in word_label]
        word_label_length = self.xp.asarray(word_label_length,
                                            dtype=self.xp.int32)

        # char_label_length=[len(l) for l in char_lable]
        # char_label_length=self.xp.asarray(char_label_length,dtype=self.xp.int32)

        input_length = self.xp.asarray(input_length, dtype=self.xp.int32)

        word_lables = concat_examples(word_label,
                                      self.device,
                                      padding=self.blank)

        word_loss = F.connectionist_temporal_classification(
            word_ys, word_lables, self.blank, input_length, word_label_length)

        # if char_ys is not None:

        #     char_lables = concat_examples(char_lable, self.device, padding=self.blank)
        #     char_loss = F.connectionist_temporal_classification(char_ys, char_lables, self.blank, input_length, char_label_length)
        print(word_loss)

        return word_loss
示例#14
0
    def check_forward(self, t_data, xs_data):
        x = tuple(chainer.Variable(x_data) for x_data in xs_data)
        t = chainer.Variable(t_data)
        loss = functions.connectionist_temporal_classification(x, t, 2)
        loss_value = float(loss.data)

        # compute expected value by recursive computation.
        xp = cuda.get_array_module(self.x)
        xt = xp.swapaxes(self.x, 0, 1)
        for b in range(xt.shape[0]):
            for t in range(xt.shape[1]):
                xt[b][t] = numpy.exp(xt[b][t]) / numpy.sum(numpy.exp(xt[b][t]))
        loss_expect = 0
        batch_size = xt.shape[0]
        for b in range(batch_size):
            loss_expect += -math.log(self.alpha(xt[b],
                                                self.l[b],
                                                self.x.shape[0]-1,
                                                self.l[b].shape[0]-1)
                                     + self.alpha(xt[b],
                                                  self.l[b],
                                                  self.x.shape[0]-1,
                                                  self.l[b].shape[0]-2))
        loss_expect /= batch_size
        self.assertAlmostEqual(loss_expect, loss_value, places=5)
示例#15
0
 def f(input_length, label_length, t, *x):
     return functions.connectionist_temporal_classification(
         x,
         t,
         self.blank_symbol,
         x_length,
         l_length,
         reduce=self.reduce)
示例#16
0
def run_ctc():
	x = Variable(x_data)
	x = x[:, :vocab_size_ctc]
	x = functions.swapaxes(x, 1, 3)
	x = functions.reshape(x, (batchsize, -1))
	x = functions.split_axis(x, seq_length, axis=1)

	x_length = Variable(xp.asarray([seq_length, seq_length // 2], dtype=xp.int32))	# 入力系列長は適当

	loss_ctc = functions.connectionist_temporal_classification(x, label_unigram, blank_symbol, x_length, Variable(length_unigram), reduce="mean")
	loss_ctc.backward()
示例#17
0
    def check_backward(self, t_data, xs_data):
        xs = tuple(chainer.Variable(x_data) for x_data in xs_data)
        t = chainer.Variable(t_data)
        loss = functions.connectionist_temporal_classification(xs, t, 2)
        loss.grad = self.g
        loss.backward()

        func = loss.creator
        xs_data = tuple(x.data for x in xs)
        f = lambda: func.forward((t.data,) + xs_data)
        gl_0, gx_0, gx_1, gx_2, gx_3 = gradient_check.numerical_grad(
            f, ((t.data,) + xs_data), (self.gx,))
        gradient_check.assert_allclose(xs[0].grad, gx_0, atol=1e-04)
        gradient_check.assert_allclose(xs[1].grad, gx_1, atol=1e-04)
        gradient_check.assert_allclose(xs[2].grad, gx_2, atol=1e-04)
        gradient_check.assert_allclose(xs[3].grad, gx_3, atol=1e-04)
示例#18
0
def test_ctc_loss(in_length, out_length, use_warpctc):
    pytest.importorskip("torch")
    if use_warpctc:
        pytest.importorskip("warpctc_pytorch")
        import warpctc_pytorch

        torch_ctcloss = warpctc_pytorch.CTCLoss(size_average=True)
    else:
        if LooseVersion(torch.__version__) < LooseVersion("1.0"):
            pytest.skip("pytorch < 1.0 doesn't support CTCLoss")
        _ctcloss_sum = torch.nn.CTCLoss(reduction="sum")

        def torch_ctcloss(th_pred, th_target, th_ilen, th_olen):
            th_pred = th_pred.log_softmax(2)
            loss = _ctcloss_sum(th_pred, th_target, th_ilen, th_olen)
            # Batch-size average
            loss = loss / th_pred.size(1)
            return loss

    n_out = 7
    input_length = numpy.array(in_length, dtype=numpy.int32)
    label_length = numpy.array(out_length, dtype=numpy.int32)
    np_pred = [
        numpy.random.rand(il, n_out).astype(numpy.float32)
        for il in input_length
    ]
    np_target = [
        numpy.random.randint(0, n_out, size=ol, dtype=numpy.int32)
        for ol in label_length
    ]

    # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr.py
    ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2)
    ch_target = F.pad_sequence(np_target, padding=-1)
    ch_loss = F.connectionist_temporal_classification(ch_pred, ch_target, 0,
                                                      input_length,
                                                      label_length).data

    th_pred = pad_list([torch.from_numpy(x) for x in np_pred],
                       0.0).transpose(0, 1)
    th_target = torch.from_numpy(numpy.concatenate(np_target))
    th_ilen = torch.from_numpy(input_length)
    th_olen = torch.from_numpy(label_length)
    th_loss = torch_ctcloss(th_pred, th_target, th_ilen, th_olen).numpy()
    numpy.testing.assert_allclose(th_loss, ch_loss, 0.05)
示例#19
0
    def __call__(self, x, phonemes, lengths):
        y = self.forward(x)

        # The input of ctc must be list or tuple.
        ys = [y[:, :, i] for i in range(y.shape[2])]

        # The input label of ctc must be variable or array.
        phonemes = F.pad_sequence(phonemes, padding=self.n_category - 1)

        nll = F.connectionist_temporal_classification(
            ys,
            phonemes,
            blank_symbol=self.n_category - 1,
            label_length=lengths)
        likelihood = F.exp(-nll)

        chainer.reporter.report({'nll': nll, 'likelihood': likelihood}, self)
        return nll
示例#20
0
    def check_backward(self, t_data, xs_data, l_length, x_length, grad, gx):
        xs = tuple(chainer.Variable(x_data) for x_data in xs_data)
        t = chainer.Variable(t_data)

        loss = functions.connectionist_temporal_classification(
            xs, t, 2, input_length=chainer.Variable(x_length), label_length=chainer.Variable(l_length)
        )

        loss.grad = grad
        loss.backward()

        func = loss.creator
        xs_data = tuple(x.data for x in xs)
        f = lambda: func.forward((x_length, l_length, t.data) + xs_data)
        gx_0, gx_1, gx_2, gx_3 = gradient_check.numerical_grad(f, (xs_data), (gx,))
        gradient_check.assert_allclose(xs[0].grad, gx_0, atol=1e-04)
        gradient_check.assert_allclose(xs[1].grad, gx_1, atol=1e-04)
        gradient_check.assert_allclose(xs[2].grad, gx_2, atol=1e-04)
        gradient_check.assert_allclose(xs[3].grad, gx_3, atol=1e-04)
示例#21
0
 def calc_actual_loss(self, predictions, grid, labels):
     predictions = F.separate(predictions, axis=0)
     return F.connectionist_temporal_classification(predictions, labels, blank_symbol=self.blank_symbol)
示例#22
0
 def test_not_iterable(self):
     x = chainer.Variable(numpy.zeros((4, 2, 3), numpy.float32))
     t = chainer.Variable(numpy.zeros((2, 2), numpy.int32))
     with self.assertRaises(ValueError):
         functions.connectionist_temporal_classification(
             tuple(x), t, 0, reduce='invalid_option')
示例#23
0
 def test_not_iterable(self):
     x = chainer.Variable(numpy.zeros((4, 2, 3), numpy.float32))
     t = chainer.Variable(numpy.zeros((2, 2), numpy.int32))
     with self.assertRaises(TypeError):
         functions.connectionist_temporal_classification(x, t, 0)
示例#24
0
 def loss_function(predict, label):
     return F.connectionist_temporal_classification(predict, label, 0)
示例#25
0
文件: test_ctc.py 项目: 2php/chainer
 def test_volatile(self):
     xs_data = numpy.random.uniform(-1, 1, (4, 2, 3)).astype(numpy.float32)
     t_data = numpy.array([[0, 1], [1, 0]]).astype(numpy.int32)
     x = [chainer.Variable(x_data, volatile=True) for x_data in xs_data]
     t = chainer.Variable(t_data, volatile=True)
     functions.connectionist_temporal_classification(x, t, 2)
示例#26
0
文件: test_ctc.py 项目: okuta/chainer
 def test_not_iterable(self):
     x = chainer.Variable(numpy.zeros((4, 2, 3), numpy.float32))
     t = chainer.Variable(numpy.zeros((2, 2), numpy.int32))
     with self.assertRaises(TypeError):
         functions.connectionist_temporal_classification(x, t, 0)
示例#27
0
 def calc_actual_loss(self, predictions, grid, labels):
     loss = F.connectionist_temporal_classification(predictions, labels,
                                                    self.blank_symbol)
     return loss
示例#28
0
文件: test_ctc.py 项目: okuta/chainer
 def f(input_length, label_length, t, *x):
     return functions.connectionist_temporal_classification(
         x, t, self.blank_symbol, x_length, l_length,
         reduce=self.reduce)
示例#29
0
    sum_accuracy = 0
    sum_loss = 0
    for i in range(0, N, batchsize):
        x = img_train[perm[i:i + batchsize]]
        y = label_train[perm[i:i + batchsize]]
        padded_y = np.zeros((batchsize, max([len(t) for t in y])))
        for index, item in enumerate(y):
            padded_y[index, :len(item)] = item
        x = Variable(xp.asarray(x).astype(xp.float32))
        output = model(x)
        print(output[0].shape)
        print(output[0][0])
        model.cleargrads()
        loss = F.connectionist_temporal_classification(
            output,
            xp.asarray(padded_y).astype(xp.int32), 0,
            xp.full((len(y), ), 63, dtype=xp.int32),
            xp.asarray([len(t) for t in y]).astype(xp.int32))
        loss.backward()
        optimizer.update()
        print(loss.data)
    """
    print('train mean loss={}, accuracy={}'.format(
        sum_loss / N, sum_accuracy / N))

    # evaluation
    with configuration.using_config('train', False):
        sum_accuracy = 0
        sum_loss = 0
        for i in range(0, N_test, batchsize):
            x = x_test[i:i + batchsize]
示例#30
0
 def test_volatile(self):
     xs_data = numpy.random.uniform(-1, 1, (4, 2, 3)).astype(numpy.float32)
     t_data = numpy.array([[0, 1], [1, 0]]).astype(numpy.int32)
     x = [chainer.Variable(x_data, volatile=True) for x_data in xs_data]
     t = chainer.Variable(t_data, volatile=True)
     functions.connectionist_temporal_classification(x, t, 2)
示例#31
0
def main():
	model = Model(args.vocab_size, args.ndim_embedding, args.num_layers, args.ndim_h)
	if args.gpu_device >= 0:
		chainer.cuda.get_device(args.gpu_device).use()
		model.to_gpu()

	train_data, train_labels = generate_data()
	total_loop = int(math.ceil(len(train_data) / args.batchsize))
	train_indices = np.arange(len(train_data), dtype=int)

	xp = model.xp
	x_length_batch = xp.full((args.batchsize,), args.sequence_length, dtype=xp.int32)
	t_length_batch = xp.full((args.batchsize,), args.true_sequence_length, dtype=xp.int32)

	# optimizer
	optimizer = optimizers.Adam(args.learning_rate, 0.9)
	optimizer.setup(model)
	

	for epoch in xrange(1, args.total_epoch + 1):
		# train loop
		sum_loss = 0
		with chainer.using_config("train", True):
			for itr in xrange(1, total_loop + 1):
				# sample minibatch
				np.random.shuffle(train_indices)
				x_batch = train_data[train_indices[:args.batchsize]]
				t_batch = train_labels[train_indices[:args.batchsize]]

				# GPU
				if xp is cupy:
					x_batch = cuda.to_gpu(x_batch.astype(xp.int32))
					t_batch = cuda.to_gpu(t_batch.astype(xp.int32))

				# forward
				model.reset_state()
				y_batch = model(x_batch)	# list of variables

				# compute loss
				loss = F.connectionist_temporal_classification(y_batch, t_batch, BLANK, x_length_batch, t_length_batch)
				optimizer.update(lossfun=lambda: loss)

				sum_loss += float(loss.data)

		# evaluate
		with chainer.using_config("train", False):
			# sample minibatch
			np.random.shuffle(train_indices)
			x_batch = train_data[train_indices[:args.batchsize]]
			t_batch = train_labels[train_indices[:args.batchsize]]

			# GPU
			if xp is cupy:
				x_batch = cuda.to_gpu(x_batch.astype(xp.int32))
				t_batch = cuda.to_gpu(t_batch.astype(xp.int32))

			# forward
			model.reset_state()
			y_batch = model(x_batch, split_into_variables=False)
			y_batch = xp.argmax(y_batch.data, axis=2)

			average_error = 0
			for input_sequence, argmax_sequence, true_sequence in zip(x_batch, y_batch, t_batch):
				pred_seqence = []
				for token in argmax_sequence:
					if token == BLANK:
						continue
					pred_seqence.append(int(token))
				print("true:", true_sequence, "pred:", pred_seqence)
				error = compute_character_error_rate(true_sequence.tolist(), pred_seqence)
				average_error += error
			print("CER: {} - loss: {} - lr: {:.4e}".format(int(average_error / args.batchsize * 100), sum_loss / total_loop, optimizer.alpha))
示例#32
0
文件: test_ctc.py 项目: okuta/chainer
 def test_not_iterable(self):
     x = chainer.Variable(numpy.zeros((4, 2, 3), numpy.float32))
     t = chainer.Variable(numpy.zeros((2, 2), numpy.int32))
     with self.assertRaises(ValueError):
         functions.connectionist_temporal_classification(
             tuple(x), t, 0, reduce='invalid_option')
示例#33
0
def test_ctc_loss(in_length, out_length, ctc_type):
    pytest.importorskip("torch")
    if ctc_type == "warpctc":
        pytest.importorskip("warpctc_pytorch")
        import warpctc_pytorch

        torch_ctcloss = warpctc_pytorch.CTCLoss(size_average=True)
    elif ctc_type == "builtin" or ctc_type == "cudnnctc":
        if LooseVersion(torch.__version__) < LooseVersion("1.0"):
            pytest.skip("pytorch < 1.0 doesn't support CTCLoss")
        _ctcloss_sum = torch.nn.CTCLoss(reduction="sum")

        def torch_ctcloss(th_pred, th_target, th_ilen, th_olen):
            th_pred = th_pred.log_softmax(2)
            loss = _ctcloss_sum(th_pred, th_target, th_ilen, th_olen)
            # Batch-size average
            loss = loss / th_pred.size(1)
            return loss

    elif ctc_type == "gtnctc":
        pytest.importorskip("gtn")
        from espnet.nets.pytorch_backend.gtn_ctc import GTNCTCLossFunction

        _ctcloss_sum = GTNCTCLossFunction.apply

        def torch_ctcloss(th_pred, th_target, th_ilen, th_olen):
            targets = [t.tolist() for t in th_target]
            log_probs = torch.nn.functional.log_softmax(th_pred, dim=2)
            loss = _ctcloss_sum(log_probs, targets, th_ilen, 0, "none")
            return loss

    n_out = 7
    input_length = numpy.array(in_length, dtype=numpy.int32)
    label_length = numpy.array(out_length, dtype=numpy.int32)
    np_pred = [
        numpy.random.rand(il, n_out).astype(numpy.float32)
        for il in input_length
    ]
    np_target = [
        numpy.random.randint(0, n_out, size=ol, dtype=numpy.int32)
        for ol in label_length
    ]

    # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr.py
    ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2)
    ch_target = F.pad_sequence(np_target, padding=-1)
    ch_loss = F.connectionist_temporal_classification(ch_pred, ch_target, 0,
                                                      input_length,
                                                      label_length).data

    th_pred = pad_list([torch.from_numpy(x) for x in np_pred],
                       0.0).transpose(0, 1)
    if ctc_type == "gtnctc":
        # gtn implementation expects targets as list
        th_target = np_target
        # keep as B x T x H for gtn
        th_pred = th_pred.transpose(0, 1)
    else:
        th_target = torch.from_numpy(numpy.concatenate(np_target))
    th_ilen = torch.from_numpy(input_length)
    th_olen = torch.from_numpy(label_length)
    th_loss = torch_ctcloss(th_pred, th_target, th_ilen, th_olen).numpy()

    numpy.testing.assert_allclose(th_loss, ch_loss, 0.05)