def test_zero_length_target(etype): pytest.importorskip('torch') args = make_arg(etype=etype) import logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s' ) import e2e_asr_attctc as ch import e2e_asr_attctc_th as th ch_model = ch.E2E(40, 5, args) ch_model.cleargrads() th_model = th.E2E(40, 5, args) data = [("aaa", dict(feat=numpy.random.randn(200, 40).astype(numpy.float32), tokenid="1")), ("bbb", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), tokenid="")), ("cc", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), tokenid="1 2"))] ch_ctc, ch_att, ch_acc = ch_model(data) th_ctc, th_att, th_acc = th_model(data)
def test_lecun_init_chainer(): nseed = args.seed random.seed(nseed) numpy.random.seed(nseed) os.environ["CHAINER_SEED"] = str(nseed) import e2e_asr_attctc as m model = m.Loss(m.E2E(40, 5, args), 0.5) b = model.predictor.ctc.ctc_lo.b.data assert numpy.all(b == 0.0) w = model.predictor.ctc.ctc_lo.W.data numpy.testing.assert_allclose(w.mean(), 0.0, 1e-2, 1e-2) numpy.testing.assert_allclose(w.var(), 1.0 / w.shape[1], 1e-2, 1e-2) for name, p in model.namedparams(): print(name) data = p.data if "lstm0/upward/b" in name: assert data.sum() == data.size // 4 elif "lstm1/upward/b" in name: assert data.sum() == data.size // 4 elif "embed" in name: numpy.testing.assert_allclose(data.mean(), 0.0, 5e-2, 5e-2) numpy.testing.assert_allclose(data.var(), 1.0, 5e-2, 5e-2) elif data.ndim == 1: assert numpy.all(data == 0.0) else: numpy.testing.assert_allclose(data.mean(), 0.0, 5e-2, 5e-2) numpy.testing.assert_allclose( data.var(), 1.0 / numpy.prod(data.shape[1:]), 5e-2, 5e-2)
def load_pretrained(self, src_dict, idim, odim, args, train_batch, train_reader): dst_dict = self.state_dict() for k, v in src_dict.items(): assert k in dst_dict, k + " not found" dst_dict[k] = v self.load_state_dict(dst_dict) tgt_dict = self.state_dict() for k, v in src_dict.items(): assert (tgt_dict[k] == v).all() if args.verbose > 0: import e2e_asr_attctc_th as base init = base.Loss(base.E2E(idim, odim, args), args.mtlalpha) init.load_state_dict(src_dict) init.eval() self.predictor.eval() # test first batch prediction equality with open_kaldi_feat(train_batch[0], train_reader) as data: init_ctc, init_att, init_acc = init.predictor(data) re_ctc, re_att, re_acc = self.predictor(data, supervised=True) print("init: ", init_ctc, init_att, init_acc) print("re: ", re_ctc, re_att, re_acc) np.testing.assert_almost_equal(init_ctc.data[0], re_ctc.data[0]) np.testing.assert_almost_equal(init_att.data[0], re_att.data[0]) np.testing.assert_almost_equal(init_acc, re_acc) return self
def test_lecun_init_torch(): torch = pytest.importorskip("torch") nseed = args.seed random.seed(nseed) torch.manual_seed(nseed) numpy.random.seed(nseed) os.environ["CHAINER_SEED"] = str(nseed) import e2e_asr_attctc_th as m model = m.Loss(m.E2E(40, 5, args), 0.5) b = model.predictor.ctc.ctc_lo.bias.data.numpy() assert numpy.all(b == 0.0) w = model.predictor.ctc.ctc_lo.weight.data.numpy() numpy.testing.assert_allclose(w.mean(), 0.0, 1e-2, 1e-2) numpy.testing.assert_allclose(w.var(), 1.0 / w.shape[1], 1e-2, 1e-2) for name, p in model.named_parameters(): print(name) data = p.data.numpy() if "embed" in name: numpy.testing.assert_allclose(data.mean(), 0.0, 5e-2, 5e-2) numpy.testing.assert_allclose(data.var(), 1.0, 5e-2, 5e-2) elif "predictor.dec.decoder.0.bias_ih" in name: assert data.sum() == data.size // 4 elif "predictor.dec.decoder.1.bias_ih" in name: assert data.sum() == data.size // 4 elif data.ndim == 1: assert numpy.all(data == 0.0) else: numpy.testing.assert_allclose(data.mean(), 0.0, 5e-2, 5e-2) numpy.testing.assert_allclose( data.var(), 1.0 / numpy.prod(data.shape[1:]), 5e-2, 5e-2)
def test_loss_and_ctc_grad(etype): pytest.importorskip('torch') args = make_arg(etype=etype) import logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s') import e2e_asr_attctc as ch import e2e_asr_attctc_th as th ch_model = ch.E2E(40, 5, args) ch_model.cleargrads() th_model = th.E2E(40, 5, args) const = 1e-4 init_torch_weight_const(th_model, const) init_chainer_weight_const(ch_model, const) out_data = "1 2 3 4" data = [ ("aaa", dict(feat=numpy.random.randn(200, 40).astype( numpy.float32), tokenid=out_data)), ("bbb", dict(feat=numpy.random.randn(100, 40).astype( numpy.float32), tokenid=out_data)), ("cc", dict(feat=numpy.random.randn(100, 40).astype( numpy.float32), tokenid=out_data)) ] ch_ctc, ch_att, ch_acc = ch_model(data) th_ctc, th_att, th_acc = th_model(data) # test masking ch_ench = ch_model.att.pre_compute_enc_h.data th_ench = th_model.att.pre_compute_enc_h.data.numpy() numpy.testing.assert_equal(ch_ench == 0.0, th_ench == 0.0) # test loss with constant weights (1.0) and bias (0.0) except for foget-bias (1.0) numpy.testing.assert_allclose(ch_ctc.data, th_ctc.data.numpy()) numpy.testing.assert_allclose(ch_att.data, th_att.data.numpy()) # test ctc grads ch_ctc.backward() th_ctc.backward() numpy.testing.assert_allclose(ch_model.ctc.ctc_lo.W.grad, th_model.ctc.ctc_lo.weight.grad.data.numpy(), 1e-7, 1e-8) numpy.testing.assert_allclose(ch_model.ctc.ctc_lo.b.grad, th_model.ctc.ctc_lo.bias.grad.data.numpy(), 1e-5, 1e-6) # test cross-entropy grads ch_model.cleargrads() th_model.zero_grad() ch_ctc, ch_att, ch_acc = ch_model(data) th_ctc, th_att, th_acc = th_model(data) ch_att.backward() th_att.backward() numpy.testing.assert_allclose(ch_model.dec.output.W.grad, th_model.dec.output.weight.grad.data.numpy(), 1e-7, 1e-8) numpy.testing.assert_allclose(ch_model.dec.output.b.grad, th_model.dec.output.bias.grad.data.numpy(), 1e-5, 1e-6)