def _propagate(ctc_type): args = make_arg(ctc_type=ctc_type) np.random.seed(0) model = ch_asr.E2E(10, 5, args) _, ch_ctc, _, _ = model(*batch) ch_ctc.backward() W_grad = model.ctc.ctc_lo.W.grad b_grad = model.ctc.ctc_lo.b.grad return ch_ctc.data, W_grad, b_grad
def test_mtl_loss(etype): args = make_arg(etype=etype) ch_model = ch_asr.E2E(10, 5, args) th_model = th_asr.E2E(10, 5, args) const = 1e-4 init_torch_weight_const(th_model, const) init_chainer_weight_const(ch_model, const) ch_batch = prepare_inputs("chainer") th_batch = prepare_inputs("pytorch") _, ch_ctc, ch_att, ch_acc = ch_model(*ch_batch) th_model(*th_batch) th_ctc, th_att = th_model.loss_ctc, th_model.loss_att # test masking ch_ench = ch_model.att.pre_compute_enc_h.data th_ench = th_model.att[0].pre_compute_enc_h.detach().numpy() np.testing.assert_equal(ch_ench == 0.0, th_ench == 0.0) # test loss with constant weights (1.0) and bias (0.0) except for foget-bias (1.0) np.testing.assert_allclose(ch_ctc.data, th_ctc.detach().numpy()) np.testing.assert_allclose(ch_att.data, th_att.detach().numpy()) # test grads in mtl mode ch_loss = ch_ctc * 0.5 + ch_att * 0.5 th_loss = th_ctc * 0.5 + th_att * 0.5 ch_model.cleargrads() th_model.zero_grad() ch_loss.backward() th_loss.backward() np.testing.assert_allclose( ch_model.ctc.ctc_lo.W.grad, th_model.ctc.ctc_lo.weight.grad.data.numpy(), 1e-7, 1e-8, ) np.testing.assert_allclose( ch_model.ctc.ctc_lo.b.grad, th_model.ctc.ctc_lo.bias.grad.data.numpy(), 1e-5, 1e-6, ) np.testing.assert_allclose( ch_model.dec.output.W.grad, th_model.dec.output.weight.grad.data.numpy(), 1e-7, 1e-8, ) np.testing.assert_allclose( ch_model.dec.output.b.grad, th_model.dec.output.bias.grad.data.numpy(), 1e-5, 1e-6, )
def test_zero_length_target(etype): args = make_arg(etype=etype) ch_model = ch_asr.E2E(10, 5, args) ch_model.cleargrads() th_model = th_asr.E2E(10, 5, args) ch_batch = prepare_inputs("chainer", olens=[4, 0]) th_batch = prepare_inputs("pytorch", olens=[4, 0]) ch_model(*ch_batch) th_model(*th_batch)
def test_chainer_save_and_load(): args = make_arg() model = ch_asr.E2E(10, 5, args) # initialize randomly for p in model.params(): p.data = np.random.randn(*p.data.shape) tmppath = tempfile.mktemp() chainer.serializers.save_npz(tmppath, model) p_saved = [p.data for p in model.params()] # set constant value for p in model.params(): p.data = np.zeros_like(p.data) asr_utils.chainer_load(tmppath, model) for p1, p2 in zip(p_saved, model.params()): np.testing.assert_array_equal(p1, p2.data) if os.path.exists(tmppath): os.remove(tmppath)