def test_activation_coverage(act_type): encoder_num_hidden, decoder_num_hidden, coverage_num_hidden, source_seq_len, batch_size = 5, 5, 2, 10, 4 # source: (batch_size, source_seq_len, encoder_num_hidden) source = mx.sym.Variable("source") # source_length: (batch_size,) source_length = mx.sym.Variable("source_length") # prev_hidden: (batch_size, decoder_num_hidden) prev_hidden = mx.sym.Variable("prev_hidden") # prev_coverage: (batch_size, source_seq_len, coverage_num_hidden) prev_coverage = mx.sym.Variable("prev_coverage") # attention_scores: (batch_size, source_seq_len) attention_scores = mx.sym.Variable("attention_scores") source_shape = (batch_size, source_seq_len, encoder_num_hidden) source_length_shape = (batch_size, ) prev_hidden_shape = (batch_size, decoder_num_hidden) attention_scores_shape = (batch_size, source_seq_len, 1) prev_coverage_shape = (batch_size, source_seq_len, coverage_num_hidden) source_data = gaussian_vector(shape=source_shape) source_length_data = integer_vector(shape=source_length_shape, max_value=source_seq_len) prev_hidden_data = gaussian_vector(shape=prev_hidden_shape) prev_coverage_data = gaussian_vector(shape=prev_coverage_shape) attention_scores_data = uniform_vector(shape=attention_scores_shape) attention_scores_data = attention_scores_data / np.sum( attention_scores_data) coverage = sockeye.coverage.get_coverage( coverage_type=act_type, coverage_num_hidden=coverage_num_hidden) coverage_func = coverage.on(source, source_length, source_seq_len) updated_coverage = coverage_func(prev_hidden, attention_scores, prev_coverage) executor = updated_coverage.simple_bind( ctx=mx.cpu(), source=source_shape, source_length=source_length_shape, prev_hidden=prev_hidden_shape, prev_coverage=prev_coverage_shape, attention_scores=attention_scores_shape) executor.arg_dict["source"][:] = source_data executor.arg_dict["source_length"][:] = source_length_data executor.arg_dict["prev_hidden"][:] = prev_hidden_data executor.arg_dict["prev_coverage"][:] = prev_coverage_data executor.arg_dict["attention_scores"][:] = attention_scores_data result = executor.forward() # this is needed to modulate the 0 input. The output changes according to the activation type used. activation = mx.sym.Activation(name="activation", act_type=act_type) modulated = activation.eval(ctx=mx.cpu(), activation_data=mx.nd.zeros( (1, )))[0].asnumpy() new_coverage = result[0].asnumpy() assert new_coverage.shape == prev_coverage_shape
def _test_activation_coverage(act_type): config_coverage = sockeye.coverage.CoverageConfig( type=act_type, max_fertility=2, num_hidden=2, layer_normalization=False) encoder_num_hidden, decoder_num_hidden, source_seq_len, batch_size = 5, 5, 10, 4 # source: (batch_size, source_seq_len, encoder_num_hidden) source = mx.sym.Variable("source") # source_length: (batch_size,) source_length = mx.sym.Variable("source_length") # prev_hidden: (batch_size, decoder_num_hidden) prev_hidden = mx.sym.Variable("prev_hidden") # prev_coverage: (batch_size, source_seq_len, coverage_num_hidden) prev_coverage = mx.sym.Variable("prev_coverage") # attention_scores: (batch_size, source_seq_len) attention_scores = mx.sym.Variable("attention_scores") source_shape = (batch_size, source_seq_len, encoder_num_hidden) source_length_shape = (batch_size, ) prev_hidden_shape = (batch_size, decoder_num_hidden) attention_scores_shape = (batch_size, source_seq_len) prev_coverage_shape = (batch_size, source_seq_len, config_coverage.num_hidden) source_data = gaussian_vector(shape=source_shape) source_length_data = integer_vector(shape=source_length_shape, max_value=source_seq_len) prev_hidden_data = gaussian_vector(shape=prev_hidden_shape) prev_coverage_data = gaussian_vector(shape=prev_coverage_shape) attention_scores_data = uniform_vector(shape=attention_scores_shape) attention_scores_data = attention_scores_data / np.sum( attention_scores_data) coverage = sockeye.coverage.get_coverage(config_coverage) coverage_func = coverage.on(source, source_length, source_seq_len) updated_coverage = coverage_func(prev_hidden, attention_scores, prev_coverage) executor = updated_coverage.simple_bind( ctx=mx.cpu(), source=source_shape, source_length=source_length_shape, prev_hidden=prev_hidden_shape, prev_coverage=prev_coverage_shape, attention_scores=attention_scores_shape) executor.arg_dict["source"][:] = source_data executor.arg_dict["source_length"][:] = source_length_data executor.arg_dict["prev_hidden"][:] = prev_hidden_data executor.arg_dict["prev_coverage"][:] = prev_coverage_data executor.arg_dict["attention_scores"][:] = attention_scores_data result = executor.forward() new_coverage = result[0].asnumpy() assert new_coverage.shape == prev_coverage_shape # this is needed to modulate the 0 input. The output changes according to the activation type used. modulated = mx.nd.Activation(mx.nd.zeros((1, 1)), act_type=act_type).asnumpy() assert (np.sum( np.sum(np.isclose(new_coverage, modulated, atol=1.e-6), axis=2) != 0, axis=1) == source_length_data).all()
def test_gru_coverage(): encoder_num_hidden, decoder_num_hidden, coverage_num_hidden, source_seq_len, batch_size = 5, 5, 2, 10, 4 # source: (batch_size, source_seq_len, encoder_num_hidden) source = mx.sym.Variable("source") # source_length: (batch_size,) source_length = mx.sym.Variable("source_length") # prev_hidden: (batch_size, decoder_num_hidden) prev_hidden = mx.sym.Variable("prev_hidden") # prev_coverage: (batch_size, source_seq_len, coverage_num_hidden) prev_coverage = mx.sym.Variable("prev_coverage") # attention_scores: (batch_size, source_seq_len) attention_scores = mx.sym.Variable("attention_scores") source_shape = (batch_size, source_seq_len, encoder_num_hidden) source_length_shape = (batch_size, ) prev_hidden_shape = (batch_size, decoder_num_hidden) attention_scores_shape = (batch_size, source_seq_len) prev_coverage_shape = (batch_size, source_seq_len, coverage_num_hidden) source_data = gaussian_vector(shape=source_shape) source_length_data = integer_vector(shape=source_length_shape, max_value=source_seq_len) prev_hidden_data = gaussian_vector(shape=prev_hidden_shape) prev_coverage_data = gaussian_vector(shape=prev_coverage_shape) attention_scores_data = uniform_vector(shape=attention_scores_shape) attention_scores_data = attention_scores_data / np.sum( attention_scores_data) coverage = sockeye.coverage.get_coverage( coverage_type="gru", coverage_num_hidden=coverage_num_hidden) coverage_func = coverage.on(source, source_length, source_seq_len) updated_coverage = coverage_func(prev_hidden, attention_scores, prev_coverage) executor = updated_coverage.simple_bind( ctx=mx.cpu(), source=source_shape, source_length=source_length_shape, prev_hidden=prev_hidden_shape, prev_coverage=prev_coverage_shape, attention_scores=attention_scores_shape) executor.arg_dict["source"][:] = source_data executor.arg_dict["source_length"][:] = source_length_data executor.arg_dict["prev_hidden"][:] = prev_hidden_data executor.arg_dict["prev_coverage"][:] = prev_coverage_data executor.arg_dict["attention_scores"][:] = attention_scores_data result = executor.forward() new_coverage = result[0].asnumpy() assert new_coverage.shape == prev_coverage_shape
def _test_activation_coverage(act_type): config_coverage = sockeye.coverage.CoverageConfig(type=act_type, num_hidden=2, layer_normalization=False) encoder_num_hidden, decoder_num_hidden, source_seq_len, batch_size = 5, 5, 10, 4 # source: (batch_size, source_seq_len, encoder_num_hidden) source = mx.sym.Variable("source") # source_length: (batch_size,) source_length = mx.sym.Variable("source_length") # prev_hidden: (batch_size, decoder_num_hidden) prev_hidden = mx.sym.Variable("prev_hidden") # prev_coverage: (batch_size, source_seq_len, coverage_num_hidden) prev_coverage = mx.sym.Variable("prev_coverage") # attention_scores: (batch_size, source_seq_len) attention_scores = mx.sym.Variable("attention_scores") source_shape = (batch_size, source_seq_len, encoder_num_hidden) source_length_shape = (batch_size,) prev_hidden_shape = (batch_size, decoder_num_hidden) attention_scores_shape = (batch_size, source_seq_len) prev_coverage_shape = (batch_size, source_seq_len, config_coverage.num_hidden) source_data = gaussian_vector(shape=source_shape) source_length_data = integer_vector(shape=source_length_shape, max_value=source_seq_len) prev_hidden_data = gaussian_vector(shape=prev_hidden_shape) prev_coverage_data = gaussian_vector(shape=prev_coverage_shape) attention_scores_data = uniform_vector(shape=attention_scores_shape) attention_scores_data = attention_scores_data / np.sum(attention_scores_data) coverage = sockeye.coverage.get_coverage(config_coverage) coverage_func = coverage.on(source, source_length, source_seq_len) updated_coverage = coverage_func(prev_hidden, attention_scores, prev_coverage) executor = updated_coverage.simple_bind(ctx=mx.cpu(), source=source_shape, source_length=source_length_shape, prev_hidden=prev_hidden_shape, prev_coverage=prev_coverage_shape, attention_scores=attention_scores_shape) executor.arg_dict["source"][:] = source_data executor.arg_dict["source_length"][:] = source_length_data executor.arg_dict["prev_hidden"][:] = prev_hidden_data executor.arg_dict["prev_coverage"][:] = prev_coverage_data executor.arg_dict["attention_scores"][:] = attention_scores_data result = executor.forward() new_coverage = result[0].asnumpy() assert new_coverage.shape == prev_coverage_shape # this is needed to modulate the 0 input. The output changes according to the activation type used. modulated = mx.nd.Activation(mx.nd.zeros((1, 1)), act_type=act_type).asnumpy() assert (np.sum(np.sum(np.isclose(new_coverage, modulated, atol=1.e-6), axis=2) != 0, axis=1) == source_length_data).all()
def _test_gru_coverage(): config_coverage = sockeye.coverage.CoverageConfig(type="gru", num_hidden=2, layer_normalization=False) encoder_num_hidden, decoder_num_hidden, source_seq_len, batch_size = 5, 5, 10, 4 # source: (batch_size, source_seq_len, encoder_num_hidden) source = mx.sym.Variable("source") # source_length: (batch_size,) source_length = mx.sym.Variable("source_length") # prev_hidden: (batch_size, decoder_num_hidden) prev_hidden = mx.sym.Variable("prev_hidden") # prev_coverage: (batch_size, source_seq_len, coverage_num_hidden) prev_coverage = mx.sym.Variable("prev_coverage") # attention_scores: (batch_size, source_seq_len) attention_scores = mx.sym.Variable("attention_scores") source_shape = (batch_size, source_seq_len, encoder_num_hidden) source_length_shape = (batch_size,) prev_hidden_shape = (batch_size, decoder_num_hidden) attention_scores_shape = (batch_size, source_seq_len) prev_coverage_shape = (batch_size, source_seq_len, config_coverage.num_hidden) source_data = gaussian_vector(shape=source_shape) source_length_data = integer_vector(shape=source_length_shape, max_value=source_seq_len) prev_hidden_data = gaussian_vector(shape=prev_hidden_shape) prev_coverage_data = gaussian_vector(shape=prev_coverage_shape) attention_scores_data = uniform_vector(shape=attention_scores_shape) attention_scores_data = attention_scores_data / np.sum(attention_scores_data) coverage = sockeye.coverage.get_coverage(config_coverage) coverage_func = coverage.on(source, source_length, source_seq_len) updated_coverage = coverage_func(prev_hidden, attention_scores, prev_coverage) executor = updated_coverage.simple_bind(ctx=mx.cpu(), source=source_shape, source_length=source_length_shape, prev_hidden=prev_hidden_shape, prev_coverage=prev_coverage_shape, attention_scores=attention_scores_shape) executor.arg_dict["source"][:] = source_data executor.arg_dict["source_length"][:] = source_length_data executor.arg_dict["prev_hidden"][:] = prev_hidden_data executor.arg_dict["prev_coverage"][:] = prev_coverage_data executor.arg_dict["attention_scores"][:] = attention_scores_data result = executor.forward() new_coverage = result[0].asnumpy() assert new_coverage.shape == prev_coverage_shape assert (np.sum(np.sum(new_coverage != 1, axis=2) != 0, axis=1) == source_length_data).all()