def test_shared_value_function_policy_for_discrete_action_space_with_time_rank_folding( self): # state_space (NN is a simple single fc-layer relu network (2 units), random biases, random weights). state_space = FloatBox(shape=(3, ), add_batch_rank=True, add_time_rank=True) # action_space (4 possible actions). action_space = IntBox(4, add_batch_rank=True, add_time_rank=True) flat_float_action_space = FloatBox(shape=(4, ), add_batch_rank=True, add_time_rank=True) # Policy with baseline action adapter AND batch-apply over the entire policy (NN + ActionAdapter + distr.). network_spec = config_from_path("configs/test_lrelu_nn.json") # Add folding and unfolding to network. network_spec["fold_time_rank"] = True network_spec["unfold_time_rank"] = True shared_value_function_policy = SharedValueFunctionPolicy( network_spec=network_spec, action_adapter_spec=dict(fold_time_rank=True, unfold_time_rank=True), action_space=action_space, value_fold_time_rank=True, value_unfold_time_rank=True) test = ComponentTest( component=shared_value_function_policy, input_spaces=dict( nn_inputs=state_space, actions=action_space, ), action_space=action_space, ) policy_params = test.read_variable_values( shared_value_function_policy.variable_registry) # Some NN inputs. states = state_space.sample(size=(2, 3)) states_folded = np.reshape(states, newshape=(6, 3)) # Raw NN-output (3 hidden nodes). All weights=1.5, no biases. expected_nn_output = np.reshape(relu( np.matmul( states_folded, ComponentTest.read_params( "shared-value-function-policy/test-network/hidden-layer", policy_params)), 0.1), newshape=states.shape) test.test(("get_nn_outputs", states), expected_outputs=expected_nn_output, decimals=5) # Raw action layer output; Expected shape=(3,3): 3=batch, 2=action categories + 1 state value expected_action_layer_output = np.matmul( expected_nn_output, ComponentTest.read_params( "shared-value-function-policy/action-adapter-0/action-network/action-layer/", policy_params)) expected_action_layer_output = np.reshape(expected_action_layer_output, newshape=(2, 3, 4)) test.test( ("get_adapter_outputs", states), expected_outputs=dict(adapter_outputs=expected_action_layer_output, nn_outputs=expected_nn_output), decimals=5) # State-values: One for each item in the batch. expected_state_value_output = np.matmul( expected_nn_output, ComponentTest.read_params( "shared-value-function-policy/value-function-node/dense-layer", policy_params)) expected_state_value_output_unfolded = np.reshape( expected_state_value_output, newshape=(2, 3, 1)) test.test(("get_state_values", states, ["state_values"]), expected_outputs=dict( state_values=expected_state_value_output_unfolded), decimals=5) expected_action_layer_output_unfolded = np.reshape( expected_action_layer_output, newshape=(2, 3, 4)) test.test(("get_state_values_adapter_outputs_and_parameters", states, ["state_values", "adapter_outputs"]), expected_outputs=dict( state_values=expected_state_value_output_unfolded, adapter_outputs=expected_action_layer_output_unfolded), decimals=5) # Parameter (probabilities). Softmaxed logits. expected_parameters_output = np.maximum( softmax(expected_action_layer_output_unfolded, axis=-1), SMALL_NUMBER) test.test(("get_adapter_outputs_and_parameters", states, ["adapter_outputs", "parameters", "nn_outputs"]), expected_outputs=dict( nn_outputs=expected_nn_output, adapter_outputs=expected_action_layer_output_unfolded, parameters=expected_parameters_output), decimals=5) print("Probs: {}".format(expected_parameters_output)) expected_actions = np.argmax(expected_action_layer_output_unfolded, axis=-1) test.test(("get_action", states, ["action"]), expected_outputs=dict(action=expected_actions)) out = test.test(("get_action_and_log_likelihood", states)) action = out["action"] llh = out["log_likelihood"] # Action log-llh. expected_action_log_llh_output = np.log( np.array([[ expected_parameters_output[0][0][action[0][0]], expected_parameters_output[0][1][action[0][1]], expected_parameters_output[0][2][action[0][2]], ], [ expected_parameters_output[1][0][action[1][0]], expected_parameters_output[1][1][action[1][1]], expected_parameters_output[1][2][action[1][2]], ]])) test.test(("get_log_likelihood", [states, action]), expected_outputs=dict( log_likelihood=expected_action_log_llh_output, adapter_outputs=expected_action_layer_output_unfolded), decimals=5) recursive_assert_almost_equal(expected_action_log_llh_output, llh, decimals=5) # Deterministic sample. out = test.test(("get_deterministic_action", states), expected_outputs=None) self.assertTrue(out["action"].dtype == np.int32 or (out["action"].dtype == np.int64)) self.assertTrue( out["action"].shape == (2, 3)) # Make sure output is unfolded. # Stochastic sample. out = test.test(("get_stochastic_action", states), expected_outputs=None) self.assertTrue(out["action"].dtype == np.int32 or (out["action"].dtype == np.int64)) self.assertTrue( out["action"].shape == (2, 3)) # Make sure output is unfolded. # Distribution's entropy. out = test.test(("get_entropy", states), expected_outputs=None) self.assertTrue(out["entropy"].dtype == np.float32) self.assertTrue( out["entropy"].shape == (2, 3)) # Make sure output is unfolded.
def test_shared_value_function_policy_for_discrete_action_space(self): # state_space (NN is a simple single fc-layer relu network (2 units), random biases, random weights). state_space = FloatBox(shape=(4, ), add_batch_rank=True) # action_space (3 possible actions). action_space = IntBox(3, add_batch_rank=True) # Policy with baseline action adapter. shared_value_function_policy = SharedValueFunctionPolicy( network_spec=config_from_path("configs/test_lrelu_nn.json"), action_space=action_space) test = ComponentTest( component=shared_value_function_policy, input_spaces=dict( nn_inputs=state_space, actions=action_space, ), action_space=action_space, ) policy_params = test.read_variable_values( shared_value_function_policy.variable_registry) # Some NN inputs (4 input nodes, batch size=3). states = state_space.sample(size=3) # Raw NN-output (3 hidden nodes). All weights=1.5, no biases. expected_nn_output = relu( np.matmul( states, ComponentTest.read_params( "shared-value-function-policy/test-network/hidden-layer", policy_params)), 0.1) test.test(("get_nn_outputs", states), expected_outputs=expected_nn_output, decimals=5) # Raw action layer output; Expected shape=(3,3): 3=batch, 2=action categories + 1 state value expected_action_layer_output = np.matmul( expected_nn_output, ComponentTest.read_params( "shared-value-function-policy/action-adapter-0/action-network/action-layer/", policy_params)) test.test( ("get_adapter_outputs", states), expected_outputs=dict(adapter_outputs=expected_action_layer_output, nn_outputs=expected_nn_output), decimals=5) # State-values: One for each item in the batch. expected_state_value_output = np.matmul( expected_nn_output, ComponentTest.read_params( "shared-value-function-policy/value-function-node/dense-layer", policy_params)) test.test( ("get_state_values", states, ["state_values"]), expected_outputs=dict(state_values=expected_state_value_output), decimals=5) # Logits-values. test.test(("get_state_values_adapter_outputs_and_parameters", states, ["state_values", "adapter_outputs"]), expected_outputs=dict( state_values=expected_state_value_output, adapter_outputs=expected_action_layer_output), decimals=5) # Parameter (probabilities). Softmaxed logits. expected_parameters_output = np.maximum( softmax(expected_action_layer_output, axis=-1), SMALL_NUMBER) test.test( ("get_adapter_outputs_and_parameters", states, ["adapter_outputs", "parameters"]), expected_outputs=dict(adapter_outputs=expected_action_layer_output, parameters=expected_parameters_output), decimals=5) print("Probs: {}".format(expected_parameters_output)) expected_actions = np.argmax(expected_action_layer_output, axis=-1) test.test(("get_action", states, ["action"]), expected_outputs=dict(action=expected_actions)) # Get action AND log-llh. out = test.test(("get_action_and_log_likelihood", states)) action = out["action"] llh = out["log_likelihood"] # Action log-llh. expected_action_log_llh_output = np.log( np.array([ expected_parameters_output[0][action[0]], expected_parameters_output[1][action[1]], expected_parameters_output[2][action[2]], ])) test.test(("get_log_likelihood", [states, action], "log_likelihood"), expected_outputs=dict( log_likelihood=expected_action_log_llh_output), decimals=5) recursive_assert_almost_equal(expected_action_log_llh_output, llh) # Stochastic sample. out = test.test(("get_stochastic_action", states), expected_outputs=None) self.assertTrue(out["action"].dtype == np.int32 or (out["action"].dtype == np.int64)) self.assertTrue(out["action"].shape == (3, )) # Deterministic sample. out = test.test(("get_deterministic_action", states), expected_outputs=None) self.assertTrue(out["action"].dtype == np.int32 or (out["action"].dtype == np.int64)) self.assertTrue(out["action"].shape == (3, )) # Distribution's entropy. out = test.test(("get_entropy", states), expected_outputs=None) self.assertTrue(out["entropy"].dtype == np.float32) self.assertTrue(out["entropy"].shape == (3, ))
def test_shared_value_function_policy_for_discrete_container_action_space_with_time_rank_folding( self): # state_space (NN is a simple single fc-layer relu network (2 units), random biases, random weights). state_space = FloatBox(shape=(6, ), add_batch_rank=True, add_time_rank=True) # Action_space. action_space = Tuple(IntBox(2), IntBox(3), Dict(a=IntBox(4), ), add_batch_rank=True, add_time_rank=True) flat_float_action_space = Tuple(FloatBox(shape=(2, )), FloatBox(shape=(3, )), Dict(a=FloatBox(shape=(4, )), ), add_batch_rank=True, add_time_rank=True) # Policy with baseline action adapter AND batch-apply over the entire policy (NN + ActionAdapter + distr.). network_spec = config_from_path("configs/test_lrelu_nn.json") network_spec["fold_time_rank"] = True shared_value_function_policy = SharedValueFunctionPolicy( network_spec=network_spec, action_adapter_spec=dict(unfold_time_rank=True), action_space=action_space, value_unfold_time_rank=True) test = ComponentTest( component=shared_value_function_policy, input_spaces=dict(nn_input=state_space, actions=action_space, probabilities=flat_float_action_space, parameters=flat_float_action_space, logits=flat_float_action_space), action_space=action_space, ) policy_params = test.read_variable_values( shared_value_function_policy.variables) base_scope = "shared-value-function-policy/action-adapter-" # Some NN inputs. states = state_space.sample(size=(2, 3)) states_folded = np.reshape(states, newshape=(6, 6)) # Raw NN-output (still folded). expected_nn_output = relu( np.matmul( states_folded, policy_params[ "shared-value-function-policy/test-network/hidden-layer/dense/kernel"] ), 0.1) test.test(("get_nn_output", states), expected_outputs=dict(output=expected_nn_output), decimals=5) # Raw action layer output; Expected shape=(3,3): 3=batch, 2=action categories + 1 state value expected_action_layer_output = tuple([ np.matmul( expected_nn_output, policy_params[base_scope + "0/action-network/action-layer/dense/kernel"]), np.matmul( expected_nn_output, policy_params[base_scope + "1/action-network/action-layer/dense/kernel"]), dict(a=np.matmul( expected_nn_output, policy_params[base_scope + "2/action-network/action-layer/dense/kernel"])) ]) expected_action_layer_output_unfolded = tuple([ np.reshape(expected_action_layer_output[0], newshape=(2, 3, 2)), np.reshape(expected_action_layer_output[1], newshape=(2, 3, 3)), dict(a=np.reshape(expected_action_layer_output[2]["a"], newshape=(2, 3, 4))) ]) test.test(("get_action_layer_output", states), expected_outputs=dict( output=expected_action_layer_output_unfolded), decimals=5) # State-values: One for each item in the batch. expected_state_value_output = np.matmul( expected_nn_output, policy_params[ "shared-value-function-policy/value-function-node/dense-layer/dense/kernel"] ) expected_state_value_output_unfolded = np.reshape( expected_state_value_output, newshape=(2, 3, 1)) test.test(("get_state_values", states), expected_outputs=dict( state_values=expected_state_value_output_unfolded), decimals=5) test.test(("get_state_values_logits_probabilities_log_probs", states, ["state_values", "logits"]), expected_outputs=dict( state_values=expected_state_value_output_unfolded, logits=expected_action_layer_output_unfolded), decimals=5) # Parameter (probabilities). Softmaxed logits. expected_probabilities_output = tuple([ softmax(expected_action_layer_output_unfolded[0], axis=-1), softmax(expected_action_layer_output_unfolded[1], axis=-1), dict(a=softmax(expected_action_layer_output_unfolded[2]["a"], axis=-1)) ]) test.test( ("get_logits_probabilities_log_probs", states, ["logits", "probabilities"]), expected_outputs=dict(logits=expected_action_layer_output_unfolded, probabilities=expected_probabilities_output), decimals=5) print("Probs: {}".format(expected_probabilities_output)) expected_actions = tuple([ np.argmax(expected_action_layer_output_unfolded[0], axis=-1), np.argmax(expected_action_layer_output_unfolded[1], axis=-1), dict(a=np.argmax(expected_action_layer_output_unfolded[2]["a"], axis=-1), ) ]) test.test(("get_action", states), expected_outputs=dict(action=expected_actions)) # Action log-probs. expected_action_log_prob_output = tuple([ np.log( np.array([[ expected_probabilities_output[0][0][0][expected_actions[0] [0][0]], expected_probabilities_output[0][0][1][expected_actions[0] [0][1]], expected_probabilities_output[0][0][2][expected_actions[0] [0][2]], ], [ expected_probabilities_output[0][1][0][ expected_actions[0][1][0]], expected_probabilities_output[0][1][1][ expected_actions[0][1][1]], expected_probabilities_output[0][1][2][ expected_actions[0][1][2]], ]])), np.log( np.array([[ expected_probabilities_output[1][0][0][expected_actions[1] [0][0]], expected_probabilities_output[1][0][1][expected_actions[1] [0][1]], expected_probabilities_output[1][0][2][expected_actions[1] [0][2]], ], [ expected_probabilities_output[1][1][0][ expected_actions[1][1][0]], expected_probabilities_output[1][1][1][ expected_actions[1][1][1]], expected_probabilities_output[1][1][2][ expected_actions[1][1][2]], ]])), dict(a=np.log( np.array([[ expected_probabilities_output[2]["a"][0][0][ expected_actions[2]["a"][0][0]], expected_probabilities_output[2]["a"][0][1][ expected_actions[2]["a"][0][1]], expected_probabilities_output[2]["a"][0][2][ expected_actions[2]["a"][0][2]], ], [ expected_probabilities_output[2]["a"][1][0][ expected_actions[2]["a"][1][0]], expected_probabilities_output[2]["a"][1][1][ expected_actions[2]["a"][1][1]], expected_probabilities_output[2]["a"][1][2][ expected_actions[2]["a"][1][2]], ]]))) ]) test.test(("get_action_log_probs", [states, expected_actions]), expected_outputs=dict( action_log_probs=expected_action_log_prob_output, logits=expected_action_layer_output_unfolded), decimals=5) # Deterministic sample. out = test.test(("get_deterministic_action", states), expected_outputs=None) self.assertTrue(out["action"][0].dtype == np.int32) self.assertTrue( out["action"][0].shape == (2, 3)) # Make sure output is unfolded. self.assertTrue(out["action"][1].dtype == np.int32) self.assertTrue( out["action"][1].shape == (2, 3)) # Make sure output is unfolded. self.assertTrue(out["action"][2]["a"].dtype == np.int32) self.assertTrue(out["action"][2]["a"].shape == ( 2, 3)) # Make sure output is unfolded. # Stochastic sample. out = test.test(("get_stochastic_action", states), expected_outputs=None) self.assertTrue(out["action"][0].dtype == np.int32) self.assertTrue( out["action"][0].shape == (2, 3)) # Make sure output is unfolded. self.assertTrue(out["action"][1].dtype == np.int32) self.assertTrue( out["action"][1].shape == (2, 3)) # Make sure output is unfolded. self.assertTrue(out["action"][2]["a"].dtype == np.int32) self.assertTrue(out["action"][2]["a"].shape == ( 2, 3)) # Make sure output is unfolded. # Distribution's entropy. out = test.test(("get_entropy", states), expected_outputs=None) self.assertTrue(out["entropy"][0].dtype == np.float32) self.assertTrue( out["entropy"][0].shape == (2, 3)) # Make sure output is unfolded. self.assertTrue(out["entropy"][1].dtype == np.float32) self.assertTrue( out["entropy"][1].shape == (2, 3)) # Make sure output is unfolded. self.assertTrue(out["entropy"][2]["a"].dtype == np.float32) self.assertTrue(out["entropy"][2]["a"].shape == ( 2, 3)) # Make sure output is unfolded.
def test_shared_value_function_policy_for_discrete_action_space(self): # state_space (NN is a simple single fc-layer relu network (2 units), random biases, random weights). state_space = FloatBox(shape=(4, ), add_batch_rank=True) # action_space (3 possible actions). action_space = IntBox(3, add_batch_rank=True) flat_float_action_space = FloatBox(shape=(3, ), add_batch_rank=True) # Policy with baseline action adapter. shared_value_function_policy = SharedValueFunctionPolicy( network_spec=config_from_path("configs/test_lrelu_nn.json"), action_space=action_space) test = ComponentTest( component=shared_value_function_policy, input_spaces=dict(nn_input=state_space, actions=action_space, probabilities=flat_float_action_space, logits=flat_float_action_space), action_space=action_space, ) policy_params = test.read_variable_values( shared_value_function_policy.variables) # Some NN inputs (4 input nodes, batch size=3). states = state_space.sample(size=3) # Raw NN-output (3 hidden nodes). All weights=1.5, no biases. expected_nn_output = relu( np.matmul( states, policy_params[ "shared-value-function-policy/test-network/hidden-layer/dense/kernel"] ), 0.1) test.test(("get_nn_output", states), expected_outputs=dict(output=expected_nn_output), decimals=5) # Raw action layer output; Expected shape=(3,3): 3=batch, 2=action categories + 1 state value expected_action_layer_output = np.matmul( expected_nn_output, policy_params[ "shared-value-function-policy/action-adapter-0/action-network/action-layer/dense/kernel"] ) test.test(("get_action_layer_output", states), expected_outputs=dict(output=expected_action_layer_output), decimals=5) # State-values: One for each item in the batch. expected_state_value_output = np.matmul( expected_nn_output, policy_params[ "shared-value-function-policy/value-function-node/dense-layer/dense/kernel"] ) test.test( ("get_state_values", states), expected_outputs=dict(state_values=expected_state_value_output), decimals=5) # Logits-values. test.test( ("get_state_values_logits_probabilities_log_probs", states, ["state_values", "logits"]), expected_outputs=dict(state_values=expected_state_value_output, logits=expected_action_layer_output), decimals=5) # Parameter (probabilities). Softmaxed logits. expected_probabilities_output = softmax(expected_action_layer_output, axis=-1) test.test( ("get_logits_probabilities_log_probs", states, ["logits", "probabilities"]), expected_outputs=dict(logits=expected_action_layer_output, probabilities=expected_probabilities_output), decimals=5) print("Probs: {}".format(expected_probabilities_output)) expected_actions = np.argmax(expected_action_layer_output, axis=-1) test.test(("get_action", states), expected_outputs=dict(action=expected_actions)) # Stochastic sample. out = test.test(("get_stochastic_action", states), expected_outputs=None) self.assertTrue(out["action"].dtype == np.int32) self.assertTrue(out["action"].shape == (3, )) # Deterministic sample. out = test.test(("get_deterministic_action", states), expected_outputs=None) self.assertTrue(out["action"].dtype == np.int32) self.assertTrue(out["action"].shape == (3, )) # Distribution's entropy. out = test.test(("get_entropy", states), expected_outputs=None) self.assertTrue(out["entropy"].dtype == np.float32) self.assertTrue(out["entropy"].shape == (3, ))
def test_shared_value_function_policy_for_discrete_container_action_space( self): # state_space (NN is a simple single fc-layer relu network (2 units), random biases, random weights). state_space = FloatBox(shape=(5, ), add_batch_rank=True) # action_space (complex nested container action space). action_space = dict(type="dict", a=IntBox(2), b=Dict(b1=IntBox(3), b2=IntBox(4)), add_batch_rank=True) flat_float_action_space = dict(type="dict", a=FloatBox(shape=(2, )), b=Dict(b1=FloatBox(shape=(3, )), b2=FloatBox(shape=(4, ))), add_batch_rank=True) # Policy with baseline action adapter. shared_value_function_policy = SharedValueFunctionPolicy( network_spec=config_from_path("configs/test_lrelu_nn.json"), action_space=action_space) test = ComponentTest( component=shared_value_function_policy, input_spaces=dict(nn_input=state_space, actions=action_space, probabilities=flat_float_action_space, parameters=flat_float_action_space, logits=flat_float_action_space), action_space=action_space, ) policy_params = test.read_variable_values( shared_value_function_policy.variables) base_scope = "shared-value-function-policy/action-adapter-" # Some NN inputs (batch size=2). states = state_space.sample(size=2) # Raw NN-output. expected_nn_output = relu( np.matmul( states, policy_params[ "shared-value-function-policy/test-network/hidden-layer/dense/kernel"] ), 0.1) test.test(("get_nn_output", states), expected_outputs=dict(output=expected_nn_output), decimals=5) # Raw action layers' output. expected_action_layer_outputs = dict( a=np.matmul( expected_nn_output, policy_params[base_scope + "0/action-network/action-layer/dense/kernel"]), b=dict(b1=np.matmul( expected_nn_output, policy_params[base_scope + "1/action-network/action-layer/dense/kernel"]), b2=np.matmul( expected_nn_output, policy_params[ base_scope + "2/action-network/action-layer/dense/kernel"]))) test.test(("get_action_layer_output", states), expected_outputs=dict(output=expected_action_layer_outputs), decimals=5) # State-values. expected_state_value_output = np.matmul( expected_nn_output, policy_params[ "shared-value-function-policy/value-function-node/dense-layer/dense/kernel"] ) test.test( ("get_state_values", states), expected_outputs=dict(state_values=expected_state_value_output), decimals=5) # logits-values: One for each action-choice per item in the batch (simply take the remaining out nodes). test.test( ("get_state_values_logits_probabilities_log_probs", states, ["state_values", "logits"]), expected_outputs=dict(state_values=expected_state_value_output, logits=expected_action_layer_outputs), decimals=5) # Parameter (probabilities). Softmaxed logits. expected_probabilities_output = dict( a=softmax(expected_action_layer_outputs["a"], axis=-1), b=dict(b1=softmax(expected_action_layer_outputs["b"]["b1"], axis=-1), b2=softmax(expected_action_layer_outputs["b"]["b2"], axis=-1))) test.test( ("get_logits_probabilities_log_probs", states, ["logits", "probabilities"]), expected_outputs=dict(logits=expected_action_layer_outputs, probabilities=expected_probabilities_output), decimals=5) print("Probs: {}".format(expected_probabilities_output)) # Action sample. expected_actions = dict( a=np.argmax(expected_action_layer_outputs["a"], axis=-1), b=dict(b1=np.argmax(expected_action_layer_outputs["b"]["b1"], axis=-1), b2=np.argmax(expected_action_layer_outputs["b"]["b2"], axis=-1))) test.test(("get_action", states), expected_outputs=dict(action=expected_actions)) # Stochastic sample. out = test.test(("get_stochastic_action", states), expected_outputs=None) self.assertTrue(out["action"]["a"].dtype == np.int32) self.assertTrue(out["action"]["a"].shape == (2, )) self.assertTrue(out["action"]["b"]["b1"].dtype == np.int32) self.assertTrue(out["action"]["b"]["b1"].shape == (2, )) self.assertTrue(out["action"]["b"]["b2"].dtype == np.int32) self.assertTrue(out["action"]["b"]["b2"].shape == (2, )) # Deterministic sample. out = test.test(("get_deterministic_action", states), expected_outputs=None) self.assertTrue(out["action"]["a"].dtype == np.int32) self.assertTrue(out["action"]["a"].shape == (2, )) self.assertTrue(out["action"]["b"]["b1"].dtype == np.int32) self.assertTrue(out["action"]["b"]["b1"].shape == (2, )) self.assertTrue(out["action"]["b"]["b2"].dtype == np.int32) self.assertTrue(out["action"]["b"]["b2"].shape == (2, )) # Distribution's entropy. out = test.test(("get_entropy", states), expected_outputs=None) self.assertTrue(out["entropy"]["a"].dtype == np.float32) self.assertTrue(out["entropy"]["a"].shape == (2, )) self.assertTrue(out["entropy"]["b"]["b1"].dtype == np.float32) self.assertTrue(out["entropy"]["b"]["b1"].shape == (2, )) self.assertTrue(out["entropy"]["b"]["b2"].dtype == np.float32) self.assertTrue(out["entropy"]["b"]["b2"].shape == (2, ))