def test_buffer(): agent_1_buffer = construct_fake_buffer(1) agent_2_buffer = construct_fake_buffer(2) agent_3_buffer = construct_fake_buffer(3) a = agent_1_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=2, training_length=1, sequential=True) assert_array(np.array(a), np.array([[171, 172, 173], [181, 182, 183]])) a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=2, training_length=3, sequential=True) assert_array( np.array(a), np.array([ [231, 232, 233], [241, 242, 243], [251, 252, 253], [261, 262, 263], [271, 272, 273], [281, 282, 283], ]), ) a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=2, training_length=3, sequential=False) assert_array( np.array(a), np.array([ [251, 252, 253], [261, 262, 263], [271, 272, 273], [261, 262, 263], [271, 272, 273], [281, 282, 283], ]), ) agent_1_buffer.reset_agent() assert agent_1_buffer.num_experiences == 0 update_buffer = AgentBuffer() agent_2_buffer.resequence_and_append(update_buffer, batch_size=None, training_length=2) agent_3_buffer.resequence_and_append(update_buffer, batch_size=None, training_length=2) assert len(update_buffer[BufferKey.CONTINUOUS_ACTION]) == 20 assert np.array(update_buffer[BufferKey.CONTINUOUS_ACTION]).shape == (20, 2) c = update_buffer.make_mini_batch(start=0, end=1) assert c.keys() == update_buffer.keys() assert np.array(c[BufferKey.CONTINUOUS_ACTION]).shape == (1, 2)
def test_buffer(): b = construct_fake_processing_buffer() a = b[1]["vector_observation"].get_batch(batch_size=2, training_length=1, sequential=True) assert_array(np.array(a), np.array([[171, 172, 173], [181, 182, 183]])) a = b[2]["vector_observation"].get_batch(batch_size=2, training_length=3, sequential=True) assert_array( np.array(a), np.array([ [231, 232, 233], [241, 242, 243], [251, 252, 253], [261, 262, 263], [271, 272, 273], [281, 282, 283], ]), ) a = b[2]["vector_observation"].get_batch(batch_size=2, training_length=3, sequential=False) assert_array( np.array(a), np.array([ [251, 252, 253], [261, 262, 263], [271, 272, 273], [261, 262, 263], [271, 272, 273], [281, 282, 283], ]), ) b[4].reset_agent() assert len(b[4]) == 0 update_buffer = AgentBuffer() b.append_to_update_buffer(update_buffer, 3, batch_size=None, training_length=2) b.append_to_update_buffer(update_buffer, 2, batch_size=None, training_length=2) assert len(update_buffer["action"]) == 20 assert np.array(update_buffer["action"]).shape == (20, 2) c = update_buffer.make_mini_batch(start=0, end=1) assert c.keys() == update_buffer.keys() assert np.array(c["action"]).shape == (1, 2)
def test_buffer(): agent_1_buffer = construct_fake_buffer(1) agent_2_buffer = construct_fake_buffer(2) agent_3_buffer = construct_fake_buffer(3) # Test get_batch a = agent_1_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=2, training_length=1, sequential=True) assert_array( np.array(a), np.array([[171, 172, 173], [181, 182, 183]], dtype=np.float32)) # Test get_batch a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=2, training_length=3, sequential=True) assert_array( np.array(a), np.array( [ [231, 232, 233], [241, 242, 243], [251, 252, 253], [261, 262, 263], [271, 272, 273], [281, 282, 283], ], dtype=np.float32, ), ) a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=2, training_length=3, sequential=False) assert_array( np.array(a), np.array([ [251, 252, 253], [261, 262, 263], [271, 272, 273], [261, 262, 263], [271, 272, 273], [281, 282, 283], ]), ) # Test padding a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(batch_size=None, training_length=4, sequential=True) assert_array( np.array(a), np.array([ [201, 202, 203], [211, 212, 213], [221, 222, 223], [231, 232, 233], [241, 242, 243], [251, 252, 253], [261, 262, 263], [271, 272, 273], [281, 282, 283], [0, 0, 0], [0, 0, 0], [0, 0, 0], ]), ) # Test group entries return Lists of Lists. Make sure to pad properly! a = agent_2_buffer[BufferKey.GROUP_CONTINUOUS_ACTION].get_batch( batch_size=None, training_length=4, sequential=True) for _group_entry in a[:-3]: assert len(_group_entry) == 3 for _group_entry in a[-3:]: assert len(_group_entry) == 0 agent_1_buffer.reset_agent() assert agent_1_buffer.num_experiences == 0 update_buffer = AgentBuffer() agent_2_buffer.resequence_and_append(update_buffer, batch_size=None, training_length=2) agent_3_buffer.resequence_and_append(update_buffer, batch_size=None, training_length=2) assert len(update_buffer[BufferKey.CONTINUOUS_ACTION]) == 20 assert np.array(update_buffer[BufferKey.CONTINUOUS_ACTION]).shape == (20, 2) c = update_buffer.make_mini_batch(start=0, end=1) assert c.keys() == update_buffer.keys() # Make sure the values of c are AgentBufferField for val in c.values(): assert isinstance(val, AgentBufferField) assert np.array(c[BufferKey.CONTINUOUS_ACTION]).shape == (1, 2)