def testGamma(self): values = [[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5]] tensor = tf.constant(values, dtype=tf.float32) result_gamma0 = common.discounted_future_sum(tensor, 0.0, 3) result_gamma09 = common.discounted_future_sum(tensor, 0.9, 3) result_gamma1 = common.discounted_future_sum(tensor, 1.0, 3) result_gamma2 = common.discounted_future_sum(tensor, 2.0, 3) values = np.array(values) values_shift1 = np.pad(values[:, 1:], ((0, 0), (0, 1)), 'constant') values_shift2 = np.pad(values[:, 2:], ((0, 0), (0, 2)), 'constant') expected_result_gamma0 = values expected_result_gamma09 = (values + 0.9 * values_shift1 + 0.81 * values_shift2) expected_result_gamma1 = values + values_shift1 + values_shift2 expected_result_gamma2 = values + 2 * values_shift1 + 4 * values_shift2 self.assertAllClose(expected_result_gamma0, self.evaluate(result_gamma0)) self.assertAllClose(expected_result_gamma09, self.evaluate(result_gamma09)) self.assertAllClose(expected_result_gamma1, self.evaluate(result_gamma1)) self.assertAllClose(expected_result_gamma2, self.evaluate(result_gamma2))
def testNumSteps(self): values = [[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5]] tensor = tf.constant(values, dtype=tf.float32) result_step1 = common.discounted_future_sum(tensor, 1.0, 1) result_step3 = common.discounted_future_sum(tensor, 1.0, 3) result_step20 = common.discounted_future_sum(tensor, 1.0, 20) expected_result_step1 = values expected_result_step3 = [[3, 6, 5, 3], [6, 9, 7, 4], [9, 12, 9, 5]] expected_result_step20 = [[6, 6, 5, 3], [10, 9, 7, 4], [14, 12, 9, 5]] self.assertAllClose(expected_result_step1, self.evaluate(result_step1)) self.assertAllClose(expected_result_step3, self.evaluate(result_step3)) self.assertAllClose(expected_result_step20, self.evaluate(result_step20))