Пример #1
0
 def test_small_2(self):
     exp_value = [[94.041, 1000.000, 0.000], [86.082, 73.143, 44.286]]
     exp_policy = [['v', ' ', '*'], ['>', '>', '^']]
     task.grid = [[0, 1, 0], [0, 0, 0]]
     task.goal = [0, len(task.grid[0]) - 1]  # Goal is in top right corner
     task.success_prob = 0.5
     task.failure_prob = (1.0 - task.success_prob) / 2.0
     value, policy = task.stochastic_value()
     self.compare_values(exp_value, value)
     self.compare_policies(exp_policy, policy)
Пример #2
0
 def test_small_1(self):
     exp_value = [[60.472, 37.193, 0.000], [63.503, 44.770, 37.193]]
     exp_policy = [['>', '>', '*'], ['>', '^', '^']]
     task.grid = [[0, 0, 0], [0, 0, 0]]
     task.goal = [0, len(task.grid[0]) - 1]  # Goal is in top right corner
     task.success_prob = 0.5
     task.failure_prob = (1.0 - task.success_prob) / 2.0
     value, policy = task.stochastic_value()
     self.compare_values(exp_value, value)
     self.compare_policies(exp_policy, policy)
Пример #3
0
 def test_small_2(self):
     exp_value = [[94.041, 1000.000, 0.000],
                  [86.082, 73.143, 44.286]]
     exp_policy = [['v', ' ', '*'],
                   ['>', '>', '^']]
     task.grid = [[0, 1, 0],
                  [0, 0, 0]]
     task.goal = [0, len(task.grid[0])-1] # Goal is in top right corner
     task.success_prob = 0.5
     task.failure_prob = (1.0 - task.success_prob)/2.0 
     value, policy = task.stochastic_value()
     self.compare_values(exp_value, value)
     self.compare_policies(exp_policy, policy)
Пример #4
0
 def test_small_1(self):
     exp_value = [[60.472, 37.193, 0.000],
                  [63.503, 44.770, 37.193]]
     exp_policy = [['>', '>', '*'],
                   ['>', '^', '^']]
     task.grid = [[0, 0, 0],
                  [0, 0, 0]]
     task.goal = [0, len(task.grid[0])-1] # Goal is in top right corner
     task.success_prob = 0.5
     task.failure_prob = (1.0 - task.success_prob)/2.0 
     value, policy = task.stochastic_value()
     self.compare_values(exp_value, value)
     self.compare_policies(exp_policy, policy)
Пример #5
0
 def test_big_nofail(self):
     exp_value = [[3.000, 2.000, 1.000,
                   0.000], [4.000, 3.000, 2.000, 1.000],
                  [5.000, 4.000, 3.000, 2.000],
                  [6.000, 1000.000, 1000.000, 3.000]]
     exp_policy = [['>', '>', '>', '*'], ['^>', '^>', '^>', '^'],
                   ['^>', '^>', '^>', '^'], ['^', ' ', ' ', '^']]
     task.grid = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 1, 0]]
     task.goal = [0, len(task.grid[0]) - 1]  # Goal is in top right corner
     task.success_prob = 1.0
     task.failure_prob = (1.0 - task.success_prob) / 2.0
     value, policy = task.stochastic_value()
     self.compare_values(exp_value, value)
     self.compare_policies(exp_policy, policy)
Пример #6
0
 def test_big(self):
     exp_value = [[57.903, 40.278, 26.066, 0.000],
                  [47.055, 36.572, 29.994, 27.270],
                  [53.172, 42.023, 37.775, 45.092],
                  [77.586, 1000.000, 1000.000, 73.546]]
     exp_policy = [['>', 'v', 'v', '*'], ['>', '>', '^', '<'],
                   ['>', '^', '^', '<'], ['^', ' ', ' ', '^']]
     task.grid = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 1, 0]]
     task.goal = [0, len(task.grid[0]) - 1]  # Goal is in top right corner
     task.success_prob = 0.5
     task.failure_prob = (1.0 - task.success_prob) / 2.0
     value, policy = task.stochastic_value()
     self.compare_values(exp_value, value)
     self.compare_policies(exp_policy, policy)
Пример #7
0
 def test_big_nofail(self):
     exp_value = [[3.000, 2.000, 1.000, 0.000],
                  [4.000, 3.000, 2.000, 1.000],
                  [5.000, 4.000, 3.000, 2.000],
                  [6.000, 1000.000, 1000.000, 3.000]]
     exp_policy = [['>',  '>',  '>',  '*'],
                   ['^>', '^>', '^>', '^'],
                   ['^>', '^>', '^>', '^'],
                   ['^',  ' ',  ' ',  '^']]
     task.grid = [[0, 0, 0, 0],
                  [0, 0, 0, 0],
                  [0, 0, 0, 0],
                  [0, 1, 1, 0]]
     task.goal = [0, len(task.grid[0])-1] # Goal is in top right corner
     task.success_prob = 1.0
     task.failure_prob = (1.0 - task.success_prob)/2.0 
     value, policy = task.stochastic_value()
     self.compare_values(exp_value, value)
     self.compare_policies(exp_policy, policy)
Пример #8
0
 def test_big(self):
     exp_value = [[57.903, 40.278, 26.066, 0.000],
                  [47.055, 36.572, 29.994, 27.270],
                  [53.172, 42.023, 37.775, 45.092],
                  [77.586, 1000.000, 1000.000, 73.546]]
     exp_policy = [['>', 'v', 'v', '*'],
                   ['>', '>', '^', '<'],
                   ['>', '^', '^', '<'],
                   ['^', ' ', ' ', '^']]
     task.grid = [[0, 0, 0, 0],
                  [0, 0, 0, 0],
                  [0, 0, 0, 0],
                  [0, 1, 1, 0]]
     task.goal = [0, len(task.grid[0])-1] # Goal is in top right corner
     task.success_prob = 0.5
     task.failure_prob = (1.0 - task.success_prob)/2.0 
     value, policy = task.stochastic_value()
     self.compare_values(exp_value, value)
     self.compare_policies(exp_policy, policy)