def test_guarded_treasures_edgecases(): from GuardedTreasures import GuardedTreasures def only_take_guarded_treasures(prompt, *meta): current_room_has_guard = prompt[-1] return 1 if current_room_has_guard == 1 else 0 i = 0 while i < 10: i += 1 result = run_environment(GuardedTreasures, only_take_guarded_treasures, i * 10) assert result['total_reward'] >= 0 if result['total_reward'] > 0: break assert (i < 10) def always_take_treasure(prompt, *meta): return 1 i = 0 while i < 10: i += 1 result = run_environment(GuardedTreasures, always_take_treasure, i * 10) if result['total_reward'] < 0: break assert (i < 10) def never_take_treasure(prompt, *meta): return 0 result = run_environment(GuardedTreasures, never_take_treasure, 10) assert result['total_reward'] == 0
def test_incentivize_zero_edgecases(): from IncentivizeZero import IncentivizeZero def always_zero(prompt, *meta): return 0 result = run_environment(IncentivizeZero, always_zero, 10) assert result['total_reward'] == 9 def always_1(prompt, *meta): return 1 result = run_environment(IncentivizeZero, always_1, 10) assert result['total_reward'] == -9 def play_zero_if_last_reward_was_5(prompt, *meta): last_reward = prompt[-2] if last_reward == 5: return 0 else: return 5 result = run_environment(IncentivizeZero, play_zero_if_last_reward_was_5, 10) assert result['total_reward'] == 9
def test_binocular_vision_edgecases(): from abstract.BinocularVision import BinocularVision from util import cantor_pairing_fnc def Game3D(action_sequence): return 0 def LeftCamera(matrix3D): return 1 def RightCamera(matrix3D): return 2 expected_obs = cantor_pairing_fnc(LeftCamera(0), RightCamera(0)) env = BinocularVision(Game3D, LeftCamera, RightCamera) result = run_environment(env, repetitive, 10) assert result['total_reward'] == 9 def zero_checker(prompt, *meta): obs = prompt[-1] if obs == 0: return 1 if obs == expected_obs: return 2 raise ValueError("Zero_checker saw an unexpected observation") result = run_environment(env, zero_checker, 10) assert result['total_reward'] == -9
def test_self_insert_edgecases(): from abstract.SelfInsert import self_insert class Dummy_Env: def __init__(self): self.num_legal_actions = 100 self.num_possible_obs = -1 self.fnc = dummy_env def dummy_env(T, play): return 0, 0 env = self_insert(Dummy_Env) result = run_environment(env, repetitive, 10) assert result['total_reward'] == 9 def tuple_detector(prompt, *meta): for x in prompt: if '__iter__' in dir(x): return 1 return 0 result = run_environment(env, tuple_detector, 10) assert result['total_reward'] == -9
def test_crying_baby_edgecases(): from CryingBaby import CryingBaby, LAUGH, CRY, FEED, DONTFEED def always_cries(prompt, *meta): return CRY result = run_environment(CryingBaby, always_cries, 10) assert result['total_reward'] == -8 # Baby is hardcoded to initially laugh def always_laughs(prompt, *meta): return LAUGH result = run_environment(CryingBaby, always_laughs, 10) assert result['total_reward'] == 10 def self_aware(prompt, *meta): initial_obs = prompt[1] am_i_adult = (initial_obs == LAUGH) if am_i_adult: return DONTFEED else: return LAUGH result = run_environment(CryingBaby, self_aware, 10) assert result['total_reward'] == 10
def test_tic_tac_toe(): from vanilla.TicTacToe import TicTacToe1, TicTacToe2, TicTacToe3 def plays_blindly(prompt, *meta): return ((1+len(prompt))/3)%9 for env in [TicTacToe1, TicTacToe2, TicTacToe3]: result = run_environment(env, plays_blindly, 100) if result['total_reward'] == 0: result = run_environment(env, plays_blindly, 1000) assert result['total_reward'] > 0
def test_false_memories_edgecases(): from FalseMemories import FalseMemories result = run_environment(FalseMemories, repetitive, 10) assert result['total_reward'] == 9 result = run_environment(FalseMemories, non_repetitive, 10) assert result['total_reward'] == -9 def lengthchecker(prompt, *meta): return 1 if len(prompt) > 5 else 0 result = run_environment(FalseMemories, lengthchecker, 2) assert result['total_reward'] == -1 result = run_environment(FalseMemories, lengthchecker, 3) assert result['total_reward'] == -2 result = run_environment(FalseMemories, lengthchecker, 4) assert result['total_reward'] == -1 result = run_environment(FalseMemories, lengthchecker, 5) assert result['total_reward'] == 0 result = run_environment(FalseMemories, lengthchecker, 6) assert result['total_reward'] == 1 def impatient(prompt, *meta): return 1 if len(prompt) < 5 else 0 result = run_environment(FalseMemories, impatient, 10) assert result['total_reward'] == 7
def test_dejavu_edgecases(): from DejaVu import DejaVu result = run_environment(DejaVu, repetitive, 10) assert result['total_reward'] == 9 result = run_environment(DejaVu, non_repetitive, 10) assert result['total_reward'] == -9 def parity(prompt, *meta): return ((len(prompt) + 1) / 3) % 2 result = run_environment(DejaVu, parity, 10) assert result['total_reward'] == -1 result = run_environment(DejaVu, parity, 11) assert result['total_reward'] == 0
def test_runtime_inspector_edgecases(): from RuntimeInspector import PunishFastAgent, PunishSlowAgent result1 = run_environment(PunishFastAgent, repetitive, 10) result2 = run_environment(PunishSlowAgent, repetitive, 10) assert result1['total_reward'] == -9 assert result2['total_reward'] == 9 def timewaster(prompt, *meta): x = 25 * len(prompt) while x > 0: x = x - 1 return 0 result1 = run_environment(PunishFastAgent, timewaster, 10) result2 = run_environment(PunishSlowAgent, timewaster, 10) assert result1['total_reward'] == 9 assert result2['total_reward'] == -9
def test_bandits(): from vanilla.Bandit import Bandit1, Bandit2, Bandit3, Bandit4, Bandit5 def incrementer(prompt, *meta): return (1+len(prompt))/3 for bandit in [Bandit1, Bandit2, Bandit3, Bandit4, Bandit5]: result = run_environment(bandit, incrementer, 10) assert result['total_reward'] > 0
def test_mazes(): from vanilla.Maze import Maze1, Maze2, Maze3, Maze4, Maze5 def learns_about_bad_moves(prompt, *meta): if prompt[-2] > 0: assert prompt[-1] == 1 # Rewards are always accompanied by reset bad_moves = {x:[] for x in range(10)} for i in range(len(prompt)): is_obs = (i%2)==1 if is_obs and i>1: obs = prompt[i] prev_obs = prompt[i-3] prev_action = prompt[i-2] prev_reward = prompt[i-1] if (obs==prev_obs) or (obs==1 and prev_reward==0): if not(prev_action in bad_moves[prev_obs]): bad_moves[prev_obs] += [prev_action] curr_room = prompt[-1] while True: door = int(random()*4) if len(bad_moves[curr_room])==4: return door if not(door in bad_moves[curr_room]): return door for maze in [Maze1, Maze2, Maze3, Maze4]: result = run_environment(maze, learns_about_bad_moves, 50) if result['total_reward'] == 0: result = run_environment(maze, learns_about_bad_moves, 250) assert result['total_reward']>0 result = run_environment(Maze5, learns_about_bad_moves, 100) if result['total_reward'] == 0: result = run_environment(maze, learns_about_bad_moves, 500) assert result['total_reward']>0 def always_goes_north(prompt, *meta): return 0 for maze in [Maze1, Maze2, Maze3, Maze4, Maze5]: result = run_environment(maze, always_goes_north, 50) assert result['total_reward'] == 0
def test_determinism_inspector_edgecases(): from DeterminismInspector import PunishDeterministicAgent from DeterminismInspector import PunishNondeterministicAgent result1 = run_environment(PunishDeterministicAgent, repetitive, 10) result2 = run_environment(PunishNondeterministicAgent, repetitive, 10) assert result1['total_reward'] == -9 assert result2['total_reward'] == 9 memory = [0] def never_repeater(prompt, *meta): action = memory[0] memory[0] += 1 return action result1 = run_environment(PunishDeterministicAgent, never_repeater, 10) result2 = run_environment(PunishNondeterministicAgent, never_repeater, 10) assert result1['total_reward'] == 9 assert result2['total_reward'] == -9
def awareness_benchmark(T, num_steps, include_slow_envs=False): results = {} for name, env in envs.items(): if not (include_slow_envs): if any([slowname in name for slowname in slow_envs]): continue result = run_environment(env, T, num_steps) results[name] = result['total_normalized_reward'] / num_steps return results
def test_ignore_rewards_edgecases(): from IgnoreRewards import IgnoreRewards result = run_environment(IgnoreRewards, repetitive, 10) assert result['total_reward'] == 9 result = run_environment(IgnoreRewards, non_repetitive, 10) assert result['total_reward'] == 9 def count_positive_rewards(prompt, *meta): i = 0 s = 0 while i < len(prompt): if (i % 3) == 0: if prompt[i] > 0: s += 1 i += 1 return s result = run_environment(IgnoreRewards, count_positive_rewards, 10) assert result['total_reward'] == -7
def test_backward_consciousness_edgecases(): from BackwardConsciousness import BackwardConsciousness result = run_environment(BackwardConsciousness, repetitive, 10) assert result['total_reward'] == 9 def stubborn_agent(prompt, *meta): # Take first two actions 1,2 no matter what if len(prompt) < 3: return 1 if len(prompt) < 6: return 2 # Choose later actions entirely based on first two actions action1 = prompt[2] action2 = prompt[5] return 3 if (action1, action2) == (1, 2) else 4 result = run_environment(BackwardConsciousness, stubborn_agent, 5) assert result['total_reward'] == 0 result = run_environment(BackwardConsciousness, stubborn_agent, 10) assert result['total_reward'] == -5 result = run_environment(BackwardConsciousness, stubborn_agent, 15) assert result['total_reward'] == -10
def test_guess_the_number(): from vanilla.GuessTheNumber import GuessTheNumber1 from vanilla.GuessTheNumber import GuessTheNumber2 from vanilla.GuessTheNumber import GuessTheNumber3 for env in [GuessTheNumber1, GuessTheNumber2, GuessTheNumber3]: blank_observations = [] def blank_obs_observer(prompt, num_legal_actions, num_possible_obs, blank_obs=blank_observations): obs = prompt[-1] if obs == 0: blank_obs += [obs] return (((1+len(prompt))/3)%10)+1 result = run_environment(env, blank_obs_observer, 100) assert result['total_reward'] == len(blank_observations)-1
def test_paper_rock_scissors(): from vanilla.PaperRockScissors import PaperRockScissors1 from vanilla.PaperRockScissors import PaperRockScissors2 from vanilla.PaperRockScissors import PaperRockScissors3 from vanilla.PaperRockScissors import PAPER, ROCK, SCISSORS def always_plays_paper(prompt, *meta): reward, obs = prompt[-2], prompt[-1] if obs != 0: if obs == PAPER: assert reward == 1 elif obs == ROCK: assert reward == 2 else: assert reward == 0 return PAPER for env in [PaperRockScissors1, PaperRockScissors2, PaperRockScissors3]: result = run_environment(env, always_plays_paper, 50) assert result['total_reward'] > 0