def test_save_load(): cfg = {"rng": np.random} input_shape = 8 output_shape = 8 instructions = \ { "init_std": 0.05, "layers": [64, 64], "layer_functions": ['relu', 'relu'], "layer_extras": ['bn', 'bn'], "output_function": 'linear', "output_extras": 'bn', } policy = FeedForward(input_shape, output_shape, None, cfg) policy.build_model(instructions) print("BUILT POLICY LAYERS:") for layer in policy.model: print(layer) inp = np.ones(input_shape) out = policy.activate(inp) print("\nOUTPUT ON ONES BEFORE SAVING:", out) policy.save("data/experiments/exp_name/epochs/epoch_0/policy") policy.load("data/experiments/exp_name/epochs/epoch_0/policy") out = policy.activate(inp) print("OUTPUT ON ONES AFTER SAVING:", out)
def test_bn(): cfg = ConfigLoader.load_config(file_name="test_config.json") input_shape = 8 output_shape = 8 instructions = cfg["policy"] #parser = MiscParsingFunctions.parse_policy_action_function(cfg["policy"]["action_parser"]) policy = FeedForward(input_shape, output_shape, None, cfg) policy.build_model(instructions) print("BUILT POLICY LAYERS:") for layer in policy.model: print(layer) inp = np.ones(input_shape) output = policy.activate(inp) print("POLICY OUTPUT ON ONES OF SHAPE", inp.shape, "=", output.shape, "EXPECTED", output_shape) action_parsers = { "linear": parsers.linear_parse, "random sample": parsers.random_sample, "arg max": parsers.argmax_sample } for name, parser in action_parsers.items(): policy.action_parser = parser print("ATTEMPTING PARSER", name) print("RAW POLICY OUTPUT:", policy.activate(inp)[0]) print("SUM:", sum(policy.activate(inp)[0])) action = policy.get_action(inp) print("POLICY ACTION FROM PARSER", name, "=", action) print()
def run_test(): cfg = {"rng": np.random} input_shape = 2 output_shape = 2 instructions = \ { "init_std": 0.05, "layers" : [1], "layer_functions" : ['relu'], "layer_extras" : ['bn'], "output_function" : 'linear', "output_extras" : 'bn', } policy = FeedForward(input_shape, output_shape, None, cfg) policy.build_model(instructions) print("BUILT POLICY LAYERS:") for layer in policy.model: print(layer) flat = np.random.randn(policy.num_params) print("POLICY FLAT BEFORE SETTING:", policy.get_trainable_flat()) policy.set_trainable_flat(flat) print("POLICY FLAT AFTER SETTING:", policy.get_trainable_flat()) print("FLAT SHOULD NOW BE:", flat)
def test_batch_bn(): cfg = ConfigLoader.load_config(file_name="test_config.json") input_shape = 8 output_shape = 8 instructions = cfg["policy"] parser = MiscParsingFunctions.parse_policy_action_function( cfg["policy"]["action_parser"]) policy = FeedForward(input_shape, output_shape, parser, cfg) policy.build_model(instructions) print("BUILT POLICY LAYERS:") for layer in policy.model: print(layer) batch = [np.ones(input_shape) for _ in range(500)] output = policy.activate_batch(batch) print("OUTPUT ON ONES BATCH OF 500:", output.shape, "EXPECTED 500 OF", output_shape) action_parsers = { "linear": parsers.linear_parse, "random sample": parsers.random_sample, "arg max": parsers.argmax_sample } for name, parser in action_parsers.items(): policy.action_parser = parser action = policy.get_actions_on_batch(batch) print("POLICY ACTIONS FROM PARSER", name, "=", action) print()
def run_test(): cfg = {"rng": np.random} input_shape = 8 output_shape = 8 instructions = \ { "init_std": 0.05, "layers" : [64,64], "layer_functions" : ['relu', 'relu'], "layer_extras" : ['bn', 'bn'], "output_function" : 'linear', "output_extras" : 'bn', } policy = FeedForward(input_shape, output_shape, None, cfg) policy.build_model(instructions) print("BUILT POLICY LAYERS:") for layer in policy.model: print(layer)
def run_test(): # cfg = {"rng": np.random} # input_shape = 8 # output_shape = 8 # instructions = \ # { # "init_std": 0.05, # "layers": [64, 64], # "layer_functions": ['relu', 'relu'], # "layer_extras": ['bn', 'bn'], # "output_function": 'linear', # "output_extras": 'bn', # } cfg = ConfigLoader.load_config(file_name="test_config.json") env = EnvironmentFactory.get_from_config(cfg) input_shape = env.get_policy_input_shape() output_shape = env.get_policy_output_shape() instructions = cfg["policy"] cfg["rng"] = np.random.RandomState(cfg["seed"]) policy = FeedForward(input_shape, output_shape, None, cfg) policy.build_model(instructions) print("BUILT POLICY LAYERS:") for layer in policy.model: print(layer) num = np.prod(input_shape) vbn = [np.random.randn(num) for _ in range(1000)] inp = np.ones(num) out = policy.activate(inp) print("\nOUTPUT ON ONES BEFORE VBN:", out) policy.compute_virtual_normalization(vbn) out = policy.activate(inp) print("OUTPUT ON ONES AFTER VBN:", out) policy.save("data/test") out = policy.activate(inp) print("OUTPUT ON ONES AFTER SAVE:", out) del policy policy = FeedForward(input_shape, output_shape, None, cfg) policy.build_model(instructions) policy.load("data/test") out = policy.activate(inp) print("OUTPUT ON ONES AFTER LOAD:", out) policy.compute_virtual_normalization(vbn) out = policy.activate(inp) print("OUTPUT ON ONES AFTER LOAD AND VBN:", out)
def test_save_load_vbn(): cfg = {"rng": np.random} input_shape = 8 output_shape = 8 instructions = \ { "init_std": 0.05, "layers": [64, 64], "layer_functions": ['relu', 'relu'], "layer_extras": ['bn', 'bn'], "output_function": 'linear', "output_extras": 'bn', } policy = FeedForward(input_shape, output_shape, None, cfg) policy.build_model(instructions) print("BUILT POLICY LAYERS:") for layer in policy.model: print(layer) vbn = [np.random.randn(input_shape) for _ in range(1000)] policy.compute_virtual_normalization(vbn) inp = np.ones(input_shape) out = policy.activate(inp) print("\nOUTPUT ON ONES WITH VBN BEFORE SAVING:", out) policy.save("data/experiments/exp_name/epochs/epoch_0/policy") out = policy.activate(inp) print("\nOUTPUT ON ONES WITH VBN AFTER SAVING:", out) policy.set_trainable_flat(policy.get_trainable_flat() + np.random.randn(policy.num_params)) out = policy.activate(inp) print("\nJIGGLED OUTPUT ON ONES WITH VBN BEFORE LOADING", out) policy.load("data/experiments/exp_name/epochs/epoch_0/policy") out = policy.activate(inp) print("OUTPUT ON ONES WITH VBN AFTER LOADING:", out)