FREEZE_INTERVAL = 1000 DETERMINISTIC = True if __name__ == "__main__": logging.basicConfig(level=logging.INFO) # --- Parse parameters --- parameters = process_args(sys.argv[1:], Defaults) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() # --- Instantiate environment --- env = Toy_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_norm, parameters.freeze_interval, parameters.batch_size, parameters.update_rule, rng) train_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.1) test_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.) # --- Instantiate agent --- agent = NeuralAgent(env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0]
FREEZE_INTERVAL = 1000 DETERMINISTIC = True if __name__ == "__main__": logging.basicConfig(level=logging.INFO) # --- Parse parameters --- parameters = process_args(sys.argv[1:], Defaults) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() # --- Instantiate environment --- env = Toy_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork( env, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.update_rule, rng) train_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.1) test_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.)
FREEZE_INTERVAL = 1000 DETERMINISTIC = True if __name__ == "__main__": logging.basicConfig(level=logging.INFO) # --- Parse parameters --- parameters = process_args(sys.argv[1:], Defaults) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() # --- Instantiate environment --- env = Toy_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) # --- Instantiate agent --- agent = NeuralAgent( env, qnetwork, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng)
FREEZE_INTERVAL = 1000 DETERMINISTIC = True if __name__ == "__main__": logging.basicConfig(level=logging.INFO) # --- Parse parameters --- parameters = process_args(sys.argv[1:], Defaults) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() # --- Instantiate environment --- env = Toy_env(rng) # --- Instantiate qnetwork --- qnetwork = MyQNetwork( env, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng)