def dora_run_softmax(run_name='default', plot=False): selection = LLL_softmax() env = CartPoleVision() Qnet = selectNet() Enet = selectNet(Enet=True) t = DoraTrainer(Qnet, Enet, env, selection, lr=0.01, run_name=run_name, plot=plot) t.run()
def optimistic_dqn_run_softmax(run_name='default', plot=False): selection = softmax() env = CartPoleVision() optQnet = selectNet(Mode="optimisticQ") t = Trainer(optQnet, env, selection, run_name=run_name, plot=plot) t.run()
def dqn_run(run_name='default', plot=False): selection = epsilon_greedy() env = CartPoleVision() Qnet = selectNet() t = Trainer(Qnet, env, selection, run_name=run_name, plot=plot) t.run()
def dqn_run_softmax(run_name='default', plot=False): selection = softmax() env = CartPoleVision() Qnet = selectNet() t = Trainer(Qnet, env, selection, run_name=run_name, plot=plot) t.run()
def dora_run(env, run_name='default', plot=False, selection='softmax', setting=DefaultSetting(), log_path='logs'): selection = { 'epsilon': LLL_epsilon_greedy(), 'softmax': LLL_softmax() }[selection] Qnet = selectNet('dqn', env.name) Enet = selectNet('enet', env.name) t = DoraTrainer(Qnet, Enet, env, selection, run_name=run_name, plot=plot, setting=setting, log_path=log_path) t.run()