e0 = 0.90 e1 = 0.10 decay_fun = sch_exp_decay cos_ann = True ann_cyc = 5 schedule = Schedule(t0, t1, e0, e1, decay_fun, cosine_annealing=cos_ann, annealing_cycles=ann_cyc) # Policy policy = EpsilonGreedyPolicy(schedule=schedule, value_function=Q) # Reward Function reward_fun = rf_info2d_pos # Action Pre/Post-Processing Action act_fun = act_disc2cont # Agent lr = 1e-4 gamma = 0.99 doubleQ = True # Run doubleQ-DQN sampling from Q_target and bootstraping from Q rb = True rb_max_size = 1e6 rb_batch_size = 64 tau = 0.1