win_num += 1 r = 1 #reward if won elif status == -1: fail_num += 1 r = -1#reward if loss elif progress.sum() != 0: r = 0.9 #some progress is made else: r = -0.3 #nothing #we wanted to reward YOLO bombing as this is a proven strategy if stuck. miner.store_transition(s, a, r, s_) ep_r += r if miner.memory_counter > opt.memory_capacity: miner.optimize_model() if game.get_status() != 0: print('Ep: ', epoch, '| Ep_r: ', round(ep_r, 2)) if status != 0: break s = s_.copy() critic_r += ep_r if (epoch+1) % opt.n_critic == 0: print('=====evaluation=====') print('Epochs:', epoch) print('win number:', win_num) print('fail number:', fail_num)