R.SaveBuffer(OUT_DIR+BUFFER_FILE) print "model saved, replay buffer: ", R.GetOccupency() L.Save(OUT_DIR+LOG_FILE) Ws,bs = Q.get_weights() Q_target.assign(sess, Ws,bs) save_counter.increment(sess) saver.save(sess,OUT_DIR+"model.ckpt", global_step=save_counter.evaluate(sess)) R.SaveBuffer(OUT_DIR+BUFFER_FILE) print "model saved, replay buffer: ", R.GetOccupency() L.Save(OUT_DIR+LOG_FILE) sess.close() sys.exit() # plot statistics R_P_l = L.GetLogByName('policy_left') R_Q_l = L.GetLogByName('network_left') R_P_m = L.GetLogByName('policy_middle') R_Q_m = L.GetLogByName('network_middle') R_P_r = L.GetLogByName('policy_right') R_Q_r = L.GetLogByName('network_right') totalR = L.GetLogByName('total_reward') error = L.GetLogByName('error') value = L.GetLogByName('estimated_value') t = np.arange(R_P_l.size) plt.figure(1) plt.plot(t, R_Q_l, 'b', t, R_P_l, 'r') plt.xlabel('Episodes') plt.ylabel('reward') plt.title('Puck on the left')
L.Save(OUT_DIR + LOG_FILE) Ws, bs = Q.get_weights() Q_target.assign(sess, Ws, bs) save_counter.increment(sess) saver.save(sess, OUT_DIR + "model.ckpt", global_step=save_counter.evaluate(sess)) R.SaveBuffer(OUT_DIR + BUFFER_FILE) print "model saved, replay buffer: ", R.GetOccupency() L.Save(OUT_DIR + LOG_FILE) sess.close() # plot statistics R_P_l = L.GetLogByName('policy_left') R_Q_l = L.GetLogByName('network_left') R_P_m = L.GetLogByName('policy_middle') R_Q_m = L.GetLogByName('network_middle') R_P_r = L.GetLogByName('policy_right') R_Q_r = L.GetLogByName('network_right') t = np.arange(R_P_l.size) plt.figure(1) plt.plot(t, R_Q_l, 'b', t, R_P_l, 'r') plt.figure(2) plt.plot(t, R_Q_m, 'b', t, R_P_m, 'r') plt.figure(3) plt.plot(t, R_Q_r, 'b', t, R_P_r, 'r')
#if UPDATE_TARGET: # Ws,bs = Q.get_weights() # Q_targets[cur_target % len(Q_targets)].assign(sess, Ws,bs) save_counter.increment(sess) saver.save(sess, OUT_DIR + "model.ckpt", global_step=save_counter.evaluate(sess)) R.SaveBuffer(OUT_DIR + BUFFER_FILE) print "model saved, replay buffer: ", R.GetOccupency() L.Save(OUT_DIR + LOG_FILE) sess.close() # plot statistics if DISPLAY_STATISTICS: R_P_l = L.GetLogByName('policy_left') R_Q_l = L.GetLogByName('network_left') R_P_m = L.GetLogByName('policy_middle') R_Q_m = L.GetLogByName('network_middle') R_P_r = L.GetLogByName('policy_right') R_Q_r = L.GetLogByName('network_right') currentR = L.GetLogByName('network_random') error = L.GetLogByName('error') value = L.GetLogByName('estimated_value') t = np.arange(R_P_l.size) plt.figure() plt.subplot(231) plt.plot(t, R_Q_l, 'b', t, R_P_l, 'r') plt.xlabel('Episodes')