sess.run(dqn.trainStep, feed_dict={dqn.yInput: y_batch, dqn.actionInput: action_batch,dqn.currentQNet.stateInput: state_batch}) state = next_state # if step % 100 == 0: # m, opt = sess.run([dqn.merged, dqn.trainStep], # {dqn.yInput: y_batch, dqn.actionInput: action_batch, # dqn.currentQNet.stateInput: state_batch}) # summary_writer.add_summary(m, step) # if step % UPDATE_TIME == 0: # sess.run(dqn.copyCurrentToTargetOperation()) if game_over: frame_stack.empty() game +=1 game_scores.append(score) if game % BACKUP_RATE == 0 and SAVE_NETWORK: saver.save(sess, 'saved_networks/' + ENVIRONMENT + '-dqn', global_step=game) print('Network backup done') if (game % 20) == 0: print("The average score of the last 20 games is:", np.mean(game_scores[-20:]), " currently at game ", game, " , step ", step) summary_scores = sess.run(avg_Score_l20, {avg_Score_l20_plhldr: np.mean(game_scores[-20:])}) summary_writer.add_summary(summary_scores, step) print("The average score of all games is:", np.mean(game_scores)) # else: # print('Game %s finished with score %s' % (game, score))