Пример #1
0
    def load(self, path):
        """
		"""

        ReplayMemory.load(self, path)

        self.priority = None

        f = open(path + '/priority.npy')
        self.priority = np.load(f)
        f.close()
Пример #2
0
tensorboard_monitor = TensorboardMonitor('./log', sess, counter)
tensorboard_monitor.add_scalar_summary('score', 'per_game_summary')
tensorboard_monitor.add_scalar_summary('training_loss', 'training_summary')
for i in range(4):
	tensorboard_monitor.add_histogram_summary('Q%d_training' % i, 'training_summary')

checkpoint_monitor = CheckpointRecorder(dqn_agent.dqn, replay_memory, counter, './checkpoints', sess)
agi.add_listener(checkpoint_monitor)
agi.add_listener(tensorboard_monitor)
dqn_agent.add_listener(tensorboard_monitor)

sess.run(tf.global_variables_initializer())

# Load the DQN and replay memory
dqn_agent.dqn.restore('./checkpoints/dqn/7000000')
replay_memory.load('./checkpoints/replay_memory/7000000')
dqn_agent.update_target_network()

def run():
	cur_episode = 0
	num_frames = 7000000
	while counter.count < 50000000:
		score = agi.learn()

		tensorboard_monitor.record({'score': score})

		elapsed_frames = counter.count - num_frames
		num_frames = counter.count
		print "Episode %d:  Total Score = %d\t# Frames = %d\tTotal Frames = %d\tEpsilon: %f" % (cur_episode, score, elapsed_frames, num_frames, agent.epsilon)
		cur_episode += 1