Exemplo n.º 1
0
    def build_dqn(self, params):
        with tf.variable_scope(PREDICTION):
            self.s_t = tf.placeholder(
                dtype=tf.float32,
                shape=[None, self.replay_memory.history_length, 
                            self.replay_memory.num_channels],
                name=HISTORICAL_PRICES
            )
            self.trade_rem_t = tf.placeholder(
                dtype=tf.float32,
                shape=[None,],
                name=TRADE_REM
            )
            
            with tf.variable_scope(DROPOUT_KEEP_PROBS):
                self.q_conv_keep_prob = tf.placeholder(tf.float32)
                self.q_dense_keep_prob = tf.placeholder(tf.float32)
                self.q_gru_keep_prob = tf.placeholder(tf.float32)

        params.dropoutkeepprobs = DropoutKeepProbs(
                    self.q_conv_keep_prob,
                    self.q_dense_keep_prob,
                    self.q_gru_keep_prob
                )
        self.q = DeepSense(params, self.logger, self.sess, self.config, name=Q_NETWORK)
        self.q.build_model((self.s_t, self.trade_rem_t))

        with tf.variable_scope(TARGET):
            self.t_s_t = tf.placeholder(
                dtype=tf.float32,
                shape=[None, self.replay_memory.history_length, 
                            self.replay_memory.num_channels],
                name=HISTORICAL_PRICES
            )
            self.t_trade_rem_t = tf.placeholder(
                dtype=tf.float32,
                shape=[None,],
                name=TRADE_REM
            )

        params.dropoutkeepprobs = DropoutKeepProbs()
        self.t_q = DeepSense(params, self.logger, self.sess, self.config, name=T_Q_NETWORK)
        self.t_q.build_model((self.t_s_t, self.t_trade_rem_t))

        with tf.variable_scope(UPDATE_TARGET_NETWORK):
            self.q_weights_placeholders = {}
            self.t_weights_assign_ops = {}

            for name in self.q.weights.keys():
                self.q_weights_placeholders[name] = tf.placeholder(
                            tf.float32,
                            self.q.weights[name].get_shape().as_list()
                        )
            for name in self.q.weights.keys():
                self.t_weights_assign_ops[name] = self.t_q.weights[name].assign(
                    self.q_weights_placeholders[name]
                )

        with tf.variable_scope(TRAINING):
            self.target_q = tf.placeholder(tf.float32, [None], name=TARGET_Q)
            self.action = tf.placeholder(tf.int64, [None], name=ACTION)
            
            action_one_hot = tf.one_hot(self.action, self.config[NUM_ACTIONS], 
                                            1.0, 0.0, name=ACTION_ONE_HOT)
            q_acted = tf.reduce_sum(self.q.values * action_one_hot, 
                                        reduction_indices=1, name=Q_ACTED)
                                        
            with tf.variable_scope(LOSS):
                self.delta = self.target_q - q_acted

                self.global_step = tf.Variable(0, trainable=False)

                self.loss = tf.reduce_mean(clipped_error(self.delta), name=LOSS)

            with tf.variable_scope(OPTIMIZER):
                self.learning_rate_step = tf.placeholder(tf.int64, None, name=LEARNING_RATE_STEP)
                self.learning_rate_op = tf.maximum(self.learning_rate_minimum,
                    tf.train.exponential_decay(
                        self.learning_rate,
                        self.learning_rate_step,
                        self.learning_rate_decay_step,
                        self.learning_rate_decay,
                        staircase=True))

                self.optimizer = tf.train.RMSPropOptimizer(
                    self.learning_rate_op, momentum=0.95, epsilon=0.01).minimize(self.loss)

        with tf.variable_scope(SUMMARY):
            scalar_summary_tags = ['average.reward', 'average.loss', 'average.q', \
                'episode.max reward', 'episode.min reward', 'episode.avg reward', \
                'episode.num of episodes', 'training.learning_rate']            

            self.summary_placeholders = {}
            self.summary_ops = {}

            for tag in scalar_summary_tags:
                self.summary_placeholders[tag] = \
                    tf.placeholder('float32', None, name=tag.replace(' ', '_'))
                self.summary_ops[tag] = \
                    tf.summary.scalar(
                        name="{}-{}".format(self.env_name, tag.replace(' ', '_')),
                        tensor=self.summary_placeholders[tag]
                    )

            histogram_summary_tags = ['episode.rewards', 'episode.actions']
            for tag in histogram_summary_tags:
                self.summary_placeholders[tag] = \
                    tf.placeholder('float32', None, name=tag)
                self.summary_ops[tag] = \
                    tf.summary.histogram(
                        tag,
                        self.summary_placeholders[tag]
                    )

        self.sess.run(tf.local_variables_initializer())
        self.sess.run(tf.global_variables_initializer())
        self._saver = tf.train.Saver(self.q.weights.values() + [self.step_op], max_to_keep=30)
        
        self.load_model()
        self.update_target_network()

        self._summary_writer = tf.summary.FileWriter(self.config[TENSORBOARD_LOG_DIR])
        self._summary_writer.add_graph(self.sess.graph)
Exemplo n.º 2
0
    def build_dqn(self, params):
        with tf.variable_scope(PREDICTION):
            self.s_t = tf.placeholder(dtype=tf.float32,
                                      shape=[
                                          None,
                                          self.replay_memory.history_length,
                                          self.replay_memory.num_channels
                                      ])
        self.q = DeepSense(params,
                           self.logger,
                           self.sess,
                           self.config,
                           name=Q_NETWORK)
        self.q.build_model(self.s_t)

        with tf.variable_scope(TARGET):
            self.t_s_t = tf.placeholder(dtype=tf.float32,
                                        shape=[
                                            None,
                                            self.replay_memory.history_length,
                                            self.replay_memory.num_channels
                                        ])
        self.t_q = DeepSense(params,
                             self.logger,
                             self.sess,
                             self.config,
                             name=T_Q_NETWORK)
        self.t_q.build_model(self.t_s_t, train=False)

        with tf.variable_scope(UPDATE_TARGET_NETWORK):
            self.q_weights_placeholders = {}
            self.t_weights_assign_ops = {}

            for name in self.q.weights.keys():
                self.q_weights_placeholders[name] = tf.placeholder(
                    tf.float32, self.q.weights[name].get_shape().as_list())
            for name in self.q.weights.keys():
                self.t_weights_assign_ops[name] = self.t_q.weights[
                    name].assign(self.q_weights_placeholders[name])

        with tf.variable_scope(TRAINING):
            self.target_q = tf.placeholder(tf.float32, [None], name=TARGET_Q)
            self.action = tf.placeholder(tf.int64, [None], name=ACTION)

            action_one_hot = tf.one_hot(self.action,
                                        self.env.action_size,
                                        1.0,
                                        0.0,
                                        name=ACTION_ONE_HOT)
            q_acted = tf.reduce_sum(self.q.values * action_one_hot,
                                    reduction_indices=1,
                                    name=Q_ACTED)

            with tf.variable_scope(LOSS):
                self.delta = self.target_q - q_acted

                self.global_step = tf.Variable(0, trainable=False)

                self.loss = tf.reduce_mean(clipped_error(self.delta),
                                           name=LOSS)

            with tf.variable_scope(OPTIMIZER):
                self.learning_rate_step = tf.placeholder(
                    tf.int64, None, name=LEARNING_RATE_STEP)
                self.learning_rate_op = tf.maximum(
                    self.learning_rate_minimum,
                    tf.train.exponential_decay(self.learning_rate,
                                               self.learning_rate_step,
                                               self.learning_rate_decay_step,
                                               self.learning_rate_decay,
                                               staircase=True))

                self.optimizer = tf.train.RMSPropOptimizer(
                    self.learning_rate_op, momentum=0.95,
                    epsilon=0.01).minimize(self.loss)

        with tf.variable_scope(SUMMARY):
            scalar_summary_tags = ['average.reward', 'average.loss', 'average.q', \
                'episode.max reward', 'episode.min reward', 'episode.avg reward', \
                'episode.num of game', 'training.learning_rate']

            self.summary_placeholders = {}
            self.summary_ops = {}

            for tag in scalar_summary_tags:
                self.summary_placeholders[tag] = \
                    tf.placeholder('float32', None, name=tag.replace(' ', '_'))
                self.summary_ops[tag] = \
                    tf.summary.scalar(
                        name="{}-{}".format(self.env_name, tag),
                        tensor=self.summary_placeholders[tag]
                    )

            histogram_summary_tags = ['episode.rewards', 'episode.actions']
            for tag in histogram_summary_tags:
                self.summary_placeholders[tag] = \
                    tf.placeholder('float32', None, name=tag.replace(' ', '_'))
                self.summary_ops[tag] = \
                    tf.summary.histogram(
                        name=tag,
                        self.summary_placeholders[tag]
                    )

        self._summary_writer = tf.summary.FileWriter(
            config[TENSORBOARD_LOG_DIR])
        self._summary_writer.add_graph(sess.graph)

        tf.initialize_all_variables().run()
        self._saver = tf.train.Saver(self.q.weights.values + [self.step_op],
                                     max_to_keep=30)

        self.load_model()
        self.update_target_network()