Exemplo n.º 1
0
    def train(self):
        """
            1、构造tensorflow的基本算子、算法。注意这一步都是在“定义”和“构造”,不是真正的模型训练和计算
        """
        # 先构造一个数据流图
        temp_graph = tf.Graph()
        with temp_graph.as_default():
            # 定义占位符,表示待训练的数据集,用这种方式最后运行train的时候总是报错,暂无法解决:
            # You must feed a value for placeholder tensor 'x' with dtype float and shape [?,?]
            # x = tf.placeholder(dtype=tf.float32, shape=[None, None], name='x')
            # y = tf.placeholder(dtype=tf.float32, shape=[None], name='y')

            # 定义待训练的参数w和b,weight被赋予随机值,介于-1和1之间,bias分配一个变量并赋值为0
            weight = tf.Variable(tf.random_uniform([1, self.__x_train.shape[1]], -1.0, 1.0))
            bias = tf.Variable(tf.zeros([1]))

            # 定义二分类的sigmoid模型 y = 1/(1+exp-(w*x + b))
            # y_pre = tf.div(1.0,
            #                tf.add(1.0,
            #                       tf.exp(tf.neg(tf.reduce_sum(tf.multiply(weight, self.__x_train),
            #                                                   1
            #                                                  ) + bias)
            #                             )
            #                      )
            #               )
            # 也可以直接利用tf的sigmoid函数
            y_pre = tf.sigmoid(tf.reduce_sum(tf.multiply(weight, self.__x_train), 1) + bias)

            # 定义损失函数为对数似然函数(-y*log(y_pre) - (1-y)*log(1-y_pre))/样本数
            # 为什么这样定义呢?这里要扯到线性回归的最小二乘法和逻辑回归中的最大似然函数法的区别了。
            # 最小二乘法的核心思想是,让预测值和真实值的“误差”尽可能小;
            # 而最大似然函数法的核心思想是,让已知训练样本发生的概率尽可能大。
            # 上述的对数似然函数就是这么来的,推导过程可参考相关文献,在梯度下降的运用中,就是加个负号,让其最小
            loss0 = self.__y_train * tf.log(y_pre)
            loss1 = (1 - self.__y_train) * tf.log(1 - y_pre)
            loss = tf.reduce_sum(- loss0 - loss1) / self.__x_train.shape[0]
            # 定义优化算法(梯度下降),目标就是最小化损失函数
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
            train = optimizer.minimize(loss)
            # 初始化变量
            init = tf.global_variables_initializer()

        """
            2.正式训练
        """
        # 建立会话
        with tf.Session(graph=temp_graph) as sess:
            # 这个时候才开始真正地计算
            sess.run(init)
            print('初始化参数:weight=', sess.run(weight), ', bias=', sess.run(bias))
            # 拟合平面,过程就是执行1000遍梯度下降算法,得到最佳的w和b
            for step in range(1000):
                sess.run(train)
                if step % 100 == 0:
                    print("第%u步:权重:%s,偏置:%f,损失:%f" %
                          (step, weight.eval(), bias.eval(), loss.eval()))
                self.__weight = weight.eval()
                self.__bias = bias.eval()
Exemplo n.º 2
0
def tf_xywh_to_grid(all_true_xy: tf.Tensor, all_true_wh: tf.Tensor, layer: int,
                    h: Helper) -> [tf.Tensor, tf.Tensor]:
    """convert true label xy wh to grid scale

    Parameters
    ----------
    all_true_xy : tf.Tensor

    all_true_wh : tf.Tensor

    layer : int
        layer index
    h : Helper


    Returns
    -------
    [tf.Tensor, tf.Tensor]
        grid_true_xy, grid_true_wh shape = [out h ,out w,anchor num , 2 ]
    """
    with tf.name_scope('xywh_to_grid_%d' % layer):
        grid_true_xy = (all_true_xy *
                        h.out_hw[layer][::-1]) - h.xy_offset[layer]
        grid_true_wh = tf.log(all_true_wh / h.anchors[layer])
    return grid_true_xy, grid_true_wh
Exemplo n.º 3
0
    def _setup_actor_critic_loss(self, actor, critic, num_actions):

        actions_one_hot = tf.placeholder(tf.float32, [None, num_actions])

        action_probability = tf.reduce_sum(actor * actions_one_hot, axis=1)

        log_prob = tf.log(tf.maximum(action_probability, self._log_noise))
        advantage = self._R - tf.stop_gradient(critic)
        entropy = tf.reduce_sum(tf.log(tf.maximum(actor, self._log_noise)) * actor, axis=1)

        actor_loss = -(tf.reduce_sum((log_prob * advantage), axis=0) + tf.reduce_sum((-1 * self._entropy_beta * entropy), axis=0))
        critic_loss = tf.reduce_sum(tf.square(self._R - critic), axis=0)

        loss = 0.5 * critic_loss + actor_loss

        return loss, actions_one_hot