Python MotionNetwork примеры использования

Язык программирования: Python

Пространство имен/Пакет: modules.motion

Класс/Тип: MotionNetwork

Примеров на hotexamples.com: 6

Python MotionNetwork - 6 примеров найдено. Это лучшие примеры Python кода для modules.motion.MotionNetwork, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

MotionNetwork(4)

forward(4)

compute_loss(2)

Пример #1

Показать файл

Файл: deepv2d.py Проект: zsustc/DeepV2D

    def _build_motion_graph(self):
        self.motion_net = MotionNetwork(self.cfg.MOTION,
                                        mode=self.mode,
                                        use_regressor=self.use_regressor,
                                        is_calibrated=self.is_calibrated,
                                        is_training=False)

        images = self.images_placeholder[tf.newaxis]
        depths = self.depths_placeholder[tf.newaxis]
        poses = self.poses_placeholder[tf.newaxis]

        do_init = self.init_placeholder
        intrinsics = self.intrinsics_placeholder[tf.newaxis]
        edge_inds = tf.unstack(self.edges_placeholder, num=2, axis=-1)

        # convert pose matrix into SE3 object
        Ts = VideoSE3Transformation(matrix=poses)

        Ts, intrinsics = self.motion_net.forward(Ts,
                                                 images,
                                                 depths,
                                                 intrinsics,
                                                 edge_inds,
                                                 init=do_init)

        self.outputs['poses'] = tf.squeeze(Ts.matrix(), 0)
        self.outputs['intrinsics'] = intrinsics[0]
        self.outputs['weights'] = self.motion_net.weights_history[-1]

Пример #2

Показать файл

    def _build_motion_graph(self):
        """ Motion graph updates poses using depth as input """

        self.motion_net = MotionNetwork(
            self.cfg.MOTION,
            mode='global',  # use global optimization mode
            is_training=False)

        images = self.images_placeholder[tf.newaxis]
        depths = self.depths_placeholder[tf.newaxis]
        poses = self.poses_placeholder[tf.newaxis]
        intrinsics = self.intrinsics_placeholder[tf.newaxis]
        edge_inds = tf.unstack(self.edges_placeholder, num=2, axis=-1)

        # convert pose matricies into SE3 object
        Ts = VideoSE3Transformation(matrix=poses)
        batch, num = Ts.shape()

        Ts, intrinsics = self.motion_net.forward(
            Ts,
            images,
            depths,
            intrinsics,
            inds=edge_inds,
            num_fixed=self.fixed_placeholder)

        # convert SE3 object back to matrix representation
        self.outputs['poses'] = tf.squeeze(Ts.matrix(), 0)
        self.outputs['intrinsics'] = intrinsics

Пример #3

Показать файл

Файл: deepv2d.py Проект: zsustc/DeepV2D

class DeepV2D:
    def __init__(self,
                 cfg,
                 ckpt,
                 is_calibrated=True,
                 use_fcrn=False,
                 use_regressor=True,
                 image_dims=None,
                 mode='keyframe'):

        self.cfg = cfg
        self.ckpt = ckpt
        self.mode = mode

        self.use_fcrn = use_fcrn
        self.use_regressor = use_regressor
        self.is_calibrated = is_calibrated

        if image_dims is not None:
            self.image_dims = image_dims
        else:
            if cfg.STRUCTURE.MODE == 'concat':
                self.image_dims = [
                    cfg.INPUT.FRAMES, cfg.INPUT.HEIGHT, cfg.INPUT.WIDTH
                ]
            else:
                self.image_dims = [None, cfg.INPUT.HEIGHT, cfg.INPUT.WIDTH]

        self.outputs = {}
        self._create_placeholders()
        self._build_motion_graph()
        self._build_depth_graph()
        self._build_reprojection_graph()
        self._build_visibility_graph()
        self._build_point_cloud_graph()

        self.depths = []
        self.poses = []

        if self.use_fcrn:
            self._build_fcrn_graph()

        self.saver = tf.train.Saver(tf.model_variables())

    def set_session(self, sess):
        self.sess = sess
        sess.run(tf.global_variables_initializer())
        self.saver.restore(self.sess, self.ckpt)

        if self.use_fcrn:
            fcrn_vars = {}
            for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                         scope="FCRN"):
                fcrn_vars[var.name.replace('FCRN/', '').replace(':0',
                                                                '')] = var

            fcrn_saver = tf.train.Saver(fcrn_vars)
            fcrn_saver.restore(sess, 'models/NYU_FCRN.ckpt')

    def _create_placeholders(self):
        frames, ht, wd = self.image_dims
        self.images_placeholder = tf.placeholder(tf.float32,
                                                 [frames, ht, wd, 3])
        if self.mode == 'keyframe':
            self.depths_placeholder = tf.placeholder(tf.float32, [1, ht, wd])
        else:
            self.depths_placeholder = tf.placeholder(tf.float32,
                                                     [frames, ht, wd])

        self.poses_placeholder = tf.placeholder(tf.float32, [frames, 4, 4])
        self.intrinsics_placeholder = tf.placeholder(tf.float32, [4])
        self.init_placeholder = tf.placeholder(tf.bool, [])

        # placeholders for storing graph adj_list and edges
        self.edges_placeholder = tf.placeholder(tf.int32, [None, 2])
        self.adj_placeholder = tf.placeholder(tf.int32, [None, None])

    def _build_motion_graph(self):
        self.motion_net = MotionNetwork(self.cfg.MOTION,
                                        mode=self.mode,
                                        use_regressor=self.use_regressor,
                                        is_calibrated=self.is_calibrated,
                                        is_training=False)

        images = self.images_placeholder[tf.newaxis]
        depths = self.depths_placeholder[tf.newaxis]
        poses = self.poses_placeholder[tf.newaxis]

        do_init = self.init_placeholder
        intrinsics = self.intrinsics_placeholder[tf.newaxis]
        edge_inds = tf.unstack(self.edges_placeholder, num=2, axis=-1)

        # convert pose matrix into SE3 object
        Ts = VideoSE3Transformation(matrix=poses)

        Ts, intrinsics = self.motion_net.forward(Ts,
                                                 images,
                                                 depths,
                                                 intrinsics,
                                                 edge_inds,
                                                 init=do_init)

        self.outputs['poses'] = tf.squeeze(Ts.matrix(), 0)
        self.outputs['intrinsics'] = intrinsics[0]
        self.outputs['weights'] = self.motion_net.weights_history[-1]

    def _build_depth_graph(self):
        self.depth_net = DepthNetwork(self.cfg.STRUCTURE, is_training=False)
        images = self.images_placeholder[tf.newaxis]
        poses = self.poses_placeholder[tf.newaxis]
        intrinsics = self.intrinsics_placeholder[tf.newaxis]

        # convert pose matrix into SE3 object
        Ts = VideoSE3Transformation(matrix=poses)

        adj_list = None
        if self.mode == 'global':
            adj_list = self.adj_placeholder

        depths = self.depth_net.forward(Ts, images, intrinsics, adj_list)
        self.outputs['depths'] = depths

    def _build_point_cloud_graph(self):
        """Use poses and depth maps to create point cloud"""
        depths = self.depths_placeholder[tf.newaxis]
        images = self.images_placeholder[tf.newaxis]
        poses = self.poses_placeholder[tf.newaxis]
        intrinsics = self.intrinsics_placeholder[tf.newaxis]
        intrinsics = intrinsics_vec_to_matrix(intrinsics)

        depths_pad = tf.pad(depths, [[0, 0], [0, 0], [0, 1], [0, 1]],
                            "CONSTANT")

        depths_grad = \
            (depths_pad[:, :, 1:, :-1] - depths_pad[:, :, :-1, :-1])**2 + \
            (depths_pad[:, :, :-1, 1:] - depths_pad[:, :, :-1, :-1])**2

        # don't use large depths for point cloud and ignore boundary regions
        valid = (depths < 5.0) & (depths_grad < 0.01)

        # depths, intrinsics = rescale_depths_and_intrinsics(depths, intrinsics, downscale=4)
        batch, num, ht, wd = tf.unstack(tf.shape(depths), num=4)

        ii, jj = tf.meshgrid(tf.range(1), tf.range(0, num))
        ii = tf.reshape(ii, [-1])
        jj = tf.reshape(jj, [-1])

        Ts = VideoSE3Transformation(matrix=poses)
        X0 = projective_ops.backproject(depths, intrinsics)

        # transform point cloud into coordinate system defined by first frame
        X1 = (Ts.gather(ii) * Ts.gather(jj).inv())(X0)

        crop_h = 12
        crop_w = 32

        X1 = X1[:, :, crop_h:-crop_h, crop_w:-crop_w]
        valid = valid[:, :, crop_h:-crop_h, crop_w:-crop_w]
        images = images[:, :, crop_h:-crop_h, crop_w:-crop_w, ::-1]

        X1 = tf.reshape(X1, [-1, 3])
        colors = tf.reshape(images, [-1, 3])

        valid_inds = tf.where(tf.reshape(valid, [-1]))
        valid_inds = tf.reshape(valid_inds, [-1])

        X1 = tf.gather(X1, valid_inds, axis=0)
        colors = tf.gather(colors, valid_inds, axis=0)

        self.outputs['point_cloud'] = (X1, colors)

    def _build_reprojection_graph(self):
        """ Used to project depth from keyframes onto new frame """

        EPS = 1e-8
        depths = self.depths_placeholder[tf.newaxis]
        poses = self.poses_placeholder[tf.newaxis]
        intrinsics = self.intrinsics_placeholder[tf.newaxis]

        batch, num, ht, wd = tf.unstack(tf.shape(depths), num=4)
        Ts = VideoSE3Transformation(matrix=poses)
        intrinsics = intrinsics_vec_to_matrix(intrinsics)

        ii, jj = tf.meshgrid(tf.range(0, num), tf.range(num, num + 1))
        ii = tf.reshape(ii, [-1])
        jj = tf.reshape(jj, [-1])

        Tij = Ts.gather(jj) * Ts.gather(ii).inv()
        X0 = projective_ops.backproject(depths, intrinsics)
        X1 = Tij(X0)

        coords = projective_ops.project(X1, intrinsics)
        depths = X1[..., 2]

        indicies = tf.cast(coords[..., ::-1] + .5, tf.int32)
        indicies = tf.reshape(indicies, [-1, 2])
        depths = tf.reshape(depths, [-1])

        depth = tf.scatter_nd(indicies, depths, [ht, wd])
        count = tf.scatter_nd(indicies, tf.ones_like(depths), [ht, wd])

        depth = depth / (count + EPS)
        self.outputs['depth_reprojection'] = depth

    def _build_visibility_graph(self):
        """ Find induced optical flow between pairs of frames """

        depths = self.depths_placeholder[tf.newaxis]
        poses = self.poses_placeholder[tf.newaxis]
        intrinsics = self.intrinsics_placeholder[tf.newaxis]

        Ts = VideoSE3Transformation(matrix=poses)
        ii, jj = tf.unstack(self.edges_placeholder, num=2, axis=-1)
        intrinsics = intrinsics_vec_to_matrix(intrinsics)

        depths, intrinsics = rescale_depths_and_intrinsics(depths,
                                                           intrinsics,
                                                           downscale=4)
        ht = tf.cast(tf.shape(depths)[2], tf.float32)
        wd = tf.cast(tf.shape(depths)[3], tf.float32)

        depths = tf.gather(depths, ii, axis=1)
        Tij = Ts.gather(jj) * Ts.gather(ii).inv()

        flow = Tij.induced_flow(depths, intrinsics)
        coords = Tij.transform(depths, intrinsics)

        flo_graph = tf.sqrt(tf.reduce_sum(flow**2, axis=-1))
        flo_graph = tf.reduce_mean(flo_graph, [-1, -2])

        contained = tf.to_float((coords[..., 0] > 0.0) & (coords[..., 0] < wd)
                                & (coords[..., 1] > 0.0)
                                & (coords[..., 1] < ht))

        vis_graph = tf.reduce_mean(contained, [-1, -2])
        self.outputs['visibility'] = (flo_graph[0], vis_graph[0], flow)

    def _build_fcrn_graph(self):
        """ Build single image initializion graph"""
        images = self.images_placeholder
        batch, ht, wd, _ = tf.unstack(tf.shape(images), num=4)

        with tf.variable_scope("FCRN") as scope:
            # crop out boarder and flip color channels
            fcrn_input = tf.image.resize_area(images[:, 4:-4, 6:-6, ::-1],
                                              [228, 304])
            net = fcrn.ResNet50UpProj({'data': fcrn_input}, batch, 1, False)
            fcrn_output = tf.stop_gradient(net.get_output())
            fcrn_output = tf.image.resize_bilinear(fcrn_output, [ht, wd])

        self.outputs['fcrn'] = tf.squeeze(fcrn_output, -1)

    def compute_visibility_matrix(self):
        """ Computes a matrix of optical flow and visibility between all pairs of frames 
        Ex. flo_matrix[i,j] is the mean optical flow between camera i and camera j
        Ex. vis_matrix[i,j] is the portion of points in camera i visibile in camera j """

        num = len(self.images)
        ii, jj = np.meshgrid(np.arange(num), np.arange(num))

        ii = np.reshape(ii, [-1])
        jj = np.reshape(jj, [-1])
        edges = np.stack([jj, ii], axis=-1)

        feed_dict = {
            self.depths_placeholder: self.depths,
            self.poses_placeholder: self.poses,
            self.edges_placeholder: edges,
            self.intrinsics_placeholder: self.intrinsics
        }

        flo_graph, vis_graph, flow = self.sess.run(self.outputs['visibility'],
                                                   feed_dict=feed_dict)
        flo_matrix = flo_graph.reshape(num, num)
        vis_matrix = vis_graph.reshape(num, num)
        return flo_matrix, vis_matrix, flow

    def reproject_depth(self, query_pose):
        """ Use depth estimates and poses to estimate depth map at a new camera location """
        poses = np.concatenate([self.poses, query_pose[np.newaxis]], axis=0)
        feed_dict = {
            self.depths_placeholder: self.depths,
            self.poses_placeholder: poses,
            self.intrinsics_placeholder: self.intrinsics
        }

        depth = self.sess.run(self.outputs['depth_reprojection'],
                              feed_dict=feed_dict)
        return fill_depth(depth)

    def deepv2d_init(self):
        if self.use_fcrn:
            if self.mode == 'keyframe':
                feed_dict = {self.images_placeholder: self.images[[0]]}
            else:
                feed_dict = {self.images_placeholder: self.images}

            self.depths = self.sess.run(self.outputs['fcrn'],
                                        feed_dict=feed_dict)

        else:
            if self.mode == 'keyframe':
                images = np.stack([self.images[0]] * self.images.shape[0],
                                  axis=0)
                poses = np.stack([np.eye(4)] * self.images.shape[0], axis=0)

                feed_dict = {
                    self.images_placeholder: images,
                    self.poses_placeholder: poses,
                    self.intrinsics_placeholder: self.intrinsics
                }

            else:
                ii = np.arange(self.images.shape[0])
                adj = np.stack([ii, ii], axis=-1)

                feed_dict = {
                    self.images_placeholder: self.images,
                    self.poses_placeholder: self.poses,
                    self.adj_placeholder: adj,
                    self.intrinsics_placeholder: self.intrinsics
                }

            self.depths = self.sess.run(self.outputs['depths'],
                                        feed_dict=feed_dict)

    def update_poses(self, itr=0):
        n = self.images.shape[0]

        if self.mode == 'keyframe':
            ii, jj = np.meshgrid(np.arange(1), np.arange(1, n))
        else:
            ii, jj = np.meshgrid(np.arange(n), np.arange(n))

        ii = ii.reshape(-1)
        jj = jj.reshape(-1)
        v = ~np.equal(ii, jj)

        # don't use pairs with self loop
        edges = np.stack([ii[v], jj[v]], axis=-1)

        feed_dict = {
            self.images_placeholder: self.images,
            self.depths_placeholder: self.depths,
            self.poses_placeholder: self.poses,
            self.edges_placeholder: edges,
            self.init_placeholder: (itr == 0),
            self.intrinsics_placeholder: self.intrinsics
        }

        # execute pose subgraph
        outputs = [
            self.outputs['poses'], self.outputs['intrinsics'],
            self.outputs['weights']
        ]
        self.poses, self.intrinsics, self.weights = self.sess.run(
            outputs, feed_dict=feed_dict)

        if not self.cfg.MOTION.IS_CALIBRATED:
            print("intrinsics (fx, fy, cx, cy): ", self.intrinsics)

    def update_depths(self, itr=0):
        n = self.images.shape[0]
        inds_list = []

        if self.mode == 'keyframe':
            feed_dict = {
                self.images_placeholder: self.images,
                self.poses_placeholder: self.poses,
                self.intrinsics_placeholder: self.intrinsics
            }

            self.depths = self.sess.run(self.outputs['depths'],
                                        feed_dict=feed_dict)

        else:
            for i in range(n):
                inds = np.arange(n).tolist()
                inds.remove(i)
                inds = [i] + inds
                inds_list.append(inds)

            adj_list = np.array(inds_list, dtype=np.int32)

            if n <= 4:
                feed_dict = {
                    self.images_placeholder: self.images,
                    self.poses_placeholder: self.poses,
                    self.adj_placeholder: adj_list,
                    self.intrinsics_placeholder: self.intrinsics
                }

                self.depths = self.sess.run(self.outputs['depths'],
                                            feed_dict=feed_dict)

            else:  # we need to split up inference to fit in memory
                s = 2
                for i in range(0, n, s):
                    feed_dict = {
                        self.images_placeholder: self.images,
                        self.poses_placeholder: self.poses,
                        self.adj_placeholder: adj_list[i:i + s],
                        self.intrinsics_placeholder: self.intrinsics
                    }

                    self.depths[i:i + s] = self.sess.run(
                        self.outputs['depths'], feed_dict=feed_dict)

    def vizualize_output(self, inds=[0]):
        feed_dict = {
            self.images_placeholder: self.images,
            self.depths_placeholder: self.depths,
            self.poses_placeholder: self.poses,
            self.intrinsics_placeholder: self.intrinsics
        }

        keyframe_image = self.images[0]
        keyframe_depth = self.depths[0]

        image_depth = vis.create_image_depth_figure(keyframe_image,
                                                    keyframe_depth)
        cv2.imwrite('depth.png', image_depth[:, image_depth.shape[1] // 2:])
        cv2.imshow('image_depth', image_depth / 255.0)

        print("Press any key to cotinue")
        cv2.waitKey()

        # use depth map to create point cloud
        point_cloud, point_colors = self.sess.run(self.outputs['point_cloud'],
                                                  feed_dict=feed_dict)

        print("Press q to exit")
        vis.visualize_prediction(point_cloud, point_colors, self.poses)

    def __call__(self, images, intrinsics=None, iters=5, viz=False):
        n_frames = len(images)
        self.images = np.stack(images, axis=0)

        if intrinsics is None:
            # initialize intrinsics
            fx = images.shape[2] * 1.2
            fy = images.shape[2] * 1.2
            cx = images.shape[2] / 2.0
            cy = images.shape[1] / 2.0
            intrinsics = np.stack([fx, fy, cx, cy])

        # (fx, cy, cx, cy)
        self.intrinsics = intrinsics

        poses = np.eye(4).reshape(1, 4, 4)
        poses = np.tile(poses, [n_frames, 1, 1])
        self.poses = poses

        # initalize reconstruction
        self.deepv2d_init()

        for i in range(iters):
            self.update_poses(i)
            self.update_depths()

        if viz:
            self.vizualize_output()

        return self.depths, self.poses

Пример #4

Показать файл

class DeepV2DSLAM:
    def __init__(self,
                 cfg,
                 ckpt,
                 n_keyframes=1,
                 rate=2,
                 use_fcrn=True,
                 viz=True,
                 mode='global',
                 image_dims=[None, 480, 640]):

        self.cfg = cfg
        self.ckpt = ckpt

        self.viz = viz
        self.mode = mode
        self.use_fcrn = use_fcrn
        self.image_dims = image_dims

        self.index = 0
        self.keyframe_inds = []

        self.images = []
        self.depths = []
        self.poses = []

        # tracking config parameters
        self.n_keyframes = n_keyframes  # number of keyframes to use
        self.rate = rate  # how often to sample new frames
        self.window = 3  # add edges if frames are within distance

        # build tensorflow graphs
        self.outputs = {}
        self._create_placeholders()
        self._build_motion_graph()
        self._build_depth_graph()
        self._build_reprojection_graph()
        self._build_visibility_graph()
        self._build_point_cloud_graph()

        if self.use_fcrn:
            self._build_fcrn_graph()

        self.saver = tf.train.Saver(tf.model_variables())

    def set_session(self, sess):
        self.sess = sess
        self.saver.restore(self.sess, self.ckpt)

        if self.use_fcrn:
            fcrn_vars = {}
            for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                         scope="FCRN"):
                fcrn_vars[var.name.replace('FCRN/', '').replace(':0',
                                                                '')] = var

            fcrn_saver = tf.train.Saver(fcrn_vars)
            fcrn_saver.restore(sess, 'models/NYU_FCRN.ckpt')

    def start_visualization(self,
                            cinematic=False,
                            render_path=None,
                            clear_points=False):
        """ Start interactive slam visualization in seperate process """

        # new points and poses get added to the queue
        self.queue = Queue()
        self.vis_counter = 0

        self.viz = vis.InteractiveViz(self.queue, cinematic, render_path,
                                      clear_points)
        self.viz.start()

    def _create_placeholders(self):
        frames, ht, wd = self.image_dims
        self.images_placeholder = tf.placeholder(tf.float32,
                                                 [frames, ht, wd, 3])
        if self.mode == 'keyframe':
            self.depths_placeholder = tf.placeholder(tf.float32, [1, ht, wd])
        else:
            self.depths_placeholder = tf.placeholder(tf.float32,
                                                     [frames, ht, wd])

        self.poses_placeholder = tf.placeholder(tf.float32, [frames, 4, 4])
        self.intrinsics_placeholder = tf.placeholder(tf.float32, [4])

        # placeholders for storing graph adj_list and edges
        self.edges_placeholder = tf.placeholder(tf.int32, [None, 2])
        self.adj_placeholder = tf.placeholder(tf.int32, [None, None])
        self.fixed_placeholder = tf.placeholder(tf.int32, [])
        self.init_placeholder = tf.placeholder(tf.bool, [])

    def _build_motion_graph(self):
        """ Motion graph updates poses using depth as input """

        self.motion_net = MotionNetwork(
            self.cfg.MOTION,
            mode='global',  # use global optimization mode
            is_training=False)

        images = self.images_placeholder[tf.newaxis]
        depths = self.depths_placeholder[tf.newaxis]
        poses = self.poses_placeholder[tf.newaxis]
        intrinsics = self.intrinsics_placeholder[tf.newaxis]
        edge_inds = tf.unstack(self.edges_placeholder, num=2, axis=-1)

        # convert pose matricies into SE3 object
        Ts = VideoSE3Transformation(matrix=poses)
        batch, num = Ts.shape()

        Ts, intrinsics = self.motion_net.forward(
            Ts,
            images,
            depths,
            intrinsics,
            inds=edge_inds,
            num_fixed=self.fixed_placeholder)

        # convert SE3 object back to matrix representation
        self.outputs['poses'] = tf.squeeze(Ts.matrix(), 0)
        self.outputs['intrinsics'] = intrinsics

    def _build_depth_graph(self):
        """ Depth graph updates depth using poses as input """
        self.depth_net = DepthNetwork(self.cfg.STRUCTURE, is_training=False)
        images = self.images_placeholder[tf.newaxis]
        poses = self.poses_placeholder[tf.newaxis]
        intrinsics = self.intrinsics_placeholder[tf.newaxis]

        Ts = VideoSE3Transformation(matrix=poses)

        adj_list = None
        if self.mode == 'global':
            adj_list = self.adj_placeholder

        depths = self.depth_net.forward(Ts, images, intrinsics, adj_list)
        self.outputs['depths'] = depths

    def _build_visibility_graph(self):
        depths = self.depths_placeholder[tf.newaxis]
        poses = self.poses_placeholder[tf.newaxis]
        intrinsics = self.intrinsics_placeholder[tf.newaxis]

        Ts = VideoSE3Transformation(matrix=poses)
        ii, jj = tf.unstack(self.edges_placeholder, num=2, axis=-1)
        intrinsics = intrinsics_vec_to_matrix(intrinsics)

        depths, intrinsics = rescale_depths_and_intrinsics(depths,
                                                           intrinsics,
                                                           downscale=4)
        ht = tf.cast(tf.shape(depths)[2], tf.float32)
        wd = tf.cast(tf.shape(depths)[3], tf.float32)

        depths = tf.gather(depths, ii, axis=1)
        Tij = Ts.gather(jj) * Ts.gather(ii).inv()

        flow = Tij.induced_flow(depths, intrinsics)
        coords = Tij.transform(depths, intrinsics)

        # translation only
        rotation_mask = [1.0, 1.0, 1.0, 0.0, 0.0, 0.0]
        flow_translation = Tij.induced_flow(depths, intrinsics)

        flo_graph = tf.sqrt(tf.reduce_sum(flow**2, axis=-1))
        flo_graph = tf.reduce_mean(flo_graph, [-1, -2])

        pos_graph = tf.sqrt(tf.reduce_sum(flow_translation**2, axis=-1))
        pos_graph = tf.reduce_mean(pos_graph, [-1, -2])

        contained = tf.to_float((coords[..., 0] > 0.0) & (coords[..., 0] < wd)
                                & (coords[..., 1] > 0.0)
                                & (coords[..., 1] < ht))

        vis_graph = tf.reduce_mean(contained, [-1, -2])
        self.outputs['visibility'] = (flo_graph[0], vis_graph[0])

    def _build_fcrn_graph(self):
        """ Build single image initializion graph"""
        images = self.images_placeholder
        batch, ht, wd, _ = tf.unstack(tf.shape(images), num=4)

        with tf.variable_scope("FCRN") as scope:
            # crop out boarder and flip color channels
            fcrn_input = tf.image.resize_area(images[:, 4:-4, 6:-6, ::-1],
                                              [228, 304])
            net = fcrn.ResNet50UpProj({'data': fcrn_input}, batch, 1, False)
            fcrn_output = tf.stop_gradient(net.get_output())
            fcrn_output = tf.image.resize_bilinear(fcrn_output, [ht, wd])

        self.outputs['fcrn'] = tf.squeeze(fcrn_output, -1)

    def compute_visibility_graph(self, edges=None):
        """ Computes a matrix of optical flow and visibility between all pairs of frames 
        Ex. flo_matrix[i,j] is the mean optical flow between camera i and camera j
        Ex. vis_matrix[i,j] is the portion of points in camera i visibile in camera j """

        vis_matrix = False
        if edges is None:
            num = len(self.keyframe_images)
            vis_matrix = True
            ii, jj = np.meshgrid(np.arange(num), np.arange(num))

            ii = np.reshape(ii, [-1])
            jj = np.reshape(jj, [-1])
            edges = np.stack([jj, ii], axis=-1)

        feed_dict = {
            self.depths_placeholder: np.stack(self.keyframe_depths, axis=0),
            self.poses_placeholder: np.stack(self.keyframe_poses, axis=0),
            self.edges_placeholder: edges,
            self.intrinsics_placeholder: self.intrinsics
        }

        flo_graph, pos_graph = self.sess.run(self.outputs['visibility'],
                                             feed_dict=feed_dict)
        if vis_matrix:
            flo_matrix = flo_graph.reshape(num, num)
            pos_matrix = pos_graph.reshape(num, num)
            return flo_matrix, pos_matrix

        return flo_graph, pos_matrix

    def _build_point_cloud_graph(self):
        """Use poses and depth maps to create point cloud"""
        depths = self.depths_placeholder[tf.newaxis]
        images = self.images_placeholder[tf.newaxis]
        poses = self.poses_placeholder[tf.newaxis]
        intrinsics = self.intrinsics_placeholder[tf.newaxis]
        intrinsics = intrinsics_vec_to_matrix(intrinsics)

        depths_pad = tf.pad(depths, [[0, 0], [0, 0], [0, 1], [0, 1]],
                            "CONSTANT")

        depths_grad = \
            (depths_pad[:, :, 1:, :-1] - depths_pad[:, :, :-1, :-1])**2 + \
            (depths_pad[:, :, :-1, 1:] - depths_pad[:, :, :-1, :-1])**2

        # don't use large depths for point cloud and ignore boundary regions
        valid = (depths < 6.0) & (depths_grad < 0.05)

        batch, num, ht, wd = tf.unstack(tf.shape(depths), num=4)
        Ts = VideoSE3Transformation(matrix=poses)
        X0 = projective_ops.backproject(depths, intrinsics)

        # transform point cloud into world coordinaes
        X1 = Ts.inv()(X0)

        crop_h0 = 20
        crop_h1 = 12

        crop_w = 32

        X1 = X1[:, :, crop_h0:-crop_h1, crop_w:-crop_w]
        valid = valid[:, :, crop_h0:-crop_h1, crop_w:-crop_w]
        images = images[:, :, crop_h0:-crop_h1, crop_w:-crop_w, ::-1]

        X1 = tf.reshape(X1, [-1, 3])
        colors = tf.reshape(images, [-1, 3])

        valid_inds = tf.where(tf.reshape(valid, [-1]))
        valid_inds = tf.reshape(valid_inds, [-1])

        X1 = tf.gather(X1, valid_inds, axis=0)
        colors = tf.gather(colors, valid_inds, axis=0)

        self.outputs['point_cloud'] = (X1, colors)

    def _build_reprojection_graph(self):
        """ Used to project depth from keyframes onto new frame """
        EPS = 1e-8
        depths = self.depths_placeholder[tf.newaxis]
        poses = self.poses_placeholder[tf.newaxis]
        intrinsics = self.intrinsics_placeholder[tf.newaxis]

        batch, num, ht, wd = tf.unstack(tf.shape(depths), num=4)
        Ts = VideoSE3Transformation(matrix=poses)
        intrinsics = intrinsics_vec_to_matrix(intrinsics)

        ii, jj = tf.meshgrid(tf.range(0, num), tf.range(num, num + 1))
        ii = tf.reshape(ii, [-1])
        jj = tf.reshape(jj, [-1])

        Tij = Ts.gather(jj) * Ts.gather(ii).inv()
        X0 = projective_ops.backproject(depths, intrinsics)
        X1 = Tij(X0)

        coords = projective_ops.project(X1, intrinsics)
        depths = X1[..., 2]

        indicies = tf.cast(coords[..., ::-1] + .5, tf.int32)
        indicies = tf.reshape(indicies, [-1, 2])
        depths = tf.reshape(depths, [-1])

        depth = tf.scatter_nd(indicies, depths, [ht, wd])
        count = tf.scatter_nd(indicies, tf.ones_like(depths), [ht, wd])

        depth = depth / (count + EPS)
        self.outputs['depth_reprojection'] = depth

    def reproject_depth(self, query_pose, margin=2):
        """ Use depth estimates and poses to estimate depth map at a new camera location """

        keyframe_pose = self.poses[self.keyframe_inds[-1]]
        poses = np.stack([keyframe_pose, query_pose], axis=0)

        keyframe_depth = self.depths[self.keyframe_inds[-1]]
        depths = keyframe_depth[np.newaxis]

        feed_dict = {
            self.depths_placeholder: depths,
            self.poses_placeholder: poses,
            self.intrinsics_placeholder: self.intrinsics
        }

        depth = self.sess.run(self.outputs['depth_reprojection'],
                              feed_dict=feed_dict)
        return fill_depth(depth)

    def deepv2d_init(self):
        if self.use_fcrn:
            feed_dict = {
                self.images_placeholder: np.stack(self.images, axis=0)
            }
            depths_init = self.sess.run(self.outputs['fcrn'],
                                        feed_dict=feed_dict)

        else:
            ii = np.arange(len(self.images))
            adj = np.stack([ii, ii], axis=-1)

            feed_dict = {
                self.images_placeholder: np.stack(self.images, axis=0),
                self.poses_placeholder: np.stack(self.poses, axis=0),
                self.adj_placeholder: adj,
                self.intrinsics_placeholder: self.intrinsics
            }

            depths_init = self.sess.run(self.outputs['depths'],
                                        feed_dict=feed_dict)

        self.depths = [depth for depth in depths_init]

    def update_poses(self, fixed=1, margin=3):
        """ Update the poses by executing the motion graph, fix first keyframe """

        n_images = len(self.images)
        start_idx = max(self.keyframe_inds[0] - margin, 0)

        edges = []
        for i in self.keyframe_inds:
            for j in range(start_idx, n_images):
                if (i != j) and (abs(i - j) <= self.window):
                    edges.append((i, j))

        edges = np.stack(edges, axis=0) - start_idx
        images = np.stack(self.images[start_idx:], axis=0)
        depths = np.stack(self.depths[start_idx:], axis=0)
        poses = np.stack(self.poses[start_idx:], axis=0)

        if not fixed:
            fixed = 0

        feed_dict = {
            self.images_placeholder: images,
            self.depths_placeholder: depths,
            self.poses_placeholder: poses,
            self.edges_placeholder: edges,
            self.fixed_placeholder: np.int32(fixed),
            self.init_placeholder: False,
            self.intrinsics_placeholder: self.intrinsics
        }

        # execute pose subgraph
        poses = self.sess.run(self.outputs['poses'], feed_dict=feed_dict)

        # update the poses
        for j in range(poses.shape[0]):
            self.poses[start_idx + j] = poses[j]

        self.pose_cur = self.poses[-1]

    def update_depths(self, fixed=1, margin=3):
        """ Update the depths by executing the depth graph """

        n_images = len(self.images)
        start_idx = max(self.keyframe_inds[0] - margin, 0)

        # faster if we batch multiple depth updates together
        inds = self.keyframe_inds
        if fixed and len(self.keyframe_inds) > 1:
            inds = inds[fixed:]  # fix depth for first keyframe

        adj_list = []
        for i in inds:
            adj_inds = []
            for j in range(start_idx, n_images):
                if (i != j) and (abs(i - j) <= self.window):
                    adj_inds.append(j)

            # make sure all adj lists are the same size
            if len(adj_inds) < 2 * self.window:
                adj_inds = np.random.choice(adj_inds,
                                            2 * self.window,
                                            replace=True).tolist()

            adj_inds = [i] + adj_inds
            adj_list.append(np.array(adj_inds, dtype=np.int32))

        adj_list = np.stack(adj_list, axis=0) - start_idx
        images = np.stack(self.images[start_idx:], axis=0)
        poses = np.stack(self.poses[start_idx:], axis=0)

        feed_dict = {
            self.images_placeholder: images,
            self.poses_placeholder: poses,
            self.adj_placeholder: adj_list,
            self.intrinsics_placeholder: self.intrinsics,
        }

        depths = self.sess.run(self.outputs['depths'], feed_dict=feed_dict)

        # update the keyframe depths
        for i, keyframe_index in enumerate(inds):
            self.depths[keyframe_index] = depths[i]

    def visualize_output(self, keyframe_index):
        """ Backproject a point cloud then add point cloud to visualization """

        self.vis_counter += 1
        keyframe_image = self.images[keyframe_index]
        keyframe_depth = self.depths[keyframe_index]
        keyframe_pose = self.poses[keyframe_index]

        feed_dict = {
            self.images_placeholder: keyframe_image[np.newaxis],
            self.depths_placeholder: keyframe_depth[np.newaxis],
            self.poses_placeholder: keyframe_pose[np.newaxis],
            self.intrinsics_placeholder: self.intrinsics
        }

        keyframe_point_cloud, keyframe_point_colors = \
            self.sess.run(self.outputs['point_cloud'], feed_dict=feed_dict)

        pointcloud = (keyframe_point_cloud, keyframe_point_colors)

        # only add the point cloud once in every 5 frames
        if self.vis_counter % 4 == 0:
            self.queue.put((pointcloud, keyframe_pose))

        else:
            self.queue.put((None, keyframe_pose))

    def display_keyframes(self):
        """ display image / depth keyframe pairs """

        if len(self.keyframe_inds) > 0:
            image_stack = []
            for keyframe_index in self.keyframe_inds:
                keyframe_image = self.images[keyframe_index]
                keyframe_depth = self.depths[keyframe_index]

                image_and_depth = vis.create_image_depth_figure(
                    keyframe_image, keyframe_depth)
                image_stack.append(image_and_depth)

            image_stack = np.concatenate(image_stack, axis=0)
            if len(self.keyframe_inds) > 1:
                image_stack = cv2.resize(image_stack, None, fx=0.5, fy=0.5)

            cv2.imshow('keyframes', image_stack / 255.0)
            cv2.waitKey(10)

    def track(self, image):
        """ track the new frame """

        keyframe_image = self.images[self.keyframe_inds[-1]]
        images = np.stack([keyframe_image, image], axis=0)

        keyframe_pose = self.poses[self.keyframe_inds[-1]]
        poses = np.stack([keyframe_pose, self.pose_cur], axis=0)

        keyframe_depth = self.depths[self.keyframe_inds[-1]]
        depths = keyframe_depth[np.newaxis]

        edges = np.array([[0, 1]], dtype=np.int32)
        fixed = np.int32(0)

        feed_dict = {
            self.images_placeholder: images,
            self.depths_placeholder: depths,
            self.poses_placeholder: poses,
            self.edges_placeholder: edges,
            self.fixed_placeholder: fixed,
            self.init_placeholder: False,
            self.intrinsics_placeholder: self.intrinsics
        }

        updated_poses = self.sess.run(self.outputs['poses'],
                                      feed_dict=feed_dict)

        # relative pose between keyframe and new pose
        dP = np.matmul(updated_poses[1], np.linalg.inv(updated_poses[0]))

        # tracking probably lost, attempt recovery; sometimes caused by gaps between frames
        if pose_distance(dP) > 0.8:
            feed_dict = {
                self.images_placeholder: images,
                self.depths_placeholder: depths,
                self.poses_placeholder: poses,
                self.edges_placeholder: edges,
                self.fixed_placeholder: fixed,
                self.init_placeholder: True,
                self.intrinsics_placeholder: self.intrinsics
            }

            updated_poses = self.sess.run(self.outputs['poses'],
                                          feed_dict=feed_dict)
            dP = np.matmul(updated_poses[1], np.linalg.inv(updated_poses[0]))

        self.pose_cur = np.matmul(dP, keyframe_pose)
        return pose_distance(dP)

    def __call__(self, image, intrinsics=None):

        if intrinsics is not None:
            self.intrinsics = intrinsics

        ht, wd, _ = image.shape  # get image dimensions
        did_make_new_keyframe = False

        if len(self.images) < 4:  # tracking has not yet begun
            if self.index % self.rate == 0:
                self.images.append(image)
                self.depths.append(np.ones((ht, wd)))
                self.poses.append(np.eye(4))

            # initialize the tracker !
            if len(self.images) == 4:
                self.deepv2d_init()

                # set the keyframes
                self.keyframe_inds = np.random.randint(0, 4, self.n_keyframes)
                self.keyframe_inds = sorted(self.keyframe_inds.tolist())

                for i in range(3):
                    self.update_poses(fixed=False)
                    self.update_depths(fixed=False)

        else:
            dist = self.track(image)

            if dist > 0.8:
                new_keyframe_index = len(self.images) - 1
                query_pose = self.poses[new_keyframe_index]

                depth_new = self.reproject_depth(query_pose)
                self.depths[new_keyframe_index] = depth_new

                self.keyframe_inds.append(new_keyframe_index)
                if len(self.keyframe_inds) > self.n_keyframes:
                    old_keyframe_index = self.keyframe_inds.pop(0)
                    self.visualize_output(old_keyframe_index)

                self.update_poses(fixed=2)
                self.update_depths()

            if self.index % self.rate == 0 and (dist > 0.1):
                self.images.append(image)
                self.depths.append(np.ones((ht, wd)))
                self.poses.append(self.pose_cur)

                self.update_poses(fixed=2)
                self.update_depths()

            # make a new keyfrane
            if len(self.images) - self.keyframe_inds[-1] >= self.window:
                new_keyframe_index = self.keyframe_inds[-1] + 2
                query_pose = self.poses[new_keyframe_index]

                depth_new = self.reproject_depth(query_pose)
                self.depths[new_keyframe_index] = depth_new

                self.keyframe_inds.append(new_keyframe_index)
                if len(self.keyframe_inds) > self.n_keyframes:
                    old_keyframe_index = self.keyframe_inds.pop(0)
                    self.visualize_output(old_keyframe_index)

                self.update_poses(fixed=2)
                self.update_depths()

        self.display_keyframes()
        self.index += 1

Пример #5

Показать файл

Файл: trainer.py Проект: zsustc/DeepV2D

    def build_train_graph_stage2(self, cfg, num_gpus=1):

        with tf.name_scope("training_schedule"):
            global_step = tf.Variable(0, name='global_step', trainable=False)
            gs = tf.to_float(global_step)
            if cfg.TRAIN.RENORM:
                rmax = tf.clip_by_value(5.0 * (gs / 2.5e4) + 1.0, 1.0,
                                        5.0)  # rmax schedule
                dmax = tf.clip_by_value(8.0 * (gs / 2.5e4), 0.0,
                                        8.0)  # dmax schedule
                rmin = 1.0 / rmax
                schedule = {'rmax': rmax, 'rmin': rmin, 'dmax': dmax}
            else:
                schedule = None

            LR_DECAY = int(0.8 * self.training_steps)
            lr = tf.train.exponential_decay(cfg.TRAIN.LR,
                                            global_step,
                                            LR_DECAY,
                                            0.2,
                                            staircase=True)

            stereo_optim = tf.train.RMSPropOptimizer(lr)
            motion_optim = tf.train.RMSPropOptimizer(MOTION_LR_FRACTION * lr)

        id_batch, images_batch, poses_batch, gt_batch, filled_batch, pred_batch, intrinsics_batch = self.dl.next(
        )
        images_batch = tf.split(images_batch, num_gpus)
        poses_batch = tf.split(poses_batch, num_gpus)
        gt_batch = tf.split(gt_batch, num_gpus)
        filled_batch = tf.split(filled_batch, num_gpus)
        pred_batch = tf.split(pred_batch, num_gpus)
        intrinsics_batch = tf.split(intrinsics_batch, num_gpus)

        tower_motion_grads = []
        tower_stereo_grads = []
        tower_predictions = []
        tower_losses = []
        write_ops = []

        for gpu_id in range(num_gpus):
            motion_net = MotionNetwork(cfg.MOTION, reuse=gpu_id > 0)
            depth_net = DepthNetwork(cfg.STRUCTURE,
                                     schedule=schedule,
                                     reuse=gpu_id > 0)

            images = images_batch[gpu_id]
            poses = poses_batch[gpu_id]
            depth_gt = gt_batch[gpu_id]
            depth_filled = filled_batch[gpu_id]
            depth_pred = pred_batch[gpu_id]
            intrinsics = intrinsics_batch[gpu_id]

            Gs = VideoSE3Transformation(matrix=poses)
            batch, frames, height, width, _ = images.get_shape().as_list()

            with tf.name_scope("depth_input"):
                input_prob = tf.train.exponential_decay(2.0,
                                                        global_step,
                                                        LR_DECAY,
                                                        0.02,
                                                        staircase=False)
                rnd = tf.random_uniform([], 0, 1)
                depth_input = tf.cond(rnd < input_prob, lambda: depth_filled,
                                      lambda: depth_pred)

            with tf.device('/gpu:%d' % gpu_id):

                # motion inference
                Ts, kvec = motion_net.forward(None, images,
                                              depth_input[:, tf.newaxis],
                                              intrinsics)

                stop_cond = global_step < cfg.TRAIN.GT_POSE_ITERS
                Ts = cond_transform(stop_cond, Ts.copy(stop_gradients=True),
                                    Ts)
                kvec = tf.cond(stop_cond, lambda: tf.stop_gradient(kvec),
                               lambda: kvec)

                # depth inference
                depth_pr = depth_net.forward(Ts, images, kvec)

                depth_loss = depth_net.compute_loss(depth_gt,
                                                    log_error=(gpu_id == 0))
                motion_loss = motion_net.compute_loss(Gs,
                                                      depth_filled[:,
                                                                   tf.newaxis],
                                                      intrinsics,
                                                      log_error=(gpu_id == 0))

                # compute all gradients
                if 1:
                    total_loss = cfg.TRAIN.DEPTH_WEIGHT * depth_loss + motion_loss
                    var_list = tf.trainable_variables()
                    grads = gradients(total_loss, var_list)

                # split backward pass
                else:
                    motion_vars = tf.get_collection(
                        tf.GraphKeys.MODEL_VARIABLES, scope="motion")
                    stereo_vars = tf.get_collection(
                        tf.GraphKeys.MODEL_VARIABLES, scope="stereo")

                    so3, translation = Ts.so3, Ts.translation
                    stereo_grads = gradients(depth_loss,
                                             [so3, translation] + stereo_vars)
                    diff_so3, diff_translation, stereo_grads = \
                        stereo_grads[0], stereo_grads[1], stereo_grads[2:]

                    motion_grads = tf.gradients(
                        [motion_loss, so3, translation],
                        motion_vars,
                        grad_ys=[
                            tf.ones_like(motion_loss), diff_so3,
                            diff_translation
                        ])

                    grads = stereo_grads + motion_grads
                    var_list = stereo_vars + motion_vars

                motion_gvs = []
                stereo_gvs = []

                for (g, v) in zip(grads, var_list):
                    if 'stereo' in v.name and (g is not None):
                        if cfg.TRAIN.CLIP_GRADS:
                            g = tf.clip_by_value(g, -1.0, 1.0)
                        stereo_gvs.append((g, v))

                    if 'motion' in v.name and (g is not None):
                        if cfg.TRAIN.CLIP_GRADS and (g is not None):
                            g = tf.clip_by_value(g, -1.0, 1.0)
                        motion_gvs.append((g, v))

                tower_motion_grads.append(motion_gvs)
                tower_stereo_grads.append(stereo_gvs)

                tower_predictions.append(depth_pr)
                tower_losses.append(depth_loss)

                if gpu_id == 0:
                    self.total_loss = depth_loss

                # use last gpu to compute batch norm statistics
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        tower_motion_gvs = average_gradients(tower_motion_grads)
        tower_stereo_gvs = average_gradients(tower_stereo_grads)

        with tf.name_scope("train_op"):
            with tf.control_dependencies(update_ops):
                self.train_op = tf.group(
                    stereo_optim.apply_gradients(tower_stereo_gvs),
                    motion_optim.apply_gradients(tower_motion_gvs),
                    tf.assign(global_step, global_step + 1))

        self.write_op = self.dl.write(id_batch,
                                      tf.concat(tower_predictions, axis=0))
        self.total_loss = tf.reduce_mean(tf.stack(tower_losses, axis=0))

        tf.summary.scalar("total_loss", self.total_loss)
        tf.summary.scalar("learning_rate", lr)
        tf.summary.scalar("input_prob", input_prob)

Пример #6

Показать файл

Файл: trainer.py Проект: zsustc/DeepV2D

    def build_train_graph_stage1(self, cfg, num_gpus=1):

        id_batch, images_batch, poses_batch, gt_batch, filled_batch, pred_batch, intrinsics_batch = self.dl.next(
        )
        images_batch = tf.split(images_batch, num_gpus)
        poses_batch = tf.split(poses_batch, num_gpus)
        gt_batch = tf.split(gt_batch, num_gpus)
        filled_batch = tf.split(filled_batch, num_gpus)
        pred_batch = tf.split(pred_batch, num_gpus)
        intrinsics_batch = tf.split(intrinsics_batch, num_gpus)

        with tf.name_scope("training_schedule"):
            global_step = tf.Variable(0, name='global_step', trainable=False)
            lr = tf.train.exponential_decay(cfg.TRAIN.LR,
                                            global_step,
                                            5000,
                                            0.5,
                                            staircase=True)
            optim = tf.train.RMSPropOptimizer(MOTION_LR_FRACTION * lr)

        tower_grads = []
        tower_losses = []

        for gpu_id in range(num_gpus):
            images = images_batch[gpu_id]
            poses = poses_batch[gpu_id]
            depth_gt = gt_batch[gpu_id]
            depth_filled = filled_batch[gpu_id]
            depth_pred = pred_batch[gpu_id]
            intrinsics = intrinsics_batch[gpu_id]

            Gs = VideoSE3Transformation(matrix=poses)
            motion_net = MotionNetwork(cfg.MOTION,
                                       bn_is_training=True,
                                       reuse=gpu_id > 0)

            with tf.device('/gpu:%d' % gpu_id):

                depth_input = tf.expand_dims(depth_filled, 1)
                Ts, kvec = motion_net.forward(None, images, depth_input,
                                              intrinsics)

                total_loss = motion_net.compute_loss(Gs,
                                                     depth_input,
                                                     intrinsics,
                                                     log_error=(gpu_id == 0))
                tower_losses.append(total_loss)

                var_list = tf.trainable_variables()
                grads = gradients(total_loss, var_list)

                gvs = []
                for (g, v) in zip(grads, var_list):
                    if g is not None:
                        if cfg.TRAIN.CLIP_GRADS:
                            g = tf.clip_by_value(g, -1.0, 1.0)
                        gvs.append((g, v))

                gvs = zip(grads, var_list)
                tower_grads.append(gvs)

                # use last gpu to compute batch norm statistics
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        with tf.name_scope("train_op"):
            gvs = average_gradients(tower_grads)
            total_loss = tf.reduce_mean(tf.stack(tower_losses, axis=0))

            with tf.control_dependencies(update_ops):
                self.train_op = optim.apply_gradients(gvs, global_step)

            self.write_op = None
            self.total_loss = total_loss
            tf.summary.scalar("learning_rate", lr)
            tf.summary.scalar("total_loss", total_loss)