def add_training_inputs(model, roidb=None): """Create network input ops and blobs used for training. To be called *after* model_builder.create(). """ # Implementation notes: # Typically, one would create the input ops and then the rest of the net. # However, creating the input ops depends on loading the dataset, which # can take a few minutes for COCO. # We prefer to avoid waiting so debugging can fail fast. # Thus, we create the net *without input ops* prior to loading the # dataset, and then add the input ops after loading the dataset. # Since we defer input op creation, we need to do a little bit of surgery # to place the input ops at the start of the network op list. assert model.train, 'Training inputs can only be added to a trainable model' if roidb is not None: # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1 model.roi_data_loader = RoIDataLoader( roidb, num_loaders=cfg.DATA_LOADER.NUM_THREADS, minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE, blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY) orig_num_op = len(model.net._net.op) blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True) for gpu_id in range(cfg.NUM_GPUS): with c2_utils.NamedCudaScope(gpu_id): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) model.net.DequeueBlobs(model.roi_data_loader._blobs_queue_name, blob_names) # A little op surgery to move input ops to the start of the net diff = len(model.net._net.op) - orig_num_op new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff] del model.net._net.op[:] model.net._net.op.extend(new_op)
def __init__( self, roidb, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8 ): self._roidb = roidb self._lock = threading.Lock() self._perm = deque(range(len(self._roidb))) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._shuffle_roidb_inds() self.create_threads()
def create_model(weights_file): """adapted from utils.train.setup_model_for_training """ model = model_builder.create(cfg.MODEL.TYPE, train=True) if cfg.MEMONGER: optimize_memory(model) # Performs random weight initialization as defined by the model workspace.RunNetOnce(model.param_init_net) roidb = combined_roidb_for_training( cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES ) # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1 model.roi_data_loader = RoIDataLoaderSimple( roidb, num_loaders=cfg.DATA_LOADER.NUM_THREADS, minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE, blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY ) orig_num_op = len(model.net._net.op) blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True) with c2_utils.NamedCudaScope(0): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) model.net.DequeueBlobs( model.roi_data_loader._blobs_queue_name, blob_names ) # A little op surgery to move input ops to the start of the net diff = len(model.net._net.op) - orig_num_op new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff] del model.net._net.op[:] model.net._net.op.extend(new_op) nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) nu.broadcast_parameters(model) workspace.CreateBlob("gpu_0/track_n_rois_two") workspace.CreateNet(model.net) # Start loading mini-batches and enqueuing blobs model.roi_data_loader.register_sigint_handler() model.roi_data_loader.start(prefill=True) return model
def add_training_inputs(model, roidb=None): """Create network input ops and blobs used for training. To be called *after* model_builder.create(). """ # Implementation notes: # Typically, one would create the input ops and then the rest of the net. # However, creating the input ops depends on loading the dataset, which # can take a few minutes for COCO. # We prefer to avoid waiting so debugging can fail fast. # Thus, we create the net *without input ops* prior to loading the # dataset, and then add the input ops after loading the dataset. # Since we defer input op creation, we need to do a little bit of surgery # to place the input ops at the start of the network op list. assert model.train, 'Training inputs can only be added to a trainable model' if roidb is not None: # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1 model.roi_data_loader = RoIDataLoader( roidb, num_loaders=cfg.DATA_LOADER.NUM_THREADS, minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE, blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY ) orig_num_op = len(model.net._net.op) blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True) for gpu_id in range(cfg.NUM_GPUS): with c2_utils.NamedCudaScope(gpu_id): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) model.net.DequeueBlobs( model.roi_data_loader._blobs_queue_name, blob_names ) # A little op surgery to move input ops to the start of the net diff = len(model.net._net.op) - orig_num_op new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff] del model.net._net.op[:] model.net._net.op.extend(new_op)
def __init__( self, roidb, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8 ): self._mc = pylibmc.Client(["127.0.0.1:11212"], binary=True, behaviors={"tcp_nodelay": True, "ketama": True}) self._mc.set('rois_s','yidu') self._mc.set('inds_s','yidu') self._mc.set('freeze_fastrcnn_label_s','yidu') self._mc.set('rpn_cls_probs_fpn2_s','yidu') self._mc.set('rpn_bbox_pred_fpn2_s','yidu') self._mc.set('rpn_cls_probs_fpn3_s','yidu') self._mc.set('rpn_bbox_pred_fpn3_s','yidu') self._mc.set('rpn_cls_probs_fpn4_s','yidu') self._mc.set('rpn_bbox_pred_fpn4_s','yidu') self._mc.set('rpn_cls_probs_fpn5_s','yidu') self._mc.set('rpn_bbox_pred_fpn5_s','yidu') self._mc.set('rpn_cls_probs_fpn6_s','yidu') self._mc.set('rpn_bbox_pred_fpn6_s','yidu') self._mc.set('rois',[]) self._mc.set('inds',[]) self._mc.set('freeze_fastrcnn_label',[]) self._mc.set('rpn_cls_probs_fpn2',[]) self._mc.set('rpn_bbox_pred_fpn2',[]) self._mc.set('rpn_cls_probs_fpn3',[]) self._mc.set('rpn_bbox_pred_fpn3',[]) self._mc.set('rpn_cls_probs_fpn4',[]) self._mc.set('rpn_bbox_pred_fpn4',[]) self._mc.set('rpn_cls_probs_fpn5',[]) self._mc.set('rpn_bbox_pred_fpn5',[]) self._mc.set('rpn_cls_probs_fpn6',[]) self._mc.set('rpn_bbox_pred_fpn6',[]) self._roidb = roidb self._lock = threading.Lock() self._perm = deque(range(len(self._roidb))) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._inds=[] self._shuffle_roidb_inds0() self._shuffle_roidb_inds() self.create_threads()