def main(args): merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 for i, weights_file in enumerate(args.weights_list): args.weights_list[i] = cache_url(weights_file, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) preffix_list = args.preffix_list if len(args.preffix_list) \ else [""] * len(args.weights_list) model = model_builder.create(cfg.MODEL.TYPE, train=False) # Initialize GPU from weights files for i, weights_file in enumerate(args.weights_list): nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0, preffix=preffix_list[i]) nu.broadcast_parameters(model) blobs = {} # Save all parameters for param in model.params: scoped_name = str(param) unscoped_name = c2_utils.UnscopeName(scoped_name) if unscoped_name not in blobs: if workspace.HasBlob(scoped_name): blobs[unscoped_name] = workspace.FetchBlob(scoped_name) # Save merged weights file save_object(dict(blobs=blobs), args.output_wts)
def test_restore_checkpoint(): # Create Model model = model_builder.create(cfg.MODEL.TYPE, train=True) add_momentum_init_ops(model) init_weights(model) # Fill input blobs roidb = combined_roidb_for_training( cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES ) model_builder.add_training_inputs(model, roidb=roidb) workspace.CreateNet(model.net) # Bookkeeping for checkpoint creation iter_num = 0 checkpoints = {} output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True) chk_file_path = os.path.join(output_dir, 'model_iter{}.pkl'.format(iter_num)) checkpoints[iter_num] = chk_file_path # Save model weights nu.save_model_to_weights_file(checkpoints[iter_num], model) orig_gpu_0_params, orig_all_params = get_params(model) # Change the model weights init_weights(model) # Reload the weights in the model nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0) nu.broadcast_parameters(model) shutil.rmtree(cfg.OUTPUT_DIR) _, restored_all_params = get_params(model) # Check if all params are loaded correctly for scoped_name, blob in orig_all_params.items(): np.testing.assert_array_equal(blob, restored_all_params[scoped_name]) # Check if broadcast_parameters works for scoped_name, blob in restored_all_params.items(): unscoped_name = c2_utils.UnscopeName(scoped_name) np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])
def test_restore_checkpoint(): # Create Model model = model_builder.create(cfg.MODEL.TYPE, train=True) add_momentum_init_ops(model) init_weights(model) # Fill input blobs roidb = combined_roidb_for_training(cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES) model_builder.add_training_inputs(model, roidb=roidb) workspace.CreateNet(model.net) # Bookkeeping for checkpoint creation iter_num = 0 checkpoints = {} output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True) chk_file_path = os.path.join(output_dir, 'model_iter{}.pkl'.format(iter_num)) checkpoints[iter_num] = chk_file_path # Save model weights nu.save_model_to_weights_file(checkpoints[iter_num], model) orig_gpu_0_params, orig_all_params = get_params(model) # Change the model weights init_weights(model) # Reload the weights in the model nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0) nu.broadcast_parameters(model) shutil.rmtree(cfg.OUTPUT_DIR) _, restored_all_params = get_params(model) # Check if all params are loaded correctly for scoped_name, blob in orig_all_params.items(): np.testing.assert_array_equal(blob, restored_all_params[scoped_name]) # Check if broadcast_parameters works for scoped_name, blob in restored_all_params.items(): unscoped_name = c2_utils.UnscopeName(scoped_name) np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])
def create_model(): """Build the model and look for saved model checkpoints in case we can resume from one. """ logger = logging.getLogger(__name__) start_iter = 0 checkpoints = {} output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True) weights_file = cfg.TRAIN.WEIGHTS if cfg.TRAIN.AUTO_RESUME: # Check for the final model (indicates training already finished) final_path = os.path.join(output_dir, 'model_final.pkl') if os.path.exists(final_path): logger.info('model_final.pkl exists; no need to train!') # return None, None, None, {'final': final_path}, output_dir files = os.listdir(output_dir) for f in files: iter_string = re.findall(r'(?<=model_epoch)\d+(?=\.pkl)', f) if len(iter_string) > 0: checkpoint_iter = int(iter_string[0]) checkpoints[checkpoint_iter] = os.path.join(output_dir, f) checkpoints['final'] = final_path return None, None, None, checkpoints, output_dir if cfg.TRAIN.COPY_WEIGHTS: copyfile(weights_file, os.path.join(output_dir, os.path.basename(weights_file))) logger.info('Copy {} to {}'.format(weights_file, output_dir)) # Find the most recent checkpoint (highest iteration number) files = os.listdir(output_dir) for f in files: iter_string = re.findall(r'(?<=model_epoch)\d+(?=\.pkl)', f) if len(iter_string) > 0: checkpoint_iter = int(iter_string[0]) if checkpoint_iter > start_iter: # Start one iteration immediately after the checkpoint iter start_iter = checkpoint_iter + 1 resume_weights_file = f if start_iter > 0: # Override the initialization weights with the found checkpoint weights_file = os.path.join(output_dir, resume_weights_file) logger.info( '========> Resuming from checkpoint {} at start iter {}'. format(weights_file, start_iter)) logger.info('Building model: {}'.format(cfg.MODEL.TYPE)) model = model_builder.create(cfg.MODEL.TYPE, train=True) if cfg.MEMONGER: optimize_memory(model) # Performs random weight initialization as defined by the model workspace.RunNetOnce(model.param_init_net) return model, weights_file, start_iter, checkpoints, output_dir
def initialize_model_from_cfg(weights_file, gpu_id=0): """Initialize a model from the global cfg. Loads test-time weights and creates the networks in the Caffe2 workspace. """ model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) net_utils.initialize_gpu_from_weights_file( model, weights_file, gpu_id=gpu_id, ) model_builder.add_inference_inputs(model) workspace.CreateNet(model.net) workspace.CreateNet(model.conv_body_net) if cfg.MODEL.MASK_ON: workspace.CreateNet(model.mask_net) if cfg.MODEL.KEYPOINTS_ON: workspace.CreateNet(model.keypoint_net) return model
def initialize_model_from_cfg(weights_file, gpu_id=0): """Initialize a model from the global cfg. Loads test-time weights and creates the networks in the Caffe2 workspace. """ model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) net_utils.initialize_gpu_from_weights_file( model, weights_file, gpu_id=gpu_id, ) model_builder.add_inference_inputs(model) workspace.CreateNet(model.net) workspace.CreateNet(model.conv_body_net) if cfg.MODEL.MASK_ON: workspace.CreateNet(model.mask_net) if cfg.MODEL.KEYPOINTS_ON: workspace.CreateNet(model.keypoint_net) return model
def generate_rpn_on_range( weights_file, dataset_name, _proposal_file_ignored, output_dir, ind_range=None, gpu_id=0 ): """Run inference on all images in a dataset or over an index range of images in a dataset using a single GPU. """ assert cfg.MODEL.RPN_ONLY or cfg.MODEL.FASTER_RCNN roidb, start_ind, end_ind, total_num_images = get_roidb( dataset_name, ind_range ) logger.info( 'Output will be saved to: {:s}'.format(os.path.abspath(output_dir)) ) model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) nu.initialize_gpu_from_weights_file( model, weights_file, gpu_id=gpu_id, ) model_builder.add_inference_inputs(model) workspace.CreateNet(model.net) boxes, scores, ids = generate_proposals_on_roidb( model, roidb, start_ind=start_ind, end_ind=end_ind, total_num_images=total_num_images, gpu_id=gpu_id, ) cfg_yaml = envu.yaml_dump(cfg) if ind_range is not None: rpn_name = 'rpn_proposals_range_%s_%s.pkl' % tuple(ind_range) else: rpn_name = 'rpn_proposals.pkl' rpn_file = os.path.join(output_dir, rpn_name) save_object( dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file ) logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file))) return boxes, scores, ids, rpn_file
def generate_rpn_on_range( weights_file, dataset_name, _proposal_file_ignored, output_dir, ind_range=None, gpu_id=0 ): """Run inference on all images in a dataset or over an index range of images in a dataset using a single GPU. """ assert cfg.MODEL.RPN_ONLY or cfg.MODEL.FASTER_RCNN roidb, start_ind, end_ind, total_num_images = get_roidb( dataset_name, ind_range ) logger.info( 'Output will be saved to: {:s}'.format(os.path.abspath(output_dir)) ) model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) nu.initialize_gpu_from_weights_file( model, weights_file, gpu_id=gpu_id, ) model_builder.add_inference_inputs(model) workspace.CreateNet(model.net) boxes, scores, ids = generate_proposals_on_roidb( model, roidb, start_ind=start_ind, end_ind=end_ind, total_num_images=total_num_images, gpu_id=gpu_id, ) cfg_yaml = yaml.dump(cfg) if ind_range is not None: rpn_name = 'rpn_proposals_range_%s_%s.pkl' % tuple(ind_range) else: rpn_name = 'rpn_proposals.pkl' rpn_file = os.path.join(output_dir, rpn_name) save_object( dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file ) logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file))) return boxes, scores, ids, rpn_file
def create_model(weights_file): """adapted from utils.train.setup_model_for_training """ model = model_builder.create(cfg.MODEL.TYPE, train=True) if cfg.MEMONGER: optimize_memory(model) # Performs random weight initialization as defined by the model workspace.RunNetOnce(model.param_init_net) roidb = combined_roidb_for_training( cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES ) # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1 model.roi_data_loader = RoIDataLoaderSimple( roidb, num_loaders=cfg.DATA_LOADER.NUM_THREADS, minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE, blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY ) orig_num_op = len(model.net._net.op) blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True) with c2_utils.NamedCudaScope(0): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) model.net.DequeueBlobs( model.roi_data_loader._blobs_queue_name, blob_names ) # A little op surgery to move input ops to the start of the net diff = len(model.net._net.op) - orig_num_op new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff] del model.net._net.op[:] model.net._net.op.extend(new_op) nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) nu.broadcast_parameters(model) workspace.CreateBlob("gpu_0/track_n_rois_two") workspace.CreateNet(model.net) # Start loading mini-batches and enqueuing blobs model.roi_data_loader.register_sigint_handler() model.roi_data_loader.start(prefill=True) return model
def create_model(): """Build the model and look for saved model checkpoints in case we can resume from one. """ logger = logging.getLogger(__name__) start_iter = 0 checkpoints = {} output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True) weights_file = cfg.TRAIN.WEIGHTS if cfg.TRAIN.AUTO_RESUME: # Check for the final model (indicates training already finished) final_path = os.path.join(output_dir, 'model_final.pkl') if os.path.exists(final_path): logger.info('model_final.pkl exists; no need to train!') return None, None, None, {'final': final_path}, output_dir # Find the most recent checkpoint (highest iteration number) files = os.listdir(output_dir) for f in files: iter_string = re.findall(r'(?<=model_iter)\d+(?=\.pkl)', f) if len(iter_string) > 0: checkpoint_iter = int(iter_string[0]) if checkpoint_iter > start_iter: # Start one iteration immediately after the checkpoint iter start_iter = checkpoint_iter + 1 resume_weights_file = f if start_iter > 0: # Override the initialization weights with the found checkpoint weights_file = os.path.join(output_dir, resume_weights_file) logger.info( '========> Resuming from checkpoint {} at start iter {}'. format(weights_file, start_iter) ) logger.info('Building model: {}'.format(cfg.MODEL.TYPE)) model = model_builder.create(cfg.MODEL.TYPE, train=True) if cfg.MEMONGER: optimize_memory(model) # Performs random weight initialization as defined by the model workspace.RunNetOnce(model.param_init_net) return model, weights_file, start_iter, checkpoints, output_dir
def initialize_mixed_model_from_cfg(weights_list, preffix_list, gpu_id=0): """Initialize a model from the global cfg. Loads and combinds multiple test-time weights and creates the networks in the Caffe2 workspace. """ model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) assert len(weights_list) == len(preffix_list) for i, weights_file in enumerate(weights_list): net_utils.initialize_gpu_from_weights_file(model, weights_file, gpu_id=gpu_id, preffix=preffix_list[i]) model_builder.add_inference_inputs(model) workspace.CreateNet(model.net) workspace.CreateNet(model.conv_body_net) if cfg.MODEL.MASK_ON: workspace.CreateNet(model.mask_net) if cfg.MODEL.KEYPOINTS_ON: workspace.CreateNet(model.keypoint_net) if cfg.MODEL.TRACKING_ON: workspace.CreateNet(model.track_net) return model
def initialize_model_from_cfg(weights_file, gpu_id=0, int8=True): """Initialize a model from the global cfg. Loads test-time weights and creates the networks in the Caffe2 workspace. """ ob = None ob_mask = None ob_keypoint = None model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) net_utils.initialize_gpu_from_weights_file( model, weights_file, gpu_id=gpu_id, ) model_builder.add_inference_inputs(model) int8_path = os.environ.get('INT8PATH') def LoadModuleFile(fname): with open(fname) as f: from caffe2.proto import caffe2_pb2 net_def = caffe2_pb2.NetDef() if os.environ.get('INT8PTXT') == "1": import google.protobuf.text_format as ptxt net_def = ptxt.Parse(f.read(), caffe2_pb2.NetDef()) else: net_def.ParseFromString(f.read()) if gpu_id == -2: device_opts = caffe2_pb2.DeviceOption() device_opts.device_type = caffe2_pb2.IDEEP for op in net_def.op: op.device_option.CopyFrom(device_opts) return net_def return None def CreateNet(net): int8_file_path = int8_path if int8_path else '' if os.environ.get('INT8PTXT') == "1": int8_predict_file = int8_file_path + '/' + net.Proto( ).name + '_predict_int8.pbtxt' int8_init_file = int8_file_path + '/' + net.Proto( ).name + '_init_int8.pbtxt' else: int8_predict_file = int8_file_path + '/' + net.Proto( ).name + '_predict_int8.pb' int8_init_file = int8_file_path + '/' + net.Proto( ).name + '_init_int8.pb' if os.path.isfile(int8_init_file): logging.warning('Loading Int8 init file for module {}'.format( net.Proto().name)) workspace.RunNetOnce(LoadModuleFile(int8_init_file)) if os.path.isfile(int8_predict_file): logging.warning('Loading Int8 predict file for module {}'.format( net.Proto().name)) net.Proto().CopyFrom(LoadModuleFile(int8_predict_file)) if os.environ.get('DEBUGMODE') == "1": for i, op in enumerate(net.Proto().op): if len(op.name) == 0: op.name = op.type.lower() + str(i) if gpu_id == -2 and os.environ.get('DNOOPT') != "1": logging.warning('Optimize module {}....................'.format( net.Proto().name)) tf.optimizeForIDEEP(net) if os.environ.get('DEBUGMODE') == "1": with open("{}_opt_predict_net.pb".format(net.Proto().name), "w") as fid: fid.write(net.Proto().SerializeToString()) with open("{}_opt_predict_net.pbtxt".format(net.Proto().name), "w") as fid: fid.write(str(net.Proto())) workspace.CreateNet(net) if os.environ.get('COSIM') and int8 == False: int8_path = None CreateNet(model.net) if os.environ.get('DPROFILE') == "1": logging.warning('need profile, add observer....................') ob = model.net.AddObserver("TimeObserver") workspace.CreateNet(model.conv_body_net) if cfg.MODEL.MASK_ON: CreateNet(model.mask_net) if os.environ.get('DPROFILE') == "1": ob_mask = model.mask_net.AddObserver("TimeObserver") if cfg.MODEL.KEYPOINTS_ON: CreateNet(model.keypoint_net) if os.environ.get('DPROFILE') == "1": ob_keypoint = model.keypoint_net.AddObserver("TimeObserver") return model, ob, ob_mask, ob_keypoint
def main(args): MINIMAL = False TRAIN = False FORWARD = False SHAPES = False HIDE_PARAMS = True if args.opts is not None: if 'minimal' in args.opts: MINIMAL = True if 'train' in args.opts: TRAIN = True if 'forward' in args.opts: FORWARD = True if 'shapes' in args.opts: SHAPES = True if 'params' in args.opts: HIDE_PARAMS = False if SHAPES and args.model_file is None: raise ValueError('Specify model file') MODEL_FILE = args.model_file NET_NAMES = args.net_names if MINIMAL: get_dot_graph = lambda net, shapes: net_drawer.GetPydotGraphMinimal( net, rankdir="BT") else: get_dot_graph = lambda net, shapes: net_drawer.GetPydotGraph( net, rankdir="BT", shapes=shapes, hide_params=HIDE_PARAMS) # Get model if args.cfg_file is not None: merge_cfg_from_file(args.cfg_file) cfg.NUM_GPUS = 1 cfg.VIS_NET = True if FORWARD: cfg.MODEL.FORWARD_ONLY = True assert_and_infer_cfg(cache_urls=False) if SHAPES and TRAIN: raise NotImplementedError # Run model to get shape information of all blobs if SHAPES: model = infer_engine.initialize_model_from_cfg(MODEL_FILE) workspace.RunNetOnce(model.param_init_net) nu.broadcast_parameters(model) dataset = JsonDataset(cfg.TRAIN.DATASETS[0]) roidb = dataset.get_roidb() with c2_utils.NamedCudaScope(0): if cfg.MODEL.TRACKING_ON: roidb_min = [roidb[0], roidb[1]] im_list = [cv2.imread(e['image']) for e in roidb_min] infer_engine.multi_im_detect_all(model, im_list, [None, None]) else: infer_engine.im_detect_all(model, roidb[0]['image'], None) else: model = model_builder.create(cfg.MODEL.TYPE, train=TRAIN) subprocess.call(["killall", "xdot"]) # Visualize all specified nets for net_name in NET_NAMES: net = getattr(model, net_name, None) if net: print('processing graph {}...'.format(net_name)) g = get_dot_graph(net.Proto(), shapes=SHAPES) name = net_name if TRAIN: name_append = 'train' else: name_append = 'infer' # Save graph graph_dir = os.path.join(args.output_dir, cfg.MODEL.TYPE) if not os.path.exists(graph_dir): os.makedirs(graph_dir) dot_name = os.path.join(graph_dir, '{}_{}.dot'.format(net_name, name_append)) g.write_dot(dot_name) subprocess.Popen(['xdot', dot_name])