def __init__(self, id): # self.cap = cv2.VideoCapture(id) self.cap = WebcamVideoStream(src=id).start() self.width = 1280 #640# self.height = 720 #360# self.display_lincomb = False self.crop = True self.score_threshold = 0.15 self.top_k = 30 self.display_masks = True self.display_fps = False self.display_text = True self.display_bboxes = True self.display_scores = False self.fast_nms = True self.cross_class_nms = True self.config = 'yolact_plus_base_config' print('Config specified. Parsed %s from the file name.\n' % self.config) set_cfg(self.config) print('Loading model...', end='') self.trained_model = 'weights/yolact_plus_base_54_800000.pth' self.model = Yolact() self.model.load_weights(self.trained_model) self.model.detect.use_fast_nms = self.fast_nms self.model.detect.use_cross_class_nms = self.cross_class_nms self.model.eval() self.model = self.model.to(device, non_blocking=True) print(' Done.') self.model_path = SavePath.from_str(self.trained_model)
def __init__(self, trained_model: str, save_json=True, output_dir=None, output_name="detection", output_num=5): """ YOLACT 初始化,参数: - save_json 是否将计算结果保存为json文件 - output_dir 当上个参数为True时,这个参数表示将json文件保存到的位置 - output_name 保存的json文件名 - output_num # ? 目测是要输出的类别个数 """ # step 0 初始化变量 self.save_json = save_json # NOTE 卧槽还有这种用法,学习了 self.detections = None self.output_num = output_num # step 1 如果指定了要生成json文件,就创建上面的Detection类对象 if self.save_json and output_dir is not None: self.detections = Detections(output_dir, output_name) # step 2 初始化YOLACT网络 with torch.no_grad(): set_cfg("yolact_base_config") torch.cuda.set_device(1) cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') self.net = Yolact() # TODO 这里的权值是需要进行修改的 # self.net.load_weights('./weights/yolact_base_54_800000.pth') self.net.load_weights(trained_model) self.net.eval() self.net = self.net.cuda() print("load model complete")
class YOLACT_MODEL(): def __init__(self, opts): #concat the two files to one file # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'): # script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth" # call(script, shell=True) set_cfg('yolact_resnet50_config') cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') self.net = Yolact() self.net.load_weights(opts['checkpoint']) print("done.") self.net.eval() self.net = self.net.cuda() self.net.detect.use_fast_nms = True cfg.mask_proto_debug = False self.color_cache = defaultdict(lambda: {}) self.threshold = opts['threshold'] # Generate an image based on some text. def detect(self, img): numpy_image = np.array(img) print('starting inference...') frame = torch.from_numpy(numpy_image).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) preds = self.net(batch) print("done.") output_image = self.display(preds, frame, None, None, undo_transform=False, score_threshold=self.threshold) return output_image def display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, top_k = 100, score_threshold = 0.3): img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb = False, crop_masks = True, score_threshold = score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:top_k] img_gpu = img_gpu * masks[0] # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() return img_numpy
def __init__(self, model_pth, output_num=5): self.output_num = output_num with torch.no_grad(): set_cfg("yolact_base_config") torch.cuda.set_device(0) cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') self.net = Yolact() self.net.load_weights(model_pth) self.net.eval() self.net = self.net.cuda() print("load model complete")
def __init__( self, weight_path='C:/Users/user/yolact_notes/weights/yolact_darknet53_249_2000.pth', save_path='C:/Users/user/yolact_notes/pear_output'): set_cfg('pear_config') self.save_path = save_path self.weight_path = weight_path self.net = Yolact() self.net.load_weights(self.weight_path) self.net.eval() self.net = self.net.cuda() print('model loaded...') self.net.detect.cross_class_nms = True self.net.detect.use_fast_nms = True
def __init__(self): parse_args(self) self.args.config = 'yolact_edge_mobilenetv2_config' set_cfg(self.args.config) self.args.trained_model = '/home/ht/catkin_ws/src/instance_segmentation/scripts/weights/yolact_edge_mobilenetv2_124_10000.pth' self.args.top_k = 10 self.args.score_threshold = 0.3 self.args.trt_batch_size = 3 self.args.disable_tensorrt = False self.args.use_fp16_tensorrt = False self.args.use_tensorrt_safe_mode = True self.args.cuda = True self.args.fast_nms = True self.args.display_masks = True self.args.display_bboxes = True self.args.display_text = True self.args.display_scores = True self.args.display_linecomb = False self.args.fast_eval = False self.args.deterministic = False self.args.no_crop = False self.args.crop = True self.args.calib_images = '/home/ht/catkin_ws/src/instance_segmentation/scripts/data/coco/calib_images' setup_logger(logging_level=logging.INFO) self.logger = logging.getLogger('yolact.eval') self.color_cache = defaultdict(lambda: {}) with torch.no_grad(): cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') self.logger.info('Loading model...') self.net = Yolact(training=False) if self.args.trained_model is not None: self.net.load_weights(self.args.trained_model, args=self.args) else: self.logger.warning('No weights loaded!') self.net.eval() self.logger.info('Model loaded.') convert_to_tensorrt(self.net, cfg, self.args, transform=BaseTransform())
def __init__(self, model_path="./weights/yolact_im700_54_800000.pth", use_cuda=False): print('Loading model...', end='') self.use_cuda = use_cuda self.trained_model = model_path self.net = Yolact() self.net.load_weights(self.trained_model) self.net.eval() if self.use_cuda: self.net = self.net.cuda() self.net.detect.use_fast_nms = True self.net.detect.use_cross_class_nms = False cfg.mask_proto_debug = False print(' Done.')
def setWeights(self, filename: str) -> None: if filename == '': raise YolactException('Empty filename for network weights') self.weights_file = filename tic = time.perf_counter_ns() with torch.no_grad(): if self.cuda: cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') self.net = Yolact() self.net.load_weights(self.weights_file, self.cuda) self.net.eval() if self.cuda: self.net = self.net.cuda() toc = time.perf_counter_ns() logging.debug('Time to load weights %f s', 1e-9 * (toc - tic)) self.sigInitialized.emit()
def __init__(self, opts): #concat the two files to one file # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'): # script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth" # call(script, shell=True) set_cfg('yolact_resnet50_config') cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') self.net = Yolact() self.net.load_weights(opts['checkpoint']) print("done.") self.net.eval() self.net = self.net.cuda() self.net.detect.use_fast_nms = True cfg.mask_proto_debug = False self.color_cache = defaultdict(lambda: {}) self.threshold = opts['threshold']
def load_model(model_file): torch.set_default_tensor_type('torch.cuda.FloatTensor') set_cfg('yolact_plus_resnet50_config') net = Yolact() net.load_weights(model_file) net.eval() return net
def init_model(transform): args = parse_args() if args.config is not None: print(args.config) set_cfg(args.config) cfg.mask_proto_debug = False if args.trained_model == 'interrupt': args.trained_model = SavePath.get_interrupt('weights/') elif args.trained_model == 'latest': args.trained_model = SavePath.get_latest('weights/', cfg.name) if args.config is None: model_path = SavePath.from_str(args.trained_model) # TODO: Bad practice? Probably want to do a name lookup instead. args.config = model_path.model_name + '_config' print('Config not specified. Parsed %s from the file name.\n' % args.config) set_cfg(args.config) if args.detect: cfg.eval_mask_branch = False if args.dataset is not None: set_dataset(args.dataset) with torch.no_grad(): if args.cuda: cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') print('Loading model...', end='') net = Yolact() net.load_weights(args.trained_model) net.eval() print(' Done.') net = net.cuda() net = CustomDataParallel(net).cuda() transform = torch.nn.DataParallel(FastBaseTransform()).cuda() return net, args
def __init__( self, weights='./crow_vision_yolact/data/yolact/weights/weights_yolact_kuka_17/crow_base_35_457142.pth', config=None, batchsize=1, top_k=25, score_threshold=0.1, display_text=True, display_bboxes=True, display_masks=True, display_scores=True): self.score_threshold = score_threshold self.top_k = top_k self.batchsize = batchsize # initialize a yolact net for inference ## YOLACT setup # setup config if config is not None: if '.obj' in config: with open(config, 'rb') as f: config = dill.load(f) set_cfg(config) self.class_names_tuple = get_class_names_tuple() parse_args([ '--top_k=' + str(top_k), '--score_threshold=' + str(score_threshold), '--display_text=' + str(display_text), '--display_bboxes=' + str(display_bboxes), '--display_masks=' + str(display_masks), '--display_scores=' + str(display_scores), ]) # CUDA setup for yolact torch.backends.cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') #YOLACT net itself with torch.no_grad(): net = Yolact().cuda(torch.cuda.current_device()) net.load_weights(weights) net.eval() net.detect.use_fast_nms = True net.detect.use_cross_class_nms = False self.net = net print("YOLACT network available as self.net") #for debug,benchmark self.duration = 0.0
def __init__(self, problem): super().__init__() from utils.augmentations import FastBaseTransform self.FastBaseTransform = FastBaseTransform import cv2 self.cv2 = cv2 import matplotlib.pyplot as plt self.plt = plt from layers.output_utils import postprocess, undo_image_transformation self.postprocess = postprocess self.undo_image_transformation = undo_image_transformation from utils import timer self.timer = timer import sys syspathsave = None if not 'yolact' in sys.path[1]: import copy syspathsave = copy.copy(sys.path) sys.path.insert(1, '../yolact/') from yolact import Yolact from train import MultiBoxLoss import data as D self.D = D from collections import defaultdict self.color_cache = defaultdict(lambda: {}) net = Yolact() net.train() net.init_weights(backbone_path='../yolact/weights/' + D.cfg.backbone.path) criterion = MultiBoxLoss(num_classes=D.cfg.num_classes, pos_threshold=D.cfg.positive_iou_threshold, neg_threshold=D.cfg.negative_iou_threshold, negpos_ratio=D.cfg.ohem_negpos_ratio) self.net = net self.criterion = criterion if syspathsave: sys.path = syspathsave
def convert_to_onnx_with_hydra(cfg: DictConfig): # create folder for onnx createFolderOnnx(cfg) # set cfg set_cfg(cfg.onnx.yolact_cfg) model = Yolact() model.load_weights(cfg.onnx.model_ckpt_path) model.eval() model = model.cpu() dummy_input = torch.rand( (cfg.onnx.model_batch_size, cfg.onnx.model_channel_input, cfg.onnx.model_height_input, cfg.onnx.model_width_input)) torch.onnx.export(model, dummy_input, cfg.onnx.model_onnx_path, verbose=cfg.onnx.verbose, opset_version=cfg.onnx.opset_version)
def prepare_model(args): yolact_net = Yolact() net = yolact_net net.train() if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: init_path = args.save_folder + cfg.backbone.path print('Initializing weights...', init_path) if os.path.isfile(init_path): yolact_net.init_weights(backbone_path=init_path) else: print("no init weight, use empty") return yolact_net
def main(args): rospy.init_node('yolact_ros') rospack = rospkg.RosPack() yolact_path = rospack.get_path('yolact_ros') model_path_str = yolact_path + "/scripts/yolact/weights/yolact_base_54_800000.pth" model_path = SavePath.from_str(model_path_str) set_cfg(model_path.model_name + '_config') with torch.no_grad(): results_path_str = yolact_path + "/scripts/yolact/results" if not os.path.exists(results_path_str): os.makedirs(results_path_str) cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') print('Loading model...', end='') net = Yolact() net.load_weights(model_path_str) net.eval() print(' Done.') net = net.cuda() net.detect.use_fast_nms = True cfg.mask_proto_debug = False ic = image_converter(net) try: rospy.spin() except KeyboardInterrupt: print("Shutting down") cv2.destroyAllWindows()
def load_weights(filename, cuda): """Load YOLACT network weights""" global ynet if filename == '': raise ValueError('Empty filename for network weights') print('#### CUDA ENABLED', cuda) print(f'Loading weights from {filename}') tic = time.perf_counter_ns() with torch.no_grad(): if cuda: cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') # torch.set_default_tensor_type('torch.FloatTensor') ynet = Yolact() ynet.load_weights(filename, False) ynet.eval() toc = time.perf_counter_ns() logging.debug(f'Time to load weights: {1e-9 * (toc - tic)}')
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=3) if args.cuda: cudnn.benchmark = True net = nn.DataParallel(net).cuda() criterion = nn.DataParallel(criterion).cuda() # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() loss_types = ['B', 'C', 'M', 'P', 'D', 'E', 'S'] # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) # Load training data # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there images, targets, masks, num_crowds = prepare_data(datum) # Forward Pass out = net(images) # Compute Loss optimizer.zero_grad() wrapper = ScatterWrapper(targets, masks, num_crowds) losses = criterion(out, wrapper, wrapper.make_mask()) losses = {k: v.mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: compute_validation_map(yolact_net, val_dataset) except KeyboardInterrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
class MattingService: def __init__(self, model_path="./weights/yolact_im700_54_800000.pth", use_cuda=False): print('Loading model...', end='') self.use_cuda = use_cuda self.trained_model = model_path self.net = Yolact() self.net.load_weights(self.trained_model) self.net.eval() if self.use_cuda: self.net = self.net.cuda() self.net.detect.use_fast_nms = True self.net.detect.use_cross_class_nms = False cfg.mask_proto_debug = False print(' Done.') def process(self, image, top_k=1, score_threshold=0.6): # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo with torch.no_grad(): if image is not None: if ':' in image: inp, _image_name = image.split(':') self._infer_image(self.net, inp, _image_name, top_k, score_threshold) else: _image_name = image.split('/')[-1].split('.')[0] + '.png' out = os.path.join('results/', _image_name) self._infer_image(self.net, image, out, top_k, score_threshold) return _image_name def _infer_image(self, net: Yolact, path, save_path, top_k, score_threshold): if self.use_cuda: frame = torch.from_numpy(cv2.imread(path)).cuda().float() else: frame = torch.from_numpy(cv2.imread(path)).float() batch = FastBaseTransform()(frame.unsqueeze(0)) preds = net(batch) img_numpy = self.post_process(preds, frame, None, None, top_k, score_threshold, undo_transform=False) if save_path is None: img_numpy = img_numpy[:, :, (2, 1, 0, 3)] if save_path is None: plt.subplot() plt.imshow(img_numpy) plt.title(path) plt.show() else: # plt.subplot() # plt.imshow(img_numpy) # plt.title(path) # plt.show() cv2.imwrite(save_path, img_numpy) @staticmethod def post_process(dets_out, img, h, w, top_k=1, score_threshold=0.6, undo_transform=True): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=False, crop_masks=False, score_threshold=score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < score_threshold: num_dets_to_consider = j break # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice # After this, mask is of size [num_dets, h, w, 1] final_res = (img_gpu * 255).byte().cpu().numpy() final_res = cv2.cvtColor(final_res, cv2.COLOR_RGB2RGBA) if num_dets_to_consider == 0: return final_res masks = masks[:num_dets_to_consider, :, :, None] _mask = (masks * 255).byte().cpu().numpy()[0] # Then assign the mask to the last channel of the image final_res[:, :, 3] = _mask.squeeze() return final_res
def detect(): img_path = '/home/user/dataset/pear/train/JPEGImages' save_path = '/home/user/pear_output' weight_path = '/home/user/caoliwei/yolact/weights/20200901/yolact_darknet53_1176_20000.pth' set_cfg('pear_config') with torch.no_grad(): torch.cuda.set_device(0) ###### # If the input image size is constant, this make things faster (hence why we can use it in a video setting). # cudnn.benchmark = True # cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') ###### net = Yolact() net.load_weights(weight_path) net.eval() net = net.cuda() print('model loaded...') net.detect.cross_class_nms = True net.detect.use_fast_nms = True cfg.mask_proto_debug = False if not os.path.exists(save_path): os.mkdir(save_path) img_names = [ name for name in os.listdir(img_path) if name.endswith('.jpg') or name.endswith('.png') ] #for img_name in tqdm(img_names): for img_name in img_names: img = cv2.imread(os.path.join(img_path, img_name)) img = torch.from_numpy(img).cuda().float() img = FastBaseTransform()(img.unsqueeze(0)) start = time.time() preds = net(img) print('clw: image_name: %s, inference time use %.3fs' % (img_name, time.time() - start)) # inference time use 0.023s, 550x550 # start = time.time() h, w = img.shape[2:] result = postprocess( preds, w, h, crop_masks=True, score_threshold=0.3) # classes, scores, boxes, masks 按照score排序 # top_k = 10 # classes, scores, boxes, masks = [x[:top_k].cpu().numpy() for x in result] # clw note TODO: 是否有必要只取top_k个? # print('clw: postprocess time use %.3fs' % (time.time() - start)) # 0.001s ### 顺序遍历result[0],找到第一个是0的值,也就是梨,也就拿到了相应的mask # start = time.time() bFindPear = False for i, cls_id in enumerate(result[0]): if cls_id == 0 and not bFindPear: pear_mask = result[3][i].cpu().numpy() bFindPear = True # 从梨的mask中提取轮廓 pear_outline = get_outline_from_mask(pear_mask, w, h) # print('pear_mask.sum:', pear_mask.sum()) # 124250.0 # print('pear_outline.sum:', pear_outline.sum()) # 34335.0 # print('clw: outline extract time use %.3fs' % (time.time() - start)) # 0.001s roundness = compute_roundness(pear_outline) ### result.append(roundness)
undo_transform=False) cv2.imshow("YOLACT", img_numpy) if cv2.waitKey(33) == 27: break cv2.destroyAllWindows() camera.release() return if __name__ == '__main__': rospy.init_node('test') sub_img = Get_image() print('Loading model...', end='') with torch.no_grad(): torch.set_default_tensor_type('torch.cuda.FloatTensor') net = Yolact() net.load_weights( '/home/chien/ros_yolact/src/yolact/src/weights/yolact_base_1333_8000.pth' ) net.eval() net = net.cuda() print(' Done.') while not rospy.is_shutdown(): cv2.imshow("YOLACT1", sub_img.cv_image) image = torch.from_numpy(sub_img.cv_image).cuda().float() batch = FastBaseTransform()(image.unsqueeze(0)) preds = net(batch) img_numpy = prep_display(preds, image, None, None,
if isinstance(child, tf.keras.Model): parse_module(child, weights) elif isinstance(child, tf.keras.layers.Conv2D): layer_weights = weights.pop() print(child, layer_weights[0].shape) child.set_weights(layer_weights) elif isinstance(child, tf.keras.layers.BatchNormalization): print(child, layer_weights[0].shape) layer_weights = weights.pop() child.set_weights(layer_weights) else: continue return True model = Yolact() darknet53_modules = [model.backbone._preconv] + model.backbone.conv_layers for module in darknet53_modules: parse_module(module, darknet53_weights) proto_net = model.proto_net parse_module(proto_net, proto_net_weights) fpn = model.fpn parse_module(fpn, fpn_weights) pred = model.prediction_layers[0] parse_module(pred, pred_weights) segmantic_seg_conv = model.semantic_seg_conv parse_module(segmantic_seg_conv, segmantic_seg_conv_weights)
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() if args.log: log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio) if args.batch_alloc is not None: args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print( 'Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) net = CustomDataParallel(NetLoss(net, criterion)) if args.cuda: net = net.cuda() # Initialize everything if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda()) if not cfg.freeze_bn: yolact_net.freeze_bn(True) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) # Zero the grad to get ready to compute gradients optimizer.zero_grad() # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss) losses = net(datum) losses = {k: (v).mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # no_inf_mean removes some components from the loss, so make sure to backward through all of it # all_loss = sum([v.mean() for v in losses.values()]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) if args.log: precision = 5 loss_info = { k: round(losses[k].item(), precision) for k in losses } loss_info['T'] = round(loss.item(), precision) if args.log_gpu: log.log_gpu_stats = (iteration % 10 == 0 ) # nvidia-smi is sloooow log.log('train', loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed) log.log_gpu_stats = args.log_gpu iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) # Compute validation mAP after training is finished compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
def train(): #1: train 결과를 저장할 폴더를 생성 if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) #2: MSCOCO에서 제공하는 API를 통해 train dataset을 준비한다. dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) # 만약 train-validation기법을 사용한다면, eval dataset도 준비한다. if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) #3: 구현한 yolact() class의 객체를 만들고 train모드로 설정. #주의 : net과 yolact_net은 메모리에 저장된 같은 객체를 공유한다. # 다만 net은 이후에 yolact와 MultiBoxLoss가 결함되어 train을 위한 # 통합된 객체로 다시 정의되기 때문에 yolact넷 객체에만 따로 접근하기 위해 # yolact_net을 deep copy본으로 가지고 있는다. yolact_net = Yolact() net = yolact_net net.train() ####################################################################### #######RESUME 관련##################################################### #4: args.log와 args.resume은 train도중 log를 남기는 것과, train이 # 불가피하게 중도에 정지되었을 경우, 중단 지점부터 재시작할 수 있도록 # 기능을 만든 것이므로 필요한 경우에만 더 자세히 보도록 하자. if args.log: log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) #######END############################################################# ####################################################################### #5: yolact의 optimizer와 loss함수를 설정한다. optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio) #6: 멀티 GPU를 사용하는 경우 각 GPU에 batch size를 분할해준다. # 만약 총 Batch size가 맞지 않으면 뭔가 잘못된 것이므로 프로그램 종료. if args.batch_alloc is not None: args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print( 'Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) #7: 현재까지 설정된 net과 loss 함수를 엮어 더 통합된 net으로 만듬. # 이제 net을 호출하면, bbox를 detection하고, fast nms를 거쳐 한 번 # 필터링을 한 후, ground truth와 비교하여 loss를 계산하고, 이 과정을 # 멀티 GPU일 경우 알아서 각 device에 작업을 분할해준다. # yolact_net은 net에 포함된 yolact()만을 가리킨다. net = CustomDataParallel(NetLoss(net, criterion)) if args.cuda: net = net.cuda() #8: yolact_net의 batch_normalization layer를 모두 false로 만든 뒤에 # 0만을 가지고 있는 zero_tensor를 모델에 통과시켜, 파라미터를 초기화시켜준다. # 그 후에 다시 batch_normalization layer를 train모드로 바꿔준다. # 굳이 이런 과정을 거치는 이유는 저자가 batch_normalization에 미리 넣어놓은 # 평균/분산 값은 초기화하고 싶지 않기 때문이다. if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means (torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda()) if not cfg.freeze_bn: yolact_net.freeze_bn(True) #9: loss counters # bbox의 위치에 대한 loss와, class confidence에 대한 loss 를 담을 변수를 생성하고, # batch_size와 dataset의 크기에 맞는 1 epoch의 size와 몇 epoch를 돌려야하는지 구한다. loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) #cw : 음수입력을 허용치 않기 위해... GOOD last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) #10:Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index # step_index는 learning rate decay를 위해 사용하는 index이다. # data_loader는 train중에 순서대로 데이터셋을 준비해서 넘겨주는 class이다. # 여기서 객체를 만들어 저장한다. step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) #11:특정 epoch와 iteration에 도달했을 때, 중간 과정을 save_path에 저장하기 위한 # 람다 함수를 정의하고, time_avg와 loss_avg는 MovingAverage 클래스의 객체로써 # 훈련 중간 과정의 loss를 이동평균 값으로 보여주기 위해 선언되는 객체이다. save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} #12: main train이 시작되는 부분(#A ~ #F) print('Begin training!') print() # A # try-except를 사용하여 ctrl+c(keyboardInterrupt)를 통해 # 훈련을 중단하고 진행내용은 저장할 수 있다. # 중단지점부터 재시작하고 싶으면 train.py실행 시 --resume인자를 사용한다. try: #9에서 계산된 num_epochs만큼 반복. for epoch in range(num_epochs): # B # --resume을 이용해 시작했다면, 재시작 iter에 도달할 때까지 continue, # 또한 data_loader에서 data를 불러오며 loss를 계산하는데, # 도중에 목표 iteration에 도달했으면 break하여 1 epoch를 종료한다. if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # 목표한만큼 훈련이 되었다면, 종료한다. # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # 목표로 설정된 반복횟수가 max_iter보다 크면 max_iter에서 훈련을 마친다. # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # 특정 iteration에 config값이 바뀌도록 할 경우의 작업을 수행한다. # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # C # [learning rate 조정] # train시작한지 얼마 안되었을 경우(lr_warmup_until기준) 훈련을 조금 가속시키기 위해 조정. # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # 특정 iteration에 도달할 때마다 learning rate decay수행. # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) # D # loss 함수 계산. # Zero the grad to get ready to compute gradients optimizer.zero_grad() # Forward Propagation을 수행하고 수행 결과로 loss 함수를 통해 1 iteration의 loss를 계산한다. # 구체적인 동작은 Backbone.py의 resnet101, yolact.py의 yolact, MultiBoxLoss.py의 MultiBoxLoss 클래스를 모두 보아야 한다. # (see CustomDataParallel and NetLoss) losses = net(datum) losses = {k: (v).mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # no_inf_mean removes some components from the loss, so make sure to backward through all of it # all_loss = sum([v.mean() for v in losses.values()]) # E # Backward Propagation을 수행하고, # 계산가능한 값일 경우, optimizer.step()을 통해 parameters에 적용 # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # F # train진행 과정에서 소요 시간과, 중간 loss값을 출력하여 중간 성과를 # 파악 할 수 있도록 해주는 파트. # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) # log를 파일로 기록 if args.log: precision = 5 loss_info = { k: round(losses[k].item(), precision) for k in losses } loss_info['T'] = round(loss.item(), precision) if args.log_gpu: log.log_gpu_stats = (iteration % 10 == 0 ) # nvidia-smi is sloooow log.log('train', loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed) log.log_gpu_stats = args.log_gpu # ~F # 1번 반복하면, 1 iter증가. iteration += 1 # 주기마다 진행과정을 저장하는 작업 수행. if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # train-validation으로 작업을 수행하는 경우, # 1 epoch를 돌렸을 때 validation 주기에 도달한 epoch였으면 validate 1회 진행하여 mAP측정. if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) # Compute validation mAP after training is finished compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) #13: Ctrl + c를 이용하여 훈련을 중단했을 경우, save_foler에 weights를 저장하고 중단하여 # 다음에 다시 재시작할 수 있도록 한다. except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
from config import PASCAL_CLASSES, COLORS, get_params, ROOT_DIR from data.coco_dataset import ObjectDetectionDataset from utils import learning_rate_schedule from utils.utils import postprocess, denormalize_image from yolact import Yolact # Todo Add your custom dataset tf.random.set_seed(1234) NAME_OF_DATASET = "pascal" CLASS_NAMES = PASCAL_CLASSES # ----------------------------------------------------------------------------------------------- # create model and dataloader train_iter, input_size, num_cls, lrs_schedule_params, loss_params, parser_params, model_params = get_params( NAME_OF_DATASET) model = Yolact(**model_params) dateset = ObjectDetectionDataset(dataset_name=NAME_OF_DATASET, tfrecord_dir=os.path.join( ROOT_DIR, "data", NAME_OF_DATASET), anchor_instance=model.anchor_instance, **parser_params) train_dataset = dateset.get_dataloader(subset='train', batch_size=1) valid_dataset = dateset.get_dataloader(subset='val', batch_size=1) # ----------------------------------------------------------------------------------------------- # Restore CheckPoints # Choose the Optimizor, Loss Function, and Metrics, learning rate schedule lr_schedule = learning_rate_schedule.Yolact_LearningRateSchedule( **lrs_schedule_params) optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9) ckpt_dir = os.path.join(ROOT_DIR, "checkpoints")
# Copyright (C) 2019 * Ltd. All rights reserved. # # Editor : VIM # File name : convert_weight.py # Author : YunYang1994 # Created date: 2019-07-27 18:07:20 # Description : # #================================================================ import torch import numpy as np from yolact import Yolact with torch.no_grad(): model = Yolact() model.eval() model.load_weights("./yolact_darknet53_54_800000.pth") modules = model.children() def parse_layer(layer, weights): assert isinstance(layer, torch.nn.Conv2d) or isinstance( layer, torch.nn.BatchNorm2d) print("=> Parsing ", layer) if isinstance(layer, torch.nn.Conv2d): weight, bias = layer.weight.detach().numpy(), layer.bias weight = np.transpose( weight, [2, 3, 1, 0]) # k_h, h_w, in_channels, out_channels if bias is None: weights.append([weight])
# print('Average: %5.2f fps, %5.2f ms' % (1 / frame_times.get_avg(), 1000 * avg_seconds)) if __name__ == '__main__': # 数据集与标签 valid_dataset = COCODetection(image_path='./data/coco/images/val2017/', info_file='./data/coco/annotations/instances_val2017.json', transform=BaseTransform(), has_gt=True ) prep_coco_cats() # 模型 print('Loading model...', end='') model = Yolact() model.load_weights(args.trained_model) model.eval() model = model.cuda() if args.cuda else model.cpu() print(' Done.') # 核心入口 with torch.no_grad(): if not os.path.exists('results'): os.makedirs('results') if args.cuda: torch.backends.cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor')
def train(rank, args): if args.num_gpus > 1: multi_gpu_rescale(args) if rank == 0: if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) # set up logger setup_logger(output=os.path.join(args.log_folder, cfg.name), distributed_rank=rank) logger = logging.getLogger("yolact.train") w = SummaryHelper(distributed_rank=rank, log_dir=os.path.join(args.log_folder, cfg.name)) w.add_text("argv", " ".join(sys.argv)) logger.info("Args: {}".format(" ".join(sys.argv))) import git with git.Repo(search_parent_directories=True) as repo: w.add_text("git_hash", repo.head.object.hexsha) logger.info("git hash: {}".format(repo.head.object.hexsha)) try: logger.info("Initializing torch.distributed backend...") dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=args.num_gpus, rank=rank) except Exception as e: logger.error("Process group URL: {}".format(args.dist_url)) raise e dist.barrier() if torch.cuda.device_count() > 1: logger.info('Multiple GPUs detected! Turning off JIT.') collate_fn = detection_collate if cfg.dataset.name == 'YouTube VIS': dataset = YoutubeVIS(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, configs=cfg.dataset, transform=SSDAugmentationVideo(MEANS)) if cfg.dataset.joint == 'coco': joint_dataset = COCODetection( image_path=cfg.joint_dataset.train_images, info_file=cfg.joint_dataset.train_info, transform=SSDAugmentation(MEANS)) joint_collate_fn = detection_collate if args.validation_epoch > 0: setup_eval() val_dataset = YoutubeVIS(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, configs=cfg.dataset, transform=BaseTransformVideo(MEANS)) collate_fn = collate_fn_youtube_vis elif cfg.dataset.name == 'FlyingChairs': dataset = FlyingChairs(image_path=cfg.dataset.trainval_images, info_file=cfg.dataset.trainval_info) collate_fn = collate_fn_flying_chairs else: dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Set cuda device early to avoid duplicate model in master GPU if args.cuda: torch.cuda.set_device(rank) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs. # use timer for experiments timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: logger.info('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume, args=args) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: logger.info('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) if cfg.flow.train_flow: criterion = OpticalFlowLoss() else: criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=3) if args.cuda: cudnn.benchmark = True net.cuda(rank) criterion.cuda(rank) net = nn.parallel.DistributedDataParallel(net, device_ids=[rank], output_device=rank, broadcast_buffers=False, find_unused_parameters=True) # net = nn.DataParallel(net).cuda() # criterion = nn.DataParallel(criterion).cuda() optimizer = optim.SGD(filter(lambda x: x.requires_grad, net.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) w.set_step(iteration) last_time = time.time() epoch_size = len(dataset) // args.batch_size // args.num_gpus num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 from data.sampler_utils import InfiniteSampler, build_batch_data_sampler infinite_sampler = InfiniteSampler(dataset, seed=args.random_seed, num_replicas=args.num_gpus, rank=rank, shuffle=True) train_sampler = build_batch_data_sampler(infinite_sampler, images_per_batch=args.batch_size) data_loader = data.DataLoader( dataset, num_workers=args.num_workers, collate_fn=collate_fn, multiprocessing_context="fork" if args.num_workers > 1 else None, batch_sampler=train_sampler) data_loader_iter = iter(data_loader) if cfg.dataset.joint: joint_infinite_sampler = InfiniteSampler(joint_dataset, seed=args.random_seed, num_replicas=args.num_gpus, rank=rank, shuffle=True) joint_train_sampler = build_batch_data_sampler( joint_infinite_sampler, images_per_batch=args.batch_size) joint_data_loader = data.DataLoader( joint_dataset, num_workers=args.num_workers, collate_fn=joint_collate_fn, multiprocessing_context="fork" if args.num_workers > 1 else None, batch_sampler=joint_train_sampler) joint_data_loader_iter = iter(joint_data_loader) dist.barrier() save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() data_time_avg = MovingAverage(10) global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} def backward_and_log(prefix, net_outs, targets, masks, num_crowds, extra_loss=None): optimizer.zero_grad() out = net_outs["pred_outs"] wrapper = ScatterWrapper(targets, masks, num_crowds) losses = criterion(out, wrapper, wrapper.make_mask()) losses = {k: v.mean() for k, v in losses.items()} # Mean here because Dataparallel if extra_loss is not None: assert type(extra_loss) == dict losses.update(extra_loss) loss = sum([losses[k] for k in losses]) # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) w.add_scalar('{prefix}/{key}'.format(prefix=prefix, key=k), losses[k].item()) return losses logger.info('Begin training!') # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue while True: data_start_time = time.perf_counter() datum = next(data_loader_iter) dist.barrier() data_end_time = time.perf_counter() data_time = data_end_time - data_start_time if iteration != args.start_iter: data_time_avg.add(data_time) # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until and cfg.lr_warmup_init < args.lr: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) elif cfg.lr_schedule == 'cosine': set_lr( optimizer, args.lr * ((math.cos(math.pi * iteration / cfg.max_iter) + 1.) * .5)) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while cfg.lr_schedule == 'step' and step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) global lr w.add_scalar('meta/lr', lr) if cfg.dataset.name == "FlyingChairs": imgs_1, imgs_2, flows = prepare_flow_data(datum) net_outs = net(None, extras=(imgs_1, imgs_2)) # Compute Loss optimizer.zero_grad() losses = criterion(net_outs, flows) losses = {k: v.mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) w.add_scalar('loss/%s' % k, losses[k].item()) elif cfg.dataset.joint or not cfg.dataset.is_video: if cfg.dataset.joint: joint_datum = next(joint_data_loader_iter) dist.barrier() # Load training data # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there images, targets, masks, num_crowds = prepare_data( joint_datum) else: images, targets, masks, num_crowds = prepare_data( datum) extras = { "backbone": "full", "interrupt": False, "moving_statistics": { "aligned_feats": [] } } net_outs = net(images, extras=extras) out = net_outs["pred_outs"] # Compute Loss optimizer.zero_grad() wrapper = ScatterWrapper(targets, masks, num_crowds) losses = criterion(out, wrapper, wrapper.make_mask()) losses = {k: v.mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) w.add_scalar('joint/%s' % k, losses[k].item()) # Forward Pass if cfg.dataset.is_video: # reference frames references = [] moving_statistics = {"aligned_feats": [], "conf_hist": []} for idx, frame in enumerate(datum[:0:-1]): images, annots = frame extras = { "backbone": "full", "interrupt": True, "keep_statistics": True, "moving_statistics": moving_statistics } with torch.no_grad(): net_outs = net(images, extras=extras) moving_statistics["feats"] = net_outs["feats"] moving_statistics["lateral"] = net_outs["lateral"] keys_to_save = ("outs_phase_1", "outs_phase_2") for key in set(net_outs.keys()) - set(keys_to_save): del net_outs[key] references.append(net_outs) # key frame with annotation, but not compute full backbone frame = datum[0] images, annots = frame frame = ( images, annots, ) images, targets, masks, num_crowds = prepare_data(frame) extras = { "backbone": "full", "interrupt": not cfg.flow.base_backward, "moving_statistics": moving_statistics } gt_net_outs = net(images, extras=extras) if cfg.flow.base_backward: losses = backward_and_log("compute", gt_net_outs, targets, masks, num_crowds) keys_to_save = ("outs_phase_1", "outs_phase_2") for key in set(gt_net_outs.keys()) - set(keys_to_save): del gt_net_outs[key] # now do the warp if len(references) > 0: reference_frame = references[0] extras = { "backbone": "partial", "moving_statistics": moving_statistics } net_outs = net(images, extras=extras) extra_loss = yolact_net.extra_loss( net_outs, gt_net_outs) losses = backward_and_log("warp", net_outs, targets, masks, num_crowds, extra_loss=extra_loss) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time w.add_scalar('meta/data_time', data_time) w.add_scalar('meta/iter_time', elapsed) # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] if torch.cuda.is_available(): max_mem_mb = torch.cuda.max_memory_allocated( ) / 1024.0 / 1024.0 # torch.cuda.reset_max_memory_allocated() else: max_mem_mb = None logger.info("""\ eta: {eta} epoch: {epoch} iter: {iter} \ {losses} {loss_total} \ time: {time} data_time: {data_time} lr: {lr} {memory}\ """.format(eta=eta_str, epoch=epoch, iter=iteration, losses=" ".join([ "{}: {:.3f}".format(k, loss_avgs[k].get_avg()) for k in losses ]), loss_total="T: {:.3f}".format( sum([loss_avgs[k].get_avg() for k in losses])), data_time="{:.3f}".format(data_time_avg.get_avg()), time="{:.3f}".format(elapsed), lr="{:.6f}".format(lr), memory="max_mem: {:.0f}M".format(max_mem_mb))) if rank == 0 and iteration % 100 == 0: if cfg.flow.train_flow: import flowiz as fz from layers.warp_utils import deform_op tgt_size = (64, 64) flow_size = flows.size()[2:] vis_data = [] for pred_flow in net_outs: vis_data.append(pred_flow) deform_gt = deform_op(imgs_2, flows) flows_pred = [ F.interpolate(x, size=flow_size, mode='bilinear', align_corners=False) for x in net_outs ] deform_preds = [ deform_op(imgs_2, x) for x in flows_pred ] vis_data.append( F.interpolate(flows, size=tgt_size, mode='area')) vis_data = [ F.interpolate(flow[:1], size=tgt_size) for flow in vis_data ] vis_data = [ fz.convert_from_flow( flow[0].data.cpu().numpy().transpose( 1, 2, 0)).transpose( 2, 0, 1).astype('float32') / 255 for flow in vis_data ] def convert_image(image): image = F.interpolate(image, size=tgt_size, mode='area') image = image[0] image = image.data.cpu().numpy() image = image[::-1] image = image.transpose(1, 2, 0) image = image * np.array(STD) + np.array(MEANS) image = image.transpose(2, 0, 1) image = image / 255 image = np.clip(image, -1, 1) image = image[::-1] return image vis_data.append(convert_image(imgs_1)) vis_data.append(convert_image(imgs_2)) vis_data.append(convert_image(deform_gt)) vis_data.extend( [convert_image(x) for x in deform_preds]) vis_data_stack = np.stack(vis_data, axis=0) w.add_images("preds_flow", vis_data_stack) elif cfg.flow.warp_mode == "flow": import flowiz as fz tgt_size = (64, 64) vis_data = [] for pred_flow, _, _ in net_outs["preds_flow"]: vis_data.append(pred_flow) vis_data = [ F.interpolate(flow[:1], size=tgt_size) for flow in vis_data ] vis_data = [ fz.convert_from_flow( flow[0].data.cpu().numpy().transpose( 1, 2, 0)).transpose( 2, 0, 1).astype('float32') / 255 for flow in vis_data ] input_image = F.interpolate(images, size=tgt_size, mode='area') input_image = input_image[0] input_image = input_image.data.cpu().numpy() input_image = input_image.transpose(1, 2, 0) input_image = input_image * np.array( STD[::-1]) + np.array(MEANS[::-1]) input_image = input_image.transpose(2, 0, 1) input_image = input_image / 255 input_image = np.clip(input_image, -1, 1) vis_data.append(input_image) vis_data_stack = np.stack(vis_data, axis=0) w.add_images("preds_flow", vis_data_stack) iteration += 1 w.set_step(iteration) if rank == 0 and iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) logger.info('Saving state, iter: {}'.format(iteration)) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: logger.info('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: if rank == 0: compute_validation_map(yolact_net, val_dataset) dist.barrier() except KeyboardInterrupt: if args.interrupt_no_save: logger.info('No save on interrupt, just exiting...') elif rank == 0: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) return if rank == 0: yolact_net.save_weights(save_path(epoch, iteration))
if args.cuda: cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') if args.resume and not args.display: with open(args.ap_data_file, 'rb') as f: ap_data = pickle.load(f) calc_map(ap_data) exit() dataset = None print('Loading model...', end='') net = Yolact() net.load_weights(args.trained_model) net.eval() print(' Done.') if args.cuda: net = net.cuda() net.detect.use_fast_nms = args.fast_nms net.detect.use_cross_class_nms = args.cross_class_nms cfg.mask_proto_debug = args.mask_proto_debug scan = Scan(rgb_paths=rgb_paths, depth_paths=depth_paths, pose_paths=pose_paths, cam_intr=cam_intr, mesh_plot=mesh_plot, scannet_data=scannet_data, mask_net=net,
class YOLACT_MODEL(): def __init__(self, opts): #concat the two files to one file # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'): # script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth" # call(script, shell=True) set_cfg('yolact_resnet50_config') cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') self.net = Yolact() self.net.load_weights(opts['checkpoint']) print("done.") self.net.eval() self.net = self.net.cuda() self.net.detect.use_fast_nms = True cfg.mask_proto_debug = False self.color_cache = defaultdict(lambda: {}) self.threshold = opts['threshold'] self.mode = opts['mode'] # Generate an image based on some text. def detect(self, img): numpy_image = np.array(img) print('starting inference...') frame = torch.from_numpy(numpy_image).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) preds = self.net(batch) print("done.") return self.display(preds, frame, None, None, undo_transform=False, score_threshold=self.threshold) def display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, top_k=100, score_threshold=0.3): img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=False, crop_masks=True, score_threshold=score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:top_k] classes, scores, boxes = [ x[:top_k].detach().cpu().numpy() for x in t[:3] ] num_dets_to_consider = min(top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < 0: num_dets_to_consider = j break if num_dets_to_consider == 0: # No detections found so just output the original image return (img_gpu * 255).byte().detach().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in self.color_cache[on_gpu]: return self.color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. self.color_cache[on_gpu][color_idx] = color return color show_mask = True show_box = True if self.mode == "mask_only": show_box = False if self.mode == "box_only": show_mask = False print("mode :", self.mode) print("show_mask :", show_mask) print("show_box :", show_box) # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if show_mask and cfg.eval_mask_branch: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod( dim=0) + masks_color_summand # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if show_box: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if True: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if True: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % (_class, score) if True else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return (img_numpy, boxes, scores)