def __init__(self, model_path, with_cuda, yolact_config, fast_nms, threshold, display_cv, top_k): self.top_k = top_k self.threshold = threshold self.display_cv = display_cv print("loading Yolact ...") with torch.no_grad(): set_cfg(yolact_config) print("Configuration: ", yolact_config) if with_cuda: cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') print("use cuda: ", with_cuda) self.net = Yolact() self.net.load_weights(model_path) print("Model: ", model_path) self.net.eval() if with_cuda: self.net = self.net.cuda() self.net.detect.use_fast_nms = fast_nms print("use fast nms: ", fast_nms) print("Yolact loaded")
def main(): parse_args() rospy.init_node('yolact_ros', anonymous=True) if args.config is not None: set_cfg(args.config) if args.config is None: model_path = SavePath.from_str(args.trained_model) # TODO: Bad practice? Probably want to do a name lookup instead. args.config = model_path.model_name + '_config' print('Config not specified. Parsed %s from the file name.\n' % args.config) set_cfg(args.config) if args.detect: cfg.eval_mask_branch = False if args.dataset is not None: set_dataset(args.dataset) with torch.no_grad(): if not os.path.exists('results'): os.makedirs('results') if args.cuda: cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') if args.resume and not args.display: with open(args.ap_data_file, 'rb') as f: ap_data = pickle.load(f) calc_map(ap_data) exit() print('Loading model...', end='') net = Yolact() net.load_weights(args.trained_model) net.eval() print(' Done.') if args.cuda: net = net.cuda() net.detect.use_fast_nms = True cfg.mask_proto_debug = False detect_ = DetectImg(net) try: rospy.spin() except KeyboardInterrupt: print("Shutting down") cv2.destroyAllWindows()
def __init__(self, cuda=True, detect=False): self.trained_model = 'yolact/weights/yolact_im400_53_7000.pth' self.config = 'yolact_base_config' if self.config is not None: yolact_module.set_cfg(self.config) if self.trained_model == 'interrupt': trained_model = yolact_module.SavePath.get_interrupt('weights/') elif self.trained_model == 'latest': trained_model = yolact_module.SavePath.get_latest( 'weights/', cfg.name) if self.config is None: model_path = yolact_module.SavePath.from_str(trained_model) # TODO: Bad practice? Probably want to do a name lookup instead. config = model_path.model_name + '_config' print('Config not specified. Parsed %s from the file name.\n' % config) yolact_module.set_cfg(config) if detect: cfg.eval_mask_branch = False with torch.no_grad(): if cuda: cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') self.net = Yolact() self.net.load_weights(self.trained_model) self.net.eval() if cuda: self.net = self.net.cuda() self.net.detect.use_fast_nms = True self.net.detect.use_cross_class_nms = False cfg.mask_proto_debug = False
def predict(self, image_array: np.ndarray): """ :image_path : image numpy array Format of returned boxes is [x1,y1,x2,y2], individual centers are tuples :return entire mask, individual masks, boxes, centers """ with torch.no_grad(): torch.set_default_tensor_type('torch.cuda.FloatTensor') frame = torch.from_numpy(image_array).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) net = Yolact() net.detect.use_fast_nms = True net.detect.use_cross_class_nms = True net.load_weights(self.weights) net.eval() preds = net(batch) mask_entire, boxes = prep_display(preds, frame, None, None, undo_transform=False) if len(boxes) < 1: return mask_entire, None, None, None mask_dict = {} centers_dict = {} boxes_dict = {} for index in range(len(boxes)): current_box = boxes[index] mask_dict[index] = mask_entire[current_box[1]:current_box[3], current_box[0]:current_box[2]] center = Segment.find_center(mask_dict[index]) if not center: adjusted_center = None else: adjusted_center = Segment.adjust_centers( center, current_box) centers_dict[index] = adjusted_center boxes_dict[index] = current_box return mask_entire, mask_dict, centers_dict, boxes_dict
if args.resume and not args.display: with open(args.ap_data_file, 'rb') as f: ap_data = pickle.load(f) calc_map(ap_data) exit() if args.image is None and args.video is None and args.images is None: dataset = COCODetection(cfg.dataset.valid_images, cfg.dataset.valid_info, transform=BaseTransform(), has_gt=cfg.dataset.has_gt) prep_coco_cats() else: dataset = None print('Loading model...', end='') net = Yolact() net.load_weights(args.trained_model) net.eval() print(' Done.') if args.cuda: net = net.cuda() net.detect.use_fast_nms = args.fast_nms cfg.mask_proto_debug = args.mask_proto_debug detect_ = detect() detect_.evalvideo(net, args.video)
# print("config") # print(opt.config) estimator = PoseNet(num_points=num_points, num_obj=num_obj) estimator.cuda() estimator.load_state_dict(torch.load(opt.model)) estimator.eval() refiner = PoseRefineNet(num_points=num_points, num_obj=num_obj) refiner.cuda() refiner.load_state_dict(torch.load(opt.refine_model)) refiner.eval() yolact = Yolact() yolact.load_weights(opt.trained_model) yolact.eval() yolact.cuda() torch.set_default_tensor_type('torch.cuda.FloatTensor') yolact.detect.use_fast_nms = opt.fast_nms yolact.detect.use_cross_class_nms = opt.cross_class_nms # evalimage(net, args.image) import matplotlib.pyplot as plt def prep_display(dets_out, img,
class Yolact_ROS(object): def __init__(self, model_path, with_cuda, yolact_config, fast_nms, threshold, display_cv, top_k): self.top_k = top_k self.threshold = threshold self.display_cv = display_cv print("loading Yolact ...") with torch.no_grad(): set_cfg(yolact_config) print("Configuration: ", yolact_config) if with_cuda: cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') print("use cuda: ", with_cuda) self.net = Yolact() self.net.load_weights(model_path) print("Model: ", model_path) self.net.eval() if with_cuda: self.net = self.net.cuda() self.net.detect.use_fast_nms = fast_nms print("use fast nms: ", fast_nms) print("Yolact loaded") def prediction(self, img): self.net.detect.cross_class_nms = True cfg.mask_proto_debug = False with torch.no_grad(): frame = torch.Tensor(img).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) time_start = time.clock() preds = self.net(batch) h, w, _ = img.shape t = postprocess(preds, w, h, visualize_lincomb=False, crop_masks=True, score_threshold=self.threshold) torch.cuda.synchronize() masks = t[3][:self.top_k] classes, scores, bboxes = [ x[:self.top_k].cpu().numpy() for x in t[:3] ] time_elapsed = (time.clock() - time_start) num_dets_to_consider = min(self.top_k, classes.shape[0]) for i in range(num_dets_to_consider): if scores[i] < self.threshold: num_dets_to_consider = i break if num_dets_to_consider >= 1: masks = masks[:num_dets_to_consider, :, :, None] masks_msg = masks.cpu().detach().numpy() masks_msg = masks_msg.astype(np.uint8) scores_msg = np.zeros(num_dets_to_consider) class_label_msg = np.empty(num_dets_to_consider, dtype="S20") bboxes_msg = np.zeros([num_dets_to_consider, 4], dtype=int) for i in reversed(range(num_dets_to_consider)): scores_msg[i] = scores[i] class_label_msg[i] = cfg.dataset.class_names[classes[i]] bboxes_msg[i] = bboxes[i] print(class_label_msg[i].decode(), "%.2f" % (scores_msg[i])) os.system('cls' if os.name == 'nt' else 'clear') print("%.2f" % (1 / time_elapsed), "hz") if self.display_cv: self.display(frame, masks, classes, scores, bboxes, num_dets_to_consider) return masks_msg, class_label_msg, scores_msg, bboxes_msg def display(self, img, masks, pred_classes, scores, bboxes, num_dets_to_consider, mask_alpha=0.75): img_gpu = img / 255.0 if num_dets_to_consider == 0: return (img_gpu * 255).byte().cpu().numpy() use_class_color = True colors = torch.cat([ self.get_color( i, pred_classes, use_class_color, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for i in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha inv_alph_masks = masks * (-mask_alpha) + 1 masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand img_numpy = (img_gpu * 255).byte().cpu().numpy() for i in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = bboxes[i, :] color = self.get_color(i, pred_classes, use_class_color) score = scores[i] cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) _class = cfg.dataset.class_names[pred_classes[i]] text_str = '%s: %.2f' % (_class, score) if True else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) cv2.imshow("yolact", img_numpy) cv2.waitKey(1) def get_color(self, i, pred_classes, class_color, on_gpu=None): color_cache = defaultdict(lambda: {}) color_idx = (pred_classes[i] * 5 if class_color else i * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color
class Real_time_yolact(): def __init__(self, cuda=True, detect=False): self.trained_model = 'yolact/weights/yolact_im400_53_7000.pth' self.config = 'yolact_base_config' if self.config is not None: yolact_module.set_cfg(self.config) if self.trained_model == 'interrupt': trained_model = yolact_module.SavePath.get_interrupt('weights/') elif self.trained_model == 'latest': trained_model = yolact_module.SavePath.get_latest( 'weights/', cfg.name) if self.config is None: model_path = yolact_module.SavePath.from_str(trained_model) # TODO: Bad practice? Probably want to do a name lookup instead. config = model_path.model_name + '_config' print('Config not specified. Parsed %s from the file name.\n' % config) yolact_module.set_cfg(config) if detect: cfg.eval_mask_branch = False with torch.no_grad(): if cuda: cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') self.net = Yolact() self.net.load_weights(self.trained_model) self.net.eval() if cuda: self.net = self.net.cuda() self.net.detect.use_fast_nms = True self.net.detect.use_cross_class_nms = False cfg.mask_proto_debug = False def segmentation(self, img): with torch.no_grad(): h, w, _ = img.shape frame = torch.from_numpy(img).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) preds = self.net(batch) classes, scores, boxes, masks = yolact_module.prep_display( 5, preds, frame, 0.5, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str='') if not len(masks): return np.zeros((img.shape[0], img.shape[1])) mask = masks[0] mask = mask.cpu().numpy() h, w = mask.shape filled_mask = np.zeros([h, w]) contours = yolact_module.cv_contours(np.uint8(mask)) C = len(contours) contours = sorted(contours, key=lambda x: cv2.contourArea(x)) cv2.drawContours(filled_mask, contours, C - 1, 255, thickness=-1) #Fills the biggest contour return filled_mask def process(self, image_1, image_2): # Get segmentation masks as numpy arrays mask_2 = self.segmentation(img=image_2) mask_2 = np.uint8(mask_2) return mask_2
def train(): if cfg.dataset is None: print("Missing dataset in config!") exit(-1) save_folder = Path(args.save_folder) save_folder.mkdir(exist_ok=True, parents=True) epoch_status_file_path = Path(args.epoch_status_file) dataset = COCODetection( image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(cfg, MEANS), label_map=cfg.dataset.get_valid_label_map(), ) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection( image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(cfg, MEANS), label_map=cfg.dataset.get_valid_label_map(), ) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact(cfg) net = yolact_net net.train() if args.log: log = Log( cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu, ) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == "interrupt": args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == "latest": args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print("Resuming training, loading {}...".format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print("Initializing weights...") yolact_net.init_weights(backbone_path=cfg.backbone.path) optimizer = optim.SGD( net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay ) criterion = MultiBoxLoss( num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio, cfg=cfg, ) if args.batch_alloc is not None: args.batch_alloc = [int(x) for x in args.batch_alloc.split(",")] if sum(args.batch_alloc) != args.batch_size: print( "Error: Batch allocation (%s) does not sum to batch size (%s)." % (args.batch_alloc, args.batch_size) ) exit(-1) net = CustomDataParallel(NetLoss(net, criterion)) if args.cuda: net = net.cuda() # Initialize everything if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda()) if not cfg.freeze_bn: yolact_net.freeze_bn(True) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = math.ceil(len(dataset) / args.batch_size) print(f"\n\t ==> Number of iterations per epoch: {epoch_size}") num_epochs = min(math.ceil(cfg.max_iter / epoch_size), cfg.max_num_epochs) print(f"\t ==> Number of epochs: {num_epochs}\n") # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader( dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True, ) save_path = lambda epoch, iteration: SavePath(cfg.name, epoch, iteration).get_path( root=args.save_folder ) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print("Begin training!") print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): with epoch_status_file_path.open( "w", encoding="utf-8" ) as epoch_status_file: json.dump({"cur_epoch": epoch}, epoch_status_file) # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr( optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init, ) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while ( step_index < len(cfg.lr_steps) and iteration >= cfg.lr_steps[step_index] ): step_index += 1 set_lr(optimizer, args.lr * (args.gamma ** step_index)) # Zero the grad to get ready to compute gradients optimizer.zero_grad() # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss) losses = net(datum) losses = { k: (v).mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # no_inf_mean removes some components from the loss, so make sure to backward through all of it # all_loss = sum([v.mean() for v in losses.values()]) # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta( seconds=(cfg.max_iter - iteration) * time_avg.get_avg() ) ).split(".")[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum( [ [k, loss_avgs[k].get_avg()] for k in loss_types if k in losses ], [], ) print( ( "[%3d] %7d ||" + (" %s: %.3f |" * len(losses)) + " T: %.3f || ETA: %s || timer: %.3f" ) % tuple( [epoch, iteration] + loss_labels + [total, eta_str, elapsed] ), flush=True, ) if args.log: precision = 5 loss_info = {k: round(losses[k].item(), precision) for k in losses} loss_info["T"] = round(loss.item(), precision) if args.log_gpu: log.log_gpu_stats = iteration % 10 == 0 # nvidia-smi is sloooow log.log( "train", loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed, ) log.log_gpu_stats = args.log_gpu iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print("Saving state, iter:", iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if ( args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval ): print("Deleting old save...") os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: compute_validation_map( epoch, iteration, yolact_net, val_dataset, log if args.log else None, ) # Compute validation mAP after training is finished compute_validation_map( epoch, iteration, yolact_net, val_dataset, log if args.log else None ) except KeyboardInterrupt: if args.interrupt: print("Stopping early. Saving network...") # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) # Wait for all torch processes to finish their task time.sleep(1) yolact_net.save_weights(save_path(epoch, repr(iteration) + "_interrupt")) exit() print("Saving weights...") yolact_net.save_weights(save_path(epoch, repr(iteration) + "_end"))