def __init__(self, fileName): # self.rgb = np.array(Image.open(fileName).convert("RGB")) self.rgb = utils.read_image(fileName, color=True) print(self.rgb.shape) bboxes, labels, scores, masks = model.predict([self.rgb]) bbox, label, score, mask = bboxes[0], np.asarray(labels[0],dtype=np.int32), scores[0], masks[0] print(bbox) vis_bbox(self.rgb, bbox, label=label, score=score, mask=mask, label_names=('onigiri'), contour=False, labeldisplay=True) plt.show()
def get_predictions(self, img_name='', image=None, plot=False): """ Gets the bounding box prediction for the image and returns them in tensor of bboxes in the format: [[x1, y1, x2, y2, score], ...] """ if img_name: img = cv2.imread(os.path.join(settings.INPUT_FOLDER, img_name)) else: img = image img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1)) img, info_img = preprocess(img, self.imgsize, jitter=0) # info = (h, w, nh, nw, dx, dy) img = np.transpose(img / 255., (2, 0, 1)) img = torch.from_numpy(img).float().unsqueeze(0) if use_cuda(): img = Variable(img.type(torch.cuda.FloatTensor)) else: img = Variable(img.type(torch.FloatTensor)) with torch.no_grad(): outputs = self.model(img) outputs = postprocess(outputs, Dataset.NUM_CLASSES[Dataset.SIGNET_RING], self.confthre, self.nmsthre) bboxes = list() colors = list() bboxes_with_scores = list() if outputs[0] is not None: for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]: print(int(x1), int(y1), int(x2), int(y2), float(conf), int(cls_pred)) print('\t+ Conf: %.5f' % cls_conf.item()) box = yolobox2label([y1, x1, y2, x2], info_img) bboxes.append(box) colors.append(BOX_COLOR) tmp = [box[1], box[0], box[3], box[2]] tmp.append(conf * cls_conf) bboxes_with_scores.append(tmp) if plot: vis_bbox(img_raw, bboxes, instance_colors=colors, linewidth=2) plt.show() return torch.FloatTensor(bboxes_with_scores)
def draw(model,datatype,imgsize): model.eval() coco_class_names, coco_class_ids, coco_class_colors = get_coco_label_names() id_list_file = os.path.join( '../../VOCdevkit/VOC2007', 'ImageSets/Main/{0}.txt'.format('test')) ids = [id_.strip() for id_ in open(id_list_file)] for i in tqdm(range(len(ids))): if datatype=='voc': img_file = os.path.join('../../VOCdevkit/VOC2007', 'JPEGImages', ids[i] + '.jpg') else: img_file = os.path.join('COCO', 'train2017', '{:012}'.format(id_) + '.jpg') img = cv2.imread(img_file) img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1)) img, info_img_t = preprocess(img, imgsize, jitter=0) # info = (h, w, nh, nw, dx, dy) img = np.transpose(img / 255., (2, 0, 1)) img = torch.from_numpy(img).float().unsqueeze(0) img = Variable(img.type(torch.cuda.FloatTensor)) outputs = model(img) outputs = postprocess(outputs, 80, 0.5, 0.5) # imgs.shape : torch.Size([1, 3, 608, 608]) # outputs[0].shape :torch.Size([3, 7]) # targets.shape :torch.Size([1, 50, 5]) # print(outputs) if outputs[0] is not None: bboxes = list() classes = list() colors = list() for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]: cls_id = coco_class_ids[int(cls_pred)] box = yolobox2label([y1, x1, y2, x2], info_img_t) bboxes.append(box) classes.append(cls_id) colors.append(coco_class_colors[int(cls_pred)]) vis_bbox( img_raw, bboxes, label=classes, label_names=coco_class_names, instance_colors=colors, linewidth=2) plt.savefig('draw/'+ids[i]+'.jpg') model.train()
def main(): """ Visualize the detection result for the given image and the pre-trained model. """ parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--cfg', type=str, default='config/yolov3_default.cfg') parser.add_argument('--ckpt', type=str, help='path to the checkpoint file') parser.add_argument('--weights_path', type=str, default=None, help='path to weights file') parser.add_argument('--image', type=str) parser.add_argument( '--background', action='store_true', default=False, help='background(no-display mode. save "./output.png")') parser.add_argument('--detect_thresh', type=float, default=None, help='confidence threshold') args = parser.parse_args() with open(args.cfg, 'r') as f: cfg = yaml.load(f) imgsize = cfg['TEST']['IMGSIZE'] model = YOLOv3(cfg['MODEL']) confthre = cfg['TEST']['CONFTHRE'] nmsthre = cfg['TEST']['NMSTHRE'] if args.detect_thresh: confthre = args.detect_thresh img = cv2.imread(args.image) img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1)) img, info_img = preprocess(img, imgsize, jitter=0) # info = (h, w, nh, nw, dx, dy) img = np.transpose(img / 255., (2, 0, 1)) img = torch.from_numpy(img).float().unsqueeze(0) if args.gpu >= 0: model.cuda(args.gpu) img = Variable(img.type(torch.cuda.FloatTensor)) else: img = Variable(img.type(torch.FloatTensor)) if args.weights_path: print("loading yolo weights %s" % (args.weights_path)) parse_yolo_weights(model, args.weights_path) else: print("loading checkpoint %s" % (args.ckpt)) model.load_state_dict(torch.load(args.ckpt)) model.eval() with torch.no_grad(): outputs = model(img) outputs = postprocess(outputs, 80, confthre, nmsthre) if outputs[0] is None: print("No Objects Deteted!!") return coco_class_names, coco_class_ids, coco_class_colors = get_coco_label_names( ) bboxes = list() classes = list() colors = list() for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]: cls_id = coco_class_ids[int(cls_pred)] print(int(x1), int(y1), int(x2), int(y2), float(conf), int(cls_pred)) print('\t+ Label: %s, Conf: %.5f' % (coco_class_names[cls_id], cls_conf.item())) box = yolobox2label([y1, x1, y2, x2], info_img) bboxes.append(box) classes.append(cls_id) colors.append(coco_class_colors[int(cls_pred)]) if args.background: import matplotlib matplotlib.use('Agg') from utils.vis_bbox import vis_bbox import matplotlib.pyplot as plt vis_bbox(img_raw, bboxes, label=classes, label_names=coco_class_names, instance_colors=colors, linewidth=2) #plt.show() if args.background: plt.savefig('output.png')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--modelfile') parser.add_argument('--image', type=str) parser.add_argument('--roi_size', '-r', type=int, default=7, help='ROI size for mask head input') parser.add_argument('--roialign', action='store_false', default=True, help='default: True') parser.add_argument('--contour', action='store_true', default=False, help='visualize contour') parser.add_argument('--background', action='store_true', default=False, help='background(no-display mode)') parser.add_argument('--extractor', choices=('resnet50', 'resnet101'), default='resnet50', help='extractor network') args = parser.parse_args() if args.background: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plot from utils.vis_bbox import vis_bbox from chainercv.datasets import voc_bbox_label_names from mask_rcnn_resnet import MaskRCNNResNet from chainercv import utils if args.extractor == 'resnet50': model = MaskRCNNResNet(n_fg_class=80, roi_size=args.roi_size, n_layers=50, roi_align=args.roialign) elif args.extractor == 'resnet101': model = MaskRCNNResNet(n_fg_class=80, roi_size=args.roi_size, n_layers=101, roi_align=args.roialign) chainer.serializers.load_npz(args.modelfile, model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() img = utils.read_image(args.image, color=True) bboxes, rois, labels, scores, masks = model.predict([img]) print(bboxes, rois) bbox, roi, label, score, mask = bboxes[0], rois[0], np.asarray( labels[0], dtype=np.int32), scores[0], masks[0] #print(bbox, np.asarray(label,dtype=np.int32), score, mask) coco_label_names = ( 'background', # class zero 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'street sign', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella', 'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'plate', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'mirror', 'dining table', 'window', 'desk', 'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') vis_bbox(img, roi, roi, label=label, score=score, mask=mask, label_names=coco_label_names, contour=args.contour, labeldisplay=True) #plot.show() filename = "output.png" plot.savefig(filename)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--modelfile') parser.add_argument('--image', type=str) parser.add_argument('--roi_size', '-r', type=int, default=14, help='ROI size for mask head input') parser.add_argument('--roialign', action='store_false', default=True, help='default: True') parser.add_argument('--contour', action='store_true', default=False, help='visualize contour') parser.add_argument('--background', action='store_true', default=False, help='background(no-display mode)') parser.add_argument('--bn2affine', action='store_true', default=False, help='batchnorm to affine') parser.add_argument('--extractor', choices=('resnet50', 'resnet101'), default='resnet50', help='extractor network') args = parser.parse_args() #network class id --> coco label id test_class_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, \ 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, \ 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] if args.background: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plot from utils.vis_bbox import vis_bbox from chainercv.datasets import voc_bbox_label_names from mask_rcnn_resnet import MaskRCNNResNet from chainercv import utils if args.extractor == 'resnet50': model = MaskRCNNResNet(n_fg_class=80, roi_size=args.roi_size, pretrained_model=args.modelfile, n_layers=50, roi_align=args.roialign, class_ids=test_class_ids) elif args.extractor == 'resnet101': model = MaskRCNNResNet(n_fg_class=80, roi_size=args.roi_size, pretrained_model=args.modelfile, n_layers=101, roi_align=args.roialign, class_ids=test_class_ids) chainer.serializers.load_npz(args.modelfile, model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() if args.bn2affine: bn_to_affine(model) img = utils.read_image(args.image, color=True) bboxes, labels, scores, masks = model.predict([img]) bbox, label, score, mask = bboxes[0], np.asarray( labels[0], dtype=np.int32), scores[0], masks[0] #print(bbox, np.asarray(label,dtype=np.int32), score, mask) coco_label_names = ( 'background', # class zero 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'street sign', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella', 'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'plate', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'mirror', 'dining table', 'window', 'desk', 'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') vis_bbox(img, bbox, label=label, score=score, mask=mask, label_names=coco_label_names, contour=args.contour, labeldisplay=True) plot.show() filename = "output.png" plot.savefig(filename)
def main( image = None , gpu = -1, weights_path= f"{ Path(__file__).parent }/weights/yolov3.weights", background = False ): """ Visualize the detection result for the given image and the pre-trained model. """ print( weights_path ) my_path = Path( __file__ ).parent parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default= gpu ) parser.add_argument('--cfg', type=str, default=my_path/'config/yolov3_default.cfg') parser.add_argument('--ckpt', type=str, help='path to the checkpoint file') parser.add_argument('--weights_path', type=str, default= weights_path, help='path to weights file') parser.add_argument('--image', type=str , default= image ) parser.add_argument('--background', type=bool, default= background , help='background(no-display mode. save "./output.png")') parser.add_argument('--detect_thresh', type=float, default= 0.5 , help='confidence threshold') args = parser.parse_args() with open(args.cfg, 'r') as f: cfg = yaml.load(f) imgsize = cfg['TEST']['IMGSIZE'] model = YOLOv3(cfg['MODEL']) confthre = cfg['TEST']['CONFTHRE'] nmsthre = cfg['TEST']['NMSTHRE'] if args.detect_thresh: confthre = args.detect_thresh img = imread( args.image ) if img is None : print( "load image failed" ) print( args.image ) return img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1)) img, info_img = preprocess(img, imgsize, jitter=0) # info = (h, w, nh, nw, dx, dy) img = np.transpose(img / 255., (2, 0, 1)) img = torch.from_numpy(img).float().unsqueeze(0) if args.gpu >= 0: model.cuda(args.gpu) img = Variable(img.type(torch.cuda.FloatTensor)) else: img = Variable(img.type(torch.FloatTensor)) assert args.weights_path or args.ckpt, 'One of --weights_path and --ckpt must be specified' if args.weights_path: print("loading yolo weights %s" % (args.weights_path)) parse_yolo_weights(model, args.weights_path) elif args.ckpt: print("loading checkpoint %s" % (args.ckpt)) state = torch.load(args.ckpt) if 'model_state_dict' in state.keys(): model.load_state_dict(state['model_state_dict']) else: model.load_state_dict(state) model.eval() with torch.no_grad(): outputs1 = model(img) # np.save("output.npy" , outputs.numpy() ) # torch.save( outputs1 , "outputs1.pt" ) out1 = torch.load( "outputs1.pt" ) rere = torch.equal( outputs1 , out1 ) outputs = postprocess(outputs1, 80, confthre, nmsthre) a = "hoho" if outputs[0] is None: print("No Objects Deteted!!") return coco_class_names, coco_class_ids, coco_class_colors = get_coco_label_names() bboxes = list() classes = list() colors = list() for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]: cls_id = coco_class_ids[int(cls_pred)] print(int(x1), int(y1), int(x2), int(y2), float(conf), int(cls_pred)) print('\t+ Label: %s, Conf: %.5f' % (coco_class_names[cls_id], cls_conf.item())) box = yolobox2label([y1, x1, y2, x2], info_img) bboxes.append(box) classes.append(cls_id) colors.append(coco_class_colors[int(cls_pred)]) # args.background = True if args.background: import matplotlib matplotlib.use('Agg') from utils.vis_bbox import vis_bbox vis_bbox( img_raw, bboxes, label=classes, label_names=coco_class_names, instance_colors=colors, linewidth=2) if args.background: output = Path( "./output" ) output.mkdir( parents=True , exist_ok=True ) now = datetime.now().strftime("%Y-%m-%d %H-%M-%S") output /= f"output-{now}.png" plt.savefig( output ) return str( output.absolute() ) # return plt_to_qpixmap(plt.gca()) else : plt.show()
def main(): args = parse_args() print("------------------------------------") print(" use {} dataset for demo. ".format(args.data)) print("------------------------------------") assert args.data in ['coco', 'drone'] if torch.cuda.is_available() and args.gpu >= 0: device = torch.device('cuda:{}'.format(args.gpu)) else: device = torch.device('cpu') # [TBM] gen n_classes from coco-format json file.. if args.data == 'coco': cfg_path = 'config/yolov3_default.cfg' n_classes = 80 if args.data == 'drone': cfg_path = 'config/yolov3_visdrone_default.cfg' n_classes = 10 with open(cfg_path, 'r') as f: cfg = yaml.load(f) imgsize = cfg['TEST']['IMGSIZE'] model = YOLOv3(n_classes=n_classes) confthre = cfg['TEST']['CONFTHRE'] nmsthre = cfg['TEST']['NMSTHRE'] if args.detect_thresh: confthre = args.detect_thresh img = cv2.imread(args.image) assert img is not None img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1)) img, info_img = preprocess(img, imgsize) # info = (h, w, nh, nw, dx, dy) img = torch.from_numpy(img).float().unsqueeze(0) model = model.to(device) img = Variable(img.to(device, dtype=torch.float32)) if args.weights_path: print("loading yolo weights %s" % (args.weights_path)) parse_yolo_weights(model, args.weights_path) else: print("loading checkpoint %s" % (args.ckpt)) model.load_state_dict(torch.load(args.ckpt)) model.eval() with torch.no_grad(): outputs = model(img) outputs = postprocess(outputs, n_classes, confthre, nmsthre) # [TBM] gen label_names from coco-format json file.. if args.data == 'coco': class_names, class_ids, class_colors = get_coco_label_names() if args.data == 'drone': class_names, class_ids, class_colors = get_visdrone_label_names() bboxes = list() classes = list() colors = list() for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]: cls_id = class_ids[int(cls_pred)] print(int(x1), int(y1), int(x2), int(y2), float(conf), int(cls_pred)) print('\t+ Label: %s, Conf: %.5f' % (class_names[cls_id], cls_conf.item())) box = yolobox2label([y1, x1, y2, x2], info_img) bboxes.append(box) classes.append(cls_id) colors.append(class_colors[int(cls_pred)]) vis_bbox(img_raw, bboxes, label=classes, label_names=class_names, instance_colors=colors, linewidth=2) if args.window: plt.show() else: out_path = './output.png' plt.savefig(out_path, bbox_inches=0, pad_inches=0, dpi=100)
elif cls_id == 65: # 신호등 z_world = 1065 / width * 1060 else: # 나머지 z_world = 1065 / width * 1060 x_world = (x_cent - 960) / 1060 * z_world y_world = (y_cent - 600) / 1060 * z_world distance = round( ((x_world**2) + (y_world**2) + (z_world**2))**0.5 / 1000, 2) world.append((x_world, y_world, z_world)) dist.append(distance) fig, ax = vis_bbox( img_raw, bboxes, label=classes, score=scores, label_names=coco_class_names, sigma=sigmas, sigma_scale_img=sigma_scale_img, world=world, distance=dist, sigma_scale_xy=2., sigma_scale_wh=2., # 2-sigma show_inner_bound=False, # do not show inner rectangle for simplicity instance_colors=colors, linewidth=3) fig.savefig('./demo/result/' + split_list[j][i]) print('{} save'.format(split_list[j][i]))
def main(): """ Visualize the detection result for the given image and the pre-trained model. """ parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--cfg', type=str, default='config/yolov3_default_digestpath.cfg') parser.add_argument('--ckpt', type=str, help='path to the checkpoint file') parser.add_argument('--weights_path', type=str, default=None, help='path to weights file') parser.add_argument('--image', type=str) parser.add_argument( '--background', action='store_true', default=False, help='background(no-display mode. save "./output.png")') parser.add_argument('--detect_thresh', type=float, default=None, help='confidence threshold') parser.add_argument('--dataset', help='dataset to work with: {}'.format( Dataset.print_choices()), type=int, default=Dataset.SIGNET_RING) args = parser.parse_args() with open(args.cfg, 'r') as f: cfg = yaml.load(f) imgsize = cfg['TEST']['IMGSIZE'] model = YOLOv3(cfg['MODEL']) confthre = cfg['TEST']['CONFTHRE'] nmsthre = cfg['TEST']['NMSTHRE'] if args.detect_thresh: confthre = args.detect_thresh img = cv2.imread(args.image) img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1)) img, info_img = preprocess(img, imgsize, jitter=0) # info = (h, w, nh, nw, dx, dy) img = np.transpose(img / 255., (2, 0, 1)) img = torch.from_numpy(img).float().unsqueeze(0) if args.gpu >= 0: model.cuda(args.gpu) img = Variable(img.type(torch.cuda.FloatTensor)) else: img = Variable(img.type(torch.FloatTensor)) assert args.weights_path or args.ckpt, 'One of --weights_path and --ckpt must be specified' if args.weights_path: print("loading yolo weights %s" % (args.weights_path)) parse_yolo_weights(model, args.weights_path) elif args.ckpt: print("loading checkpoint %s" % (args.ckpt)) state = torch.load(args.ckpt) if 'model_state_dict' in state.keys(): model.load_state_dict(state['model_state_dict']) else: model.load_state_dict(state) model.eval() with torch.no_grad(): outputs = model(img) outputs = postprocess(outputs, Dataset.NUM_CLASSES[args.dataset], confthre, nmsthre) if outputs[0] is None: print("No Objects Deteted!!") return bboxes = list() colors = list() for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]: print(int(x1), int(y1), int(x2), int(y2), float(conf), int(cls_pred)) print('\t+ Conf: %.5f' % cls_conf.item()) box = yolobox2label([y1, x1, y2, x2], info_img) bboxes.append(box) colors.append(BOX_COLOR) if args.background: matplotlib.use('Agg') vis_bbox(img_raw, bboxes, instance_colors=colors, linewidth=2) plt.show() if args.background: plt.savefig('output.png')
def main(): """ YOLOv3 trainer. See README for details. """ args = parse_args() print("Setting Arguments.. : ", args) cuda = torch.cuda.is_available() and args.use_cuda os.makedirs(args.checkpoint_dir, exist_ok=True) # Parse config settings with open(args.cfg, 'r') as f: cfg = yaml.load(f) print("successfully loaded config file: ", cfg) momentum = cfg['TRAIN']['MOMENTUM'] decay = cfg['TRAIN']['DECAY'] burn_in = cfg['TRAIN']['BURN_IN'] iter_size = cfg['TRAIN']['MAXITER'] steps = eval(cfg['TRAIN']['STEPS']) batch_size = cfg['TRAIN']['BATCHSIZE'] subdivision = cfg['TRAIN']['SUBDIVISION'] ignore_thre = cfg['TRAIN']['IGNORETHRE'] random_resize = cfg['AUGMENTATION']['RANDRESIZE'] base_lr = cfg['TRAIN']['LR'] / batch_size / subdivision datatype = cfg['TRAIN']['DATATYPE'] print('effective_batch_size = batch_size * iter_size = %d * %d' % (batch_size, subdivision)) # Learning rate setup def burnin_schedule(i): if i < burn_in: factor = pow(i / burn_in, 4) elif i < steps[0]: factor = 1.0 elif i < steps[1]: factor = 0.1 else: factor = 0.01 return factor # Initiate model model = YOLOv3(cfg['MODEL'], ignore_thre=ignore_thre) if args.weights_path: print("loading darknet weights....", args.weights_path) parse_yolo_weights(model, args.weights_path) elif args.checkpoint: print("loading pytorch ckpt...", args.checkpoint) state = torch.load(args.checkpoint) if 'model_state_dict' in state.keys(): model.load_state_dict(state['model_state_dict']) else: model.load_state_dict(state) if cuda: print("using cuda") model = model.cuda() if args.tfboard: print("using tfboard") from tensorboardX import SummaryWriter tblogger = SummaryWriter(args.tfboard) model.train() coco_class_names, coco_class_ids, coco_class_colors = get_coco_label_names() imgsize = cfg['TRAIN']['IMGSIZE'] if datatype=='voc': dataset = VOCDataset(model_type=cfg['MODEL']['TYPE'], data_dir='../../VOCdevkit/VOC2007', img_size=imgsize, augmentation=cfg['AUGMENTATION'], debug=args.debug) print('load voc dataset successfully') else: dataset = COCODataset(model_type=cfg['MODEL']['TYPE'], data_dir='COCO/', img_size=imgsize, augmentation=cfg['AUGMENTATION'], debug=args.debug) print('load COCO dataset successfully') evaluator = COCOAPIEvaluator(model_type=cfg['MODEL']['TYPE'], data_dir='COCO/', img_size=cfg['TEST']['IMGSIZE'], confthre=cfg['TEST']['CONFTHRE'], nmsthre=cfg['TEST']['NMSTHRE']) dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=True, num_workers=args.n_cpu) dataiterator = iter(dataloader) dtype = torch.cuda.FloatTensor if cuda else torch.FloatTensor # optimizer setup # set weight decay only on conv.weight params_dict = dict(model.named_parameters()) params = [] for key, value in params_dict.items(): if 'conv.weight' in key: params += [{'params': value, 'weight_decay': decay * batch_size * subdivision}] else: params += [{'params': value, 'weight_decay': 0.0}] optimizer = optim.SGD(params, lr=base_lr, momentum=momentum, dampening=0, weight_decay=decay * batch_size * subdivision) iter_state = 0 if args.checkpoint: if 'optimizer_state_dict' in state.keys(): optimizer.load_state_dict(state['optimizer_state_dict']) iter_state = state['iter'] + 1 #学习率控制 Sets the learning rate of each parameter group to the initial lr times a given function. When last_epoch=-1, sets initial lr as lr. scheduler = optim.lr_scheduler.LambdaLR(optimizer, burnin_schedule) # result=evals(model) # print(result) # start training loop # print('args.eval_interval',args.eval_interval) for iter_i in range(iter_state, iter_size + 1): if iter_i % (args.eval_interval*2) == 0 and iter_i > 0: if datatype=='voc': result=evals(model) print(result) else: ap50_95, ap50 = evaluator.evaluate(model) print(ap50_95, ap50) model.train() if args.tfboard: tblogger.add_scalar('val/COCOAP50', ap50, iter_i) tblogger.add_scalar('val/COCOAP50_95', ap50_95, iter_i) if iter_i % (40000) == 0 and iter_i > 0: draw(model,datatype,imgsize) # subdivision loop optimizer.zero_grad() for inner_iter_i in range(subdivision): try: imgs, targets, info_img, id_ = next(dataiterator) # load a batch except StopIteration: dataiterator = iter(dataloader) imgs, targets, info_img, id_ = next(dataiterator) # load a batch imgs = Variable(imgs.type(dtype)) targets = Variable(targets.type(dtype), requires_grad=False) loss = model(imgs, targets) loss.backward() optimizer.step() scheduler.step() if iter_i % 10 == 0: # logging current_lr = scheduler.get_lr()[0] * batch_size * subdivision print('[Iter %d/%d] [lr %f] ' '[Losses: xy %f, wh %f, conf %f, cls %f, total %f, imgsize %d]' % (iter_i, iter_size, current_lr, model.loss_dict['xy'], model.loss_dict['wh'], model.loss_dict['conf'], model.loss_dict['cls'], model.loss_dict['l2'], imgsize)) if args.tfboard: tblogger.add_scalar('train/total_loss', model.loss_dict['l2'], iter_i) # random resizing # 变输入大小,利用了yolov3网络的全卷积,使得模型不受图像大小的改变而影响参数。 if random_resize: imgsize = (random.randint(0, 9) % 10 + 10) * 32 dataset.img_shape = (imgsize, imgsize) dataset.img_size = imgsize dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=True, num_workers=args.n_cpu) dataiterator = iter(dataloader) if iter_i % 100 == 0: model.eval() if datatype=='voc': img_file = os.path.join('../../VOCdevkit/VOC2007', 'JPEGImages', id_[0] + '.jpg') else: img_file = os.path.join('COCO', 'train2017', '{:012}'.format(id_) + '.jpg') img = cv2.imread(img_file) img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1)) # print(img_raw.shape) # print(img_raw) # print(imgs) img, info_img_t = preprocess(img, imgsize, jitter=0) # info = (h, w, nh, nw, dx, dy) img = np.transpose(img / 255., (2, 0, 1)) img = torch.from_numpy(img).float().unsqueeze(0) img = Variable(img.type(torch.cuda.FloatTensor)) outputs = model(img) #outputs.shape : torch.Size([1, 12348, 85]) outputs = postprocess(outputs, 80, 0.5, 0.5) # imgs.shape : torch.Size([1, 3, 608, 608]) # outputs[0].shape :torch.Size([3, 7]) # targets.shape :torch.Size([1, 50, 5]) # print(outputs) if outputs[0] is not None: bboxes = list() classes = list() colors = list() # print(info_img_t) info_img=tuple(info_img) # print(info_img) for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]: cls_id = coco_class_ids[int(cls_pred)] # print(int(x1), int(y1), int(x2), int(y2), float(conf), int(cls_pred)) # print('\t+ Label: %s, Conf: %.5f' % # (coco_class_names[cls_id], cls_conf.item())) # print([y1, x1, y2, x2]) box = yolobox2label([y1, x1, y2, x2], info_img_t) bboxes.append(box) classes.append(cls_id) colors.append(coco_class_colors[int(cls_pred)]) vis_bbox( img_raw, bboxes, label=classes, label_names=coco_class_names, instance_colors=colors, linewidth=2) plt.savefig('output/'+str(iter_i)+'.jpg') model.train() # save checkpoint if iter_i > 0 and (iter_i % args.checkpoint_interval == 0): torch.save({'iter': iter_i, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, os.path.join(args.checkpoint_dir, "snapshot" + str(iter_i) + ".ckpt")) if args.tfboard: tblogger.close()
def main(): args = parse_args() print("------------------------------------") print(" use {} dataset for demo. ".format(args.data)) print("------------------------------------") assert args.data in ['coco', 'drone'] if torch.cuda.is_available() and args.gpu >= 0: device = torch.device('cuda:{}'.format(args.gpu)) else: device = torch.device('cpu') # [TBM] gen n_classes from coco-format json file.. if args.data == 'coco': cfg_path = 'config/yolov3_default.cfg' n_classes = 80 if args.data == 'drone': cfg_path = 'config/yolov3_visdrone_default.cfg' n_classes = 10 with open(cfg_path, 'r') as f: cfg = yaml.load(f) imgsize = cfg['TEST']['IMGSIZE'] model = YOLOv3(n_classes=n_classes) confthre = cfg['TEST']['CONFTHRE'] nmsthre = cfg['TEST']['NMSTHRE'] if args.detect_thresh: confthre = args.detect_thresh model = model.to(device) if args.weights_path: print("loading yolo weights %s" % (args.weights_path)) parse_yolo_weights(model, args.weights_path) else: print("loading checkpoint %s" % (args.ckpt)) model.load_state_dict(torch.load(args.ckpt)) model.eval() dir_name = os.path.basename(os.path.dirname(args.in_dir + '/')) out_dir = os.path.join(args.out_dir, dir_name) os.makedirs(out_dir, exist_ok=True) img_files = os.listdir(args.in_dir) img_files.sort() start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) for i in range(0, len(img_files), args.step): filename = img_files[i] img_path = os.path.join(args.in_dir, filename) img = cv2.imread(img_path) assert img is not None start.record() img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1)) img, info_img = preprocess(img, imgsize) # info = (h, w, nh, nw, dx, dy) img = torch.from_numpy(img).float().unsqueeze(0) img = Variable(img.to(device, dtype=torch.float32)) with torch.no_grad(): outputs = model(img) outputs = postprocess(outputs, n_classes, confthre, nmsthre) end.record() torch.cuda.synchronize() # [TBM] gen label_names from coco-format json file.. if args.data == 'coco': class_names, class_ids, class_colors = get_coco_label_names() if args.data == 'drone': class_names, class_ids, class_colors = get_visdrone_label_names() bboxes, classes, colors = list(), list(), list() if outputs[0] is None: outputs[0] = [] if args.verbose: print("=====================================") print("{}, {:.2f} [fps]".format(filename, 1000.0 / start.elapsed_time(end))) for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]: cls_id = class_ids[int(cls_pred)] if args.verbose: print(int(x1), int(y1), int(x2), int(y2), float(conf), int(cls_pred)) print('\t+ Label: %s, Conf: %.5f' % (class_names[cls_id], cls_conf.item())) box = yolobox2label([y1, x1, y2, x2], info_img) bboxes.append(box) classes.append(cls_id) colors.append(class_colors[int(cls_pred)]) if args.verbose: print() vis_bbox(img_raw, bboxes, label=classes, label_names=class_names, instance_colors=colors, linewidth=2) basename, _ = os.path.splitext(filename) out_path = os.path.join(out_dir, '{}.png'.format(basename)) plt.savefig(out_path, bbox_inches=0, pad_inches=0, dpi=100) plt.close() print("Done!")