def init_model(transform):

    parser = argparse.ArgumentParser()
    parser.add_argument("--confidence", dest="confidence", help="Object Confidence to filter predictions", default=0.25)
    parser.add_argument("--nms_thresh", dest="nms_thresh", help="NMS Threshhold", default=0.4)
    parser.add_argument("--reso", dest='reso', help=
    "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
                        default="160", type=str)
    args, unknown = parser.parse_known_args()

    cfgfile = "./cfg/yolov4.cfg"
    weightsfile = "./weights/yolov4.pth"

    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    CUDA = torch.cuda.is_available()
    num_classes = 80
    # bbox_attrs = 5 + num_classes
    class_names = load_class_names("./data/coco.names")

    model = Darknet(cfgfile)
    model.load_weights(weightsfile)

    if CUDA:
        model.cuda()

    model.eval()
    return (model, class_names,CUDA), None
def load_model(model_config_file, weight_file, frame_size):
    model = Darknet(model_config_file, inference=True)
    checkpoint = torch.load(
        weight_file, map_location=torch.device('cuda'))
    model.load_state_dict(checkpoint['state_dict'])
    
    model.eval()
    model.cuda()
    return model
def call_yolov4(cfgfile='../yolov4.cfg',
                weightfile='../yolov4.weights',
                use_cuda=True):
    m = Darknet(cfgfile)
    m.load_weights(weightfile)

    if use_cuda:
        m.cuda().eval()
    else:
        m.eval()

    return m
示例#4
0
def load_model(opts, frame_size):
    cfg_file_path = opts.model_config_dir + \
        "/yolov4_" + str(frame_size) + ".cfg"
    model = Darknet(cfg_file_path, inference=True)
    weight_file = os.path.join(
        opts.weights_dir, "yolov4_{}.pth".format(frame_size))
    checkpoint = torch.load(
        weight_file, map_location='cuda:{}'.format(opts.gpu_id))
    model.load_state_dict(checkpoint['state_dict'])

    model.eval()
    if not opts.no_cuda:
        model.cuda(opts.gpu_id)

    # Zero grad for parameters
    for param in model.parameters():
        param.grad = None
    return model
示例#5
0
def detect(cfgfile, weightfile, imgfile):
    m = Darknet(cfgfile)

    m.print_network()
    m.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))

    if use_cuda:
        m.cuda()
    m.eval()
    img = Image.open(imgfile).convert('RGB')
    sized = img.resize((m.width, m.height))
    start = time.time()
    num = 1
    for i in range(num):
        boxes = do_detect(m, sized, 0.5, num_classes, 0.4, use_cuda)

    finish = time.time()
    print('%s: Predicted in %f seconds.' % (imgfile, (finish - start) / num))

    class_names = load_class_names(namesfile)
    plot_boxes(img, boxes, 'predictions.jpg', class_names)
示例#6
0
    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    CUDA = torch.cuda.is_available()
    num_classes = 80
    bbox_attrs = 5 + num_classes
    class_names = load_class_names("data/coco.names")

    model = Darknet(cfgfile)
    model.load_weights(weightsfile)

    if CUDA:
        model.cuda()

    model.eval()
    cap = cv2.VideoCapture(0)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            sized = cv2.resize(frame, (model.width, model.height))
            sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)
            boxes = do_detect(model, sized, 0.5, 0.4, CUDA)

            orig_im = plot_boxes_cv2(frame, boxes, class_names=class_names)
class Yolov4Node(object):
    def __init__(self, cfgfile, weightfile):
        rospy.on_shutdown(self.shutdown_cb)
        self.model = Darknet(cfgfile)
        self.model.print_network()
        self.model.load_weights(weightfile)
        self.model.eval()
        print('Loading weights from %s... Done!' % (weightfile))

        self.num_classes = 80
        if self.num_classes == 20:
            namesfile = os.path.dirname(__file__) + '/data/voc.names'
        elif self.num_classes == 80:
            namesfile = os.path.dirname(__file__) + '/data/coco.names'
        else:
            namesfile = os.path.dirname(__file__) + '/data/names'
        self.class_names = load_class_names(namesfile)

        self.use_cuda = 1
        if self.use_cuda:
            self.model.cuda()

        self.cvbridge = CvBridge()
        self.pub_bbox = rospy.Publisher('~det2d_result',
                                        Detection2D,
                                        queue_size=1)
        self.sub_image = rospy.Subscriber("/camera/color/image_raw",
                                          ROSImage,
                                          self.image_cb,
                                          queue_size=1)
        self.detection_srv = rospy.Service("~yolo_detect", Detection2DTrigger,
                                           self.srv_cb)
        print(rospy.get_name() + ' is ready.')

    def srv_cb(self, req):
        try:
            cv_image = self.cvbridge.imgmsg_to_cv2(req.image, "rgb8")
            # print("Get image...")
        except CvBridgeError as e:
            print(e)
            return
        img_sized = cv2.resize(cv_image, (self.model.width, self.model.height))
        boxes_batch = do_detect(self.model, img_sized, 0.5, 0.2, self.use_cuda)

        detection_msg = Detection2D()
        detection_msg.header.stamp = rospy.Time.now()
        detection_msg.header.frame_id = req.image.header.frame_id

        # Batch size != 1
        if len(boxes_batch) != 1:
            print("Batch size != 1, cannot handle it")
            exit(-1)
        boxes = boxes_batch[0]

        # print('num_detections:', len(boxes))
        for index, box in enumerate(boxes):
            # print('box:', box)
            bbox_msg = BBox2D()
            bbox_msg.center.x = math.floor(box[0] * req.image.width)
            bbox_msg.center.y = math.floor(box[1] * req.image.height)
            bbox_msg.size_x = math.floor(box[2] * req.image.width)
            bbox_msg.size_y = math.floor(box[3] * req.image.height)
            bbox_msg.id = box[6]
            bbox_msg.score = box[5]
            bbox_msg.class_name = self.class_names[bbox_msg.id]
            detection_msg.boxes.append(bbox_msg)

        # cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
        result_img = plot_boxes_cv2(cv_image,
                                    boxes,
                                    savename=None,
                                    class_names=self.class_names,
                                    interest_classes=INTEREST_CLASSES)
        detection_msg.result_image = self.cvbridge.cv2_to_imgmsg(
            result_img, "bgr8")

        print('return {} detection results'.format(len(boxes)))
        return Detection2DTriggerResponse(result=detection_msg)

    def image_cb(self, msg):
        try:
            cv_image = self.cvbridge.imgmsg_to_cv2(msg, "rgb8")
            rospy.loginfo("Get image")
        except CvBridgeError as e:
            print(e)
            return
        img_sized = cv2.resize(cv_image, (self.model.width, self.model.height))
        boxes_batch = do_detect(self.model, img_sized, 0.4, 0.3, self.use_cuda)

        detection_msg = Detection2D()
        detection_msg.header.stamp = rospy.Time.now()
        detection_msg.header.frame_id = msg.header.frame_id
        detection_msg.result_image = msg

        # Batch size != 1
        if len(boxes_batch) != 1:
            print("Batch size != 1, cannot handle it")
            exit(-1)
        boxes = boxes_batch[0]

        # print('num_detections:', len(boxes))
        for index, box in enumerate(boxes):
            # print('box:', box)
            bbox_msg = BBox2D()
            bbox_msg.center.x = math.floor(box[0] * msg.width)
            bbox_msg.center.y = math.floor(box[1] * msg.height)
            bbox_msg.size_x = math.floor(box[2] * msg.width)
            bbox_msg.size_y = math.floor(box[3] * msg.height)
            bbox_msg.id = box[6]
            bbox_msg.score = box[5]
            bbox_msg.class_name = self.class_names[bbox_msg.id]
            detection_msg.boxes.append(bbox_msg)

        result_img = plot_boxes_cv2(cv_image,
                                    boxes,
                                    savename=None,
                                    class_names=self.class_names,
                                    interest_classes=INTEREST_CLASSES)
        detection_msg.result_image = self.cvbridge.cv2_to_imgmsg(
            result_img, "bgr8")
        self.pub_bbox.publish(detection_msg)

        result_img = cv2.cvtColor(result_img, cv2.COLOR_RGB2BGR)
        cv2.imshow('Yolo demo', result_img)
        cv2.waitKey(1)

    def shutdown_cb(self):
        rospy.loginfo("Shutdown " + rospy.get_name())
        if hasattr(self, 'model'): del self.model
        if hasattr(self, 'cv_bridge'): del self.cv_bridge
    return logging


if __name__ == "__main__":
    logging = init_logger(log_dir='log')
    cfg = get_args(**Cfg)
    os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logging.info(f'Using device {device}')

    model = Darknet(cfg.model_config)

    model.print_network()
    model.load_weights(cfg.weights_file)
    model.eval()  # set model away from training

    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)

    model.to(device=device)

    annotations_file_path = cfg.gt_annotations_path
    with open(annotations_file_path) as annotations_file:
        try:
            annotations = json.load(annotations_file)
        except:
            print("annotations file not a json")
            exit()
    test(
        model=model,
class PatchTrainer(object):
    def __init__(self, mode):

        cfgfile = './cfg/yolov4.cfg'
        weightfile = '../common_data/yolov4.weights'

        self.darknet_model = Darknet(cfgfile).cuda()
        self.darknet_model.print_network()
        self.darknet_model.load_weights(weightfile)
        self.darknet_model.eval()
        print('Loading weights from %s... Done!' % (weightfile))

    def train(self):
        """
        Optimize a patch to generate an adversarial example.
        :return: Nothing
        """

        img_size = 800
        batch_size = 1
        n_epochs = 5000
        max_lab = 14

        # Dataset prepare

        data = CocoTrainPerson(dataType="train2017", num_use=100)
        dataloader = DataLoader(data, batch_size=1,
                                shuffle=False)  #使用DataLoader加载数据

        ATTACK_AREA_RATE = 0.1
        decay_epoch = 100
        k = 0

        for i_batch, batch_data in enumerate(dataloader):
            img, mask, bbox, class_label = batch_data[0][0], batch_data[1][
                0], batch_data[2][0], batch_data[3][0]

            ##############################################################################

            img_name = batch_data[4][0]
            mask_area = torch.sum(mask)

            print('---------------')
            print(img_name)
            print('---------------')

            record_dir = '../common_data/yolo4_dap_attack/disappear/area'

            record_path = os.path.join(record_dir,
                                       img_name.split('.')[0] + '.txt')

            # use segment SLIC
            base_SLIC_seed_num = 3000
            img_np = img.numpy().transpose(1, 2, 0)
            mask_np = mask.numpy()
            numSegments = int(base_SLIC_seed_num / (500 * 500) *
                              torch.sum(mask))
            segments_np = slic(image=img_np,
                               n_segments=numSegments,
                               sigma=0,
                               slic_zero=True,
                               mask=mask_np)
            segments_tensor = torch.from_numpy(segments_np).float().cuda()
            segments_label = torch.unique(segments_tensor)
            segments_label = segments_label[1:]

            zero_layer = torch.zeros_like(segments_tensor)
            one_layer = torch.ones_like(segments_tensor)

            bbox_x1 = bbox[0]
            bbox_y1 = bbox[1]
            bbox_w = bbox[2]
            bbox_h = bbox[3]

            bbox_x_c = bbox_x1 + bbox_w / 2
            bbox_y_c = bbox_y1 + bbox_h / 2
            bbox_x_c_int = int(bbox_x_c)
            bbox_y_c_int = int(bbox_y_c)

            # 3 load attack region
            load_patch_dir = '../common_data/NES_search_test_1107/' + img_name.split(
                '_')[0]

            load_patch_list = os.listdir(load_patch_dir)
            load_patch_list.sort()
            wat_num_max = 0
            for i_name in load_patch_list:
                wat_num = int(i_name.split('_')[0])
                if wat_num > wat_num_max:
                    wat_num_max = wat_num
            for i_name in load_patch_list:
                wat_num = int(i_name.split('_')[0])
                if wat_num == wat_num_max:
                    max_name = i_name
                    break

            load_patch = os.path.join(load_patch_dir, max_name)

            load_img = Image.open(load_patch).convert('RGB')
            load_img = transforms.ToTensor()(load_img)
            region_mask = 2 * load_img - img.cpu()
            region_mask = torch.sum(region_mask, dim=0) / 3
            region_mask = torch.where(mask > 0, region_mask,
                                      torch.zeros_like(region_mask))

            attack_region_tmp_pil = transforms.ToPILImage()(region_mask.cpu())
            attack_region_tmp_pil.save('013k.png')
            # process mask
            region_mask_new = torch.zeros_like(region_mask).cuda()
            for i in range(segments_label.shape[0]):
                sp = segments_label[i]
                right_color = (torch.where(segments_tensor == sp,
                                           region_mask.cuda(),
                                           one_layer * (-10))).cpu()
                right_color = torch.mean(right_color[right_color != -10])
                color_layer = torch.ones_like(segments_tensor).fill_(
                    right_color)
                region_mask_new = torch.where(segments_tensor == sp,
                                              color_layer, region_mask_new)
            region_mask_new = region_mask_new
            region_mask = region_mask_new
            region_mask_unique = torch.unique(region_mask)

            for i in range(region_mask_unique.shape[0]):
                thres = region_mask_unique[i]
                # region_mask_tmp = torch.zeros_like(region_mask)
                region_mask_tmp = torch.where(region_mask > thres, one_layer,
                                              zero_layer)
                pixel_num = torch.sum(region_mask_tmp)
                if pixel_num < mask_area * ATTACK_AREA_RATE:
                    break

            attack_region_search_top = region_mask_tmp
            attack_region_search_top = get_conv_envl(attack_region_search_top)

            attack_region_tmp_pil = transforms.ToPILImage()(
                attack_region_search_top.cpu())

            attack_region_tmp_pil.save('012k.png')

            attack_region_tmp = attack_region_search_top

            attack_region_tmp = attack_region_tmp.cuda()
            now_area = float(torch.sum(attack_region_tmp) / mask_area)
            print('---------------')
            print('You have used ', now_area, 'area.')
            print('---------------')
            ## start at gray
            adv_patch_w = torch.zeros(3, 500, 500).cuda()

            adv_patch_w.requires_grad_(True)

            optimizer = torch.optim.Adam([{
                'params': adv_patch_w,
                'lr': 0.1
            }],
                                         amsgrad=True)

            t_op_num = 1500
            min_max_iou_record = 1
            for t_op_step in range(t_op_num):
                adv_patch = torch.sigmoid(adv_patch_w)
                patched_img = torch.where(attack_region_tmp > 0, adv_patch,
                                          img.cuda()).unsqueeze(0)

                patched_img_rsz = F.interpolate(patched_img, (608, 608),
                                                mode='bilinear').cuda()

                yolov4_output = self.darknet_model(patched_img_rsz)

                bbox_pred, cls_pred, obj_pred = yolov4_output

                bbox_pred = bbox_pred.squeeze()

                total_loss = torch.max(obj_pred)

                total_loss.backward()
                optimizer.step()
                optimizer.zero_grad()

                person_conf = (cls_pred * obj_pred)[0, :, 0]

                ground_truth_bbox = [
                    bbox_x1, bbox_y1, bbox_x1 + bbox_w, bbox_y1 + bbox_h
                ]
                ground_truth_bbox = torch.Tensor(ground_truth_bbox).unsqueeze(
                    0).cuda() / 500

                ground_truth_bboxs = ground_truth_bbox.repeat(
                    bbox_pred.shape[0], 1)

                iou = compute_iou_tensor(bbox_pred, ground_truth_bboxs)

                # ----------------------------------
                # ------------------------
                # early stop

                #test
                patched_img_cpu = patched_img.cpu().squeeze()
                test_confidence_threshold = 0.45

                ov_test_thrs_index = torch.where(
                    attack_prob > test_confidence_threshold)[0]

                final_pbbox = det_bboxes[:,
                                         class_label * 4:(class_label + 1) * 4]
                ground_truth_bboxs_final = ground_truth_bbox.repeat(
                    final_pbbox.shape[0], 1)
                iou = compute_iou_tensor(final_pbbox, ground_truth_bboxs_final)
                attack_prob_select_by_iou_ = attack_prob[iou > 0.05]
                attack_prob_select_by_iou_ = attack_prob_select_by_iou_[
                    attack_prob_select_by_iou_ > test_confidence_threshold]

                # stop if no such class found
                if attack_prob_select_by_iou_.shape[0] == 0:
                    print('Break at', t_op_step, 'no bbox found')
                    # save image
                    patched_img_cpu_pil = transforms.ToPILImage()(
                        patched_img_cpu)
                    out_file_path = os.path.join(
                        '../common_data/yolo4_dap_attack/success' +
                        str(int(ATTACK_AREA_RATE * 100)), img_name)
                    patched_img_cpu_pil.save(out_file_path)
                    break

                # max same-class object bounding box iou s
                final_pbbox = det_bboxes[ov_test_thrs_index][:, class_label *
                                                             4:(class_label +
                                                                1) * 4]
                ground_truth_bboxs_final = ground_truth_bbox.repeat(
                    final_pbbox.shape[0], 1)
                iou = compute_iou_tensor(final_pbbox, ground_truth_bboxs_final)
                iou_max = torch.max(iou)
                if iou_max < 0.05:
                    print('Break at', t_op_step, 'iou final max:',
                          torch.max(iou))
                    # save image
                    patched_img_cpu_pil = transforms.ToPILImage()(
                        patched_img_cpu)
                    out_file_path = os.path.join(
                        '../common_data/yolo4_dap_attack/success' +
                        str(int(ATTACK_AREA_RATE * 100)), img_name)
                    patched_img_cpu_pil.save(out_file_path)

                    break

                # report
                ground_truth_bboxs = ground_truth_bbox.repeat(1000, 1)
                final_pbbox = det_bboxes[ov_test_thrs_index][:, class_label *
                                                             4:(class_label +
                                                                1) * 4]
                ground_truth_bboxs_final = ground_truth_bbox.repeat(
                    final_pbbox.shape[0], 1)
                iou = compute_iou_tensor(final_pbbox, ground_truth_bboxs_final)

                max_iou = torch.max(iou)
                if max_iou < min_max_iou_record:
                    min_max_iou_record = max_iou
                    txt_save_dir = '../common_data/yolo4_dap_attack/iou' + str(
                        int(ATTACK_AREA_RATE * 100))
                    txt_save_path = os.path.join(
                        txt_save_dir,
                        img_name.split('.')[0] + '.txt')
                    with open(txt_save_path, 'w') as f:
                        text = str(float(max_iou))
                        f.write(text)

                if t_op_step % 100 == 0:

                    iou_sort = torch.sort(
                        iou, descending=True)[0][:6].detach().clone().cpu()

                    print(t_op_step, 'iou t-cls  :', iou_sort)

                    # iou over 0.5, confidence print
                    final_pbbox = det_bboxes[:, class_label *
                                             4:(class_label + 1) * 4]
                    iou = compute_iou_tensor(
                        final_pbbox,
                        ground_truth_bbox.repeat(final_pbbox.shape[0], 1))
                    attack_prob = det_labels[:, class_label]
                    attack_prob_select_by_iou_ = attack_prob[iou > 0.05]

                    attack_prob_select_by_iou_sort = torch.sort(
                        attack_prob_select_by_iou_,
                        descending=True)[0][:6].detach().cpu()
                    print(t_op_step, 'right cls cf:',
                          attack_prob_select_by_iou_sort)

                    print()
示例#10
0
def train(model, device, config, epochs=5, batch_size=1, save_cp=True, log_step=20, img_scale=0.5):
    # TODO:加上resume功能,resume需要什么信息?
    # config的所有信息、yolov4-custom.cfg的所有信息,权重,epoch序号,学习率到哪了
    
    
    # 创建dataset
    # config.train_label为data/coins.txt标签文本的路径
    train_dataset = Yolo_dataset(config.train_label, config, train=True)
    val_dataset = Yolo_dataset(config.val_label, config, train=False)

    # 获得dataset的长度
    n_train = len(train_dataset)
    n_val = len(val_dataset)

    # 创建dataloader
    # 当pin_memory=False,num_workers=0(子进程数量为0,即只有主进程)时,正常
    # 当pin_memory=True,num_workers=8时,卡住
    # 当pin_memory=False,num_workers=8时,卡住
    # 当pin_memory=True,num_workers=0时,正常
    # 综上,原因在于num_workers大于0开启多线程导致
    # 经查,dataset加载图片中使用OpenCV,OpenCV某些函数默认也会开多线程,
    # 多线程套多线程,容易导致线程卡住(是否会卡住可能与不同操作系统有关)
    # 解决方法:法一,在dataset的前面import cv2时加上cv2.setNumThreads(0)禁用OpenCV多进程(推荐)
    #          法二,使用PIL加载和预处理图片(不推荐,PIL速度不如OpenCV)
    train_loader = DataLoader(train_dataset, batch_size=config.batch // config.subdivisions, shuffle=True,
                              num_workers=8, pin_memory=True, drop_last=True)
    val_loader = DataLoader(val_dataset, batch_size=config.batch // config.subdivisions, shuffle=False,
                              num_workers=8, pin_memory=True, drop_last=False, collate_fn=val_collate)
                            
    if config.only_evaluate or config.evaluate_when_train:
        tgtFile = makeTgtJson(val_loader, config.categories)

    writer = SummaryWriter(log_dir=config.TRAIN_TENSORBOARD_DIR,
                           filename_suffix=f'OPT_{config.TRAIN_OPTIMIZER}_LR_{config.learning_rate}_BS_{config.batch}_Sub_{config.subdivisions}_Size_{config.width}',
                           comment=f'OPT_{config.TRAIN_OPTIMIZER}_LR_{config.learning_rate}_BS_{config.batch}_Sub_{config.subdivisions}_Size_{config.width}')
    
    # 计算迭代次数的最大值
    max_itr = config.TRAIN_EPOCHS * n_train
    
    # 迭代次数的全局计数器
    global_step = 0

    logging.info(f'''Starting training:
        Epochs:          {epochs}
        Batch size:      {config.batch}
        Subdivisions:    {config.subdivisions}
        Learning rate:   {config.learning_rate}
        Training size:   {n_train}
        Validation size: {n_val}
        Checkpoints:     {save_cp}
        Device:          {device.type}
        Images size:     {config.width}
        Optimizer:       {config.TRAIN_OPTIMIZER}
        Dataset classes: {config.classes}
        Train label path:{config.train_label}
        Pretrained:      {config.pretrainedWeight is not None or config.Pretrained is not None}
    ''')
    if config.only_evaluate:
        if config.use_darknet_cfg:
            eval_model = Darknet(config.cfgfile)
        else:
            raise NotImplementedError
        if torch.cuda.device_count() > 1:
            eval_model.load_state_dict(model.module.state_dict())
        else:
            eval_model.load_state_dict(model.state_dict())
        eval_model.to(device)
        eval_model.eval()
        resFile = evaluate(eval_model, config.val_label, config.dataset_dir, device==torch.device("cuda"))
        if resFile is None:
            debugPrint("detect 0 boxes in the val set")
            return
        cocoEvaluate(tgtFile, resFile)
        return

    # learning rate setup
    # 自定义的学习率调整函数,先递增,然后阶梯性降低
    def burnin_schedule(i):
        # i表示iter,而不是epoch
        if i < config.burn_in:  # 按4次方递增阶段
            # factor表示乘在学习率上的倍数
            factor = pow(i / config.burn_in, 4)
        elif i < config.steps[0]:  # 第一阶段
            factor = 1.0
        elif i < config.steps[1]:  # 第二阶段
            factor = 0.1
        else:  # 第三阶段
            factor = 0.01
        return factor

    if config.TRAIN_OPTIMIZER.lower() == 'adam':  # 默认是adam
        optimizer = optim.Adam(
            model.parameters(),
            lr=config.learning_rate / config.batch,  # 学习率的实际值是设置值/batch_size
            betas=(0.9, 0.999),  # adam的特殊参数,一般用默认即可
            eps=1e-08,  # adam的特殊参数,一般用默认即可
        )
    elif config.TRAIN_OPTIMIZER.lower() == 'sgd':
        optimizer = optim.SGD(
            params=model.parameters(),
            lr=config.learning_rate / config.batch,
            momentum=config.momentum,
            weight_decay=config.decay,
        )

    # pytorch调整学习率的专用接口
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, burnin_schedule)

    # 计算loss的对象,这个模块是在yolo网络后专门求解loss的(yolo主网络只负责接收图片,然后输出三路张量),这个模块不需要权重等参数
    criterion = Yolo_loss(device=device, batch=config.batch // config.subdivisions, n_classes=config.classes)

    save_prefix = 'Yolov4_epoch'
    saved_models = deque()
    for epoch in range(epochs):
        epoch_loss = 0
        epoch_step = 0
        model.train()
        logging.info("===Train===")
        for i, batch in enumerate(train_loader):
            global_step += 1
            epoch_step += 1
            images = batch[0]
            bboxes = batch[1]

            images = images.to(device=device, dtype=torch.float32)
            bboxes = bboxes.to(device=device)

            bboxes_pred = model(images)
            loss, loss_xy, loss_wh, loss_obj, loss_cls, loss_l2 = criterion(bboxes_pred, bboxes)
            loss.backward()

            epoch_loss += loss.item()

            if global_step % config.subdivisions == 0:
                optimizer.step()
                scheduler.step()
                model.zero_grad()
            
            logging.info("Epoch:[{:3}/{}],step:[{:3}/{}],total loss:{:.2f}|lr:{:.5f}".format(epoch + 1, epochs, i + 1, len(train_loader), loss.item(), scheduler.get_last_lr()[0]))

            if global_step % (log_step * config.subdivisions) == 0:  # log_step默认为20,这里指的是迭代次数
                
                writer.add_scalar('train/Loss', loss.item(), global_step)
                writer.add_scalar('train/loss_xy', loss_xy.item(), global_step)
                writer.add_scalar('train/loss_wh', loss_wh.item(), global_step)
                writer.add_scalar('train/loss_obj', loss_obj.item(), global_step)
                writer.add_scalar('train/loss_cls', loss_cls.item(), global_step)
                writer.add_scalar('train/loss_l2', loss_l2.item(), global_step)
                writer.add_scalar('lr', scheduler.get_last_lr()[0] * config.batch, global_step)
                
                logging.debug('Train step_{}: loss : {},loss xy : {},loss wh : {},'
                            'loss obj : {},loss cls : {},loss l2 : {},lr : {}'
                            .format(global_step, loss.item(), loss_xy.item(),
                                    loss_wh.item(), loss_obj.item(),
                                    loss_cls.item(), loss_l2.item(),
                                    scheduler.get_last_lr()[0] * config.batch))
        if save_cp:  # True
            # 创建checkpoints文件夹
            if not os.path.exists(config.checkpoints):
                os.makedirs(config.checkpoints, exist_ok=True)  # exist_ok=True表示可以接受已经存在该文件夹,当exist_ok=False时文件夹存在会抛出错误
                logging.info('Created checkpoint directory')
            save_path = os.path.join(config.checkpoints, f'{save_prefix}{epoch + 1}.weights')                
            # 考虑torch.nn.DataParallel特殊情况
            if torch.cuda.device_count() > 1:
                model.module.save_weights(save_path)
            else:
                model.save_weights(save_path)                
            logging.info(f'Checkpoint {epoch + 1} saved !')
            # 只保留最新keep_checkpoint_max个checkpoint,自动删除较早的checkpoint
            saved_models.append(save_path)
            if len(saved_models) > config.keep_checkpoint_max > 0:
                model_to_remove = saved_models.popleft()
                try:
                    os.remove(model_to_remove)
                except:
                    logging.info(f'failed to remove {model_to_remove}')

        if config.evaluate_when_train:
            try:
                model.eval()
                resFile = evaluate(model, config.val_label, config.dataset_dir, device==torch.device("cuda"), config.width, config.height)
                if resFile is None:
                    continue
                stats = cocoEvaluate(tgtFile, resFile)

                logging.info("===Val===")
                logging.info("Epoch:[{:3}/{}],AP:{:.3f}|AP50:{:.3f}|AP75:{:.3f}|APs:{:.3f}|APm:{:.3f}|APl:{:.3f}".format(
                    epoch + 1, epochs, stats[0], stats[1], stats[2], stats[3], stats[4], stats[5]))
                logging.info("Epoch:[{:3}/{}],AR1:{:.3f}|AR10:{:.3f}|AR100:{:.3f}|ARs:{:.3f}|ARm:{:.3f}|ARl:{:.3f}".format(
                    epoch + 1, epochs, stats[6], stats[7], stats[8], stats[9], stats[10], stats[11]))


                writer.add_scalar('train/AP', stats[0], global_step)
                writer.add_scalar('train/AP50', stats[1], global_step)
                writer.add_scalar('train/AP75', stats[2], global_step)
                writer.add_scalar('train/AP_small', stats[3], global_step)
                writer.add_scalar('train/AP_medium', stats[4], global_step)
                writer.add_scalar('train/AP_large', stats[5], global_step)
                writer.add_scalar('train/AR1', stats[6], global_step)
                writer.add_scalar('train/AR10', stats[7], global_step)
                writer.add_scalar('train/AR100', stats[8], global_step)
                writer.add_scalar('train/AR_small', stats[9], global_step)
                writer.add_scalar('train/AR_medium', stats[10], global_step)
                writer.add_scalar('train/AR_large', stats[11], global_step)
            except Exception as e:
                debugPrint("evaluate meets an exception, here is the exception info:")
                traceback.print_exc()
                debugPrint("ignore error in evaluate and continue training")

    writer.close()
示例#11
0
        # print("item[0]", item[3])
        # print("item[1]", item[1])
    # boxes: [batch, num1 + num2 + num3, 1, 4]
    # confs: [batch, num1 + num2 + num3, num_classes]
    boxes = torch.cat(boxes_list, dim=1)
    confs = torch.cat(confs_list, dim=1)
    return [boxes, confs]

if __name__ == "__main__":    
    use_cuda = torch.cuda.is_available()
    print("is cuda available", use_cuda)
    device = torch.device("cuda" if use_cuda else "cpu")
# detector
    detector = Darknet(cfgfile).to(device)
    detector.load_weights(weightfile)
    detector.eval()

#  tracker
    cfg_track = edict({})
    if cfg_track_path is not None:
        assert(os.path.isfile(cfg_track_path))
    with open(cfg_track_path, 'r') as fo:
        cfg_track.update(yaml.load(fo.read()))
    # print("cfg_track", cfg_track.DEEPSORT)

    tracker = build_tracker(cfg_track, use_cuda=use_cuda)

 # video parameters
    cap = cv2.VideoCapture(video_path)
    im_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    im_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
示例#12
0
class PatchTrainer(object):
    def __init__(self, mode):
        
        cfgfile = './cfg/yolov4.cfg'
        weightfile = '../common_data/yolov4.weights'

        self.darknet_model = Darknet(cfgfile).cuda()
        self.darknet_model.print_network()
        self.darknet_model.load_weights(weightfile)
        self.darknet_model.eval()
        print('Loading weights from %s... Done!' % (weightfile))


    def train(self):
        """
        Optimize a patch to generate an adversarial example.
        :return: Nothing
        """

        img_size = 800
        batch_size = 1
        n_epochs = 5000
        max_lab = 14

        # ATTACK_TASK = 'target'

        # TARGET_CLASS = 'dog'
        TARGET_CLASS = 16
        ATTACK_TASK = 'untarget'
        # output = self.darknet_model(img)



        # Dataset prepare
        

        data = CocoTrainPerson(dataType="train2017", num_use=100)
        dataloader = DataLoader(data, batch_size=1, shuffle=False) #使用DataLoader加载数据


        ATTACK_AREA_RATE = 0.1
        decay_epoch = 100
        k = 0



        for i_batch, batch_data in enumerate(dataloader):
            img, mask, bbox, class_label = batch_data[0][0], batch_data[1][0], batch_data[2][0], batch_data[3][0]



            ##############################################################################

            img_name = batch_data[4][0]
            mask_area = torch.sum(mask)
            # if img_name.split('_')[0] != '000000001815':
            #     continue

            print('---------------')
            print(img_name)
            print('---------------')

            record_dir = '../common_data/dap/disappear/area'


            record_path = os.path.join(record_dir, img_name.split('.')[0]+'.txt')


            
            # use segment SLIC
            base_SLIC_seed_num = 3000
            img_np = img.numpy().transpose(1,2,0)
            mask_np = mask.numpy()
            numSegments = int(base_SLIC_seed_num/(500*500)*torch.sum(mask))
            segments_np = slic(image=img_np, n_segments=numSegments, sigma=0, slic_zero=True, mask=mask_np)
            segments_tensor = torch.from_numpy(segments_np).float().cuda()
            segments_label = torch.unique(segments_tensor)
            segments_label = segments_label[1:]


            # define theta_m
            # pay attention to the center and the boundary

            # (0) prepare stack of sp
            # (1) find the center sp
            # (2) find the boundary sp

            # (0) prepare stack of sp
            zero_layer = torch.zeros_like(segments_tensor)
            one_layer = torch.ones_like(segments_tensor)
            # segments_stack = torch.stack([torch.where(segments_tensor==segments_label[j], segments_tensor, zero_layer) for j in range(segments_label.shape[0])], dim=0)
            

            
            # # (1) find the center sp
            bbox_x1 = bbox[0]
            bbox_y1 = bbox[1]
            bbox_w = bbox[2]
            bbox_h = bbox[3]

            bbox_x_c = bbox_x1 + bbox_w/2
            bbox_y_c = bbox_y1 + bbox_h/2
            bbox_x_c_int = int(bbox_x_c)
            bbox_y_c_int = int(bbox_y_c)





            # 3 load attack region 
            load_patch_dir = '../common_data/NES_search_test_1107/'+img_name.split('_')[0]

            load_patch_list = os.listdir(load_patch_dir)
            load_patch_list.sort()
            wat_num_max = 0
            for i_name in load_patch_list:
                wat_num = int(i_name.split('_')[0])
                if wat_num > wat_num_max:
                    wat_num_max = wat_num
            for i_name in load_patch_list:
                wat_num = int(i_name.split('_')[0])
                if wat_num == wat_num_max:
                    max_name = i_name
                    break

            load_patch = os.path.join(load_patch_dir, max_name)

            load_img = Image.open(load_patch).convert('RGB')
            load_img = transforms.ToTensor()(load_img)
            region_mask = 2*load_img - img.cpu()
            region_mask = torch.sum(region_mask,dim=0)/3
            region_mask = torch.where(mask>0, region_mask,torch.zeros_like(region_mask))


            attack_region_tmp_pil = transforms.ToPILImage()(region_mask.cpu())
            attack_region_tmp_pil.save('013k.png')
            # process mask
            region_mask_new = torch.zeros_like(region_mask).cuda()
            for i in range(segments_label.shape[0]):
                sp =  segments_label[i]
                right_color = (torch.where(segments_tensor==sp,region_mask.cuda(),one_layer*(-10))).cpu()
                right_color = torch.mean(right_color[right_color!=-10])
                color_layer = torch.ones_like(segments_tensor).fill_(right_color)
                region_mask_new = torch.where(segments_tensor==sp, color_layer, region_mask_new)      
            region_mask_new = region_mask_new
            region_mask = region_mask_new
            region_mask_unique = torch.unique(region_mask)



            ATTACK_AREA_RATE = 0
            for enlarge_i in range(20):
                ATTACK_AREA_RATE = ATTACK_AREA_RATE + 0.002



                for i in range(region_mask_unique.shape[0]):
                    thres = region_mask_unique[i]
                    # region_mask_tmp = torch.zeros_like(region_mask)
                    region_mask_tmp = torch.where(region_mask>thres, one_layer, zero_layer)
                    pixel_num = torch.sum(region_mask_tmp)
                    if pixel_num < mask_area * ATTACK_AREA_RATE:
                        break

                attack_region_search_top = region_mask_tmp
                attack_region_search_top = get_conv_envl(attack_region_search_top)

                attack_region_tmp_pil = transforms.ToPILImage()(attack_region_search_top.cpu())
                
                attack_region_tmp_pil.save('012k.png')




                # # attack square region
                # attack_area = mask_area * 0.15
                # w_div_h = bbox_w/bbox_h
                # # zheng fang xing
                # w_div_h = 1
                # attack_h = torch.sqrt(attack_area/w_div_h)
                # attack_w = attack_h * w_div_h
                # attack_x_c = bbox_x1+bbox_w/2
                # attack_y_c = bbox_y1+bbox_h/2
                # attack_x1 = int(attack_x_c - attack_w/2)
                # attack_x2 = attack_x1 + int(attack_w)
                # attack_y1 = int(attack_y_c - attack_h/2)
                # attack_y2 = attack_y1 + int(attack_h)
                # attack_mask = torch.zeros_like(img)
                # attack_mask[:,attack_y1:attack_y2,attack_x1:attack_x2] = 1
                # attack_region_square = attack_mask.cuda()
                # attack_region_square_pil = transforms.ToPILImage()(attack_region_square.cpu())
                # attack_region_square_pil.save('014k.png')



                # # attack 4 square region
                # attack_area = mask_area * 0.15
                # w_div_h = bbox_w/bbox_h
                # # zheng fang xing
                # w_div_h = 1

                # basic_map = torch.zeros(10,10)
                # basic_map[2,2] = 1
                # basic_map[2,7] = 1
                # basic_map[7,2] = 1
                # basic_map[7,7] = 1
                # basic_map = basic_map.unsqueeze(0).unsqueeze(0)
                # basic_map = F.interpolate(basic_map,(int(bbox_h),int(bbox_w))).squeeze()
                # # basic_map_pil = transforms.ToPILImage()(basic_map)
                # # basic_map_pil.show()
                # four_square_map = torch.zeros_like(mask)
                # four_square_map[int(bbox_y1):int(bbox_y1)+int(bbox_h),int(bbox_x1):int(bbox_x1)+int(bbox_w)] = basic_map

                
                # # basic_map_pil = transforms.ToPILImage()(four_square_map)
                # # basic_map_pil.show()

                # four_square_map = four_square_map.cpu()

                
                # for i in range(20):
                #     four_square_map_np = four_square_map.numpy()
                #     # erode
                #     kernel = np.ones((3,3),np.uint8)  
                #     four_square_map_np = cv2.dilate(four_square_map_np, kernel, iterations = 1)
                #     four_square_map_tmp = torch.from_numpy(four_square_map_np)
                #     if torch.sum(four_square_map_tmp) < attack_area:
                #         four_square_map = four_square_map_tmp
                #     else:
                #         break



                # attack_region_four_square = four_square_map.cuda()


                
                # # shan dian
                # # init grid

                # densy = 5

                # unit_w = 13*densy
                # unit_h = 13*densy
                # sandian = torch.zeros(unit_w,unit_h)

                # '''
                # log:
                # 10,5,10,5 : 0.04   work! at 700
                # 10,5,10,6 : 0.0333 work! at 2040

                
                # '''
                # # adv_mask_1_layer = adv_mask_1_layer.reshape(100,5,100,5)
                # sandian = sandian.reshape(13,densy,13,densy)
                # # adv_mask_1_layer = adv_mask_1_layer.reshape(25,20,25,20)
                # # adv_mask_1_layer = adv_mask_1_layer.reshape(20,25,20,25)
                # # adv_mask_1_layer = adv_mask_1_layer.reshape(10,50,10,50)

                # sandian[:,int((densy-1)/2),:,int((densy-1)/2)] = 1
                # sandian = sandian.reshape(unit_w, unit_h)

                # sandian = sandian.unsqueeze(0).unsqueeze(0)
                # sandian = F.interpolate(sandian, (500, 500), mode='nearest').squeeze()
                
                # sandian_region = sandian*mask

                # sandian_pil = transforms.ToPILImage()(sandian_region)
                # # sandian_pil.show()

                # sandian_region = sandian_region.cuda()

                # sandian_region[230:500, 230:300] = 0

                # sandian_region[230:300, 150:380] = 0


                # real attack start

                # attack_region_tmp = attack_region_rand
                # attack_region_tmp = attack_region_fast
                attack_region_tmp = attack_region_search_top
                # attack_region_tmp = attack_region_four_square
                # attack_region_tmp = sandian_region

                attack_region_tmp = attack_region_tmp.cuda()
                now_area =  float(torch.sum(attack_region_tmp)/mask_area)
                print('---------------')
                print('You have used ', now_area, 'area.')
                print('---------------')
                ## start at gray
                adv_patch_w = torch.zeros(3,500,500).cuda()

                adv_patch_w.requires_grad_(True)

                optimizer = torch.optim.Adam([
                    {'params': adv_patch_w, 'lr': 0.1}
                ], amsgrad=True)

                t_op_num = 800
                min_max_iou_record = 1
                for t_op_step in range(t_op_num):
                    adv_patch = torch.sigmoid(adv_patch_w)
                    patched_img = torch.where(attack_region_tmp>0, adv_patch, img.cuda()).unsqueeze(0)
                    # @@@@!!!!!
                    # patched_img = torch.where(mask.cuda()>0, patched_img, mask.unsqueeze(0).repeat(3,1,1).cuda())
                    # patched_img = torch.where(attack_mask > 0, adv_patch_clamp, img.cuda())

                    # patched_img_unsq = patched_img.unsqueeze(0)
                    patched_img_rsz = F.interpolate(patched_img, (608, 608), mode='bilinear').cuda()


                    yolov4_output = self.darknet_model(patched_img_rsz)

                    bbox_pred, cls_pred, obj_pred = yolov4_output

                    bbox_pred = bbox_pred.squeeze()

                    total_loss = torch.max(obj_pred)

                    total_loss.backward()
                    optimizer.step()
                    optimizer.zero_grad()

                    person_conf = (cls_pred * obj_pred)[0,:,0]

                    ground_truth_bbox = [bbox_x1, bbox_y1, bbox_x1 + bbox_w, bbox_y1 + bbox_h]
                    ground_truth_bbox = torch.Tensor(ground_truth_bbox).unsqueeze(0).cuda() / 500

                    ground_truth_bboxs = ground_truth_bbox.repeat(bbox_pred.shape[0], 1)

                    iou = compute_iou_tensor(bbox_pred, ground_truth_bboxs)

                    person_conf_iou = person_conf[iou>0.45]

                    if torch.max(person_conf_iou) < 0.4:
                        print('break! at', torch.max(person_conf_iou))
                        with open(record_path, 'w') as f:
                            text = str(float(torch.sum(attack_region_tmp).cpu()/mask_area))
                            f.write(text)
                        break
                    if t_op_step % 100 == 0:
                        print(t_op_step, 'max_obj=', total_loss)
                if torch.max(person_conf_iou) < 0.4:
                    break
                    

            ##############################################################################

        with None:


            # img  : 3,500,500
            # mask : 500,500
            # bbox : x1,y1,w,h
            # class_label : tensor[]

            mask_area = torch.sum(mask)

            img_name = batch_data[4][0]

            print('---------------')
            print(img_name)
            print('---------------')

            record_dir = '/disk2/mycode/0511models/common_data/scatter_patch/disappear/area'

            success_dir = '/disk2/mycode/0511models/common_data/scatter_patch/disappear/img'

            record_path = os.path.join(record_dir, img_name.split('.')[0]+'.txt')



            # get attack region
            # shan dian
            # init grid

            densy = 5

            unit_w = 13 * densy
            unit_h = 13 * densy
            sandian = torch.zeros(unit_w, unit_h)

            '''
            log:
            10,5,10,5 : 0.04   work! at 700
            10,5,10,6 : 0.0333 work! at 2040


            '''
            # adv_mask_1_layer = adv_mask_1_layer.reshape(100,5,100,5)
            sandian = sandian.reshape(13, densy, 13, densy)
            # adv_mask_1_layer = adv_mask_1_layer.reshape(25,20,25,20)
            # adv_mask_1_layer = adv_mask_1_layer.reshape(20,25,20,25)
            # adv_mask_1_layer = adv_mask_1_layer.reshape(10,50,10,50)

            sandian[:, int((densy - 1) / 2), :, int((densy - 1) / 2)] = 1
            sandian = sandian.reshape(unit_w, unit_h)

            sandian = sandian.unsqueeze(0).unsqueeze(0)
            sandian = F.interpolate(sandian, (500, 500), mode='nearest').squeeze()

            sandian_region = sandian.cpu() * mask.cpu()

            sandian_region = sandian_region.cpu()

            attack_area = mask_area * ATTACK_AREA_RATE
            for i in range(20):
                sandian_region_np = sandian_region.numpy()
                # erode
                kernel = np.ones((3, 3), np.uint8)
                sandian_region_np = cv2.dilate(sandian_region_np, kernel, iterations=1)
                sandian_region_tmp = torch.from_numpy(sandian_region_np)
                if torch.sum(sandian_region_tmp) < attack_area:
                    sandian_region = sandian_region_tmp
                else:
                    break

            sandian_region = sandian_region.cuda()



            # get adv pattern
            adv_patch = torch.rand_like(img).cuda()
            adv_patch.requires_grad_(True)
            optimizer = torch.optim.Adam([
                {'params': adv_patch, 'lr': 0.01}
            ], amsgrad=True)


            attack_mask = sandian_region
            attack_mask = attack_mask.cuda()

            print('use area', torch.sum(attack_mask)/mask_area)

            for step in range(3000):
                

                adv_patch_clamp = torch.clamp(adv_patch,0,1)
                patched_img = torch.where(attack_mask > 0, adv_patch_clamp, img.cuda())

                patched_img_unsq = patched_img.unsqueeze(0)
                patched_img_rsz = F.interpolate(patched_img_unsq, (608, 608), mode='bilinear').cuda()


                yolov4_output = self.darknet_model(patched_img_rsz)

                bbox_pred, cls_pred, obj_pred = yolov4_output



                total_loss = torch.max(obj_pred)

                total_loss.backward()
                optimizer.step()
                optimizer.zero_grad()
                
                if step % 10 ==0:
                    patched_img_cpu = patched_img.cpu()
                    patched_img_cpu_pil = transforms.ToPILImage()(patched_img_cpu)
                    patched_img_cpu_pil.save('2.png')

                    print(total_loss)


                    # num_classes = self.darknet_model.num_classes
                    # if num_classes == 20:
                    #     namesfile = 'data/voc.names'
                    # elif num_classes == 80:
                    #     namesfile = 'data/coco.names'
                    # else:
                    #     namesfile = 'data/x.names'
                    # class_names = load_class_names(namesfile)

                    # imgfile = '2.png'
                    # img_t = cv2.imread(imgfile)
                    # sized = cv2.resize(img_t, (self.darknet_model.width, self.darknet_model.height))
                    # sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)

                    # boxes = do_detect(self.darknet_model, sized, 0.4, 0.6, use_cuda)
                    # plot_boxes_cv2(img_t, boxes[0], savename='predictions.jpg', class_names=class_names)



            # if os.path.exists(record_path):
            #     continue


            # if img_name != '000000131419.jpg':
                
            #     continue
            # else:
            #     if k == 0:
            #         k = k + 1
            #         continue
            

            # img_show = (img + mask.unsqueeze(0).repeat(3,1,1))/2
            # img_show = transforms.ToPILImage()(img_show)
            # img_show.show()

            # get attack region ---- attack_mask
            bbox_x1 = bbox[0]
            bbox_y1 = bbox[1]
            bbox_w = bbox[2]# - bbox_x1
            bbox_h = bbox[3]# - bbox_y1
            bbox_area = torch.sum(mask)
            # attack_area = bbox_area * ATTACK_AREA_RATE * 0.4
            # w_div_h = bbox_w/bbox_h
            # attack_h = torch.sqrt(attack_area/w_div_h)
            # attack_w = attack_h * w_div_h
            #
            # attack_x_c = bbox_x1+bbox_w/2
            # attack_y_c = bbox_y1+bbox_h/2
            #
            #
            # attack_x1 = int(attack_x_c - attack_w/2)
            # attack_x2 = attack_x1 + int(attack_w)
            # attack_y1 = int(attack_y_c - attack_h/2)
            # attack_y2 = attack_y1 + int(attack_h)

            ATTACK_AREA_RATE = 0.0
            for i_enlarge in range(10):
                ATTACK_AREA_RATE = ATTACK_AREA_RATE + 0.05

                # # define 4 pattern
                # # attack 4 square region
                # attack_area = mask_area * ATTACK_AREA_RATE
                # w_div_h = bbox_w / bbox_h
                # # zheng fang xing
                # w_div_h = 1
                #
                # basic_map = torch.zeros(10, 10)
                # basic_map[2, 2] = 1
                # basic_map[2, 7] = 1
                # basic_map[7, 2] = 1
                # basic_map[7, 7] = 1
                # basic_map = basic_map.unsqueeze(0).unsqueeze(0)
                # basic_map = F.interpolate(basic_map, (int(bbox_h), int(bbox_w))).squeeze()
                # # basic_map_pil = transforms.ToPILImage()(basic_map)
                # # basic_map_pil.show()
                # four_square_map = torch.zeros_like(mask)
                # four_square_map[int(bbox_y1):int(bbox_y1) + int(bbox_h),
                # int(bbox_x1):int(bbox_x1) + int(bbox_w)] = basic_map
                #
                # # basic_map_pil = transforms.ToPILImage()(four_square_map)
                # # basic_map_pil.show()
                #
                # four_square_map = four_square_map.cpu()
                #
                # for i in range(20):
                #     four_square_map_np = four_square_map.numpy()
                #     # erode
                #     kernel = np.ones((3, 3), np.uint8)
                #     four_square_map_np = cv2.dilate(four_square_map_np, kernel, iterations=1)
                #     four_square_map_tmp = torch.from_numpy(four_square_map_np)
                #     if torch.sum(four_square_map_tmp) < attack_area:
                #         four_square_map = four_square_map_tmp
                #     else:
                #         break
                #
                # attack_region_four_square = four_square_map.cuda()

                # shan dian
                # init grid

                densy = 5

                unit_w = 13 * densy
                unit_h = 13 * densy
                sandian = torch.zeros(unit_w, unit_h)

                '''
                log:
                10,5,10,5 : 0.04   work! at 700
                10,5,10,6 : 0.0333 work! at 2040


                '''
                # adv_mask_1_layer = adv_mask_1_layer.reshape(100,5,100,5)
                sandian = sandian.reshape(13, densy, 13, densy)
                # adv_mask_1_layer = adv_mask_1_layer.reshape(25,20,25,20)
                # adv_mask_1_layer = adv_mask_1_layer.reshape(20,25,20,25)
                # adv_mask_1_layer = adv_mask_1_layer.reshape(10,50,10,50)

                sandian[:, int((densy - 1) / 2), :, int((densy - 1) / 2)] = 1
                sandian = sandian.reshape(unit_w, unit_h)

                sandian = sandian.unsqueeze(0).unsqueeze(0)
                sandian = F.interpolate(sandian, (500, 500), mode='nearest').squeeze()

                sandian_region = sandian.cpu() * mask.cpu()

                sandian_region = sandian_region.cpu()

                attack_area = mask_area * ATTACK_AREA_RATE
                for i in range(20):
                    sandian_region_np = sandian_region.numpy()
                    # erode
                    kernel = np.ones((3, 3), np.uint8)
                    sandian_region_np = cv2.dilate(sandian_region_np, kernel, iterations=1)
                    sandian_region_tmp = torch.from_numpy(sandian_region_np)
                    if torch.sum(sandian_region_tmp) < attack_area:
                        sandian_region = sandian_region_tmp
                    else:
                        break

                sandian_region = sandian_region.cuda()





                # get adv pattern
                adv_patch = torch.rand_like(img).cuda()
                adv_patch.requires_grad_(True)
                optimizer = optim.Adam([
                    {'params': adv_patch, 'lr': 0.01}
                ], amsgrad=True)

                # hyper parameters
                lambda_rpn = 100.0
                lambda_balance1 = 0.00375 # 0.005
                lambda_balance2 = 0.002
                lambda_tv = 0.0001
                attack_epochs = 300

                # training start
                adv_patch_clamp = adv_patch

                min_max_iou_record = 10
                img = img.cuda()
                mask = mask.cuda()
                attack_mask = sandian_region
                attack_mask = attack_mask.cuda()

                print('use area', torch.sum(attack_mask)/mask_area)


                # refer_img_extend = refer_img_extend.cuda()
                for epoch in range(attack_epochs):
                    patched_img = torch.where(attack_mask > 0, adv_patch_clamp, img)
                    patched_img_255 = patched_img
                    patched_img_unsq = patched_img_255.unsqueeze(0)
                    patched_img_rsz = F.interpolate(patched_img_unsq, (608, 608), mode='bilinear').cuda()

                    # YOLOv4 output
                    output = self.darknet_model(patched_img_rsz)
                    obj_prob = self.prob_extractor(output)
                    top_prob = self.top_prob_extractor(output)[0]
                    top_prob_thr = top_prob[top_prob > 0.3]

                    list_boxes = output

                    nms_thresh = 0.4
                    conf_thresh = 0.5

                    anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
                    num_anchors = 9
                    anchor_masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
                    strides = [8, 16, 32]
                    anchor_step = len(anchors) // num_anchors
                    detect_result_list = []
                    for i in range(3):
                        masked_anchors = []
                        for m in anchor_masks[i]:
                            masked_anchors += anchors[m * anchor_step:(m + 1) * anchor_step]
                        masked_anchors = [anchor / strides[i] for anchor in masked_anchors]
                        # decode_result1 = get_region_boxes1(list_boxes[i].detach().cpu().numpy(), conf_thresh, 80, masked_anchors,
                        #                                len(anchor_masks[i]))
                        decode_result = get_region_boxes_tensor(list_boxes[i], conf_thresh, 80,
                                                          masked_anchors,
                                                          len(anchor_masks[i]))
                        # xs, ys, ws, hs, det_confs, cls_conf_logits
                        detect_result_list.append(decode_result)



                    detect_result = torch.cat(detect_result_list, dim=0)
                    proposals = detect_result[:, :5]  # x_c,y_c,w,h, det_confs
                    cls_conf_logits = detect_result[:, 5:]
                    cls_conf = torch.softmax(cls_conf_logits, dim=1)



                    img_cpu = img.cpu()



                    # for i in range(10):
                    #     max_c_index = torch.sort(cls_conf[:, 0] * proposals[:, 4], descending=True)[1][i]
                    #     max_c_bbox = proposals[max_c_index][:4]
                    #     x1 = int((max_c_bbox[0] - max_c_bbox[2] / 2) * 500)
                    #     x2 = int((max_c_bbox[0] + max_c_bbox[2] / 2) * 500)
                    #     y1 = max(int((max_c_bbox[1] - max_c_bbox[3] / 2) * 500),0)
                    #     y2 = int((max_c_bbox[1] + max_c_bbox[3] / 2) * 500)
                    #
                    #     mask_2 = torch.zeros_like((img_cpu))
                    #     mask_2[:, y1:y2, x1:x2] = 1
                    #     img_cpu2 = (img_cpu+mask_2)/2
                    #     img_cpu_pil = transforms.ToPILImage()(img_cpu2)
                    #     img_cpu_pil.show()
                    #     print()




                   # RPN Loss
                    # r1 : from score
                    # r2 : from x,y,w,h

                    # rpn score target is 0
                    rpn_score = proposals[:, 4]
                    loss_r1 = l2_norm(rpn_score - 0)

                    # rpn box target is smaller the boxes
                    rpn_ctx = proposals[:,0].unsqueeze(-1)
                    rpn_cty = proposals[:,1].unsqueeze(-1)
                    rpn_w = proposals[:, 2].unsqueeze(-1)
                    rpn_h = proposals[:, 3].unsqueeze(-1)
                    rpn_box_x1y1x2y2 = [rpn_ctx-rpn_w/2, rpn_cty-rpn_h/2, rpn_ctx+rpn_w/2, rpn_cty+rpn_h/2]
                    rpn_box_x1y1x2y2 = torch.cat(rpn_box_x1y1x2y2, dim=-1)
                    rpn_box_target = torch.cat([rpn_ctx,rpn_cty,rpn_ctx,rpn_cty], dim=-1)
                    loss_r2 = l1_norm(rpn_score.unsqueeze(-1).repeat(1,4)*(rpn_box_x1y1x2y2 - rpn_box_target))

                    test_bbox_reg = False # True
                    if test_bbox_reg:
                        rpn_delta_target = torch.zeros_like(rpn_box_x1y1x2y2).fill_(-0.1)
                        loss_r2_part2 = l1_norm(rpn_score.unsqueeze(-1).repeat(1,4)*(rpn_box_x1y1x2y2 - rpn_delta_target))
                        loss_r2 = (loss_r2 + loss_r2_part2)/2

                    rpn_dimension = proposals.shape[0] / 3
                    rpn_loss = lambda_rpn*(loss_r1 + lambda_balance1 * loss_r2) / rpn_dimension


                    # Regress Loss
                    attack_prob = cls_conf[:, class_label]
                    training_confidence_threshold = 0.38
                    ov_thrs_index = torch.where(attack_prob > training_confidence_threshold) # for certain class
                    ov_thrs_prob = attack_prob[ov_thrs_index]

                    # we can use final_roi here now !!!

                    final_roi = rpn_box_x1y1x2y2[ov_thrs_index]  # for certain class
                    final_roi_target = rpn_box_target[ov_thrs_index]


                    if epoch > decay_epoch:
                        lambda_reg = 10
                        lambda_cls = 100
                    else:
                        lambda_reg = 0
                        lambda_cls = 0

                    reg_loss = lambda_reg * l2_norm(final_roi - final_roi_target)

                    if test_bbox_reg:

                        pbbox = rpn_box_x1y1x2y2[ov_thrs_index]
                        tbbox = torch.zeros_like(pbbox).fill_(-0.1)
                        reg_loss_part2 = lambda_reg * l2_norm(pbbox-tbbox)
                        reg_loss = (reg_loss + reg_loss_part2)/2

                    mean_target_prob = torch.mean(cls_conf[:, class_label])
                    o_score = 0 # ~ logit

                    # Class Loss
                    assert ATTACK_TASK == 'target' or ATTACK_TASK == 'untarget'
                    if ATTACK_TASK == 'target':
                        classification_select = cls_conf[ov_thrs_index]
                        classification_select_log = torch.log(classification_select)
                        target_class = torch.ones(classification_select_log.shape[0]) * TARGET_CLASS
                        target_class = target_class.cuda().long()
                        object_xent = F.nll_loss(classification_select_log, target_class)

                        cls_loss = lambda_cls * (mean_target_prob + lambda_balance2 * object_xent)

                        target_cls_conf = torch.sum(torch.sort(det_labels[:, TARGET_CLASS],descending=True)[0][:10])

                        # cls_loss_new = torch.sum(torch.sort(det_labels[:, class_label],descending=True)[0][:10]) - target_cls_conf

                    elif ATTACK_TASK == 'untarget':
                        classification_select = cls_conf_logits[ov_thrs_index].cuda()
                        target_class = torch.ones(classification_select.shape[0]) * class_label
                        target_class = target_class.cuda().long()
                        object_xent = F.cross_entropy(classification_select, target_class)
                        cls_loss = lambda_cls * (mean_target_prob - lambda_balance2 * object_xent)

                        # cls_loss_new = torch.sum(torch.sort(det_labels[:, class_label],descending=True)[0][:10])


                    total_loss = rpn_loss + cls_loss + reg_loss

                    # if epoch > 100:
                    #     total_loss = cls_loss_new#rpn_loss + reg_loss

                    total_loss.backward()
                    optimizer.step()
                    optimizer.zero_grad()

                    #  # adaptive epsilon
                    # target_image_epsilon = 1# 0.2
                    # ratio = epoch / (decay_epoch + 1e-3) * 2
                    # epsilon = target_image_epsilon + (1-target_image_epsilon) * np.exp(-ratio)
                    #
                    # # use epsilon to clamp the adv patch
                    # refer_img_lower_bound = refer_img_extend - epsilon
                    # refer_img_lower_bound = torch.max(refer_img_lower_bound, torch.zeros_like(refer_img_lower_bound))
                    # refer_img_upper_bound = refer_img_extend + epsilon
                    # refer_img_upper_bound = torch.min(refer_img_upper_bound, torch.ones_like(refer_img_upper_bound))
                    #
                    # adv_patch_clamp = torch.max(adv_patch, refer_img_lower_bound)
                    # adv_patch_clamp = torch.min(adv_patch_clamp, refer_img_upper_bound)

                    adv_patch_clamp = torch.clamp(adv_patch, 0, 1)


                    # ----------------------------------
                    # ------------------------
                    # early stop
                    #test
                    # establish gt bbox
                    ground_truth_bbox = [bbox_x1, bbox_y1, bbox_x1 + bbox_w, bbox_y1 + bbox_h]
                    ground_truth_bbox = torch.Tensor(ground_truth_bbox).unsqueeze(0).cuda() / 500
                    patched_img_cpu = patched_img.cpu()
                    test_confidence_threshold = 0.5
                    iou_threshold = 0.5
                    iou = compute_iou_tensor(rpn_box_x1y1x2y2, ground_truth_bbox.repeat(rpn_box_x1y1x2y2.shape[0], 1))
                    attack_prob_select_by_iou_ = (cls_conf[:, 0] * rpn_score)[iou > iou_threshold]
                    attack_prob_select_by_iou_ = attack_prob_select_by_iou_[attack_prob_select_by_iou_>test_confidence_threshold]

                    # stop if no such class found
                    if attack_prob_select_by_iou_.size()[0] == 0:
                        print('Break at', epoch, 'no bbox found')
                        # save image
                        out_file_path = os.path.join(success_dir, img_name)
                        patched_img_cpu_pil = transforms.ToPILImage()(patched_img_cpu)
                        patched_img_cpu_pil.save(out_file_path)

                        txt_save_path = os.path.join(record_dir, img_name.split('.')[0] + '.txt')
                        with open(txt_save_path, 'w') as f:
                            text = str(float(torch.sum(attack_mask)/mask_area))
                            f.write(text)
                        print()
                        break
                if attack_prob_select_by_iou_.size()[0] == 0:
                    break


            print('-------------------------')





            

        print(asdadasdadasdadasdadas)
        # ------------------------------------------------------------------------------------------------
        # ------------------------------------------------------------------------------------------------
        # ------------------------------------------------------------------------------------------------
        # ------------------------------------------------------------------------------------------------
        # ------------------------------------------------------------------------------------------------



    def generate_patch(self, type):
        """
        Generate a random patch as a starting point for optimization.

        :param type: Can be 'gray' or 'random'. Whether or not generate a gray or a random patch.
        :return:
        """
        if type == 'gray':
            adv_patch_cpu = torch.full((3, 500, 500), 0.5)
        elif type == 'random':
            adv_patch_cpu = torch.rand((3, 500, 500))
        if type == 'trained_patch':
            patchfile = 'patches/object_score.png'
            patch_img = Image.open(patchfile).convert('RGB')
            patch_size = self.config.patch_size
            tf = transforms.Resize((patch_size, patch_size))
            patch_img = tf(patch_img)
            tf = transforms.ToTensor()
            adv_patch_cpu = tf(patch_img)

        return adv_patch_cpu

    def read_image(self, path):
        """
        Read an input image to be used as a patch

        :param path: Path to the image to be read.
        :return: Returns the transformed patch as a pytorch Tensor.
        """
        patch_img = Image.open(path).convert('RGB')
        tf = transforms.Resize((self.config.patch_size, self.config.patch_size))
        patch_img = tf(patch_img)
        tf = transforms.ToTensor()

        adv_patch_cpu = tf(patch_img)
        return adv_patch_cpu
def train(
    model,
    device,
    config,
    epochs=5,
    save_cp=True,
    log_step=20,
):
    # Get dataloaders
    train_dataset = Yolo_BEV_dataset(config, split="train")
    val_dataset = Yolo_BEV_dataset(config, split="val")

    train_loader = DataLoader(
        train_dataset,
        batch_size=config.batch // config.subdivisions,
        shuffle=True,
        num_workers=8,
        pin_memory=True,
        drop_last=True,
        collate_fn=collate,
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=config.batch // config.subdivisions,
        shuffle=True,
        num_workers=8,
        pin_memory=True,
        drop_last=True,
        collate_fn=collate,
    )

    # define summary writer
    writer = SummaryWriter(
        log_dir=config.TRAIN_TENSORBOARD_DIR,
        filename_suffix=
        f"OPT_{config.TRAIN_OPTIMIZER}_LR_{config.learning_rate}_BS_{config.batch}_Sub_{config.subdivisions}_Size_{config.width}",
        comment=
        f"OPT_{config.TRAIN_OPTIMIZER}_LR_{config.learning_rate}_BS_{config.batch}_Sub_{config.subdivisions}_Size_{config.width}",
    )

    # log
    n_train = len(train_dataset)
    n_val = len(val_dataset)
    global_step = 0
    logging.info(f"""Starting training:
        Epochs:          {config.epochs}
        Batch size:      {config.batch}
        Subdivisions:    {config.subdivisions}
        Learning rate:   {config.learning_rate}
        Training size:   {n_train}
        Validation size: {n_val}
        Checkpoints:     {save_cp}
        Device:          {device.type}
        Input height:    {config.height}
        Input width:     {config.width}
        Optimizer:       {config.TRAIN_OPTIMIZER}
        Dataset classes: {config.classes}
    """)

    # learning rate setup
    def burnin_schedule(i):
        if i < config.burn_in:
            factor = pow(i / config.burn_in, 4)
        elif i < config.steps[0]:
            factor = 1.0
        elif i < config.steps[1]:
            factor = 0.1
        else:
            factor = 0.01
        return factor

    # optimizer + scheduler
    if config.TRAIN_OPTIMIZER.lower() == "adam":
        optimizer = optim.Adam(
            model.parameters(),
            lr=config.learning_rate / config.batch,
            betas=(0.9, 0.999),
            eps=1e-08,
        )
    elif config.TRAIN_OPTIMIZER.lower() == "sgd":
        optimizer = optim.SGD(
            params=model.parameters(),
            lr=config.learning_rate / config.batch,
            momentum=config.momentum,
            weight_decay=config.decay,
        )

    # scheduler multiplies learning rate by a factor calculated on epoch
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, burnin_schedule)

    # loss function
    criterion = Yolo_loss(
        cfg=config,
        device=device,
    )

    # start training
    save_prefix = "Yolov4_BEV_flat_epoch"
    saved_models = deque()
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        epoch_step = 0

        with tqdm(total=n_train,
                  desc=f"Epoch {epoch + 1}/{epochs}",
                  unit="img",
                  ncols=75) as pbar:
            for i, batch in enumerate(train_loader):
                # get batch
                global_step += 1
                epoch_step += 1
                images = batch[0].float().to(device=device)
                labels = batch[1]

                # compute loss
                preds = model(images)[0]
                loss, loss_xy, loss_wl, loss_rot, loss_obj, loss_noobj = criterion(
                    preds, labels)
                loss.backward()

                epoch_loss += loss.item()

                # update weights
                if global_step % config.subdivisions == 0:
                    optimizer.step()
                    scheduler.step()
                    model.zero_grad()

                # log
                if global_step % (log_step * config.subdivisions) == 0:
                    writer.add_scalar("train/Loss", loss.item(), global_step)
                    writer.add_scalar("train/loss_xy", loss_xy.item(),
                                      global_step)
                    writer.add_scalar("train/loss_wl", loss_wl.item(),
                                      global_step)
                    writer.add_scalar("train/loss_rot", loss_rot.item(),
                                      global_step)
                    writer.add_scalar("train/loss_obj", loss_obj.item(),
                                      global_step)
                    writer.add_scalar("train/loss_noobj", loss_noobj.item(),
                                      global_step)
                    writer.add_scalar("lr",
                                      scheduler.get_lr()[0] * config.batch,
                                      global_step)
                    pbar.set_postfix({
                        "loss (batch)":
                        loss.item(),
                        "loss_xy":
                        loss_xy.item(),
                        "loss_wl":
                        loss_wl.item(),
                        "loss_rot":
                        loss_rot.item(),
                        "loss_obj":
                        loss_obj.item(),
                        "loss_noobj":
                        loss_noobj.item(),
                        "lr":
                        scheduler.get_lr()[0] * config.batch,
                    })
                    logging.debug(
                        "Train step_{}: loss : {},loss xy : {},loss wl : {},"
                        "loss rot : {},loss obj : {},loss noobj : {},lr : {}".
                        format(
                            global_step,
                            loss.item(),
                            loss_xy.item(),
                            loss_wl.item(),
                            loss_rot.item(),
                            loss_obj.item(),
                            loss_noobj.item(),
                            scheduler.get_lr()[0] * config.batch,
                        ))

                pbar.update(images.shape[0])

            # evaluate models
            min_eval_loss = math.inf
            if epoch % 2 == 0:
                eval_model = Darknet(cfg.cfgfile,
                                     inference=True,
                                     model_type="BEV_flat")
                if torch.cuda.device_count() > 1:
                    eval_model.load_state_dict(model.module.state_dict())
                else:
                    eval_model.load_state_dict(model.state_dict())
                eval_model.to(device)
                eval_model.eval()

                eval_loss = 0.0
                eval_loss_xy = 0.0
                eval_loss_wl = 0.0
                eval_loss_rot = 0.0
                eval_loss_obj = 0.0
                eval_loss_noobj = 0.0
                with tqdm(total=n_val,
                          desc=f"Eval {(epoch + 1) // 2}",
                          unit="img",
                          ncols=75) as epbar:
                    for i, batch in enumerate(val_loader):
                        # get batch
                        global_step += 1
                        epoch_step += 1
                        images = batch[0].float().to(device=device)
                        labels = batch[1]

                        # compute loss
                        labels_pred = model(images)[0]
                        loss, loss_xy, loss_wl, loss_rot, loss_obj, loss_noobj = criterion(
                            labels_pred, labels)
                        eval_loss += loss.item()
                        eval_loss_xy += loss_xy.item()
                        eval_loss_wl += loss_wl.item()
                        eval_loss_rot += loss_rot.item()
                        eval_loss_rot += loss_obj.item()
                        eval_loss_noobj += loss_noobj.item()

                        epbar.update(images.shape[0])

                # log
                logging.debug(
                    "Val step_{}: loss : {},loss xy : {},loss wl : {},"
                    "loss rot : {},loss obj : {},loss noobj : {},lr : {}".
                    format(
                        global_step,
                        eval_loss.item(),
                        eval_loss_xy.item(),
                        eval_loss_wl.item(),
                        eval_loss_rot.item(),
                        eval_loss_obj.item(),
                        eval_loss_noobj.item(),
                        scheduler.get_lr()[0] * config.batch,
                    ))

                del eval_model

            # save checkpoint
            if save_cp and eval_loss < min_eval_loss:
                min_eval_loss = eval_loss
                try:
                    os.makedirs(config.checkpoints, exist_ok=True)
                    logging.info("Created checkpoint directory")
                except OSError:
                    pass
                save_path = os.path.join(config.checkpoints,
                                         f"{save_prefix}{epoch + 1}.pth")
                torch.save(model.state_dict(), save_path)
                logging.info(f"Checkpoint {epoch + 1} saved !")
                saved_models.append(save_path)
                if len(saved_models) > config.keep_checkpoint_max > 0:
                    model_to_remove = saved_models.popleft()
                    try:
                        os.remove(model_to_remove)
                    except:
                        logging.info(f"failed to remove {model_to_remove}")

    writer.close()
示例#14
0
    parser.add_argument('--thres',
                        default=0.0,
                        type=float,
                        help='conf thres in loss function')
    args = parser.parse_args()

    output_dir = os.path.join(args.output_dir)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    #model
    cfgfile = "models/yolov4.cfg"
    weightfile = "models/yolov4.weights"
    darknet_model = Darknet(cfgfile)
    darknet_model.load_weights(weightfile)
    darknet_model = darknet_model.eval().cuda()

    config = '/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
    checkpoint = './models/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
    rcnn_model = init_detector(config, checkpoint, device=torch.device('cuda'))

    # set dataset
    dataset = Coco(args.input_dir)
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=args.batch_size,
                                         shuffle=False)

    # set attacker
    attacker = Attacker(steps=args.steps,
                        thres=args.thres,
                        device=torch.device('cuda'))
示例#15
0
class DetectorYoloPytorch:
    """ Adaptor for the pytorch Yolo implementation (Tianxiaomo/pytorch-YOLOv4) """
    def __init__(self, config):
        sys.path.append(config["repo_path"])
        from tool.darknet2pytorch import Darknet  # noqa

        self.model = Darknet(config["config_file"])
        self.detection_threshold = config["detection_threshold"]
        self.nms_threshold = config["nms_threshold"]

        self.model.print_network()
        self.model.load_weights(config["weights_file"])
        print("Loading weights from %s... Done!" % (config["weights_file"]))
        self.use_cuda = config["use_cuda"]
        if self.use_cuda:
            self.model.cuda()
        self.class_names = load_class_names(config["names_file"])

    def detect(self, frame, rescale_detections=True, recolor=False):
        orig_height, orig_width = frame.shape[:2]
        frame = cv2.resize(frame, (self.model.width, self.model.height))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        detections = self._do_detect(frame)
        width = orig_width if rescale_detections else self.model.width
        height = orig_height if rescale_detections else self.model.height
        dets = []
        for k, d in enumerate(detections[0]):
            d[0] *= width
            d[1] *= height
            d[2] *= width
            d[3] *= height
            p = d[4]
            label = self.class_names[d[6]]
            dets.append(
                Detection(
                    np.array((d[0:2], d[2:4])),
                    data={
                        "label": label,
                        "p": p
                    },
                ))
        if recolor:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        return dets, frame

    def _do_detect(self, img):
        """ Adapted from torch_utils.py -> do_detect() """
        self.model.eval()

        if type(img) == np.ndarray and len(img.shape) == 3:  # cv2 image
            img = torch.from_numpy(img.transpose(
                2, 0, 1)).float().div(255.0).unsqueeze(0)
        elif type(img) == np.ndarray and len(img.shape) == 4:
            img = torch.from_numpy(img.transpose(0, 3, 1,
                                                 2)).float().div(255.0)
        else:
            print("unknow image type")
            exit(-1)

        if self.use_cuda:
            img = img.cuda()
        img = torch.autograd.Variable(img)
        output = self.model(img)
        return post_processing(img, self.detection_threshold,
                               self.nms_threshold, output)