Пример #1
0
def main(args=None):
    retinanet = torch.load('/home/pytorch-retinanet/csv_retinanet_35.pt')
    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    unnormalize = UnNormalizer()

    path = '/home/test_images_apart/image24624-blur-5-300.jpg'

    img = skimage.io.imread(path)

    if len(img.shape) == 2:
        img = skimage.color.gray2rgb(img)

    img = img.astype(np.float32) / 255.0

    img_tensor = torch.tensor(img, dtype=torch.float32)
    img_tensor = img_tensor.unsqueeze(0)
    img_tensor = img_tensor.permute(0, 3, 1, 2)
    print(img_tensor.size())
    print(type(img_tensor))
    with torch.no_grad():
        scores, classification, transformed_anchors = retinanet(
            img_tensor.cuda().float())
        print(transformed_anchors)
    print(type(img))
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser = parser.parse_args(args)

    os.makedirs('after_augmentation_image_sample', exist_ok=True)

    set_name = 'test'

    dataset_sample = CocoDataset(
        parser.coco_path,
        set_name=set_name,
        # transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
        transform=transforms.Compose(
            [Normalizer(), AugmenterWithImgaug(),
             Resizer()]))

    sampler = AspectRatioBasedSampler(dataset_sample,
                                      batch_size=1,
                                      drop_last=False)
    dataloader_sample = DataLoader(dataset_sample,
                                   num_workers=1,
                                   collate_fn=collater,
                                   batch_sampler=sampler)

    unnormalize = UnNormalizer()

    for idx, data in enumerate(dataloader_sample):

        img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

        img[img < 0] = 0
        img[img > 255] = 255

        img = np.transpose(img, (1, 2, 0))

        img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

        for annot in data['annot']:
            annot = annot[0].data.numpy()
            x1 = int(annot[0])
            y1 = int(annot[1])
            x2 = int(annot[2])
            y2 = int(annot[3])

            cv2.rectangle(img, (x1, y1), (x2, y2),
                          color=(0, 0, 255),
                          thickness=2)
            cv2.imwrite(
                'D:\\StreetView\\RenHong\\Pytorch_RetinaNet\\after_augmentation_image_sample\\'
                + 'sample_from_({})_'.format(set_name) + str(idx) + '.jpg',
                img)

    print("finish")
Пример #3
0
def main(args=None):
	parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

	parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
	parser.add_argument('--coco_path', help='Path to COCO directory')
	parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
	parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')
	parser.add_argument('--img_path', help='Path to file to save val images')

	parser.add_argument('--model', help='Path to model (.pt) file.')

	parser = parser.parse_args(args)

	if parser.dataset == 'coco':
		dataset_val = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Resizer()]))
	elif parser.dataset == 'csv':
		dataset_val = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))
	else:
		raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

	sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
	dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val)

	retinanet = torch.load(parser.model)

	use_gpu = True

	if use_gpu:
		if torch.cuda.is_available():
			retinanet = retinanet.cuda()

	if torch.cuda.is_available():
		retinanet = torch.nn.DataParallel(retinanet).cuda()
	else:
		retinanet = torch.nn.DataParallel(retinanet)

	retinanet.eval()

	unnormalize = UnNormalizer()

	def draw_caption(image, box, caption):

		b = np.array(box).astype(int)
		cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
		cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

	for idx, data in enumerate(dataloader_val):

		with torch.no_grad():
			st = time.time()
			if torch.cuda.is_available():
				scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
			else:
				scores, classification, transformed_anchors = retinanet(data['img'].float())
			print('Elapsed time: {}'.format(time.time()-st))
			idxs = np.where(scores.cpu()>0.5)
			img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

			img[img<0] = 0
			img[img>255] = 255

			img = np.transpose(img, (1, 2, 0))

			img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

			for j in range(idxs[0].shape[0]):
				bbox = transformed_anchors[idxs[0][j], :]
				x1 = int(bbox[0])
				y1 = int(bbox[1])
				x2 = int(bbox[2])
				y2 = int(bbox[3])
				label_name = dataset_val.labels[int(classification[idxs[0][j]])]
				draw_caption(img, (x1, y1, x2, y2), label_name)

				cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
				print(label_name)

			cv2.write(img_path + str(idx) + ".jpeg", img)
			cv2.waitKey(0)
Пример #4
0
def evaluate_coco_and_save_image(dataset,
                                 model,
                                 save_ckpt_path,
                                 epoch_num,
                                 threshold=0.05):
    model.eval()

    unnormalize = UnNormalizer()

    save_ep_image_dir = os.path.join(save_ckpt_path, "ep(%s)" % epoch_num)
    os.makedirs(save_ep_image_dir)

    with torch.no_grad():

        # start collecting results
        results = []
        image_ids = []

        for index in range(len(dataset)):
            data = dataset[index]
            scale = data['scale']

            # run network
            scores, labels, boxes = model(data['img'].permute(
                2, 0, 1).cuda().float().unsqueeze(dim=0))
            scores = scores.cpu()
            labels = labels.cpu()
            boxes = boxes.cpu()

            # visualize image
            idxs = np.where(scores.cpu() > 0.5)
            img = np.array(255 * unnormalize(data['img'][:, :, :3])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            for j in range(idxs[0].shape[0]):
                bbox = boxes[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset.labels[int(labels[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)

                cv2.rectangle(img, (x1, y1), (x2, y2),
                              color=(0, 0, 255),
                              thickness=2)
                print(label_name)

            cv2.imwrite(
                os.path.join(save_ep_image_dir, str(index)) + '.jpg', img)

            # correct boxes for image scale
            boxes /= scale

            if boxes.shape[0] > 0:
                # change to (x, y, w, h) (MS COCO standard)
                boxes[:, 2] -= boxes[:, 0]
                boxes[:, 3] -= boxes[:, 1]

                # compute predicted labels and scores
                # for box, score, label in zip(boxes[0], scores[0], labels[0]):
                for box_id in range(boxes.shape[0]):
                    score = float(scores[box_id])
                    label = int(labels[box_id])
                    box = boxes[box_id, :]

                    # scores are sorted, so we can break
                    if score < threshold:
                        break

                    # append detection for each positively labeled class
                    image_result = {
                        'image_id': dataset.image_ids[index],
                        'category_id': dataset.label_to_coco_label(label),
                        'score': float(score),
                        'bbox': box.tolist(),
                    }

                    # append detection to results
                    results.append(image_result)

            # append image to list of processed images
            image_ids.append(dataset.image_ids[index])

            # print progress
            print('{}/{}'.format(index, len(dataset)), end='\r')

        if not len(results):
            return

        # write output
        json.dump(results,
                  open('{}_bbox_results.json'.format(dataset.set_name), 'w'),
                  indent=4)

        # load results in COCO evaluation tool
        coco_true = dataset.coco
        coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(
            dataset.set_name))

        # run COCO evaluation
        coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
        coco_eval.params.imgIds = image_ids
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()

        model.train()

        return
def main(args=None):
    parser = argparse.ArgumentParser(
        description=
        'Simple script for visualizing results from a RetinaNet network using the csv dataset.'
    )

    parser.add_argument(
        '--csv',
        help='Path to file containing annotations (optional, see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument('--model_path', help='Path to model (.pt) file.')
    parser.add_argument('--configfile', help='Path to the config file.')
    parser.add_argument('--out_path',
                        help='Path to the folder where to save the images.')

    parser = parser.parse_args(args)
    configs = configparser.ConfigParser()
    configs.read(parser.configfile)

    try:
        maxside = int(configs['TRAINING']['maxside'])
        minside = int(configs['TRAINING']['minside'])
    except Exception as e:
        print(e)
        print(
            'CONFIG FILE IS INVALID. PLEASE REFER TO THE EXAMPLE CONFIG FILE AT config.txt'
        )
        sys.exit()

    if parser.csv is None:
        dataset_eval = None
        print('No validation annotations provided.')
    else:
        dataset_eval = CSVDataset(train_file=parser.csv,
                                  class_list=parser.csv_classes,
                                  transform=transforms.Compose([
                                      Normalizer(),
                                      Resizer(min_side=minside,
                                              max_side=maxside)
                                  ]))

    dataloader_val = None
    if dataset_eval is not None:
        sampler_val = AspectRatioBasedSampler(dataset_eval,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_eval,
                                    num_workers=1,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    retinanet = load_model(parser.model_path, parser.configfile)

    unnormalize = UnNormalizer()

    if not os.path.exists(parser.out_path):
        os.makedirs(parser.out_path, exist_ok=True)

    for idx, data in enumerate(dataloader_val):

        with torch.no_grad():
            st = time.time()
            if torch.cuda.is_available():
                scores, classification, transformed_anchors = retinanet(
                    data['img'].cuda().float())
            else:
                scores, classification, transformed_anchors = retinanet(
                    data['img'].float())
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores.cpu() > 0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_RGB2BGR)

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_eval.labels[int(
                    classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2),
                              color=(0, 0, 255),
                              thickness=2)

            cv2.imwrite(os.path.join(parser.out_path, f'image_{idx}.png'), img)
Пример #6
0
print('training..')

for epoch in range(epochs):
    # Call train function
    train_one_epoch(epoch, train_data_loader)

    # Call valid function
    valid_one_epoch(epoch, valid_data_loader)


# Eval

### Sample Results
retinanet.eval()
unnormalize = UnNormalizer()

for iter_num, data in enumerate(test_data_loader):

    if iter_num > 10:
        break

    # Getting Predictions
    scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())

    idxs = np.where(scores.cpu() > 0.5)
    img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

    img[img < 0] = 0
    img[img > 255] = 255
Пример #7
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--model', help='Path to model (.pt) file.')
    parser.add_argument('--data_dir', help='Path to imgs (.jpg) file.')
    parser.add_argument('--num_totest', help='How many imgs to test.')

    parser = parser.parse_args(args)

    #imglist= sorted(glob.glob(os.path.join(parser.data_dir, '*.jpg')))
    #random.shuffle(imglist)
    #imglist=imglist[:int(parser.num_totest)] 
    dataset_train = CSVDataset(train_file='./foreign_object_dataset/train_standard_annotation.csv', class_list='./foreign_object_dataset/class4_classlist.csv',
                                   transform=transforms.Compose([Normalizer(), Resizer()]))

    sampler = AspectRatioBasedSampler(dataset_train, 1, drop_last=False)
    dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

# 	retinanet = torch.load(parser.model)

# 	use_gpu = True

# 	if use_gpu:
# 		if torch.cuda.is_available():
# 			retinanet = retinanet.cuda()

# 	if torch.cuda.is_available():
# 		retinanet = torch.nn.DataParallel(retinanet).cuda()
# 	else:
# 		retinanet = torch.nn.DataParallel(retinanet)

    #retinanet = model.resnet50(num_classes=80, pretrained=True)
    #retinanet.load_state_dict(torch.load(parser.model))
    
    retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        #retinanet.load_state_dict(torch.load(parser.model))
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        #retinanet.load_state_dict(torch.load(parser.model))
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.eval()
    unnormalize = UnNormalizer()

    trans=transforms.ToTensor()
    FODlabels=['nail','clipper_B','clipper_Y','Lstick','butt','foreign']

    for iter_num, data in enumerate(dataloader_train):
        with torch.no_grad():
            st = time.time()

            if torch.cuda.is_available():
                scores, classification, transformed_anchors =  retinanet(data['img'].cuda().float())

            #scores, classification, transformed_anchors = retinanet(img_tensor.float())
            print('Elapsed time: {}'.format(time.time()-st))
            idxs = np.where(scores.cpu()>0.5)
            
            #img[img<0] = 0
            #img[img>255] = 255
            #img = np.transpose(img, (1, 2, 0))
            #img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            #img=tensor_to_np(data['img']).astype(np.uint8)
            #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            #img=cv2.imread(data['filename'][0])
            #img=cv2.resize(img,(1056,608))

            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()
            img[img<0] = 0
            img[img>255] = 255
            img = np.transpose(img, (1, 2, 0))
            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = FODlabels[int(classification[idxs[0][j]])]
                #label_name='FOD'
                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
                print(label_name)
            #cv2.imwrite('./visualizeResult/visualize'+str(iter_num)+'.jpg', img)
            #import pdb
            #pdb.set_trace()
            cv2.imwrite('./visualizeResult/visualize'+data['filename'][0].split('/')[-1], img)
            if iter_num>int(parser.num_totest):
                break
Пример #8
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.',
                        default='csv')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument('--model', help='Path to model (.pt) file.')
    parser.add_argument('--output',
                        help='Output directory of images with boxes.')

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='train2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_val,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    if os.path.exists(parser.output):
        shutil.rmtree(parser.output)
    os.makedirs(os.path.join(parser.output, "pass"))
    os.makedirs(os.path.join(parser.output, "fail"))

    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=1,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    unnormalize = UnNormalizer()

    matched = 0
    not_matched = 0
    failed = []
    passed = []
    for idx, data in enumerate(dataloader_val):
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(
                data['img'].cuda().float())
            elasped = time.time()
            idxs = np.where(scores.cpu() > 0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            bb_found = []
            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])

                label_name = dataset_val.labels[int(
                    classification[idxs[0][j]])]
                bb_found.append([label_name, x1, y1, x2, y2])

            bb_groups_filtered = group_and_filter_boxes(bb_found)
            draw_boxes(img, bb_groups_filtered)

            _, r, _ = data['annot'].shape
            labels_truth = []
            for i in range(r):
                labels_truth.append([int(data['annot'][0, i, -1])] +
                                    [int(d) for d in data['annot'][0, i, :-1]])

            bb_group_truth = group_by_y(labels_truth)
            strings_found = '-'.join(
                [''.join([str(c[0]) for c in l]) for l in bb_groups_filtered])
            strings_truth = '-'.join(
                [''.join([str(c[0]) for c in l]) for l in bb_group_truth])
            img_name = Path(data['image_name'][0]).name
            if strings_truth == strings_found:
                result = 'pass'
                matched += 1
                passed.append((strings_found, strings_truth, img_name))
            else:
                result = 'fail'
                not_matched += 1
                failed.append((strings_found, strings_truth, img_name))
            total = matched + not_matched
            print("{:15s}: Found: {}, Truth: {}, {}, {}/{}={:5.2f}".format(
                result, strings_found, strings_truth, img_name, matched, total,
                matched / float(total)))

            cv2.imwrite(os.path.join(parser.output, result, img_name), img)
    write_results(os.path.join(parser.output, "fail.log"), failed)
    write_results(os.path.join(parser.output, "pass.log"), passed)
Пример #9
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--model', help='Path to model (.pt) file.')
    parser.add_argument('--data_dir', help='Path to imgs (.jpg) file.')
    parser.add_argument('--num_totest', help='How many imgs to test.')

    parser = parser.parse_args(args)

    imglist= sorted(glob.glob(os.path.join(parser.data_dir, '*.jpg')))
    random.shuffle(imglist)
    imglist=imglist[:int(parser.num_totest)]

# 	retinanet = torch.load(parser.model)

# 	use_gpu = True

# 	if use_gpu:
# 		if torch.cuda.is_available():
# 			retinanet = retinanet.cuda()

# 	if torch.cuda.is_available():
# 		retinanet = torch.nn.DataParallel(retinanet).cuda()
# 	else:
# 		retinanet = torch.nn.DataParallel(retinanet)

    #retinanet = model.resnet50(num_classes=80, pretrained=True)
    #retinanet.load_state_dict(torch.load(parser.model))
    
    retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        #retinanet.load_state_dict(torch.load(parser.model))
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        #retinanet.load_state_dict(torch.load(parser.model))
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.eval()
    unnormalize = UnNormalizer()

    trans=transforms.ToTensor()
    FODlabels=['nail','clipper_B','clipper_Y','Lstick','butt','foreign']

    for idx,path_name in enumerate(imglist):
        with torch.no_grad():
            st = time.time()
            img = cv2.imread(path_name)       
            img =  cv2.resize(img,(1056,608))
            img_tensor=trans(img).unsqueeze(0)

            if torch.cuda.is_available():
                scores, classification, transformed_anchors = retinanet(img_tensor.cuda().float())
            else:
                scores, classification, transformed_anchors = retinanet(img_tensor.float())
            print('Elapsed time: {}'.format(time.time()-st))
            idxs = np.where(scores.cpu()>0.05)

            import pdb
            pdb.set_trace()
            
            #img[img<0] = 0
            #img[img>255] = 255
            #img = np.transpose(img, (1, 2, 0))
            #img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = FODlabels[int(classification[idxs[0][j]])]
                #label_name='FOD'
                draw_caption(img, (x1, y1, x2, y2), label_name)

                cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
                print(label_name)
            cv2.imwrite('./visualizeResult/visualize'+str(idx)+'.jpg', img)
Пример #10
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument('--model', help='Path to model (.pt) file.')
    parser.add_argument('--imagesavepath',
                        help='path to save detection images')

    parser = parser.parse_args(args)

    if parser.imagesavepath:
        os.makedirs(parser.imagesavepath, exist_ok=True)
    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='train2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        #dataset_val = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))	# 提示错误
        dataset_val = CSVDataset(train_file=parser.csv_val,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=1,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.eval()

    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):

        b = np.array(box).astype(int)
        # b[1]-20防止label超过上边界
        cv2.putText(image, caption, (b[0], b[1] - 10 if b[1] - 20 > 0 else 30),
                    cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10 if b[1] - 20 > 0 else 30),
                    cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

    '''
	for idx, data in enumerate(dataloader_val):
		#print(data)
		with torch.no_grad():
			st = time.time()
			if torch.cuda.is_available():
				scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
			else:
				scores, classification, transformed_anchors = retinanet(data['img'].float())
			print('Elapsed time: {}'.format(time.time()-st))
			idxs = np.where(scores.cpu()>0.5)
			img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()
			img_path = data['image_path'][0]
			
			img[img<0] = 0
			img[img>255] = 255

			img = np.transpose(img, (1, 2, 0))

			img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

			for j in range(idxs[0].shape[0]):
				bbox = transformed_anchors[idxs[0][j], :]
				x1 = int(bbox[0])
				y1 = int(bbox[1])
				x2 = int(bbox[2])
				y2 = int(bbox[3])
				label_name = dataset_val.labels[int(classification[idxs[0][j]])]
				txt_draw = "%s %.2f" %(label_name, scores[j])
				draw_caption(img, (x1, y1, x2, y2), txt_draw)

				cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)

				print(label_name)
			if parser.imagesavepath:
				image_dir = os.path.join(parser.imagesavepath, os.path.dirname(img_path))
				if not os.path.exists(image_dir):
					os.makedirs(image_dir)
				cv2.imwrite(os.path.join(parser.imagesavepath, "{}.jpg".format(img_path)), img)
				print("create result image:{} ".format(os.path.join(parser.imagesavepath, img_path)))
				print("**** Final funtion: img {},  shape: {}".format(img_path, img.shape))
			else:
				cv2.imshow('img', img)
				cv2.waitKey(0)
				
	'''
    for idx, data in enumerate(dataloader_val):
        #print(data)
        with torch.no_grad():
            st = time.time()
            if torch.cuda.is_available():
                scores, classification, transformed_anchors = retinanet(
                    data['img'].cuda().float())
            else:
                scores, classification, transformed_anchors = retinanet(
                    data['img'].float())
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores.cpu() > 0.5)
            #img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()
            image_path = data['image_path'][0]
            scale = data['scale'][0]

            #img[img<0] = 0
            #img[img>255] = 255

            #img = np.transpose(img, (1, 2, 0))

            #img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
            img = cv2.imread(image_path)

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0] / scale)
                y1 = int(bbox[1] / scale)
                x2 = int(bbox[2] / scale)
                y2 = int(bbox[3] / scale)
                label_name = dataset_val.labels[int(
                    classification[idxs[0][j]])]
                txt_draw = "%s %.2f" % (label_name, scores[j])
                draw_caption(img, (x1, y1, x2, y2), txt_draw)

                cv2.rectangle(img, (x1, y1), (x2, y2),
                              color=(0, 0, 255),
                              thickness=2)

                print(label_name)
            if parser.imagesavepath:
                new_path = os.path.join(parser.imagesavepath,
                                        os.path.basename(image_path))
                cv2.imwrite(new_path, img)
                #print("create result image:{} ".format(new_path))
                #print("**** Final funtion: img {},  shape: {}".format(image_path, img.shape))
            else:
                cv2.imshow('img', img)
                cv2.waitKey(0)