示例#1
0
                        default=300,
                        type=int,
                        help='Input dimensions for SSD')
    args = parser.parse_args()

    if 'VOC' in args.train_data:
        dataset = VOC(args.train_data, transform=Transform(args.ssd_size))
    else:
        dataset = SpaceNet(args.train_data, transform=Transform(args.ssd_size))

    args.checkpoint_dir = os.path.join(args.save_folder,
                                       'ssd_%s' % datetime.now().isoformat())
    args.means = (104, 117, 123)  # only support voc now
    args.num_classes = len(dataset.classes) + 1
    args.stepvalues = (20, 50, 70)
    args.start_iter = 0
    args.writer = SummaryWriter()

    os.makedirs(args.save_folder, exist_ok=True)

    default_type = 'torch.cuda.FloatTensor' if args.cuda else 'torch.FloatTensor'
    torch.set_default_tensor_type(default_type)

    net = Retina(dataset.classes, args.ssd_size)

    if args.cuda:
        net = net.cuda()

    load_checkpoint(net, args)
    train(net, dataset, args)
示例#2
0
class RetinaNet:
    name = NAME

    @classmethod
    def mk_hash(cls, path):
        '''
        Create an MD5 hash from a models weight file.
        Arguments:
            path : str - path to RetinaNet checkpoint
        '''
        dirs = path.split('/')
        if 'retina_net' in dirs:
            dirs = dirs[dirs.index('retina_net'):]
            path = '/'.join(dirs)
        else:
            path = os.path.join('retina_net', path)

        md5 = hashlib.md5()
        md5.update(path.encode('utf-8'))
        return md5.hexdigest()

    @classmethod
    def zip_weights(cls, path, base_dir='./'):
        if os.path.splitext(path)[1] != '.pth':
            raise ValueError('Invalid checkpoint')

        dirs = path.split('/')

        res = {
            'name' : 'RetinaNet',
            'instance' : '_'.join(dirs[-2:]),
            'id' : cls.mk_hash(path)
        }

        zipfile = os.path.join(base_dir, res['id'] + '.zip')

        if os.path.exists(zipfile):
            os.remove(zipfile)

        weight_dir = os.path.dirname(path)

        with ZipFile(zipfile, 'w') as z:
            z.write(path, os.path.join(res['id'], os.path.basename(path)))

        return zipfile

    def __init__(self, weights, classes=['building'], cuda = True):
        chkpnt = torch.load(weights)
        self.config = chkpnt['args']
        self.net = Retina(self.config).eval()
        self.net.load_state_dict(chkpnt['state_dict'])
        self.transform = transforms.Compose([
            transforms.Resize((self.config.model_input_size, self.config.model_input_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        self.net = self.net.cuda()
        self.net.anchors.anchors = self.net.anchors.anchors.cuda()
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        self.cuda = cuda

    def predict_image(self, image, eval_mode = False):
        """
        Infer buildings for a single image.
        Inputs:
            image :: n x m x 3 ndarray - Should be in RGB format
        """

        t0 = time.time()
        img = self.transform(image)
        if self.cuda:
            img = img.cuda()

        out = self.net(Variable(img.unsqueeze(0), requires_grad=False)).squeeze().data.cpu().numpy()
        total_time = time.time() - t0
        
        out = out[1] # ignore background class

        out[:, (1, 3)] = np.clip(out[:, (1, 3)] * image.width, a_min=0, a_max=image.width)
        out[:, (2, 4)] = np.clip(out[:, (2, 4)] * image.height, a_min=0, a_max=image.height)

        out = out[out[:, 0] > 0]

        return pandas.DataFrame(out, columns=['score', 'x1' ,'y1', 'x2', 'y2'])

    def predict_all(self, test_boxes_file, batch_size=8, data_dir = None):
        if data_dir is None:
            data_dir = os.path.join(os.path.dirname(test_boxes_file))
        
        annos = json.load(open(test_boxes_file))

        total_time = 0.0

        for batch in range(0, len(annos), batch_size):
            images,  sizes = [], []
            for i in range(min(batch_size, len(annos) - batch)):
                img = Image.open(os.path.join(data_dir, annos[batch + i]['image_path']))
                images.append(self.transform(img))
                sizes.append(torch.Tensor([img.width, img.height]))

            images = torch.stack(images)
            sizes = torch.stack(sizes)

            if self.cuda:
                images = images.cuda()
                sizes = sizes.cuda()

            out = self.net(Variable(images, requires_grad=False)).data

            hws = torch.cat([sizes, sizes], dim=1).view(-1, 1, 1, 4).expand(-1, out.shape[1], out.shape[2], -1)

            out[:, :, :, 1:] *= hws
            out = out[:, 1, :, :].cpu().numpy()

            for i, detections in enumerate(out):
                anno = annos[batch + i]
                pred = cv2.imread('../data/' + anno['image_path'])

                detections = detections[detections[:, 0] > 0]
                df = pandas.DataFrame(detections, columns=['score', 'x1', 'y1', 'x2', 'y2'])
                df['image_id'] = anno['image_path']

                truth = pred.copy()

                for box in df[['x1', 'y1', 'x2', 'y2']].values.round().astype(int):
                    cv2.rectangle(pred, tuple(box[:2]), tuple(box[2:4]), (0,0,255))

                for r in anno['rects']:
                    box = list(map(lambda x: int(r[x]), ['x1', 'y1', 'x2', 'y2']))
                    cv2.rectangle(truth, tuple(box[:2]), tuple(box[2:]), (0, 0, 255))

                data = np.concatenate([pred, truth], axis=1)
                cv2.imwrite('samples/image_%d.jpg' % (batch + i), data)

                yield df