Python DataEncoder примеры использования

Язык программирования: Python

Пространство имен/Пакет: encoder

Класс/Тип: DataEncoder

Примеров на hotexamples.com: 32

Python DataEncoder - 32 примеров найдено. Это лучшие примеры Python кода для encoder.DataEncoder, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DataEncoder(30)

_get_anchor_boxes(2)

class_threshold(1)

Пример #1

Показать файл

Файл: datagen.py Проект: hopstone/pytorch-retinanet

    def __init__(self, root, list_file, train, transform, input_size):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()
            self.num_samples = len(lines)

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_boxes = (len(splited) - 1) // 5
            box = []
            label = []
            for i in range(num_boxes):
                xmin = splited[1+5*i]
                ymin = splited[2+5*i]
                xmax = splited[3+5*i]
                ymax = splited[4+5*i]
                c = splited[5+5*i]
                box.append([float(xmin),float(ymin),float(xmax),float(ymax)])
                label.append(int(c))
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))

Пример #2

Показать файл

Файл: datagen.py Проект: hopstone/pytorch-retinanet

class ListDataset(data.Dataset):
    def __init__(self, root, list_file, train, transform, input_size):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()
            self.num_samples = len(lines)

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_boxes = (len(splited) - 1) // 5
            box = []
            label = []
            for i in range(num_boxes):
                xmin = splited[1+5*i]
                ymin = splited[2+5*i]
                xmax = splited[3+5*i]
                ymax = splited[4+5*i]
                c = splited[5+5*i]
                box.append([float(xmin),float(ymin),float(xmax),float(ymax)])
                label.append(int(c))
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))

    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size,size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        return img, boxes, labels

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            loc_target, cls_target = self.encoder.encode(boxes[i], labels[i], input_size=(w,h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)

    def __len__(self):
        return self.num_samples

Пример #3

Показать файл

Файл: main2_eval.py Проект: akira-l/face_raw

def test_eval():
    id_net.eval()
    fnames = []
    ids = []
    ids_list = list(range(2874))
    im_name_list = []
    root = "./../face_a/train"
    encoder = DataEncoder()
    list_file = "./../face_a/train.csv"
    file_list = csv.reader(open(list_file,'r'))
    file_list = list(file_list)
    # 2874
    for content_counter in range(len(file_list)):
        fnames.append(os.path.join(root, file_list[content_counter][0]))
        ids.append(int(file_list[content_counter][1]))
    
    for id_counter in range(2874):
        seq_num = ids.index(id_counter)
        im_name_list.append(fnames[seq_num])
        del(ids[seq_num])
        del(fnames[seq_num])

    im_name_valid = fnames[:400]
    im_name_train = fnames[400:]+im_name_list
    ids_valid = ids[:400]
    ids_train = ids[400:]+ids_list

    eval_list_feature = torch.zeros(len(ids_list), 1024)
    for i in range(len(ids_list)):
        name = im_name_list[i]
        img = Image.open(name).convert('RGB')
        img = alignment(img)
        img, img_ = transform(img), transform(F.hflip(img))
        img, img_ = Variable(img.unsqueeze(0).cuda(), volatile=True), Variable(img_.unsqueeze(0).cuda(),
                                                                                  volatile=True)
        print(i)
        face_feature = torch.cat((id_net(img), id_net(img_)), 1).data.cpu()[0]
        eval_list_feature[i,:] = face_feature  
    
    id_ = []
    for i in range(len(ids_valid)):
        #pdb.set_trace()
        name = im_name_valid[i]
        img = Image.open(name).convert('RGB') 
        
        img = alignment(img)
        img, img_ = transform(img), transform(F.hflip(img))
        img, img_ = Variable(img.unsqueeze(0).cuda(), volatile=True), Variable(img_.unsqueeze(0).cuda(),
                                                                                  volatile=True)
        face_feature = torch.cat((id_net(img), id_net(img_)), 1).data.cpu()[0]
        dis = []
        for gallery_counter in range(eval_list_feature.size(0)):
            f1 = eval_list_feature[gallery_counter, :]
            f2 = face_feature
            cos_dis = f1.dot(f2) / (f1.norm() * f2.norm() + 1e-5)
            dis.append(float(cos_dis))
        id_num = dis.index(max(dis))
        id_.append(str(ids_list[id_num]))
    pdb.set_trace()
    acc_counter =0
    for id_counter in range(len(id_)):
        if id_[id_counter] == ids_valid[id_counter]:
            acc_counter +=1
    print(acc_counter/400.0)

Пример #4

Показать файл

Файл: tracing.py Проект: sptj/rough_retinanet

image = Image.open('IMG_3321.JPG').convert('RGB')

image = image.resize((1280, 960))
img = image.copy()
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
print(image.size)
image = transform(image)

# forward
loc_preds = traced_script_module(image.unsqueeze(0).cuda())
loc_preds = loc_preds.argmax()
print(loc_preds)
encoder = DataEncoder()
ref_table = encoder._get_anchor_boxes(torch.Tensor([1280, 960]))

boxes = [ref_table[loc_preds]]
box = boxes[0]
print(boxes)
box[0] = (box[0] - box[2] / 2)
box[1] = (box[1] - box[3] / 2)
box[2] = (box[2] + box[0])
box[3] = (box[3] + box[1])

print(ref_table[215999])

draw = ImageDraw.Draw(img)

draw.rectangle(list(box), outline='red')

Пример #5

Показать файл

Файл: train.py Проект: xw-hu/retinanet.pytorch

    transforms.ToTensor(),
    transforms.Normalize(cfg.mean, cfg.std)
]
if cfg.scale is not None:
    train_transform_list.insert(0, transforms.Scale(cfg.scale))
train_transform = transforms.Compose(train_transform_list)
val_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(cfg.mean, cfg.std)])

trainset = VocLikeDataset(image_dir=cfg.image_dir,
                          annotation_dir=cfg.annotation_dir,
                          imageset_fn=cfg.train_imageset_fn,
                          image_ext=cfg.image_ext,
                          classes=cfg.classes,
                          encoder=DataEncoder(),
                          transform=train_transform)
valset = VocLikeDataset(image_dir=cfg.image_dir,
                        annotation_dir=cfg.annotation_dir,
                        imageset_fn=cfg.val_imageset_fn,
                        image_ext=cfg.image_ext,
                        classes=cfg.classes,
                        encoder=DataEncoder(),
                        transform=val_transform)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=cfg.batch_size,
                                          shuffle=True,
                                          num_workers=cfg.num_workers,
                                          collate_fn=trainset.collate_fn)
valloader = torch.utils.data.DataLoader(valset,
                                        batch_size=cfg.batch_size,

Пример #6

Показать файл

Файл: datasets.py Проект: smahliivaza/hackathon

class BottleLoader(Dataset):
    def __init__(self,
                 dir,
                 encoder,
                 json_suffix='',
                 transform=None,
                 val=False):
        self.dir = dir
        self.encoder = DataEncoder()
        self.json_suffix = json_suffix
        self.transform = transform
        self.encoder = encoder

        files = listdir(self.dir)
        prefixes = list(
            map(lambda f: f.replace('.jpg', ''),
                filter(lambda f: '.jpg' in f, files)))
        prefixes = list(map(lambda f: path.join(self.dir, f), prefixes))

        self.impath = list(map(lambda f: f'{f}.jpg', prefixes))
        self.annotations = list(
            map(lambda f: f'{f}{self.json_suffix}.json', prefixes))

        labelset = set()
        for p in self.annotations:
            with open(p, 'r') as f:
                j = json.load(f)
            labelset = labelset.union(set(map(lambda f: f['id'], j)))
        self.label_index = dict((k, v) for v, k in enumerate(labelset))

        self.val = val

    def annotate(self, fname, imsize):
        boxes = []
        with open(fname, 'r') as f:
            groups = json.load(f)
        coords, labels = [], []
        for group in groups:
            for obj in group['data']:
                boxes.append(
                    BoundingBox(
                        obj['boundingBox']['X'],
                        obj['boundingBox']['Y'] + obj['boundingBox']['Height'],
                        obj['boundingBox']['X'] + obj['boundingBox']['Width'],
                        obj['boundingBox']['Y'], imsize[0], imsize[1],
                        self.label_index[group['id']]))
        return boxes

    def __getitem__(self, i):
        data = list(self.metadata['paths'][i])
        shape = self.metadata['shape'][i]
        img = np.array(Image.open(data[0]))
        img = resize(img, (sizeremap[shape[0]], sizeremap[shape[1]]))
        img = torch.Tensor(img.transpose(2, 0, 1))

        coords = torch.Tensor(np.stack(coords))
        labels = torch.LongTensor(
            np.array(list(map(self.metadata['label_index'].get,
                              labels)))).view(-1, 1)
        return img, coords, labels

    def __getitem__(self, index):
        impath = self.impath[index]
        annotation = self.annotations[index]
        image = Image.open(impath)
        boxes = self.annotate(annotation, image.size)
        example = {'image': image, 'boxes': boxes}
        if self.transform:
            example = self.transform(example)
        return example

    def __len__(self):
        return len(self.impath)

    def collate_fn(self, batch):
        imgs = [example['image'] for example in batch]
        boxes = [example['boxes'] for example in batch]
        labels = [example['labels'] for example in batch]
        img_sizes = [img.size()[1:] for img in imgs]

        max_h = max([im.size(1) for im in imgs])
        max_w = max([im.size(2) for im in imgs])
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, max_h, max_w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            im = imgs[i]
            imh, imw = im.size(1), im.size(2)
            inputs[i, :, :imh, :imw] = im

            loc_target, cls_target = self.encoder.encode(boxes[i],
                                                         labels[i],
                                                         input_size=(max_w,
                                                                     max_h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        if not self.val:
            return inputs, torch.stack(loc_targets), torch.stack(cls_targets)
        return inputs, img_sizes, torch.stack(loc_targets), torch.stack(
            cls_targets)

Пример #7

Показать файл

Файл: datagen.py Проект: Boosting/pytorch-retinanet

class ListDataset(data.Dataset):
    def __init__(self, root, list_file, train, transform, input_size,
                 max_size):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) image shorter side size.
          max_size: (int) maximum image longer side size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size
        self.max_size = max_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.data_encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()
            self.num_samples = len(lines)

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_boxes = (len(splited) - 3) // 5
            box = []
            label = []
            for i in range(num_boxes):
                xmin = splited[3 + 5 * i]
                ymin = splited[4 + 5 * i]
                xmax = splited[5 + 5 * i]
                ymax = splited[6 + 5 * i]
                c = splited[7 + 5 * i]
                box.append(
                    [float(xmin),
                     float(ymin),
                     float(xmax),
                     float(ymax)])
                label.append(int(c))
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))

    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and bbox locations.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        boxes = self.boxes[idx]
        labels = self.labels[idx]

        # Data augmentation while training.
        if self.train:
            img, boxes = self.random_flip(img, boxes)

        img, im_scale = self.resize(img)
        boxes *= im_scale
        img = self.transform(img)
        return img, boxes, labels

    def resize(self, img):
        '''Resize the image shorter side to input_size.

        Args:
          img: (PIL.Image) image.

        Returns:
          (PIL.Image) resized image.
          (float) image scale.

        Reference:
          https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/blob.py
        '''
        im_size_min = min(img.size)
        im_size_max = max(img.size)
        im_scale = float(self.input_size) / float(im_size_min)
        if round(im_scale * im_size_max
                 ) > self.max_size:  # limit the longer side to MAX_SIZE
            im_scale = float(self.max_size) / float(im_size_max)
        w = int(img.width * im_scale)
        h = int(img.height * im_scale)
        return img.resize((w, h)), im_scale

    def random_flip(self, img, boxes):
        '''Randomly flip the image and adjust the bbox locations.

        For bbox (xmin, ymin, xmax, ymax), the flipped bbox is:
        (w-xmax, ymin, w-xmin, ymax).

        Args:
          img: (PIL.Image) image.
          boxes: (tensor) bbox locations, sized [#obj, 4].

        Returns:
          img: (PIL.Image) randomly flipped image.
          boxes: (tensor) randomly flipped bbox locations, sized [#obj, 4].
        '''
        if random.random() < 0.5:
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            w = img.width
            xmin = w - boxes[:, 2]
            xmax = w - boxes[:, 0]
            boxes[:, 0] = xmin
            boxes[:, 2] = xmax
        return img, boxes

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          (list) of padded images, stacked cls_targets, stacked loc_targets.

        Reference:
          https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/blob.py
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]

        max_size, _ = torch.IntTensor([im.size() for im in imgs]).max(0)
        max_h, max_w = max_size[1], max_size[2]
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, max_h, max_w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            im = imgs[i]
            imh, imw = im.size(1), im.size(2)
            inputs[i, :, :imh, :imw] = im

            # Encode data.
            loc_target, cls_target = self.data_encoder.encode(
                boxes[i], labels[i], input_size=(max_h, max_w))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)

    def __len__(self):
        return self.num_samples

Пример #8

Показать файл

class ListDataset(data.Dataset):
	img_size = InputImgSize

	def __init__(self, root, list_file, train, transform):
		'''
		Args:
		  root: (str) ditectory to images.
		  list_file: (str) path to index file.
		  train: (boolean) train or test.
		  transform: ([transforms]) image transforms.
		'''
		self.root = root
		self.train = train
		self.transform = transform

		self.fnames = []
		self.boxes = []
		self.labels = []

		self.data_encoder = DataEncoder()

		with open(list_file) as f:
			lines = f.readlines()
			self.num_samples = len(lines)

		for line in lines:
			splited = line.strip().split()
			self.fnames.append(splited[0])

			num_objs = int(splited[1])
			box = []
			label = []
			for i in range(num_objs):
				xmin = splited[2+5*i]
				ymin = splited[3+5*i]
				xmax = splited[4+5*i]
				ymax = splited[5+5*i]
				c = splited[6+5*i]
				box.append([float(xmin),float(ymin),float(xmax),float(ymax)])
				label.append(int(c))
			self.boxes.append(torch.Tensor(box))
			self.labels.append(torch.LongTensor(label))

	def __getitem__(self, idx):
		'''Load a image, and encode its bbox locations and class labels.

		Args:
		  idx: (int) image index.

		Returns:
		  img: (tensor) image tensor.
		  loc_target: (tensor) location targets, sized [8732,4].
		  conf_target: (tensor) label targets, sized [8732,].
		'''
		# Load image and bbox locations.
		fname = self.fnames[idx]
	
		###############3
		img = Image.open(os.path.join(self.root, fname)).convert('L')
		#################
		boxes = self.boxes[idx].clone()
		labels = self.labels[idx]

		# Data augmentation while training.
		#if self.train:
		#   img, boxes = self.random_flip(img, boxes)
		#  img, boxes, labels = self.random_crop(img, boxes, labels)

		# Scale bbox locaitons to [0,1].
		w,h = img.size
		boxes /= torch.Tensor([w,h,w,h]).expand_as(boxes)

		img = img.resize((self.img_size,self.img_size))
		

		img = self.transform(img)


		# Encode loc & conf targets.
		loc_target, conf_target = self.data_encoder.encode(boxes, labels)


		return img, loc_target, conf_target


	def random_crop(self, img, boxes, labels):
		'''Randomly crop the image and adjust the bbox locations.

		For more details, see 'Chapter2.2: Data augmentation' of the paper.

		Args:
		  img: (PIL.Image) image.
		  boxes: (tensor) bbox locations, sized [#obj, 4].
		  labels: (tensor) bbox labels, sized [#obj,].

		Returns:
		  img: (PIL.Image) cropped image.
		  selected_boxes: (tensor) selected bbox locations.
		  labels: (tensor) selected bbox labels.
		'''
		imw, imh = img.size
		while True:
			min_iou = random.choice([None, 0.1, 0.3, 0.5, 0.7, 0.9])
			if min_iou is None:
				return img, boxes, labels

			for _ in range(100):
				w = random.randrange(int(0.1*imw), imw)
				h = random.randrange(int(0.1*imh), imh)

				if h > 2*w or w > 2*h:
					continue

				x = random.randrange(imw - w)
				y = random.randrange(imh - h)
				roi = torch.Tensor([[x, y, x+w, y+h]])

				center = (boxes[:,:2] + boxes[:,2:]) / 2  # [N,2]
				roi2 = roi.expand(len(center), 4)  # [N,4]
				mask = (center > roi2[:,:2]) & (center < roi2[:,2:])  # [N,2]
				mask = mask[:,0] & mask[:,1]  #[N,]
				if not mask.any():
					continue

				selected_boxes = boxes.index_select(0, mask.nonzero().squeeze(1))

				iou = self.data_encoder.iou(selected_boxes, roi)
				if iou.min() < min_iou:
					continue

				img = img.crop((x, y, x+w, y+h))
				selected_boxes[:,0].add_(-x).clamp_(min=0, max=w)
				selected_boxes[:,1].add_(-y).clamp_(min=0, max=h)
				selected_boxes[:,2].add_(-x).clamp_(min=0, max=w)
				selected_boxes[:,3].add_(-y).clamp_(min=0, max=h)
				return img, selected_boxes, labels[mask]

	def __len__(self):
		return self.num_samples

Пример #9

Показать файл

Файл: datagen.py Проект: lizhe960118/find-star

class ListDataset(data.Dataset):
    def __init__(self, root, list_file, train, transform, input_size):
        '''
        Args:
          root: (str) ditectory to images. ".data"
          list_file: (str) path to index file. '.data/find_star_split/find_star_train_bbx_gt.txt'
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size. (800 * 800)
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = [] # 存储的是image_name
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()
            self.num_samples = len(lines)

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_boxes = (len(splited) - 1) // 5
            box = []
            label = []
            for i in range(num_boxes):
                xmin = splited[1+5*i]
                ymin = splited[2+5*i]
                xmax = splited[3+5*i]
                ymax = splited[4+5*i]
                c = splited[5+5*i]
                box.append([float(xmin),float(ymin),float(xmax),float(ymax)])
                label.append(int(c))
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))

    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]

        prefix_name = fname[:2]

        if self.train:
            image_path = self.root + '/' + prefix_name + '/' + fname
        else:
            image_path = self.root + '/' + prefix_name + '/' + fname
            
        
        # img = Image.open(os.path.join(self.root, fname))

        img_a = Image.open(image_path + '_a.jpg')
        img_b = Image.open(image_path + '_b.jpg')
        img_c = Image.open(image_path + '_c.jpg')
        img = Image.merge('RGB', (img_a, img_b, img_c))


        # if img.mode != 'RGB':
            # img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size,size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        # if self.transforms is not None:
        #     # if img is a byte or uint8 array, it will convert from 0-255 to 0-1
        #     # this converts from (HxWxC) to (CxHxW) as well
        #     img_a, img_b, img_c = image
        #     img_a = self.transforms(img_a)
        #     img_b = self.transforms(img_b)
        #     img_c = self.transforms(img_c)
        #     img = (img_a, img_b, img_c)

        return img, boxes, labels

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            loc_target, cls_target = self.encoder.encode(boxes[i], labels[i], input_size=(w,h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)

    def __len__(self):
        return self.num_samples

Пример #10

Показать файл

Файл: dataset_acn.py Проект: zineos/lightDSFD

class ListDataset(data.Dataset):
    def __init__(self,
                 list_file,
                 root=None,
                 train=True,
                 transform=None,
                 image_size=96,
                 small_threshold=5,
                 big_threshold=60,
                 setmin=6,
                 setmax=50,
                 fm_size=None,
                 ac_size=None,
                 ac_density=None,
                 stride=4,
                 offset=12):
        print('data init')
        self.image_size = image_size
        self.root = root
        self.train = train
        self.transform = transform
        self.fnames = []
        self.boxes = []
        self.labels = []
        self.small_threshold = float(
            small_threshold)  #img_48:8,45,10,40  img_36:8,36,10,35
        self.big_threshold = float(big_threshold)
        self.data_encoder = DataEncoder(img_size=image_size,
                                        fm_size=fm_size,
                                        ac_size=ac_size,
                                        ac_density=ac_density,
                                        stride=stride,
                                        offset=offset)
        self.setmin = setmin
        self.setmax = setmax

        with open(list_file) as f:
            lines = f.readlines()

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_faces = int(splited[1])
            box = []
            label = []
            for i in range(num_faces):
                x = float(splited[2 + 5 * i])
                y = float(splited[3 + 5 * i])
                w = float(splited[4 + 5 * i])
                h = float(splited[5 + 5 * i])
                c = int(splited[6 + 5 * i])
                box.append([x, y, x + w, y + h])
                label.append(c)
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))
        self.num_samples = len(self.boxes)

    def __getitem__(self, idx):
        while True:
            fname = self.fnames[idx]
            img = cv2.imread(os.path.join(self.root + fname))
            if img is None:
                idx = random.randrange(0, self.num_samples)
                continue
            imh, imw, _ = img.shape
            boxes = self.boxes[idx].clone()
            labels = self.labels[idx].clone()
            boxwh = boxes[:, 2:] - boxes[:, :2]
            center = (boxes[:, :2] + boxes[:, 2:]) / 2.
            #			boxar = boxwh[:,0] * boxwh[:,1]
            ratio = boxwh.max(1)[0] / boxwh.min(1)[0]
            mask = (boxwh[:, 0] >=
                    self.setmin) & (boxwh[:, 1] >= self.setmin) & (
                        ratio < float(self.setmax) / self.setmin) & (
                            center[:, 0] > 0) & (center[:, 0] < imw - 1) & (
                                center[:, 1] > 0) & (center[:, 1] < imh - 1)
            if mask.any():
                break
            else:
                idx = random.randrange(0, self.num_samples)
        if self.train:
            while True:
                bbox_idx = random.randint(0, boxwh.size(0) - 1)
                #				area = boxwh[bbox_idx][0]*boxwh[bbox_idx][1]
                #				if area >= self.setmin**2:
                if mask[bbox_idx]:
                    break
#			if area > self.setmax**2:
            if max(boxwh[bbox_idx][0], boxwh[bbox_idx][1]) > self.setmax:
                oh, ow, _ = img.shape
                fct_min = self.setmin / min(boxwh[bbox_idx][0],
                                            boxwh[bbox_idx][1])
                fct_max = self.setmax / max(boxwh[bbox_idx][0],
                                            boxwh[bbox_idx][1])
                #				tgt_size = random.randint(self.setmin, self.setmax)
                #				factor = tgt_size / math.sqrt(area)
                #				factor = tgt_size / max(boxwh[bbox_idx][0], boxwh[bbox_idx][1])
                factor = random.uniform(fct_min, fct_max)
                img = cv2.resize(img, (0, 0), fx=factor, fy=factor)
                h, w, _ = img.shape
                boxes *= torch.Tensor([
                    float(w) / ow,
                    float(h) / oh,
                    float(w) / ow,
                    float(h) / oh
                ]).expand_as(boxes)
                new_center = (boxes[:, :2] + boxes[:, 2:]) / 2
                tmp = (new_center[:, 0] > 0) & (new_center[:, 0] < w) & (
                    new_center[:, 1] > 0) & (new_center[:, 1] < h)
                if not tmp.any():
                    print 'center:', center
                    print imw, imh
                    print 'new_center:', new_center
                    print w, h
                assert tmp.any()

            else:
                h, w, _ = img.shape
                center = (boxes[:, :2] + boxes[:, 2:]) / 2
                tmp = (center[:, 0] > 0) & (center[:, 0] < w - 1) & (
                    center[:, 1] > 0) & (center[:, 1] < h - 1)
                if not tmp.any():
                    print 'center:', center
                    print w, h
                assert tmp.any()

            boxwh = boxes[:, 2:] - boxes[:, :2]
            new_mask = (boxwh[:, 0] > self.small_threshold) & (
                boxwh[:, 1] > self.small_threshold) & (
                    boxwh[:, 0] < self.big_threshold) & (boxwh[:, 1] <
                                                         self.big_threshold)
            if not new_mask.any():
                print boxes
            assert new_mask.any()

            if max(h, w) < self.image_size:
                img, boxes, labels = self.supple_filter(img, boxes, labels)
            elif h >= self.image_size and w >= self.image_size:
                img, boxes, labels = self.random_crop(img, boxes, labels,
                                                      bbox_idx)
            else:
                img, boxes, labels = self.supple(img, boxes, labels)
                img, boxes, labels = self.random_crop(img, boxes, labels,
                                                      bbox_idx)
            if random.random() < 0.5:
                img = self.random_bright(img)
                img = self.random_contrast(img)
                img = self.random_saturation(img)
                img = self.random_hue(img)
            else:
                img = self.random_bright(img)
                img = self.random_saturation(img)
                img = self.random_hue(img)
                img = self.random_contrast(img)
            img, boxes = self.random_flip(img, boxes)
            boxwh = boxes[:, 2:] - boxes[:, :2]
            # print('boxwh', boxwh)

        h, w, _ = img.shape
        assert (h == w and h == self.image_size)
        #		img = cv2.resize(img,(self.image_size,self.image_size))

        boxes_wh = boxes[:, 2:] - boxes[:, :2]
        if ((boxes_wh[:, 0] == 0) | (boxes_wh[:, 1] == 0)).any():
            print boxes


#		save_path = '/home/michael/data/tmp/wider_acn/'
#		cv2.imwrite(save_path+'%d_old.jpg'%idx, img)
#		self.visual(img, boxes, idx)
#		cv2.imwrite(save_path+'%d_new.jpg'%idx, img)
#		print 'idx:', idx
#		print 'boxes:', boxes
#		print 'label:', labels
        boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)
        for t in self.transform:
            img = t(img)
        loc_target, conf_target = self.data_encoder.encode(idx, boxes, labels)

        return img, loc_target, conf_target

    def random_getim(self):
        idx = random.randrange(0, self.num_samples)
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root + fname))
        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]

        return img, boxes, labels

    def __len__(self):
        return self.num_samples

    def random_flip(self, im, boxes):
        if random.random() < 0.5:
            im_lr = np.fliplr(im).copy()
            h, w, _ = im.shape
            xmin = w - boxes[:, 2]
            xmax = w - boxes[:, 0]
            boxes[:, 0] = xmin
            boxes[:, 2] = xmax
            return im_lr, boxes
        return im, boxes

    def visual(self, im, boxes, idx):
        save_path = '/home/michael/data/tmp/wider_acn/%d.jpg' % idx
        for j, (box) in enumerate(boxes):
            x1 = int(box[0])
            x2 = int(box[2])
            y1 = int(box[1])
            y2 = int(box[3])
            cv2.rectangle(im, (x1, y1 + 2), (x2, y2), (0, 255, 0), 2)
        cv2.imwrite(save_path, im)

    def supple(self, im, boxes, labels):
        h, w, _ = im.shape
        im = cv2.copyMakeBorder(im,
                                0,
                                max(0, self.image_size - h),
                                0,
                                max(0, self.image_size - w),
                                cv2.BORDER_CONSTANT,
                                value=0)
        return im, boxes, labels

    def supple_filter(self, im, boxes, labels):
        h, w, _ = im.shape
        im = cv2.copyMakeBorder(im,
                                0,
                                max(0, self.image_size - h),
                                0,
                                max(0, self.image_size - w),
                                cv2.BORDER_CONSTANT,
                                value=0)
        boxwh = boxes[:, 2:] - boxes[:, :2]
        mask = (boxwh[:, 0] > self.small_threshold) & (
            boxwh[:, 1] > self.small_threshold) & (
                boxwh[:, 0] < self.big_threshold) & (boxwh[:, 1] <
                                                     self.big_threshold)
        if not mask.any():
            print boxes
        assert mask.any()
        selected_boxes = boxes.index_select(0, mask.nonzero().squeeze(1))
        selected_labels = labels.index_select(0, mask.nonzero().squeeze(1))
        return im, selected_boxes, selected_labels

    def random_crop(self, im, boxes, labels, bbox_idx):
        imh, imw, _ = im.shape
        w = self.image_size
        h = w
        tgt_box = boxes[bbox_idx]
        #print 'tgt:', tgt_box
        if tgt_box[0] <= 0 or imw == w:
            x = 0
        elif tgt_box[2] >= imw:
            x = imw - 1 - w
        else:
            x_min = int(max(0, tgt_box[2] - w))
            x_max = int(min(tgt_box[0], imw - w))
            x = random.randint(x_min, x_max)
        if tgt_box[1] <= 0 or imh == h:
            y = 0
        elif tgt_box[3] >= imh:
            y = imh - 1 - h
        else:
            y_min = int(max(0, tgt_box[3] - h))
            y_max = int(min(tgt_box[1], imh - h))
            y = random.randint(y_min, y_max)
        #print 'xy:', x, y
        roi = torch.Tensor([[x, y, x + w, y + h]])
        center = (boxes[:, :2] + boxes[:, 2:]) / 2
        roi2 = roi.expand(len(center), 4)
        mask = (center > roi2[:, :2]) & (center < roi2[:, 2:] + 1)
        mask = mask[:, 0] & mask[:, 1]
        if not mask.any():
            print 'roi:', roi
            print 'center:', center
            print 'box:', boxes
            print 'img:', imw, imh
        assert mask.any()

        selected_boxes = boxes.index_select(0, mask.nonzero().squeeze(1))
        img = im[y:y + h, x:x + w, :]
        tmph, tmpw, _ = img.shape
        if tmph != tmpw:
            print tgt_box[0], tgt_box[2], x, y, imw, imh, tmph, tmpw
        assert tmph == tmpw
        selected_boxes[:, 0].add_(-x)  #.clamp_(min=0, max=w)
        selected_boxes[:, 1].add_(-y)  #.clamp_(min=0, max=h)
        selected_boxes[:, 2].add_(-x)  #.clamp_(min=0, max=w)
        selected_boxes[:, 3].add_(-y)  #.clamp_(min=0, max=h)
        #print selected_boxes
        boxwh = selected_boxes[:, 2:] - selected_boxes[:, :2]
        mask = (boxwh[:, 0] > self.small_threshold) & (
            boxwh[:, 1] > self.small_threshold) & (
                boxwh[:, 0] < self.big_threshold) & (boxwh[:, 1] <
                                                     self.big_threshold)
        if not mask.any():
            print selected_boxes
            print 'boxes:', boxes
            print 'roi:', roi
            print 'center:', center
            print 'idx:', bbox_idx
            print 'img:', imw, imh
            cv2.imwrite('wrong.jpg', img)
        assert mask.any()

        selected_boxes_selected = selected_boxes.index_select(
            0,
            mask.nonzero().squeeze(1))

        selected_labels = labels.index_select(0, mask.nonzero().squeeze(1))

        return img, selected_boxes_selected, selected_labels

    def random_bright(self, im, delta=32):
        if random.random() > 0.5:
            im = im + random.randrange(-delta, delta)
            im = im.clip(min=0, max=255).astype(np.uint8)
        return im

    def random_contrast(self, im):
        if random.random() > 0.5:
            alpha = random.uniform(0.5, 1.5)
            im = im * alpha
            im = im.clip(min=0, max=255).astype(np.uint8)
        return im

    def random_saturation(self, im):
        if random.random() > 0.5:
            alpha = random.uniform(0.5, 1.5)
            hsv_im = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
            hsv_im = hsv_im * [1.0, alpha, 1.0]
            hsv_im = hsv_im.clip(min=0, max=255).astype(np.uint8)
            im = cv2.cvtColor(hsv_im, cv2.COLOR_HSV2BGR)
        return im

    def random_hue(self, im, delta=18):
        if random.random() > 0.5:
            alpha = random.randrange(-delta, delta)
            hsv_im = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
            hsv_im = hsv_im + [alpha, 0, 0]
            hsv_im = hsv_im.clip(min=0, max=179).astype(np.uint8)
            im = cv2.cvtColor(hsv_im, cv2.COLOR_HSV2BGR)
        return im

    def testGet(self, idx):
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root, fname))
        cv2.imwrite('test_encoder_source.jpg', img)
        boxes = self.boxes[idx].clone()
        # print(boxes)
        labels = self.labels[idx].clone()

        for box in boxes:
            cv2.rectangle(img, (int(box[0]), int(box[1])),
                          (int(box[2]), int(box[3])), (0, 0, 255))
        cv2.imwrite(fname, img)

        if self.train:
            img, boxes, labels = self.random_crop(img, boxes, labels)
            img = self.random_bright(img)
            img, boxes = self.random_flip(img, boxes)

        h, w, _ = img.shape
        boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)

        img = cv2.resize(img, (self.image_size, self.image_size))
        for t in self.transform:
            img = t(img)

        print(idx, fname, boxes)

        return img, boxes, labels

Пример #11

Показать файл

Файл: train.py Проект: kapitsa2811/RetinaTextBoxespp

print("gpu available : ", torch.cuda.is_available())
print("num_gpus : ", torch.cuda.device_count())

# Set data parallel training
net = torch.nn.DataParallel(net, device_ids=[0,1,2,3])
net.cuda()

# Training
print("==>training start...")
net.train()
# Freeze BN layer for pre-trained backbone
net.module.freeze_bn()
# Set optimizer -- SGD or Adam
optimizer = optim.SGD(net.parameters(), lr=cur_lr, momentum=0.9, weight_decay=1e-4) #optim.Adam(net.parameters(), lr=cur_lr)
# Encode anchor to each feature maps
encoder = DataEncoder(cls_thresh=0.5, nms_thresh=0.2)
# Tensorboard visualize recorder
writer = SummaryWriter(logdir=args.logdir)
lossest = 1
save_lossest = False

t0 = time.time()
for epoch in range(start_epoch, 10000):
    if iteration > args.max_iter:
        break

    for inputs, loc_targets, cls_targets in trainloader:
        # prepare data and cls & loc label
        inputs = Variable(inputs.cuda())
        loc_targets = Variable(loc_targets.cuda())
        cls_targets = Variable(cls_targets.cuda())

Пример #12

Показать файл

class ListDataset(data.Dataset):
    def __init__(self,
                 root,
                 dataset,
                 train,
                 transform,
                 input_size,
                 multi_scale=False):
        '''
        Args:
          root: (str) DB root ditectory.
          dataset: (str) Dataset name(dir).
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size.
          multi_scale: (bool) use multi-scale training or not.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.multi_scale = multi_scale
        self.MULTI_SCALES = [
            608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960
        ]  #step1, 2
        #self.MULTI_SCALES = [960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280] #step3

        self.encoder = DataEncoder()

        if "SynthText" in dataset:
            self.get_SynthText()
        if "ICDAR2015" in dataset:
            self.get_ICDAR2015()
        if "MLT" in dataset:
            self.get_MLT()
        if "ICDAR2013" in dataset:
            self.get_ICDAR2013()

    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) dataset index.

        Returns:
          image: (tensor) image array.
          boxes: (tensor) boxes array.
          labels: (tensor) labels array.
        '''
        # Load image, boxes and labels.
        fname = self.fnames[idx]

        img = cv2.imread(os.path.join(self.root, fname))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        boxes = self.boxes[idx].copy()
        labels = self.labels[idx]

        return {"image": img, "boxes": boxes, "labels": labels}

    def collate_fn(self, batch):
        '''bbox encode and make batch

        Args:
          batch: (dict list) images, boxes and labels

        Returns:
          batch_images, batch_loc, batch_cls
        '''
        size = self.input_size
        if self.multi_scale:  # get random input_size for multi-scale traininig
            random_choice = random.randint(0, len(self.MULTI_SCALES) - 1)
            size = self.MULTI_SCALES[random_choice]

        inputs = torch.zeros(len(batch), 3, size, size)
        loc_targets = []
        cls_targets = []

        for n, data in enumerate(batch):
            img, boxes, labels = self.transform(size=size)(data['image'],
                                                           data['boxes'],
                                                           data['labels'])
            inputs[n] = img
            loc_target, cls_target = self.encoder.encode(boxes,
                                                         labels,
                                                         input_size=(size,
                                                                     size))

            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)

    def __len__(self):
        return self.num_samples

    def get_SynthText(self):
        import scipy.io as sio
        data_dir = os.path.join(self.root, 'SynthText/train/')

        gt = sio.loadmat(data_dir + 'gt.mat')
        dataset_size = gt['imnames'].shape[1]
        img_files = gt['imnames'][0]
        labels = gt['wordBB'][0]

        self.num_samples = dataset_size
        print("Training on SynthText : ", dataset_size)

        for i in range(dataset_size):
            img_file = data_dir + str(img_files[i][0])
            label = labels[i]

            _quad = []
            _classes = []

            if label.ndim == 3:
                for i in range(label.shape[2]):
                    _x0 = label[0][0][i]
                    _y0 = label[1][0][i]
                    _x1 = label[0][1][i]
                    _y1 = label[1][1][i]
                    _x2 = label[0][2][i]
                    _y2 = label[1][2][i]
                    _x3 = label[0][3][i]
                    _y3 = label[1][3][i]

                    _quad.append([_x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3])
                    _classes.append(1)

            else:
                _x0 = label[0][0]
                _y0 = label[1][0]
                _x1 = label[0][1]
                _y1 = label[1][1]
                _x2 = label[0][2]
                _y2 = label[1][2]
                _x3 = label[0][3]
                _y3 = label[1][3]

                _quad.append([_x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3])
                _classes.append(1)

            self.fnames.append(img_file)
            self.boxes.append(np.array(_quad, dtype=np.float32))
            self.labels.append(np.array(_classes))

    def get_ICDAR2015(self):
        data_dir = os.path.join(self.root, 'ICDAR2015_Incidental/')

        dataset_list = os.listdir(data_dir + "train")
        dataset_list = [l[:-4] for l in dataset_list if "jpg" in l]

        dataset_size = len(dataset_list)
        mode = 'train' if self.train else 'test'

        self.num_samples = dataset_size
        print(mode, "ing on ICDAR2015 : ", dataset_size)

        for i in dataset_list:
            img_file = data_dir + "%s/%s.jpg" % (mode, i)
            label_file = open(data_dir + "%s/gt_%s.txt" % (mode, i))
            label_file = label_file.readlines()

            _quad = []
            _classes = []

            for label in label_file:
                _x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3, txt = label.split(
                    ",")[:9]

                if "###" in txt:
                    continue

                try:
                    _x0 = int(_x0)
                except:
                    _x0 = int(_x0[1:])

                _y0, _x1, _y1, _x2, _y2, _x3, _y3 = [
                    int(p) for p in [_y0, _x1, _y1, _x2, _y2, _x3, _y3]
                ]

                _quad.append([_x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3])
                _classes.append(1)

            if len(_quad) is 0:
                self.num_samples -= 1
                continue
            self.fnames.append(img_file)
            self.boxes.append(np.array(_quad, dtype=np.float32))
            self.labels.append(np.array(_classes))

    def get_MLT(self):
        data_dir = os.path.join(self.root, 'MLT/')

        dataset_list = os.listdir(data_dir + "train")
        dataset_list = [l[:-4] for l in dataset_list if "jpg" in l]

        dataset_size = len(dataset_list)
        mode = 'train' if self.train else 'test'

        self.num_samples = dataset_size
        print(mode, "ing on MLT : ", dataset_size)

        for i in dataset_list:
            img_file = data_dir + "%s/%s.jpg" % (mode, i)
            label_file = open(data_dir + "%s/gt_%s.txt" % (mode, i))
            label_file = label_file.readlines()

            _quad = []
            _classes = []

            for label in label_file:
                _x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3, lang, txt = label.split(
                    ",")[:10]

                if "###" in txt:
                    continue

                try:
                    _x0 = int(_x0)
                except:
                    _x0 = int(_x0[1:])

                _y0, _x1, _y1, _x2, _y2, _x3, _y3 = [
                    int(p) for p in [_y0, _x1, _y1, _x2, _y2, _x3, _y3]
                ]

                _quad.append([_x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3])
                _classes.append(1)

            if len(_quad) is 0:
                self.num_samples -= 1
                continue
            self.fnames.append(img_file)
            self.boxes.append(np.array(_quad, dtype=np.float32))
            self.labels.append(np.array(_classes))

    def get_ICDAR2013(self):
        data_dir = os.path.join(self.root, 'ICDAR2013_FOCUSED/')

        dataset_list = os.listdir(data_dir + "train")
        dataset_list = [l[:-4] for l in dataset_list if "jpg" in l]

        dataset_size = len(dataset_list)
        mode = 'train' if self.train else 'test'

        self.num_samples = dataset_size
        print(mode, "ing on ICDAR2013 : ", dataset_size)

        for i in dataset_list:
            img_file = data_dir + "%s/%s.jpg" % (mode, i)
            label_file = open(data_dir + "%s/gt_%s.txt" % (mode, i))
            label_file = label_file.readlines()

            _quad = []
            _classes = []

            for label in label_file:
                _xmin, _ymin, _xmax, _ymax = label.split(" ")[:4]

                _x0 = _xmin
                _y0 = _ymin
                _x1 = _xmax
                _y1 = _ymin
                _x2 = _xmax
                _y2 = _ymax
                _x3 = _xmin
                _y3 = _ymax

                _x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3 = [
                    int(p) for p in [_x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3]
                ]

                _quad.append([_x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3])
                _classes.append(1)

            if len(_quad) is 0:
                self.num_samples -= 1
                continue
            self.fnames.append(img_file)
            self.boxes.append(np.array(_quad, dtype=np.float32))
            self.labels.append(np.array(_classes))

Пример #13

Показать файл

def prediction(bin_of_images, checkpoint_dir, minimum_idx, result_dir):
    print('Loading model..')

    if torch.cuda.is_available():
        load_pth = torch.load(checkpoint_dir + "/ckpt-" + str(minimum_idx) +
                              ".pth")
    else:
        load_pth = torch.load(checkpoint_dir + "/ckpt-" + str(minimum_idx) +
                              ".pth",
                              map_location=lambda storage, loc: storage)

    valid_loss = load_pth['loss']
    print("valid loss : " + str(valid_loss))

    num_classes = load_pth['num_classes']
    num_batch = load_pth['batch']
    num_crops = load_pth['crops']
    print("num. batch : " + str(num_batch))
    print("num. crops : " + str(num_crops))

    net = load_sstdnet(num_classes=num_classes, using_pretrained=False)
    net.load_state_dict(load_pth['net'])
    net.eval()

    transform = transforms.Compose([transforms.ToTensor()])

    for img_file in bin_of_images:
        img = Image.open(img_file)
        w = img.width
        h = img.height

        print('Predicting : ' + img_file)
        x = transform(img)
        x = x.unsqueeze(0)
        x = Variable(x, volatile=True)
        loc_preds, cls_preds, mask_pred = net(x)

        # print('Decoding..')
        encoder = DataEncoder()
        boxes, labels = encoder.decode(loc_preds.data.squeeze(),
                                       cls_preds.data.squeeze(), (w, h))

        draw = ImageDraw.Draw(img)

        img_file_name = img_file.split("/")[-1]
        txt_file_name = img_file_name.replace(".jpg", ".result")

        result_txt = open(result_dir + "/" + txt_file_name, 'w')

        for result_idx in range(0, boxes.__len__(), 1):
            draw.rectangle(list(boxes[result_idx]), outline='red')
            result_txt.write(
                str(boxes[result_idx][0]) + "\t" + str(boxes[result_idx][1]) +
                "\t" + str(boxes[result_idx][2]) + "\t" +
                str(boxes[result_idx][3]) + "\t" + str(labels[result_idx]) +
                "\n")
        result_txt.close()

        img.save(result_dir + "/" + img_file_name)

        mask_pred = F.softmax(mask_pred)
        mask_data = mask_pred.data.numpy()
        mask_data = mask_data[:, 1:2, :, :]
        mask_data = np.squeeze(mask_data)
        mask_img = Image.fromarray(np.uint8(mask_data * 255.), 'L')
        mask_img.save(result_dir + "//" +
                      img_file_name.replace(".jpg", ".png"))

Пример #14

Показать файл

class ListDataset(data.Dataset):
    classes = [
        "articulated_truck", "bicycle", "bus", "car", "motorcycle",
        'motorized_vehicle', "non-motorized_vehicle", "pedestrian",
        "pickup_truck", "single_unit_truck", "work_van"
    ]
    n_class = len(classes)

    def __init__(self, root, list_file, train, transform, input_size,
                 max_size):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) image shorter side size.
          max_size: (int) maximum image longer side size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size
        self.max_size = max_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.data_encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()
            self.num_samples = len(lines)
        datas = defaultdict(lambda: {'box': [], 'label': []})
        for line in lines:
            splited = line.strip().split(',')
            fname, c, xmin, ymin, xmax, ymax = splited
            lab = self.classes.index(c)
            assert lab != -1, c
            datas[fname]['box'].append(
                [float(xmin),
                 float(ymin),
                 float(xmax),
                 float(ymax)])
            datas[fname]['label'].append(lab)

        for file, vals in datas.items():
            self.fnames.append(file + '.jpg')
            self.boxes.append(torch.Tensor(vals['box']))
            self.labels.append(torch.LongTensor(vals['label']))

    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        boxes = self.boxes[idx]
        labels = self.labels[idx]

        # Data augmentation while training.
        if self.train:
            img, boxes = self.random_flip(img, boxes)
            img, boxes = self.scale_jitter(img, boxes)

        img, boxes = self.resize(img, boxes)
        img = self.transform(img)
        return img, boxes, labels

    def resize(self, img, boxes):
        '''Resize the image shorter side to input_size.

        Args:
          img: (PIL.Image) image.
          boxes: (tensor) object boxes, sized [#obj, 4].

        Returns:
          (PIL.Image) resized image.
          (tensor) resized object boxes.

        Reference:
          https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/blob.py
        '''
        # im_size_min = min(img.size)
        # im_size_max = max(img.size)
        # scale = float(self.input_size) / float(im_size_min)
        # if round(scale*im_size_max) > self.max_size:  # limit the longer side to MAX_SIZE
        #     scale = float(self.max_size) / float(im_size_max)
        # w = int(img.width*scale)
        # h = int(img.height*scale)
        w = h = self.input_size
        ws = 1.0 * w / img.width
        hs = 1.0 * h / img.height

        scale = torch.Tensor([ws, hs, ws, hs])
        return img.resize((w, h)), scale * boxes

    def random_flip(self, img, boxes):
        '''Randomly flip the image and adjust the boxes.

        For box (xmin, ymin, xmax, ymax), the flipped box is:
        (w-xmax, ymin, w-xmin, ymax).

        Args:
          img: (PIL.Image) image.
          boxes: (tensor) object boxes, sized [#obj, 4].

        Returns:
          img: (PIL.Image) randomly flipped image.
          boxes: (tensor) randomly flipped boxes, sized [#obj, 4].
        '''
        if random.random() < 0.5:
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            w = img.width
            xmin = w - boxes[:, 2]
            xmax = w - boxes[:, 0]
            boxes[:, 0] = xmin
            boxes[:, 2] = xmax
        return img, boxes

    def scale_jitter(self, img, boxes):
        '''Scale image size randomly to [3/4,4/3].

        Args:
          img: (PIL.Image) image.
          boxes: (tensor) object boxes, sized [#obj, 4].

        Returns:
          img: (PIL.Image) scaled image.
          boxes: (tensor) scaled object boxes, sized [#obj, 4].
        '''
        imw, imh = img.size
        sw = random.uniform(3 / 4., 4 / 3.)
        sh = random.uniform(3 / 4., 4 / 3.)
        w = int(imw * sw)
        h = int(imh * sh)
        img = img.resize((w, h))
        boxes[:, ::2] *= sw
        boxes[:, 1::2] *= sh
        return img, boxes

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.

        Reference:
          https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/blob.py
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]

        max_h = max([im.size(1) for im in imgs])
        max_w = max([im.size(2) for im in imgs])
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, max_h, max_w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            im = imgs[i]
            imh, imw = im.size(1), im.size(2)
            inputs[i, :, :imh, :imw] = im

            # Encode data.
            loc_target, cls_target = self.data_encoder.encode(
                boxes[i],
                labels[i],
                input_size=(max_w, max_h),
                train=self.train)
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)

    def __len__(self):
        return len(self.fnames)

Пример #15

Показать файл

Файл: draw_box.py Проект: Jaekyumkim/spml_retinanet_fixedsize

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data', '-data', type=str, default='VOC')
    parser.add_argument('--loss_fn', '-loss', type=str, default='sigmoid')
    parser.add_argument('--epoch', '-e', type=str, default='None')
    parser.add_argument('--debug', '-d', type=str, default='False')
    parser.add_argument('--weight_path', '-w', type=str, default='None')
    args = parser.parse_args()

    scale = 600
    use_cuda = torch.cuda.is_available() 
    num_workers = os.cpu_count()
    batch_size = 1
    gpus = [0,1]
    save_path = args.weight_path
    if not os.path.exists(save_path+'/test_img/'):
        os.mkdir(save_path+'/test_img/')

    if args.debug == 'True':
        num_workers = 0
    
    transform = transforms.Compose([transforms.ToTensor(), \
            transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))])

    if args.data == "VOC":
        test_root = '/media/NAS/dataset/PASCALVOC/VOCdevkit/07+12/test.txt'
        if args.loss_fn == 'sigmoid':
            voc_label = ['aeroplane','bicycle','bird','boat','bottle','bus','car',
                 'cat','chair','cow','diningtable','dog','horse','motorbike'
                 ,'person','pottedplant','sheep','sofa','train','tvmonitor',]
            num_classes = 20
        elif args.loss_fn == 'softmax':
            voc_label = ['background','aeroplane','bicycle','bird','boat','bottle','bus','car',
                 'cat','chair','cow','diningtable','dog','horse','motorbike'
                 ,'person','pottedplant','sheep','sofa','train','tvmonitor',]
            num_classes = 21
        color_label = [(  0,   0,   0),
                       (  0,   0,   0),
                       (111,  74,   0),
                       ( 81,   0,  81),
                       (128,  64, 128),
                       (244,  35, 232),
                       (230, 150, 140),
                       ( 70,  70, 700),
                       (102, 102, 156),
                       (190, 153, 153),
                       (150, 120,  90),
                       (153, 153, 153),
                       (250, 170,  30),
                       (220, 220,   0),
                       (107, 142,  35),
                       ( 52, 151,  52),
                       ( 70, 130, 180),
                       (220,  20,  60),
                       (  0,   0, 142),
                       (  0,   0, 230),
                       (119,  11,  32)]

    elif args.data == "COCO":
        test_root = '/media/NAS/dataset/COCO/minival2014/test.txt'
        if args.loss_fn == 'sigmoid':
            num_classes = 80
        elif args.loss_fn == 'softmax':
            num_classes = 81

    global device
    device = torch.device("cuda" if use_cuda else "cpu")

    print('Loading model..')
    if args.data == 'VOC':
        weights = './{}/retina_{}.pth'.format(args.weight_path,args.epoch)
    elif args.data == 'COCO':
        weights = './{}/retina_{}.pth'.format(args.weight_path,args.epoch)

    model = RetinaNet(num_classes)

    checkpoint = torch.load(weights)
    if use_cuda:
        if len(gpus) >= 1:
            model = torch.nn.DataParallel(model).to(device)
        else:
            model = model.to(device)
        model.cuda()
    model.load_state_dict(checkpoint['state_dict'])
    print('\nTest')

    with open(test_root, 'r') as file:
        lines = file.readlines()

    encoder = DataEncoder(args.loss_fn)
    model.eval()
    result = ''
    for img_idx in lines[:100]:
        img_path = img_idx.rstrip()
        labelpath = img_path.replace('images','labels').replace('JPEGImages'
                    ,'labels').replace('.jpg','.txt').replace('.png','.txt')
        img = Image.open(img_path).convert('RGB')
        label = load_label(labelpath, img)

        input_img = img.resize((scale,scale))

        input_img = transform(input_img)
        data = torch.zeros(1,3,input_img.shape[1],input_img.shape[2])
        data[0] = input_img
        inputs = data.to(device)
        loc_preds_split, cls_preds_split = model(inputs.cuda())
        loc_preds_nms, cls_preds_nms, score = encoder.decode(loc_preds_split,
                                                             cls_preds_split,
                                                             data.shape,
                                                             data[0].shape,
                                                             0)
        image_id = img_path[-10:]

        if not os.path.exists(save_path+'/test_img/val_epoch_{}'\
                        .format(args.epoch)):
            os.mkdir(save_path+'/test_img/val_epoch_{}'.format(args.epoch))

        if score.shape[0] != 0:
            box_preds = loc_preds_nms.cpu().detach().numpy().astype(int)
            box_preds = np.ndarray.tolist(box_preds)
            category_preds = cls_preds_nms.cpu().detach().numpy().astype(str)
            c = np.ndarray.tolist(category_preds)
            score_preds = score.cpu().detach().numpy().astype(str)
            score_preds = np.ndarray.tolist(score_preds)

        else:
            box_preds = []
            c = []
            score_preds = []

        new_img = cv2.imread(img_path)
        for i in range(int(label.shape[0])):
            coor_min = (int(label[i][1]), int(label[i][2]))
            coor_max = (int(label[i][3]), int(label[i][4]))
            cls = int(label[i][0])
            # cv2.rectangle(new_img, coor_min, coor_max, color_label[cls], 2)
            cv2.rectangle(new_img, coor_min, coor_max, (250,0,0), 2)
            cv2.putText(new_img, voc_label[cls] + ' | ' + 'GT', (coor_min[0]+5, coor_min[1]-5), cv2.FONT_HERSHEY_SIMPLEX, 0.2, (255, 255, 255), 1, cv2.LINE_AA)
        if len(box_preds) > 0:
            for idx, box_pred in enumerate(box_preds):
                box_pred_xmin = int(float(box_pred[0]))
                if box_pred_xmin < 0: box_pred_xmin = 0
                box_pred_ymin = int(float(box_pred[1]))
                if box_pred_ymin < 0: box_pred_ymin = 0
                box_pred_xmax = int(float(box_pred[2]))
                if box_pred_xmax < 0: box_pred_xmax = 0
                box_pred_ymax = int(float(box_pred[3]))
                if box_pred_ymax < 0: box_pred_ymax = 0
                cls_idx = int(category_preds[idx])
                box_pred_min = (int(box_pred_xmin), int(box_pred_ymin))
                box_pred_max = (int(box_pred_xmax), int(box_pred_ymax))
                box_pred_min = (int(box_pred_xmin*new_img.shape[1]/scale), int(box_pred_ymin*new_img.shape[0]/scale))
                box_pred_max = (int(box_pred_xmax*new_img.shape[1]/scale), int(box_pred_ymax*new_img.shape[0]/scale))
                cls_name = voc_label[cls_idx]
                cls_color = color_label[cls_idx]
                box_coor = (box_pred_min, box_pred_max)
                conf = score_preds[idx][:4]
                # cv2.rectangle(new_img, box_pred_min, box_pred_max, cls_color, 2)
                cv2.rectangle(new_img, box_pred_min, box_pred_max, (0,250,0), 2)
                cv2.putText(new_img, cls_name + ' | ' + conf, (box_pred_min[0]+5, box_pred_min[1]-5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, cv2.LINE_AA)

        new_path = save_path+'/test_img/val_epoch_{}/'.format(args.epoch) + image_id
        cv2.imwrite(new_path, new_img)
        print(image_id)

Пример #16

Показать файл

Файл: scan_level_test.py Проект: yangyin2016/pyramid-detection-3D

def iter_scan(scan,
              scan_array,
              patient_df,
              net,
              cube_size=64,
              stride=50,
              iou=0.01):
    scan_df = pd.DataFrame(columns=["scan_id", "z", "y", "x", "iou"])
    start_time = time.time()
    gt_boxes, gt_labels = annotation(patient_df)
    #print(gt_boxes, gt_labels)
    ais_gt_boxes, mia_gt_boxes = split_class(gt_boxes, gt_labels)
    #print(ais_gt_boxes, mia_gt_boxes)
    ais_locs = torch.FloatTensor(1, 6)
    ais_probs = torch.FloatTensor(1)

    mia_locs = torch.FloatTensor(1, 6)
    mia_probs = torch.FloatTensor(1)

    for z in range(0, scan_array.shape[0], stride):
        for y in range(0, scan_array.shape[1], stride):
            for x in range(0, scan_array.shape[2], stride):
                start_coord = torch.FloatTensor([z, y, x])
                end_coord = start_coord + torch.FloatTensor(
                    [cube_size, cube_size, cube_size])
                zmax = min(z + cube_size, scan_array.shape[0])
                ymax = min(y + cube_size, scan_array.shape[1])
                xmax = min(x + cube_size, scan_array.shape[2])
                cube_sample = np.zeros((cube_size, cube_size, cube_size),
                                       dtype=np.float32)
                cube_sample[:(zmax - z), :(ymax -
                                           y), :(xmax -
                                                 x)] = scan_array[z:zmax,
                                                                  y:ymax,
                                                                  x:xmax]
                cube_sample = np.expand_dims(cube_sample, 0)
                cube_sample = np.expand_dims(cube_sample, 0)
                input_cube = Variable(torch.from_numpy(cube_sample).cuda())
                locs, clss = net(input_cube)
                locs = locs.data.cpu().squeeze()
                clss = clss.data.cpu().squeeze()
                ais_boxes, ais_scores, ais_labels, mia_boxes, mia_scores, mia_labels = DataEncoder(
                ).decode(locs, clss, [cube_size, cube_size, cube_size])
                if not isinstance(ais_boxes, int):
                    ais_boxes = calc_scan_coord(ais_boxes, start_coord)
                    ais_locs = torch.cat([ais_locs, ais_boxes], 0)
                    ais_probs = torch.cat([ais_probs, ais_scores], 0)

                if not isinstance(mia_boxes, int):
                    mia_boxes = calc_scan_coord(mia_boxes, start_coord)
                    mia_locs = torch.cat([mia_locs, mia_boxes], 0)
                    mia_probs = torch.cat([mia_probs, mia_scores], 0)

    end_time = time.time()
    run_time = end_time - start_time
    print(run_time)
    if not isinstance(ais_gt_boxes, int):
        ais_locs = ais_locs[1:, :]
        ais_probs = ais_probs[1:]
        ais_keep = box_nms(ais_locs, ais_probs)
        ais_locs = ais_locs[ais_keep]
        ais_probs = ais_probs[ais_keep]
        ais_count, best_ious = find_best_pred(ais_gt_boxes, ais_locs)
        ais_locs = change_box_order(ais_locs, "zyxzyx2zyxdhw")
        for i in range(ais_locs.size(0)):
            insert = {
                "scan_id": scan,
                "z": ais_locs[i, 0],
                "y": ais_locs[i, 1],
                "x": ais_locs[i, 2],
                "iou": best_ious[i]
            }
            la_df = pd.DataFrame(data=insert, index=["0"])
            scan_df = scan_df.append(la_df, ignore_index=True)

    else:
        ais_count = np.zeros(3)

    if not isinstance(mia_gt_boxes, int):
        mia_locs = mia_locs[1:, :]
        mia_probs = mia_probs[1:]
        mia_keep = box_nms(mia_locs, mia_probs)
        mia_locs = mia_locs[mia_keep]
        mia_probs = mia_probs[mia_keep]
        mia_count, best_ious = find_best_pred(mia_gt_boxes, mia_locs)
        for i in range(mia_locs.size(0)):
            insert = {
                "scan_id": scan,
                "z": mia_locs[i, 0],
                "y": mia_locs[i, 1],
                "x": mia_locs[i, 2],
                "iou": best_ious[i]
            }
            la_df = pd.DataFrame(data=insert, index=["0"])
            scan_df = scan_df.append(la_df, ignore_index=True)
    else:
        mia_count = np.zeros(3)

    return ais_count, mia_count, scan_df

Пример #17

Показать файл

Файл: SSD_Core.py Проект: phantanphuc/LVTN

class SSD_Core:
    def __init__(self):

        self.dictindex = []

        with open('./label.txt') as f:
            content = f.readlines()
            for symbol in content:
                symbol = symbol.replace('\n', '')

                split = symbol.split(' ')

                self.dictindex.append(split[0])

        # Load model
        self.net = SSD300()
        checkpoint = torch.load(args.resuming_model)
        checkpoint['net']
        self.net.load_state_dict(checkpoint['net'])
        self.net.eval()

        self.data_encoder = DataEncoder()

        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                 std=(0.229, 0.224, 0.225))
        ])

    def generatePrediction(self, imgpath, outname):

        # Load test image
        img = Image.open(imgpath).convert('L')
        img1 = img.resize((InputImgSize, InputImgSize))

        img1 = self.transform(img1)

        # Forward
        loc, conf = self.net(Variable(img1[None, :, :, :], volatile=True))

        # Decode

        boxes, labels, scores = self.data_encoder.decode(
            loc.data.squeeze(0),
            F.softmax(conf.squeeze(0)).data)

        draw = ImageDraw.Draw(img)

        return_str = 'null ' + str(len(boxes))

        boxes_np = boxes.numpy() * InputImgSize
        labels_np = labels.numpy()

        for i in range(len(boxes)):

            return_str = return_str + ' ' + str(int(
                boxes_np[i][0])) + ' ' + str(int(boxes_np[i][1])) + ' ' + str(
                    int(boxes_np[i][2])) + ' ' + str(int(
                        boxes_np[i][3])) + ' ' + str(int(labels_np[i][0]) - 1)

            boxes[i][::2] *= img.width
            boxes[i][1::2] *= img.height
            draw.rectangle(list(boxes[i]), outline='red')

            draw.text((boxes[i][0], boxes[i][1]),
                      self.dictindex[labels.numpy()[i, 0] - 1],
                      font=ImageFont.truetype("./font/arial.ttf"))
            #draw.text((boxes[i][0] * 300, boxes[i][1] * 300), dictindex[labels.numpy()[i, 0]], font=ImageFont.truetype("./font/arial.ttf"))

        img.save('./temp/' + outname)

        return return_str

Пример #18

Показать файл

Файл: imgdataset.py Проект: chicm/detect

class ImageDataset(data.Dataset):
    def __init__(self, img_ids, img_dir, bbox_dict, has_label=True):
        self.input_size = settings.IMG_SZ
        self.img_ids = img_ids
        self.img_dir = img_dir
        self.num = len(img_ids)
        self.bbox_dict = bbox_dict
        self.has_label = has_label
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        if has_label:
            for img_id in self.img_ids:
                box = []
                label = []
                if img_id in self.bbox_dict:
                    for x in self.bbox_dict[img_id]:
                        box.append(x[1])
                        label.append(x[0])
                else:
                    raise ValueError('No bbox: {}'.format(img_id))
                self.boxes.append(torch.Tensor(box) * self.input_size)  #
                self.labels.append(torch.LongTensor(label))  #

    def __getitem__(self, index):
        fn = os.path.join(self.img_dir, '{}.jpg'.format(self.img_ids[index]))
        img = cv2.imread(fn)
        img = self.transform(img)
        #print(get_class_names(self.labels[index]))

        if self.has_label:
            return img, self.boxes[index], self.labels[index]
        else:
            return [img]

    def __len__(self):
        return self.num

    def collate_fn(self, batch):
        """Encode targets.

        Args:
          batch: (list) of images, ids

        Returns:
          images, stacked bbox_targets, stacked clf_targets.
        """
        imgs = [x[0] for x in batch]

        if self.has_label:
            boxes = [x[1] for x in batch]
            labels = [x[2] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            #print('1>>>')
            #print(boxes[i].size(), labels[i].size())
            if self.has_label:
                loc_target, cls_target = self.encoder.encode(boxes[i],
                                                             labels[i],
                                                             input_size=(w, h))
                loc_targets.append(loc_target)
                cls_targets.append(cls_target)
        if self.has_label:
            return inputs, torch.stack(loc_targets), torch.stack(cls_targets)
        else:
            return inputs

Пример #19

Показать файл

Файл: datagen.py Проект: loozy5331/pytorch-retinanet

class ListDataset(data.Dataset):
    def __init__(self, root, list_file, train, transform, input_size):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()
            self.num_samples = len(lines)

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_boxes = (len(splited) - 1) // 5
            box = []
            label = []
            for i in range(num_boxes):
                xmin = splited[1 + 5 * i]
                ymin = splited[2 + 5 * i]
                xmax = splited[3 + 5 * i]
                ymax = splited[4 + 5 * i]
                c = splited[5 + 5 * i]
                box.append(
                    [float(xmin),
                     float(ymin),
                     float(xmax),
                     float(ymax)])
                label.append(int(c))
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))

    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            # img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size, size))

        img = self.transform(img)
        return img, boxes, labels

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            loc_target, cls_target = self.encoder.encode(boxes[i],
                                                         labels[i],
                                                         input_size=(w, h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)

    def __len__(self):
        return self.num_samples

Пример #20

Показать файл

def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--batchSz', type=int, default=1, help='batch size')
	parser.add_argument('--nEpochs', type=int, default=300, help='number of epoch to end training')
	parser.add_argument('--lr', type=float, default=1e-5, help='learning rate')
	parser.add_argument('--momentum', type=float, default=0.9)
	parser.add_argument('--wd', type=float, default=5e-4, help='weight decay')
	# parser.add_argument('--save')
	# parser.add_argument('--seed', type=int, default=1)
	parser.add_argument('--opt', type=str, default='sgd', choices=('sgd', 'adam', 'rmsprop'))
	parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
	parser.add_argument('--resume_from', type=int, default=220, help='resume from which checkpoint')
	parser.add_argument('--visdom', '-v', action='store_true', help='use visdom for training visualization')
	args = parser.parse_args()

	# args.save = args.save or 'work/DSOS.base'
	# setproctitle.setproctitle(args.save)
	# if os.path.exists(args.save):
	# 	shutil.rmtree(args.save)
	# os.makedirs(args.save, exist_ok=True)

	use_cuda = torch.cuda.is_available()
	best_loss = float('inf') # best test loss
	start_epoch = 0 # start from epoch 0 for last epoch

	normMean = [0.485, 0.456, 0.406]
	normStd = [0.229, 0.224, 0.225]
	normTransform = transforms.Normalize(normMean, normStd)

	trainTransform = transforms.Compose([
		transforms.Scale((300, 300)),
		transforms.ToTensor(),
		normTransform
		])

	testTransform = transforms.Compose([
		transforms.Scale((300, 300)),
		transforms.ToTensor(),
		normTransform
		])

	# Data
	kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
	trainset = ListDataset(root=cfg.img_root, list_file=cfg.label_train,
		                   train=True, transform=trainTransform)
	trainLoader = DataLoader(trainset, batch_size=args.batchSz,
		                     shuffle=True, **kwargs)
	testset = ListDataset(root=cfg.img_root, list_file=cfg.label_test,
		                  train=False, transform=testTransform)
	testLoader = DataLoader(testset, batch_size=args.batchSz,
		                    shuffle=False, **kwargs)
 
	# Model
	net = DSOD(growthRate=48, reduction=1)
	if args.resume:
		print('==> Resuming from checkpoint...')
		checkpoint = torch.load('./checkpoint/ckpt_{:03d}.pth'.format(args.resume_from))
		net.load_state_dict(checkpoint['net'])
		best_loss = checkpoint['loss']
		start_epoch = checkpoint['epoch']+1
		print('Previours_epoch: {}, best_loss: {}'.format(start_epoch-1, best_loss))
	else:
		print('==> Initializing weight...')
		def init_weights(m):
			if isinstance(m, nn.Conv2d):
				init.xavier_uniform(m.weight.data)
				# m.bias.data.zero_()
		net.apply(init_weights)

	print(' + Number of params: {}'.format(
		sum([p.data.nelement() for p in net.parameters()])))
	if use_cuda:
		net = net.cuda()

	if args.opt == 'sgd':
		optimizer = optim.SGD(net.parameters(), lr=args.lr,
			                  momentum=args.momentum, weight_decay=args.wd)
	elif args.opt == 'adam':
		optimizer = optim.Adam(net.parameters(), weight_decay=args.wd)
	elif args.opt == 'rmsprop':
		optimizer = optim.RMSprop(net.parameters(), weight_decay=args.wd)

	criterion = MultiBoxLoss()

	if use_cuda:
		net.cuda()
		cudnn.benchmark = True

	if args.visdom:
		import visdom
		viz = visdom.Visdom()
		training_plot = viz.line(
			X=torch.zeros((1,)).cpu(),
			Y=torch.zeros((1, 3)).cpu(),
			opts=dict(
				xlabel='Epoch',
				ylabel='Loss',
				title='Epoch DSOD Training Loss',
				legend=['Loc Loss', 'Conf Loss', 'Loss']
				)
			)
		testing_plot = viz.line(
			X=torch.zeros((1,)).cpu(),
			Y=torch.zeros((1, 3)).cpu(),
			opts=dict(
				xlabel='Epoch',
				ylabel='Loss',
				title='Epoch DSOD Testing Loss',
				legend=['Loc Loss', 'Conf Loss', 'Loss']
				)
			)

	with open(cfg.label_test) as f:
		test_lines = f.readlines()
		num_tests = len(test_lines)

		transform = trainTransform
		transform_viz = testTransform

		data_encoder = DataEncoder()
		if args.visdom:
			testing_image = viz.image(np.ones((3, 300, 300)),
			                      opts=dict(caption='Random Testing Image'))

	# TODO: save training data on log file
	# trainF = open(os.path.join(args.save, 'train.csv'), 'w')
	# testF = open(os.path.join(args.save, 'test.csv'), 'w')

	for epoch in range(start_epoch, start_epoch+args.nEpochs+1):
		adjust_opt(args.opt, optimizer, epoch)
		train(epoch, net, trainLoader, optimizer, criterion, use_cuda, args.visdom, viz=None)
		test(epoch, net, testLoader, optimizer, criterion, use_cuda, args.visdom, viz=None)

		if epoch%10 == 0:
			state = {
			      'net': net.state_dict(),
			      'loss': test_loss,
			      'epoch': epoch
			}
			if not os.path.isdir('checkpoint'):
				os.mkdir('checkpoint')
			torch.save(state, './checkpoint/ckpt_{:03d}.pth'.format(epoch))

Пример #21

Показать файл

Файл: vessel.py Проект: nasir6/sar-vessel-detection-deeplearning

class VESSELBboxDataset:
    
    def __init__(self, split='trainval'):

        
        data_dir = "/media/nasir/Drive1/datasets/SAR/SAR-Ship-Dataset"
        paths = glob.glob(f'{data_dir}/JPEGImages/*.jpg')
        
        ids = [os.path.splitext(os.path.basename(x))[0] for x in paths]
        if split == 'trainval':
            self.ids = ids[0: 40000]
        else:
            self.ids = ids[40000:]
        self.input_size = 256
        self.encoder = DataEncoder()

        self.data_dir = data_dir
        self.label_names = ['ship']
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225))
        ])

    def __len__(self):
        return len(self.ids)

    def str2int(self, a):
        return [int(x) for x in a]

    def extract_boxes(self, fname):
        with open(fname) as f:
            content = f.readlines()
            f.close()
            content = [x.strip() for x in content]
            content = [self.str2int(x.split(' ')[-4:]) for x in content]
            return content

    def __getitem__(self, i):
        """Returns the i-th example.

        Returns a color image and bounding boxes. The image is in CHW format.
        The returned image is RGB.

        Args:
            i (int): The index of the example.

        Returns:
            tuple of an image and bounding boxes

        """
        id_ = self.ids[i]
        anno_file = os.path.join(self.data_dir, 'ground-truth', id_ + '.txt')
        # bbox = self.extract_boxes(anno_file)
        
        # label = list()
        
        
        # bbox = np.stack(bbox).astype(np.float32)
        # bb = np.ones_like(bbox).astype(np.float32)
        # for i in range(len(bbox)):
        #     label.append(0)

        # bb[:, 0] = bbox[:, 1]
        # bb[:, 1] = bbox[:, 0]
        # bb[:, 2] = bbox[:, 3] + bbox[:, 1]
        # bb[:, 3] = bbox[:, 2] + bbox[:, 0]
        # label = np.stack(label)
        
        img_file = os.path.join(self.data_dir, 'JPEGImages', id_ + '.jpg')
        img = Image.open(img_file).convert('RGB')
        img = self.transform(img)
        annot = self.load_annotations(self.extract_boxes(anno_file))
        return {'img': img, 'annot': annot}
        # return {img, torch.Tensor(bb).type(torch.float)}

    def load_annotations(self, bboxes):
        annotations     = np.zeros((0, 5))
        if len(bboxes) == 0:
            return annotations
        for idx, box in enumerate(bboxes):
            annotation        = np.zeros((1, 5))
            annotation[0, :4] = box
            annotation[0, 4]  = 0
            annotations       = np.append(annotations, annotation, axis=0)

        annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
        annotations[:, 3] = annotations[:, 1] + annotations[:, 3]

        return annotations

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            loc_target, cls_target = self.encoder.encode(boxes[i], labels[i], input_size=(w,h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)

Пример #22

Показать файл

class jsonDataset(data.Dataset):
    def __init__(self,
                 path,
                 classes,
                 transform,
                 input_image_size,
                 num_crops,
                 fpn_level,
                 is_norm_reg_target,
                 radius,
                 view_image=False,
                 min_cols=1,
                 min_rows=1):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) image shorter side size.
          max_size: (int) maximum image longer side size.
        '''
        self.path = path
        self.classes = classes
        self.transform = transform
        self.input_size = input_image_size
        self.num_crops = num_crops
        self.view_img = view_image
        self.fpn_level = fpn_level
        self.is_norm_reg_target = is_norm_reg_target
        self.radius = radius

        self.fnames = list()
        self.offsets = list()
        self.boxes = list()
        self.labels = list()

        self.num_classes = len(self.classes)

        self.label_map = dict()
        self.class_idx_map = dict()
        # 0 is background class
        for idx in range(0, self.num_classes):
            self.label_map[self.classes[idx]] = idx + 1  # 0 is background
            self.class_idx_map[idx + 1] = self.classes[idx]

        self.data_encoder = DataEncoder(
            image_size=self.input_size,
            num_classes=self.num_classes + 1,
            fpn_level=self.fpn_level,
            is_norm_reg_target=self.is_norm_reg_target)

        fp_read = open(self.path, 'r')
        gt_dict = json.load(fp_read)

        all_boxes = list()
        all_labels = list()
        all_img_path = list()

        # read gt files
        for gt_key in gt_dict:
            gt_data = gt_dict[gt_key][0]

            box = list()
            label = list()

            num_boxes = len(gt_data['labels'])

            img = cv2.imread(gt_data['image_path'])
            img_rows = img.shape[0]
            img_cols = img.shape[1]

            for iter_box in range(0, num_boxes):
                xmin = gt_data['boxes'][iter_box][0]
                ymin = gt_data['boxes'][iter_box][1]
                xmax = gt_data['boxes'][iter_box][2]
                ymax = gt_data['boxes'][iter_box][3]
                rows = ymax - ymin
                cols = xmax - xmin

                if xmin < 0 or ymin < 0:
                    print('negative coordinate: [xmin: ' + str(xmin) +
                          ', ymin: ' + str(ymin) + ']')
                    print(gt_data['image_path'])
                    continue

                if xmax > img_cols or ymax > img_rows:
                    print('over maximum size: [xmax: ' + str(xmax) +
                          ', ymax: ' + str(ymax) + ']')
                    print(gt_data['image_path'])
                    continue

                if cols < min_cols:
                    print('cols is lower than ' + str(min_cols) + ': [' +
                          str(xmin) + ', ' + str(ymin) + ', ' + str(xmax) +
                          ', ' + str(ymax) + '] ' + str(gt_data['image_path']))
                    continue
                if rows < min_rows:
                    print('rows is lower than ' + str(min_rows) + ': [' +
                          str(xmin) + ', ' + str(ymin) + ', ' + str(xmax) +
                          ', ' + str(ymax) + '] ' + str(gt_data['image_path']))
                    continue

                class_name = gt_data['labels'][iter_box][0]
                if class_name not in self.label_map:
                    print('weired class name: ' + class_name)
                    print(gt_data['image_path'])
                    continue

                class_idx = self.label_map[class_name]
                box.append(
                    [float(xmin),
                     float(ymin),
                     float(xmax),
                     float(ymax)])
                label.append(int(class_idx))

            if len(box) == 0 or len(label) == 0:
                print('none of object exist in the image: ' +
                      gt_data['image_path'])
                continue

            all_boxes.append(box)
            all_labels.append(label)
            all_img_path.append(gt_data['image_path'])

        if len(all_boxes) == len(all_labels) and len(all_boxes) == len(
                all_img_path):
            num_images = len(all_img_path)
        else:
            print('num. of boxes: ' + str(len(all_boxes)))
            print('num. of labels: ' + str(len(all_labels)))
            print('num. of paths: ' + str(len(all_img_path)))
            raise ValueError(
                'num. of elements are different(all boxes, all_labels, all_img_path)'
            )

        if num_crops <= 0:
            for idx in range(0, num_images, 1):
                self.fnames.append(all_img_path[idx])
                self.boxes.append(
                    torch.tensor(all_boxes[idx], dtype=torch.float32))
                self.labels.append(
                    torch.tensor(all_labels[idx], dtype=torch.int64))
        else:
            for idx in range(0, num_images, 1):
                ori_boxes = all_boxes[idx]
                ori_labels = all_labels[idx]

                ori_img = cv2.imread(all_img_path[idx])
                img_rows = ori_img.shape[0]
                img_cols = ori_img.shape[1]

                offsets, crop_boxes, crop_labels = self._do_crop(
                    ori_img_rows=img_rows,
                    ori_img_cols=img_cols,
                    target_img_size=self.input_size,
                    boxes=ori_boxes,
                    labels=ori_labels)

                num_offsets = len(offsets)

                for idx_offset in range(0, num_offsets, 1):
                    self.fnames.append(all_img_path[idx])
                    self.offsets.append(offsets[idx_offset])
                    self.boxes.append(
                        torch.tensor(crop_boxes[idx_offset],
                                     dtype=torch.float32))
                    self.labels.append(
                        torch.tensor(crop_labels[idx_offset],
                                     dtype=torch.int64))

        self.num_samples = len(self.fnames)

    def __getitem__(self, idx):
        # Load image and boxes.
        fname = self.fnames[idx]
        boxes = self.boxes[idx]
        labels = self.labels[idx]
        img = cv2.imread(fname)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        if self.num_crops > 0:
            offset = self.offsets[idx]
            crop_rect = (int(offset[0]), int(offset[1]),
                         int(offset[0] + self.input_size[1]),
                         int(offset[1] + self.input_size[0]))

            if offset[0] < 0 or offset[1] < 0:
                raise ValueError("negative offset!")
            for box in boxes:
                if box[0] < 0 or box[1] < 0 or box[2] > self.input_size[
                        1] or box[3] > self.input_size[0]:
                    raise ValueError("negative box coordinate!")

            img = img[crop_rect[1]:crop_rect[3], crop_rect[0]:crop_rect[2]]

        bboxes = [
            bbox.tolist() + [label.item()]
            for bbox, label in zip(boxes, labels)
        ]
        augmented = self.transform(image=img, bboxes=bboxes)
        img = augmented['image']
        rows, cols = img.shape[1:]
        boxes = augmented['bboxes']
        boxes = [list(bbox) for bbox in boxes]
        labels = [bbox.pop() for bbox in boxes]

        if self.view_img is True:
            np_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            np_img = np_img.numpy()
            np_img = np.transpose(np_img, (1, 2, 0))
            np_img = np.uint8(np_img * 255)
            np_img = np.ascontiguousarray(np_img)
            for idx_box, box in enumerate(boxes):
                cv2.rectangle(np_img, (int(box[0]), int(box[1])),
                              (int(box[2]), int(box[3])), (0, 255, 0))
                class_idx = labels[idx_box]
                text_size = cv2.getTextSize(self.class_idx_map[class_idx],
                                            cv2.FONT_HERSHEY_PLAIN, 1, 1)
                cv2.putText(np_img, self.class_idx_map[class_idx],
                            (int(box[0]), int(box[1]) - text_size[1]),
                            cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

            cv2.imwrite(os.path.join("crop_test", str(idx) + ".jpg"), np_img)

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        return img, boxes, labels, fname

    def __len__(self):
        return self.num_samples

    # def _resize(self, img, boxes):
    #     if isinstance(self.input_size, int) is True:
    #         w = h = self.input_size
    #     elif isinstance(self.input_size, tuple) is True:
    #         h = self.input_size[0]
    #         w = self.input_size[1]
    #     else:
    #         raise ValueError('input size should be int or tuple of ints')
    #
    #     ws = 1.0 * w / img.shape[1]
    #     hs = 1.0 * h / img.shape[0]
    #     scale = torch.tensor([ws, hs, ws, hs], dtype=torch.float32)
    #     if boxes.numel() == 0:
    #         scaled_box = boxes
    #     else:
    #         scaled_box = scale * boxes
    #     return cv2.resize(img, (w, h)), scaled_box

    def _do_crop(self, ori_img_rows, ori_img_cols, target_img_size, boxes,
                 labels):
        num_boxes = len(boxes)
        num_labels = len(labels)

        if num_boxes != num_labels:
            print("error occur: Random crop")

        rand_indices = [0, 1, 2, 3, 4]
        np.random.shuffle(rand_indices)

        output_offsets = []
        output_boxes = []
        output_labels = []

        for box in boxes:
            # box coordinate from 1. not 0.
            xmin = box[0]
            ymin = box[1]
            xmax = box[2]
            ymax = box[3]

            width = (xmax - xmin) + 1
            height = (ymax - ymin) + 1

            if width < 0 or height < 0:
                print("negative width/height")
                continue

            for iter_crop in range(0, self.num_crops, 1):
                rand_idx = rand_indices[iter_crop]

                margin = np.random.randint(16, 128, size=1)

                # top-left
                if rand_idx == 0:
                    offset_x = xmin - 1 - margin[0]
                    offset_y = ymin - 1 - margin[0]
                    crop_maxx = offset_x + target_img_size[1]
                    crop_maxy = offset_y + target_img_size[0]

                    if crop_maxx > ori_img_cols - 1 or crop_maxy > ori_img_rows - 1:
                        continue
                    if offset_x < 0 or offset_y < 0:
                        continue

                    crop_rect = [
                        offset_x, offset_y, target_img_size[1],
                        target_img_size[0]
                    ]

                    in_boxes, in_labels = self._find_boxes_in_crop(
                        crop_rect, boxes, labels)

                    if len(in_boxes) == 0:
                        continue

                    output_offsets.append([offset_x, offset_y])
                    output_boxes.append(in_boxes)
                    output_labels.append(in_labels)
                # top-right
                elif rand_idx == 1:
                    offset_x = xmin - (target_img_size[1] -
                                       width) - 1 + margin[0]
                    offset_y = ymin - 1 - margin[0]
                    crop_maxx = offset_x + target_img_size[1]
                    crop_maxy = offset_y + target_img_size[0]

                    if crop_maxx > ori_img_cols - 1 or crop_maxy > ori_img_rows - 1:
                        continue

                    if offset_x < 0 or offset_y < 0:
                        continue

                    crop_rect = [
                        offset_x, offset_y, target_img_size[1],
                        target_img_size[0]
                    ]

                    in_boxes, in_labels = self._find_boxes_in_crop(
                        crop_rect, boxes, labels)

                    if len(in_boxes) == 0:
                        continue

                    output_offsets.append([offset_x, offset_y])
                    output_boxes.append(in_boxes)
                    output_labels.append(in_labels)
                # bottom-left
                elif rand_idx == 2:
                    offset_x = xmin - 1 - margin[0]
                    offset_y = ymin - (target_img_size[0] -
                                       height) - 1 + margin[0]
                    crop_maxx = offset_x + target_img_size[1]
                    crop_maxy = offset_y + target_img_size[0]

                    if crop_maxx > ori_img_cols - 1 or crop_maxy > ori_img_rows - 1:
                        continue

                    if offset_x < 0 or offset_y < 0:
                        continue

                    crop_rect = [
                        offset_x, offset_y, target_img_size[1],
                        target_img_size[0]
                    ]

                    in_boxes, in_labels = self._find_boxes_in_crop(
                        crop_rect, boxes, labels)

                    if len(in_boxes) == 0:
                        continue

                    output_offsets.append([offset_x, offset_y])
                    output_boxes.append(in_boxes)
                    output_labels.append(in_labels)
                # bottom-right
                elif rand_idx == 3:
                    offset_x = xmin - (target_img_size[1] -
                                       width) - 1 + margin[0]
                    offset_y = ymin - (target_img_size[0] -
                                       height) - 1 + margin[0]
                    crop_maxx = offset_x + target_img_size[1]
                    crop_maxy = offset_y + target_img_size[0]

                    if crop_maxx > ori_img_cols - 1 or crop_maxy > ori_img_rows - 1:
                        continue

                    if offset_x < 0 or offset_y < 0:
                        continue

                    crop_rect = [
                        offset_x, offset_y, target_img_size[1],
                        target_img_size[0]
                    ]

                    in_boxes, in_labels = self._find_boxes_in_crop(
                        crop_rect, boxes, labels)

                    if len(in_boxes) == 0:
                        continue

                    output_offsets.append([offset_x, offset_y])
                    output_boxes.append(in_boxes)
                    output_labels.append(in_labels)
                # center
                elif rand_idx == 4:
                    rand_direction = np.random.randint(-1, 1, size=1)

                    offset_x = (xmin - ((target_img_size[1] - width) / 2) -
                                1) + (rand_direction[0] * margin[0])
                    offset_y = (ymin - ((target_img_size[0] - height) / 2) -
                                1) + (rand_direction[0] * margin[0])
                    crop_maxx = offset_x + target_img_size[1]
                    crop_maxy = offset_y + target_img_size[0]

                    if crop_maxx > ori_img_cols - 1 or crop_maxy > ori_img_rows - 1:
                        continue

                    if offset_x < 0 or offset_y < 0:
                        continue

                    crop_rect = [
                        offset_x, offset_y, target_img_size[1],
                        target_img_size[0]
                    ]

                    in_boxes, in_labels = self._find_boxes_in_crop(
                        crop_rect, boxes, labels)

                    if len(in_boxes) == 0:
                        continue

                    output_offsets.append([offset_x, offset_y])
                    output_boxes.append(in_boxes)
                    output_labels.append(in_labels)

                else:
                    print("exceed possible crop num")

        return output_offsets, output_boxes, output_labels

    def _find_boxes_in_crop(self, crop_rect, boxes, labels):
        num_boxes = len(boxes)
        num_labels = len(labels)

        if num_boxes != num_labels:
            print("error occur: Random crop")

        boxes_in_crop = []
        labels_in_crop = []
        for idx in range(0, num_boxes, 1):
            box_in_crop, label, is_contain = self._find_box_in_crop(
                crop_rect, boxes[idx], labels[idx])

            if is_contain is True:
                boxes_in_crop.append(box_in_crop)
                labels_in_crop.append(label)

        return boxes_in_crop, labels_in_crop

    def _find_box_in_crop(self, rect, box, label):
        rect_minx = rect[0]
        rect_miny = rect[1]
        rect_width = rect[2]
        rect_height = rect[3]

        box_minx = box[0]
        box_miny = box[1]
        box_maxx = box[2]
        box_maxy = box[3]
        box_width = (box_maxx - box_minx) + 1
        box_height = (box_maxy - box_miny) + 1

        # occlusion_ratio
        occlusion_ratio = 0.3
        occlusion_width = int(box_width * occlusion_ratio) * -1
        occlusion_height = int(box_height * occlusion_ratio) * -1

        box_in_crop_minx = box_minx - rect_minx
        if box_in_crop_minx <= occlusion_width or box_in_crop_minx >= rect_width:
            box_in_rect = []
            return box_in_rect, label, False

        box_in_crop_miny = box_miny - rect_miny
        if box_in_crop_miny <= occlusion_height or box_in_crop_miny >= rect_height:
            box_in_rect = []
            return box_in_rect, label, False

        box_in_crop_maxx = box_maxx - rect_minx
        if rect_width - box_in_crop_maxx <= occlusion_width or box_in_crop_maxx <= 0:
            box_in_rect = []
            return box_in_rect, label, False

        box_in_crop_maxy = box_maxy - rect_miny
        if rect_height - box_in_crop_maxy <= occlusion_height or box_in_crop_maxy <= 0:
            box_in_rect = []
            return box_in_rect, label, False

        if box_in_crop_minx < 0:
            box_in_crop_minx = 0
        if box_in_crop_miny < 0:
            box_in_crop_miny = 0
        if rect_width - box_in_crop_maxx < 0:
            box_in_crop_maxx = rect_width - 1
        if rect_height - box_in_crop_maxy < 0:
            box_in_crop_maxy = rect_height - 1

        box_in_rect = [
            box_in_crop_minx, box_in_crop_miny, box_in_crop_maxx,
            box_in_crop_maxy
        ]
        return box_in_rect, label, True

    def collate_fn(self, batch):
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]
        paths = [x[3] for x in batch]

        num_imgs = len(imgs)

        if isinstance(self.input_size, int) is True:
            inputs = torch.zeros(
                [num_imgs, 3, self.input_size, self.input_size],
                dtype=torch.float32)
        elif isinstance(self.input_size, tuple) is True:
            inputs = torch.zeros(
                [num_imgs, 3, self.input_size[0], self.input_size[1]],
                dtype=torch.float32)
        else:
            raise ValueError('input size should be int or tuple of ints')

        loc_targets = list()
        cls_targets = list()
        center_targets = list()

        for i in range(num_imgs):
            im = imgs[i]
            imh, imw = im.size(1), im.size(2)
            inputs[i, :, :imh, :imw] = im

            # Encode data.
            loc_target, cls_target, center_target = self.data_encoder.encode(
                boxes[i], labels[i], radius=self.radius)

            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
            center_targets.append(center_target)

        return inputs, \
               torch.stack(loc_targets, dim=0), \
               torch.stack(cls_targets, dim=0), \
               torch.stack(center_targets, dim=0), \
               paths

Пример #23

Показать файл

print('Loading model..')
net = RetinaNet()
net.load_state_dict(torch.load('./checkpoint/params.pth'))
net.eval()

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

print('Loading image..')
img = Image.open('./image/000001.jpg')
w = h = 600
img = img.resize((w, h))

print('Predicting..')
x = transform(img)
x = x.unsqueeze(0)
x = Variable(x, volatile=True)
loc_preds, cls_preds = net(x)

print('Decoding..')
encoder = DataEncoder()
boxes, labels = encoder.decode(loc_preds.data.squeeze(),
                               cls_preds.data.squeeze(), (w, h))

draw = ImageDraw.Draw(img)
for box in boxes:
    draw.rectangle(list(box), outline='red')
img.show()

Пример #24

Показать файл

    def __init__(self,
                 path,
                 classes,
                 transform,
                 input_image_size,
                 num_crops,
                 fpn_level,
                 is_norm_reg_target,
                 radius,
                 view_image=False,
                 min_cols=1,
                 min_rows=1):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) image shorter side size.
          max_size: (int) maximum image longer side size.
        '''
        self.path = path
        self.classes = classes
        self.transform = transform
        self.input_size = input_image_size
        self.num_crops = num_crops
        self.view_img = view_image
        self.fpn_level = fpn_level
        self.is_norm_reg_target = is_norm_reg_target
        self.radius = radius

        self.fnames = list()
        self.offsets = list()
        self.boxes = list()
        self.labels = list()

        self.num_classes = len(self.classes)

        self.label_map = dict()
        self.class_idx_map = dict()
        # 0 is background class
        for idx in range(0, self.num_classes):
            self.label_map[self.classes[idx]] = idx + 1  # 0 is background
            self.class_idx_map[idx + 1] = self.classes[idx]

        self.data_encoder = DataEncoder(
            image_size=self.input_size,
            num_classes=self.num_classes + 1,
            fpn_level=self.fpn_level,
            is_norm_reg_target=self.is_norm_reg_target)

        fp_read = open(self.path, 'r')
        gt_dict = json.load(fp_read)

        all_boxes = list()
        all_labels = list()
        all_img_path = list()

        # read gt files
        for gt_key in gt_dict:
            gt_data = gt_dict[gt_key][0]

            box = list()
            label = list()

            num_boxes = len(gt_data['labels'])

            img = cv2.imread(gt_data['image_path'])
            img_rows = img.shape[0]
            img_cols = img.shape[1]

            for iter_box in range(0, num_boxes):
                xmin = gt_data['boxes'][iter_box][0]
                ymin = gt_data['boxes'][iter_box][1]
                xmax = gt_data['boxes'][iter_box][2]
                ymax = gt_data['boxes'][iter_box][3]
                rows = ymax - ymin
                cols = xmax - xmin

                if xmin < 0 or ymin < 0:
                    print('negative coordinate: [xmin: ' + str(xmin) +
                          ', ymin: ' + str(ymin) + ']')
                    print(gt_data['image_path'])
                    continue

                if xmax > img_cols or ymax > img_rows:
                    print('over maximum size: [xmax: ' + str(xmax) +
                          ', ymax: ' + str(ymax) + ']')
                    print(gt_data['image_path'])
                    continue

                if cols < min_cols:
                    print('cols is lower than ' + str(min_cols) + ': [' +
                          str(xmin) + ', ' + str(ymin) + ', ' + str(xmax) +
                          ', ' + str(ymax) + '] ' + str(gt_data['image_path']))
                    continue
                if rows < min_rows:
                    print('rows is lower than ' + str(min_rows) + ': [' +
                          str(xmin) + ', ' + str(ymin) + ', ' + str(xmax) +
                          ', ' + str(ymax) + '] ' + str(gt_data['image_path']))
                    continue

                class_name = gt_data['labels'][iter_box][0]
                if class_name not in self.label_map:
                    print('weired class name: ' + class_name)
                    print(gt_data['image_path'])
                    continue

                class_idx = self.label_map[class_name]
                box.append(
                    [float(xmin),
                     float(ymin),
                     float(xmax),
                     float(ymax)])
                label.append(int(class_idx))

            if len(box) == 0 or len(label) == 0:
                print('none of object exist in the image: ' +
                      gt_data['image_path'])
                continue

            all_boxes.append(box)
            all_labels.append(label)
            all_img_path.append(gt_data['image_path'])

        if len(all_boxes) == len(all_labels) and len(all_boxes) == len(
                all_img_path):
            num_images = len(all_img_path)
        else:
            print('num. of boxes: ' + str(len(all_boxes)))
            print('num. of labels: ' + str(len(all_labels)))
            print('num. of paths: ' + str(len(all_img_path)))
            raise ValueError(
                'num. of elements are different(all boxes, all_labels, all_img_path)'
            )

        if num_crops <= 0:
            for idx in range(0, num_images, 1):
                self.fnames.append(all_img_path[idx])
                self.boxes.append(
                    torch.tensor(all_boxes[idx], dtype=torch.float32))
                self.labels.append(
                    torch.tensor(all_labels[idx], dtype=torch.int64))
        else:
            for idx in range(0, num_images, 1):
                ori_boxes = all_boxes[idx]
                ori_labels = all_labels[idx]

                ori_img = cv2.imread(all_img_path[idx])
                img_rows = ori_img.shape[0]
                img_cols = ori_img.shape[1]

                offsets, crop_boxes, crop_labels = self._do_crop(
                    ori_img_rows=img_rows,
                    ori_img_cols=img_cols,
                    target_img_size=self.input_size,
                    boxes=ori_boxes,
                    labels=ori_labels)

                num_offsets = len(offsets)

                for idx_offset in range(0, num_offsets, 1):
                    self.fnames.append(all_img_path[idx])
                    self.offsets.append(offsets[idx_offset])
                    self.boxes.append(
                        torch.tensor(crop_boxes[idx_offset],
                                     dtype=torch.float32))
                    self.labels.append(
                        torch.tensor(crop_labels[idx_offset],
                                     dtype=torch.int64))

        self.num_samples = len(self.fnames)

Пример #25

Показать файл

class ListDataset(data.Dataset):
    img_size = 300

    def __init__(self, root, list_file, train, transform):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to annotation files.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
        '''
        self.root = root
        self.train = train
        self.transform = transform

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.data_encoder = DataEncoder()
        self.num_samples = 0

        for i in os.listdir(list_file):
            self.num_samples += 1
            self.fnames.append(i)
            box = []
            labels = []
            with open(os.path.join(list_file, i)) as f:
                f = f.read().split("\n")
                f = f[:-1]
            num_objs = len(f)

            for j in range(num_objs):
                f[j] = f[j].split(",")
                xmin = float(f[j][0])
                ymin = float(f[j][1])
                w = float(f[j][2])
                h = float(f[j][3])

                box.append([xmin, ymin, xmin + h, ymin + h])
                labels.append(int(f[j][5]))

            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(labels))

    def __getitem__(self, idx):
        '''Load a image, and encode its bbox locations and class labels.
        Args:
          idx: (int) image index.
        Returns:
          img: (tensor) image tensor.
          loc_target: (tensor) location targets, sized [8732,4].
          conf_target: (tensor) label targets, sized [8732,].
        '''
        # Load image and bbox locations.
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root, fname[:-4] + ".jpg"))
        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]

        # Data augmentation while training.
        if self.train:
            img, boxes = self.random_flip(img, boxes)
            img, boxes, labels = self.random_crop(img, boxes, labels)

        # Scale bbox locaitons to [0,1].
        w, h = img.shape[1], img.shape[0]
        boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)
        img = cv2.resize(img, (self.img_size, self.img_size))
        img = self.transform(img)

        # Encode loc & conf targets.

        loc_target, conf_target = self.data_encoder.encode(boxes, labels)
        return img, loc_target, conf_target

    def random_flip(self, img, boxes):
        '''Randomly flip the image and adjust the bbox locations.
        For bbox (xmin, ymin, xmax, ymax), the flipped bbox is:
        (w-xmax, ymin, w-xmin, ymax).
        Args:
          img: (ndarray.Image) image. f
          boxes: (tensor) bbox locations, sized [#obj, 4].
        Returns:
          img: (ndarray.Image) randomly flipped image.
          boxes: (tensor) randomly flipped bbox locations, sized [#obj, 4].
        '''
        if random.random() < 0.5:
            img = cv2.flip(img, 1)
            w = img.shape[1]
            xmin = w - boxes[:, 2]
            xmax = w - boxes[:, 0]
            boxes[:, 0] = xmin
            boxes[:, 2] = xmax
        return img, boxes

    def random_crop(self, img, boxes, labels):
        '''Randomly crop the image and adjust the bbox locations.
        For more details, see 'Chapter2.2: Data augmentation' of the paper.
        Args:
          img: (ndarray.Image) image.
          boxes: (tensor) bbox locations, sized [#obj, 4].
          labels: (tensor) bbox labels, sized [#obj,].
        Returns:
          img: (ndarray.Image) cropped image.
          selected_boxes: (tensor) selected bbox locations.
          labels: (tensor) selected bbox labels.
        '''
        imw, imh = img.shape[1], img.shape[0]
        while True:
            min_iou = random.choice([None, 0.1, 0.3, 0.5, 0.7,
                                     0.9])  # random choice the one
            if min_iou is None:
                return img, boxes, labels

            for _ in range(100):
                w = random.randrange(int(0.1 * imw), imw)
                h = random.randrange(int(0.1 * imh), imh)

                if h > 2 * w or w > 2 * h or h < 1 or w < 1:
                    continue

                x = random.randrange(imw - w)
                y = random.randrange(imh - h)
                roi = torch.Tensor([[x, y, x + w, y + h]])

                center = (boxes[:, :2] + boxes[:, 2:]) / 2  # [N,2]
                roi2 = roi.expand(len(center), 4)  # [N,4]

                mask = (center > roi2[:, :2]) & (center < roi2[:, 2:])  # [N,2]
                mask = mask[:, 0] & mask[:, 1]  #[N,]

                if not mask.any():
                    continue

                selected_boxes = boxes.index_select(0,
                                                    mask.nonzero().squeeze(1))

                iou = self.data_encoder.iou(selected_boxes, roi)
                if iou.min() < min_iou:
                    continue
                img = img[y:y + h, x:x + w, :]

                selected_boxes[:, 0].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 1].add_(-y).clamp_(min=0, max=h)
                selected_boxes[:, 2].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 3].add_(-y).clamp_(min=0, max=h)

                return img, selected_boxes, labels[mask]

    def __len__(self):
        return self.num_samples

Пример #26

Показать файл

Файл: datagen.py Проект: Gmy12138/RetinaNet

class ListDataset(data.Dataset):
    def __init__(self, root, train, transform, input_size):
        '''
        Args:
          root: (str) ditectory to images.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        self._labpath = sorted(glob.glob("%s/*.*" % self.root))
        self._imgpath = [
            path.replace("labels", "image").replace(".txt", ".jpg")
            for path in self._labpath
        ]

    def __getitem__(self, index):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.

        img_path = self._imgpath[index].rstrip()
        fname = img_path.split('/')[-1].split('.')[0]

        # print(img_path)
        img = Image.open(img_path)
        if img.mode != 'RGB':
            img = img.convert('RGB')

        label_path = self._labpath[index].rstrip()
        # print(label_path)

        targets = np.loadtxt(label_path).reshape(-1, 5)
        # targets = np.array(targets)
        # print(targets)
        boxes = torch.Tensor(targets[:, 1:])
        labels = torch.LongTensor(targets[:, 0])

        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, (size, size))
            # img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        return img, boxes, labels, fname

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]
        fname = [x[3] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        # print(num_imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            loc_target, cls_target = self.encoder.encode(boxes[i],
                                                         labels[i],
                                                         input_size=(w, h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(
            cls_targets), fname

        # return inputs, boxes, labels

    def __len__(self):
        return len(self._labpath)

Пример #27

Показать файл

Файл: main2_eval.py Проект: akira-l/face_raw

from torch.autograd import Variable
from encoder import DataEncoder
import arcface_loss2
from cosface_loss import MarginCosineProduct

cudnn.benchmark = True

id_net = Idnet(classnum=2874)
id_net = torch.nn.DataParallel(id_net, device_ids=[0])
id_net.load_state_dict(torch.load("./arcface_id_net-data_addition-epoch-20-acc0.pth"))
id_net.cuda()

#net.load_state_dict(torch.load("./trained model/originalFAN_model.pth"))
#net.eval()
coder = DataEncoder()

detector = dlib.get_frontal_face_detector()
predicter_path = "./model/shape_predictor_5_face_landmarks.dat"
sp = dlib.shape_predictor(predicter_path)


transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225))
])

def KFold(n=6000, n_folds=10):
    folds = []
    base = list(range(n))
    for i in range(n_folds):

Пример #28

Показать файл

def train():
    args = parse_args()

    assert torch.cuda.is_available(), 'Error: CUDA not found!'
    assert args.focal_loss, "OHEM + ce_loss is not working... :("

    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    if not os.path.exists(args.logdir):
        os.mkdir(args.logdir)

    ###########################################################################
    # Data
    ###########################################################################

    print('==> Preparing data..')
    trainset = ListDataset(root='/mnt/9C5E1A4D5E1A2116/datasets/',
                           dataset=args.dataset,
                           train=True,
                           transform=Augmentation_traininig,
                           input_size=args.input_size,
                           multi_scale=args.multi_scale)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=args.num_workers,
                                              collate_fn=trainset.collate_fn)

    ###########################################################################

    # Training Detail option\
    stepvalues = (10000, 20000, 30000, 40000, 50000) if args.dataset in ["SynthText"] \
        else (2000, 4000, 6000, 8000, 10000)
    best_loss = float('inf')  # best test loss
    start_epoch = 0  # start from epoch 0 or last epoch
    iteration = 0
    cur_lr = args.lr
    mean = (0.485, 0.456, 0.406)
    var = (0.229, 0.224, 0.225)
    step_index = 0
    pEval = None

    ###########################################################################
    # Model
    ###########################################################################

    # set model (focal_loss vs OHEM_CE loss)
    if args.focal_loss:
        imagenet_pretrain = 'weights/retinanet_se50.pth'
        criterion = FocalLoss()
        num_classes = 1
    else:
        imagenet_pretrain = 'weights/retinanet_se50_OHEM.pth'
        criterion = OHEM_loss()
        num_classes = 2

    net = RetinaNet(num_classes)

    # Restore model weights
    net.load_state_dict(torch.load(imagenet_pretrain))

    if args.resume:
        print('==> Resuming from checkpoint..', args.resume)
        checkpoint = torch.load(args.resume)
        net.load_state_dict(checkpoint['net'])
        #start_epoch = checkpoint['epoch']
        #iteration = checkpoint['iteration']
        #cur_lr = checkpoint['lr']
        #step_index = checkpoint['step_index']
        # optimizer.load_state_dict(state["optimizer"])

    print("multi_scale : ", args.multi_scale)
    print("input_size : ", args.input_size)
    print("stepvalues : ", stepvalues)
    print("start_epoch : ", start_epoch)
    print("iteration : ", iteration)
    print("cur_lr : ", cur_lr)
    print("step_index : ", step_index)
    print("num_gpus : ", torch.cuda.device_count())

    # Data parellelism for multi-gpu training
    net = torch.nn.DataParallel(net,
                                device_ids=range(torch.cuda.device_count()))
    net.cuda()

    # Put model in training mode and freeze batch norm.
    net.train()
    net.module.freeze_bn()  # you must freeze batchnorm

    ###########################################################################
    # Optimizer
    ###########################################################################

    optimizer = optim.SGD(net.parameters(),
                          lr=cur_lr,
                          momentum=0.9,
                          weight_decay=1e-4)
    #optimizer = optim.Adam(net.parameters(), lr=cur_lr)

    ###########################################################################
    # Utils
    ###########################################################################

    encoder = DataEncoder()
    writer = SummaryWriter(log_dir=args.logdir)

    ###########################################################################
    # Training loop
    ###########################################################################

    t0 = time.time()
    for epoch in range(start_epoch, 10000):
        if iteration > args.max_iter:
            break

        for inputs, loc_targets, cls_targets in trainloader:
            inputs = Variable(inputs.cuda())
            loc_targets = Variable(loc_targets.cuda())
            cls_targets = Variable(cls_targets.cuda())

            optimizer.zero_grad()
            loc_preds, cls_preds = net(inputs)

            loc_loss, cls_loss = criterion(loc_preds, loc_targets, cls_preds,
                                           cls_targets)
            loss = loc_loss + cls_loss
            loss.backward()
            optimizer.step()

            if iteration % 20 == 0:
                t1 = time.time()

                print(
                    'iter ' + repr(iteration) + ' (epoch ' + repr(epoch) +
                    ') || loss: %.4f || l loc_loss: %.4f || l cls_loss: %.4f (Time : %.1f)'
                    % (loss.sum().item(), loc_loss.sum().item(),
                       cls_loss.sum().item(), (t1 - t0)))
                # t0 = time.time()

                writer.add_scalar('loc_loss', loc_loss.sum().item(), iteration)
                writer.add_scalar('cls_loss', cls_loss.sum().item(), iteration)
                writer.add_scalar('loss', loss.sum().item(), iteration)

                # show inference image in tensorboard
                infer_img = np.transpose(inputs[0].cpu().numpy(), (1, 2, 0))
                infer_img *= var
                infer_img += mean
                infer_img *= 255.
                infer_img = np.clip(infer_img, 0, 255)
                infer_img = infer_img.astype(np.uint8)
                h, w, _ = infer_img.shape

                boxes, labels, scores = encoder.decode(loc_preds[0],
                                                       cls_preds[0], (w, h))
                boxes = boxes.reshape(-1, 4, 2).astype(np.int32)

                if boxes.shape[0] != 0:
                    # infer_img = infer_img/np.float32(255)

                    # print(boxes)
                    # print(
                    #     f"infer_img prior to cv2.polylines - dtype: {infer_img.dtype}, shape: {infer_img.shape}, min: {infer_img.min()}, max: {infer_img.max()}")
                    # print(
                    #     f"boxes prior to cv2.polylines - dtype: {boxes.dtype}, shape: {boxes.shape}, min: {boxes.min()}, max: {boxes.max()}")
                    infer_img = cv2.polylines(infer_img.copy(), boxes, True,
                                              (0, 255, 0), 4)

                # print(
                #     f"infer_img - dtype: {infer_img.dtype}, shape: {infer_img.shape}, min: {infer_img.min()}, max: {infer_img.max()}")

                writer.add_image('image',
                                 infer_img,
                                 iteration,
                                 dataformats="HWC")
                writer.add_scalar('input_size', h, iteration)
                writer.add_scalar('learning_rate', cur_lr, iteration)

                t0 = time.time()

            if iteration % args.save_interval == 0 and iteration > 0:
                print('Saving state, iter : ', iteration)
                state = {
                    'net': net.module.state_dict(),
                    "optimizer": optimizer.state_dict(),
                    'iteration': iteration,
                    'epoch': epoch,
                    'lr': cur_lr,
                    'step_index': step_index
                }
                model_file = args.save_folder + \
                    'ckpt_' + repr(iteration) + '.pth'
                torch.save(state, model_file)

            if iteration in stepvalues:
                step_index += 1
                cur_lr = adjust_learning_rate(cur_lr, optimizer, args.gamma,
                                              step_index)

            if iteration > args.max_iter:
                break

            if args.evaluation and iteration % args.eval_step == 0:
                try:
                    if pEval is None:
                        print("Evaluation started at iteration {} on IC15...".
                              format(iteration))
                        eval_cmd = "CUDA_VISIBLE_DEVICES=" + str(args.eval_device) + \
                            " python eval.py" + \
                            " --tune_from=" + args.save_folder + 'ckpt_' + repr(iteration) + '.pth' + \
                            " --input_size=1024" + \
                            " --output_zip=result_temp1"

                        pEval = Popen(eval_cmd,
                                      shell=True,
                                      stdout=PIPE,
                                      stderr=PIPE)

                    elif pEval.poll() is not None:
                        (scorestring, stderrdata) = pEval.communicate()

                        hmean = float(
                            str(scorestring).strip().split(":")[3].split(",")
                            [0].split("}")[0].strip())

                        writer.add_scalar('test_hmean', hmean, iteration)

                        print("test_hmean for {}-th iter : {:.4f}".format(
                            iteration, hmean))

                        if pEval is not None:
                            pEval.kill()
                        pEval = None

                except Exception as e:
                    print("exception happened in evaluation ", e)
                    if pEval is not None:
                        pEval.kill()
                    pEval = None

            iteration += 1

Пример #29

Показать файл

 img = Image.open(image_path).convert('RGB')
 img1 = img.resize((300, 300))
 transform = transforms.Compose([
     transforms.ToTensor(),
     transforms.Normalize(mean=(0.485, 0.456, 0.406),
                          std=(0.229, 0.224, 0.225))
 ])
 img1 = transform(img1)
 if use_cuda:
     img1 = img1.cuda()
 loc, conf = net(Variable(img1[None, :, :, :],
                          volatile=True))  # Forward
 loc = loc.cpu()
 conf = conf.cpu()
 #print(loc, conf)
 data_encoder = DataEncoder()  # Decode
 boxes, labels, scores = data_encoder.decode(
     loc.data.squeeze(0),
     F.softmax(conf.squeeze(0)).data)
 draw = ImageDraw.Draw(img)
 #draw.rectangle(list(box), outline='blue')
 #draw.rectangle(ground_truth_box, outline='blue')
 fnt = ImageFont.truetype('Pillow/Tests/fonts/FreeMono.ttf', 40)
 #img.show()
 for box in boxes:
     box[::2] *= img.width
     box[1::2] *= img.height
     box = list(box)
     x1_org = image[1]
     y1_org = image[2]
     x2_org = image[3]

Пример #30

Показать файл

                    default='ICDAR2015',
                    type=str,
                    help='evaluation dataset')

args = parser.parse_args()

net = RetinaNet()
net = net.cuda()

# load checkpoint
checkpoint = torch.load(args.tune_from)

net.load_state_dict(checkpoint['net'])
net.eval()

encoder = DataEncoder(args.cls_thresh, args.nms_thresh)

# test image path & list
img_dir = "/root/DB/ICDAR2015_Incidental/test/" if args.dataset in [
    "ICDAR2015"
] else "/root/DB/ICDAR2013_FOCUSED/test/"
val_list = [im for im in os.listdir(img_dir) if "jpg" in im]

if not os.path.exists(args.output_zip):
    os.mkdir(args.output_zip)

# save results dir & zip
eval_dir = "/root/Detector/ocr_evaluation/code/icdar/4_incidental_scene_text/1_TextLocalization/1_IoU/" if args.dataset in ["ICDAR2015"] \
           else "/root/Detector/ocr_evaluation/code/icdar/2_focused_scene_text/1_TextLocalization/1_ICDAR2013/"
result_zip = zipfile.ZipFile(eval_dir + args.output_zip, 'w')

Пример #31

Показать файл

Файл: demo.py Проект: HotaekHan/FCOS

num_classes = len(target_classes)

net = load_model(num_classes=num_classes,
                 fpn_level=5,
                 basenet=config['params']['base'],
                 is_pretrained_base=False,
                 is_norm_reg_target=config['params']['norm_reg_target'],
                 centerness_with_loc=config['params']['centerness_on_reg'],
                 is_train=False)
net = net.to(device)
net.eval()

data_encoder = DataEncoder(
    image_size=img_size,
    num_classes=num_classes + 1,
    fpn_level=5,
    is_norm_reg_target=config['params']['norm_reg_target'])

ckpt = torch.load(os.path.join(config['model']['exp_path'], 'best.pth'),
                  map_location=device)
weights = utils._load_weights(ckpt['net'])
missing_keys = net.load_state_dict(weights, strict=False)
print(missing_keys)

class_idx_map = dict()
for idx in range(0, num_classes):
    class_idx_map[idx + 1] = target_classes[idx]

img_paths = list()
for (path, _, files) in os.walk(opt.imgs):

Пример #32

Показать файл

Файл: dataset.py Проект: Gmy12138/RetinaNet

class ListDataset(data.Dataset):
    def __init__(self, root, train, transform, input_size):
        '''
        Args:
          root: (str) ditectory to images.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        self._labpath = sorted(glob.glob("%s/*.*" % self.root))
        self._imgpath = [
            path.replace("labels", "image").replace(".txt", ".jpg")
            for path in self._labpath
        ]

    def __getitem__(self, index):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.

        img_path = self._imgpath[index].rstrip()
        fname = img_path.split('/')[-1].split('.')[0]

        # print(img_path)
        img = cv2.imread(img_path)
        # if img.mode != 'RGB':
        #     img = img.convert('RGB')
        h, w, _ = img.shape

        label_path = self._labpath[index].rstrip()
        # print(label_path)

        targets = np.loadtxt(label_path).reshape(-1, 5)

        targets[:, 1] = (targets[:, 1]) / w
        targets[:, 2] = (targets[:, 2]) / h
        targets[:, 3] = (targets[:, 3]) / w
        targets[:, 4] = (targets[:, 4]) / h

        size = self.input_size

        if self.train:

            Augmentation = SSDAugmentation(size=size)
            img, boxe, labels = Augmentation(img, targets[:, 1:], targets[:,
                                                                          0])
            # to rgb
            img = img[:, :, (2, 1, 0)]
            img = torch.from_numpy(img).permute(2, 0, 1)
            img = img / 255
            _, h1, w1 = img.shape
            img[0, :, :] = img[0, :, :] / 0.229
            img[1, :, :] = img[1, :, :] / 0.224
            img[2, :, :] = img[2, :, :] / 0.225

            boxe[:, 0] = w1 * boxe[:, 0]
            boxe[:, 1] = h1 * boxe[:, 1]
            boxe[:, 2] = w1 * boxe[:, 2]
            boxe[:, 3] = h1 * boxe[:, 3]

        else:

            Augmentation = BaseTransform(size=size)
            img, boxe, labels = Augmentation(img, targets[:, 1:], targets[:,
                                                                          0])
            img = img[:, :, (2, 1, 0)]
            img = torch.from_numpy(img).permute(2, 0, 1)
            img = img / 255
            _, h1, w1 = img.shape
            img[0, :, :] = img[0, :, :] / 0.229
            img[1, :, :] = img[1, :, :] / 0.224
            img[2, :, :] = img[2, :, :] / 0.225

            boxe[:, 0] = w1 * boxe[:, 0]
            boxe[:, 1] = h1 * boxe[:, 1]
            boxe[:, 2] = w1 * boxe[:, 2]
            boxe[:, 3] = h1 * boxe[:, 3]

        boxes = torch.Tensor(boxe)
        labels = torch.LongTensor(labels)
        # img = self.transform(img)
        return img, boxes, labels, fname

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]
        fname = [x[3] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        # print(num_imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            loc_target, cls_target = self.encoder.encode(boxes[i],
                                                         labels[i],
                                                         input_size=(w, h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(
            cls_targets), fname

    def __len__(self):
        return len(self._labpath)