def __getitem__(self, idx):
        if self.use_M == True:
            image = self.images[idx]
        else:
            image, _ = self.datainfo.__getitem__(idx)

        if self.aug == True:
            self.rv = random.random()
        else:
            self.rv = 1

        if self.rv < 0.9:
            # augmenation of img and masks
            angle = random.randrange(-15, 15)

            # trans img with masks
            self.data_trans = mytransforms.Compose([mytransforms.ToPILImage(),
                                                    mytransforms.Rotation(angle),
                                                    mytransforms.ColorJitter(brightness=random.random(),
                                                                             contrast=random.random(),
                                                                             saturation=random.random(),
                                                                             hue=random.random() / 2),
                                                    mytransforms.ToTensor(),
                                                    ])
            self.mask_trans = mytransforms.Compose([mytransforms.ToPILImage(),
                                                    mytransforms.Rotation(angle),
                                                    mytransforms.ToTensor(),
                                                    ])

            if self.binary == True:
                image = perturbator.comb_binary_rec(image, [self.H, self.W])
                #image = comb_black_rec(image, [self.H, self.W])
            image = self.data_trans(image)

            mask = torch.empty(self.mask_num, image.shape[1], image.shape[2], dtype=torch.float)
            if self.use_M == True:
                for k in range(0, self.mask_num):
                    X = self.images[idx + (self.data_num * (1 + k))]
                    mask[k] = self.mask_trans(X)
            else:
                for k in range(0, self.mask_num):
                    X, _ = self.datainfo.__getitem__(idx + (self.data_num * (1 + k)))
                    mask[k] = self.mask_trans(X)
        else:
            mask = torch.empty(self.mask_num, image.shape[1], image.shape[2], dtype=torch.float)
            if self.use_M == True:
                for k in range(0, self.mask_num):
                    X = self.images[idx + (self.data_num * (1 + k))]
                    mask[k] = X
            else:
                for k in range(0, self.mask_num):
                    X, _ = self.datainfo.__getitem__(idx + (self.data_num * (1 + k)))
                    mask[k] = X
        mask = torch.pow(mask, self.pow_n)
        mask = mask / mask.max()

        return [image, mask]
Пример #2
0
def get_prediction(model, img_path, cat_names, threshold):
    """
    get_prediction
    parameters:
        - model     - the model to be used
        - img_path  - path of the input image
        - cat_names - selected name for each category
        - threshold - the confidence interval for making predictions
    method:
        - Image is obtained from the image path
        - the image is converted to image tensor using PyTorch's Transforms
        - image is passed through the model to get the predictions
        - masks, classes and bounding boxes are obtained from the model and soft masks are made binary(0 or 1) on masks
        ie: eg. segment of cat is made 1 and rest of the image is made 0
    
    """
    img = Image.open(img_path)
    transform = T.Compose([T.ToTensor()])
    img = transform(img)
    if use_cuda:
        img = img.cuda()
    pred = model([img])
    pred_score = list(pred[0]['scores'].detach().cpu().numpy())
    pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1]
    masks = None
    if 'masks' in pred[0]:
        masks = (pred[0]['masks'] > 0.5).squeeze().detach().cpu().numpy()
    elif 'keypoints' in pred[0]:
        masks = (pred[0]['keypoints']).squeeze().detach().cpu().numpy()
    pred_class = [cat_names[i] for i in list(pred[0]['labels'].cpu().numpy())]
    pred_id = [i for i in list(pred[0]['labels'].cpu().numpy())]
    pred_boxes = [[(i[0], i[1]), (i[2], i[3])]
                  for i in list(pred[0]['boxes'].detach().cpu().numpy())]
    masks = masks[:pred_t + 1]
    pred_boxes = pred_boxes[:pred_t + 1]
    pred_class = pred_class[:pred_t + 1]
    pred_id = pred_id[:pred_t + 1]
    return masks, pred_boxes, pred_class, pred_id
Пример #3
0
def train_RCNN(model, path2data, path2json, weight_path=None):
    # train on the GPU or on the CPU, if a GPU is not available
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    #device = torch.device('cpu')
    # see if pretrained weights are available
    load_pretrained = False
    if weight_path is not None:
        load_pretrained = True
    # get coco style dataset
    dataset = coco_utils.get_coco(path2data, path2json, T.ToTensor())
    # split the dataset in train and test set
    indices = torch.randperm(len(dataset)).tolist()
    dataset = torch.utils.data.Subset(dataset, indices[:-1])
    dataset_test = torch.utils.data.Subset(dataset, indices[-1:])

    # define training and validation data loaders(use num_workers for multi-gpu)
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=1,
                                              shuffle=True,
                                              collate_fn=myutils.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test,
        batch_size=1,
        shuffle=False,
        collate_fn=myutils.collate_fn)

    if torch.cuda.device_count() > 1:
        print("Using", torch.cuda.device_count(), "GPUs")
        # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        model = nn.DataParallel(model)

    # move model to the right device
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=0.005,
                                momentum=0.9,
                                weight_decay=0.0005)
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)
    # load the dataset in case of pretrained weights
    start_epoch = 0
    if load_pretrained:
        checkpoint = torch.load(weight_path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch']

    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model,
                        optimizer,
                        data_loader,
                        device,
                        epoch + start_epoch,
                        print_freq=100)
        # update the learning rate
        #lr_scheduler.step()
        # evaluate on the test dataset
        # Find a way around the broken pytorch nograd keypoint evaluation
        # evaluate(model, data_loader_test, device=device)

        # save weights when done
        torch.save(
            {
                'epoch': num_epochs + start_epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, save_weights_to)
    def __init__(self,
                 input_root,
                 target_root='',
                 counts_file='',
                 coords_file='',
                 processing_file='',
                 train=True,
                 patch_size=(256, 256),
                 patch_stride=128,
                 prescale=0.0,
                 generate_target=True,
                 target_type='density',
                 per_image_norm=False,
                 num_logits=0,
                 transform=None,
                 target_transform=None):

        extract_extra = False if os.path.exists(processing_file) else True
        input_ids, input_infos = find_inputs(input_root,
                                             types=['.jpg'],
                                             extract_extra=extract_extra)
        if len(input_ids) == 0:
            raise (RuntimeError("Found 0 images in : " + input_root))
        self.input_index = input_ids

        self.patch_index = [[]] * len(input_ids)
        self.patch_count = 0
        self.patch_size = patch_size
        self.patch_stride = patch_stride
        self.prescale = prescale if prescale != 1.0 else 0.0
        assert target_type in TARGET_TYPES
        self.target_type = target_type
        self.num_logits = num_logits
        if train and num_logits:
            assert target_type == 'countception'
        self.generate_target = generate_target  # generate on the fly instead of loading

        self.data_by_id = dict()
        for index, (k, v) in enumerate(zip(input_ids, input_infos)):
            if 'width' in v:
                if self.prescale:
                    v = self._apply_prescale(v, self.prescale)
                patch_info = self._calc_patch_info(v)
                num_patches = patch_info['num']
                self.patch_index[index] = list(range(num_patches))
                self.patch_count += num_patches
                v['patches'] = patch_info
            v['index'] = index
            self.data_by_id[k] = v

        self.has_targets = False
        if os.path.exists(target_root):
            targets = find_targets(target_root, input_ids, types=['.npz'])
            if len(targets):
                for k, v in targets.items():
                    self.data_by_id[k]['target'] = v
                self.has_targets = True
            else:
                raise (RuntimeError("Found 0 targets in : " + target_root))

        if train:
            assert self.has_targets
        self.train = train

        if counts_file:
            counts_df = pd.read_csv(counts_file).rename(columns=CATEGORY_MAP)
            counts_df.drop(['train_id'], 1, inplace=True)
            for k, v in counts_df.to_dict(orient='index').items():
                if k in self.data_by_id:
                    d = self.data_by_id[k]
                    d['counts_by_cat'] = v
                    d['count'] = sum(v.values())

        if processing_file:
            process_df = pd.read_csv(processing_file, index_col=False)
            cols = ['xmin', 'ymin', 'xmax', 'ymax', 'width', 'height']
            process_df[cols] = process_df[cols].astype(int)
            process_df['train_id'] = process_df.filename.map(
                lambda x: int(os.path.splitext(x)[0]))
            process_df.set_index(['train_id'], inplace=True)
            for k, v in process_df[cols].to_dict(orient='index').items():
                if k in self.data_by_id:
                    d = self.data_by_id[k]
                    if self.prescale:
                        v = self._apply_prescale(v, self.prescale)
                    patch_info = self._calc_patch_info(v)
                    num_patches = patch_info['num']
                    self.patch_index[d['index']] = list(range(num_patches))
                    self.patch_count += num_patches
                    v['patches'] = patch_info
                    d.update(v)
                    #print(d, self.patch_count)

        if coords_file:
            coords_df = pd.read_csv(coords_file, index_col=False)
            coords_df.x_coord = coords_df.x_coord.astype('int')
            coords_df.y_coord = coords_df.y_coord.astype('int')
            coords_df.category = coords_df.category.replace(CATEGORY_MAP)
            groupby_file = coords_df.groupby(['filename'])
            for file in groupby_file.indices:
                coords = groupby_file.get_group(file)
                coords = coords[['x_coord', 'y_coord', 'category']].as_matrix()
                coords = coords[coords[:, 0].argsort()]
                fid = int(os.path.splitext(file)[0])
                if fid in self.data_by_id:
                    d = self.data_by_id[fid]
                    if self.prescale:
                        coords[:, :2] = np.rint(coords[:, :2] * self.prescale)
                    xy_offset = np.array([d['xmin'], d['ymin']])
                    coords[:, :2] = coords[:, :2] + xy_offset
                    d['coords'] = coords

        self.dataset_mean = [0.43632373, 0.46022959, 0.4618598]
        self.dataset_std = [0.17749958, 0.16631233, 0.16272708]
        if transform is None:
            tfs = []
            if per_image_norm:
                tfs.append(mytransforms.NormalizeImg())
            tfs.append(mytransforms.ToTensor())
            if self.train:
                tfs.append(mytransforms.ColorJitter())
            if not per_image_norm:
                tfs.append(
                    transforms.Normalize(self.dataset_mean, self.dataset_std))
            self.transform = transforms.Compose(tfs)
        self.target_transform = target_transform
        self.ttime = utils.AverageMeter()
import torch.utils.data as data
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from Libs.pytorch_utils.engine import train_one_epoch, evaluate
from Libs.pytorch_utils import utils
from torchvision.models.detection.rpn import AnchorGenerator

from create_detection_dataset import porpoise_dataset

IMG_RESIZE = 800
BATCH_SIZE = 8
NUM_WORKERS = 0
DATA_PATH = "porpoise_detection_data"
TRAIN_SPLIT = 0.1

TRANSFORM_TRAIN = T.Compose([
    T.ToTensor(),
    T.Resize(IMG_RESIZE),
    T.RandomVerticalFlip(0.5),
    T.RandomHorizontalFlip(0.5),
    T.RandomColor(0.4, 0.2, 0.3, 0.1),
    T.AddRandomNoise(0.02, 0.5),
    #T.ShowImg(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

TRANSFORM_VAL = T.Compose([
    T.ToTensor(),
    T.Resize(IMG_RESIZE),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
Пример #6
0
    def __init__(
            self,
            input_root,
            target_file='',
            multi_label=True,
            train=True,
            train_fold=False,
            fold=0,
            img_type='.png',
            img_size=(512, 512),
            test_aug=0,
            transform=None):

        assert img_type in ['.png']
        inputs = find_inputs(input_root, types=[img_type])
        if len(inputs) == 0:
            raise (RuntimeError("Found 0 images in : " + input_root))

        if target_file:
            target_df = pd.read_csv(target_file)
            if train or train_fold:
                target_df = target_df[target_df['fold'] != fold]
            else:
                target_df = target_df[target_df['fold'] == fold]
            target_df.drop(['fold'], 1, inplace=True)

            input_dict = dict(inputs)    
            target_df = target_df[target_df.Id.map(lambda x: x in input_dict)]
            target_df['filename'] = target_df.Id.map(lambda x: input_dict[x])
            self.inputs = target_df['Id'].apply(lambda x:os.path.join(input_root,x)).tolist()

            labels = get_labels()
            self.target_array = target_df.as_matrix(columns=labels).astype(np.float32)
            if not multi_label:
                self.target_array = np.argmax(self.target_array, axis=1)

            self.target_array = torch.from_numpy(self.target_array)
        else:
            assert not train
            inputs = sorted(inputs, key=lambda x: natural_key(x[0]))
            self.target_array = None
            self.inputs = [x[1] for x in inputs]

        self.train = train
        if img_type == '.jpg':
            self.dataset_mean = [0.31535792, 0.34446435, 0.30275137]
            self.dataset_std = [0.05338271, 0.04247036, 0.03543708]
        else:
            # For png
            self.dataset_mean = [0.0804419, 0.05262986, 0.05474701, 0.08270896] 
            self.dataset_std = [0.13000701, 0.08796628, 0.1386317, 0.12718021] 

        self.img_size = img_size
        self.img_type = img_type
        if not train:
            self.test_aug = get_test_aug(test_aug)
        else:
            self.test_aug = []
        if transform is None:
            tfs = []
            if img_type == '.jpg':
                tfs.append(mytransforms.ToTensor())
                if self.train:
                    tfs.append(mytransforms.ColorJitter(brightness=0.01, contrast=0.01, saturation=0.01))
                tfs.append(transforms.Normalize(self.dataset_mean, self.dataset_std))
            else:
                tfs.append(mytransforms.ToTensor())
                if self.train:
                    tfs.append(mytransforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2))
                tfs.append(transforms.Normalize(self.dataset_mean, self.dataset_std))
            self.transform = transforms.Compose(tfs)
Пример #7
0
    def __init__(
            self,
            input_root,
            target_file='',
            tags_type='all',
            multi_label=True,
            train=True,
            train_fold=False,
            fold=0,
            img_type='.jpg',
            img_size=(256, 256),
            test_aug=0,
            transform=None):

        assert img_type in ['.jpg', '.tif']
        inputs = find_inputs(input_root, types=[img_type])
        if len(inputs) == 0:
            raise (RuntimeError("Found 0 images in : " + input_root))

        if target_file:
            target_df = pd.read_csv(target_file)
            if train or train_fold:
                target_df = target_df[target_df['fold'] != fold]
            else:
                target_df = target_df[target_df['fold'] == fold]
            target_df.drop(['fold'], 1, inplace=True)

            input_dict = dict(inputs)
            print(len(input_dict), len(target_df.index))
            target_df = target_df[target_df.image_name.map(lambda x: x in input_dict)]
            target_df['filename'] = target_df.image_name.map(lambda x: input_dict[x])
            self.inputs = target_df['filename'].tolist()

            tags = get_tags(tags_type)
            self.target_array = target_df.as_matrix(columns=tags).astype(np.float32)
            if not multi_label:
                self.target_array = np.argmax(self.target_array, axis=1)

            self.target_array = torch.from_numpy(self.target_array)
        else:
            assert not train
            inputs = sorted(inputs, key=lambda x: natural_key(x[0]))
            self.target_array = None
            self.inputs = [x[1] for x in inputs]

        self.tags_type = tags_type
        self.train = train
        if img_type == '.jpg':
            self.dataset_mean = [0.31535792, 0.34446435, 0.30275137]
            self.dataset_std = [0.05338271, 0.04247036, 0.03543708]
        else:
            # For IR,R,G
            self.dataset_mean = [6398.84897763/2**16, 4988.75696302/2**16, 4270.74552695/2**16] # NRG
            self.dataset_std = [858.46477922/2**16, 399.06597519/2**16, 408.51461036/2**16] # NRG

        self.img_size = img_size
        self.img_type = img_type
        if not train:
            self.test_aug = get_test_aug(test_aug)
        else:
            self.test_aug = []
        if transform is None:
            tfs = []
            if img_type == '.jpg':
                tfs.append(mytransforms.ToTensor())
                if self.train:
                    tfs.append(mytransforms.ColorJitter(brightness=0.01, contrast=0.01, saturation=0.01))
                tfs.append(transforms.Normalize(self.dataset_mean, self.dataset_std))
            else:
                tfs.append(mytransforms.ToTensor())
                if self.train:
                    tfs.append(mytransforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2))
                tfs.append(transforms.Normalize(self.dataset_mean, self.dataset_std))
            self.transform = transforms.Compose(tfs)