Exemplo n.º 1
0
def seg_learner(tiny_seg_databunch, seg_classes):
    return unet_learner(
        tiny_seg_databunch,
        models.resnet18,
        wd=1e-2,
        metrics=get_objective_fct(seg_classes),
    )
Exemplo n.º 2
0
 def create_model_from_zip(self, weights_fn):
     """Creates a deep learning model linked to the dataset and stores it as
     an instance attribute.
     """
     weights_fn = weights_fn.resolve()
     logger.info(
         f"Unzipping the model weights and label classes from {weights_fn}")
     output_dir = self.root_dir / "extracted_model_files"
     output_dir.mkdir(exist_ok=True)
     with ZipFile(weights_fn, mode='r') as zf:
         zf.extractall(output_dir)
     # Load in the label classes from the json file
     with open(output_dir / f"{weights_fn.stem}_codes.json") as jf:
         self.codes = json.load(jf)
     # Have to create dummy files and datset before loading in model weights
     self.create_dummy_files()
     self.create_dummy_dataset()
     logger.info("Creating 2d U-net model for prediction.")
     self.model = unet_learner(self.data,
                               models.resnet34,
                               model_dir=output_dir)
     logger.info("Loading in the saved weights.")
     self.model.load(weights_fn.stem)
     # Remove the restriction on the model prediction size
     self.model.data.single_ds.tfmargs['size'] = None
Exemplo n.º 3
0
def seg_learner(tiny_seg_databunch, seg_classes):
    return unet_learner(
        tiny_seg_databunch,
        models.resnet18,
        wd=1e-2,
        metrics=get_ratio_correct_metric(seg_classes),
    )
Exemplo n.º 4
0
 def create_learner_gen(data):
     return fv.unet_learner(data,
                            fv.models.resnet34,
                            blur=True,
                            norm_type=fai.NormType.Weight,
                            self_attention=True,
                            y_range=(-3., 3.),
                            loss_func=l1_loss_flat,
                            metrics=[l1_loss_flat],
                            wd=1e-2)
Exemplo n.º 5
0
def train_and_eval():
    print("Creating databunch and learner...")
    data = create_databunch(cf.PATH_IMG, cf.PATH_LBL, cf.CODES, cf.INPUT_SIZE,
                            cf.BATCH_SIZE)
    learner = unet_learner(data, models.resnet34, metrics=dice)
    print("Training model...")
    train_model(learner, cf.FREEZE_LAYER, cf.EPOCHS, cf.LEARNING_RATE,
                cf.WEIGHT_DECAY, cf.SAVE_MODEL)
    print("Evaluating model...")
    eval = eval_model(cf.PATH_TO_TESTING, cf.CODES, cf.INPUT_SIZE,
                      cf.BATCH_SIZE, learner)
    print(f'Loss = {eval[0]}, Accuracy = {eval[1]}')
    print("You have successfully trained and evaluated your model!"
          "Please find it in the appropriate directory.")
Exemplo n.º 6
0
def get_learner(data, metrics=None, model_dir='models'):
    if metrics is None:
        metrics = []

    learner = unet_learner(
        data,
        models.resnet34,
        metrics=metrics,
        self_attention=True,
        loss_func=generalized_dice_loss,
        wd=1e-7,
        model_dir=model_dir,
    )
    if USE_GPU:
        return learner.to_fp16()
    return learner
Exemplo n.º 7
0
 def create_model(self):
     """Creates a deep learning model linked to the dataset and stores it as
     an instance attribute.
     """
     logger.info("Creating 2d U-net model for training.")
     self.model = unet_learner(self.data,
                               models.resnet34,
                               metrics=self.metrics,
                               wd=self.weight_decay,
                               loss_func=self.loss_func,
                               callback_fns=[
                                   partial(CSVLogger,
                                           filename='unet_training_history',
                                           append=True),
                                   partial(SaveModelCallback,
                                           monitor=self.monitor,
                                           mode='max',
                                           name="best_unet_model")
                               ])
Exemplo n.º 8
0
def get_learn(data, model, name, weighted, cut):
    """TODO"""
    metrics = get_metrics()
    learn = unet_learner(
        data,
        model,
        split_on=_resnet_split,
        cut=cut,
        metrics=metrics,
        path="models",
        model_dir=name,
        wd=1e-2,
    )
    if weighted:
        weights = get_loss_weights(data, learn)
        learn.loss_fn = CrossEntropyFlat(weight=Tensor(weights).cuda(),
                                         ignore_index=0)
    else:
        learn.loss_fn = CrossEntropyFlat(ignore_index=0)
    learn = learn.to_fp16()
    return learn
Exemplo n.º 9
0
    def init_model(self):
        """
        Initialize learner with parameters set in constructor.
        """
        defaults.device = torch.device(self.device)
        size = self.src_shape // self.train_params['resize_factor']
        tfms = get_transforms(do_flip=True,
                              flip_vert=False,
                              max_lighting=0.8,
                              p_affine=0,
                              p_lighting=0.5)

        tfms = (tfms[0][1:], tfms[1])

        get_label = partial(get_label_from_image_name, self.labels_path)

        if self.valid_func:
            src = (SegItemListCustom.from_folder(
                self.videos_path, ignore_empty=True,
                recurse=True).filter_by_func(
                    self.filtered_by).split_by_valid_func(
                        self.valid_func).label_from_func(
                            get_label,
                            classes=np.array(['non-screen', 'screen'])))

        else:
            src = (SegItemListCustom.from_folder(
                self.videos_path, ignore_empty=True,
                recurse=True).filter_by_func(
                    self.filtered_by).split_none().label_from_func(
                        get_label, classes=np.array(['non-screen', 'screen'])))

        LOGGER.info("Creating databunch with transformations")
        data = (src.transform(tfms, size=size, tfm_y=True).databunch(
            bs=self.train_params['batch_size']).normalize(imagenet_stats))

        LOGGER.info("Creating unet-learner with resnet18 backbone.")
        self.learner = unet_learner(data,
                                    models.resnet18,
                                    metrics=[acc, dice, iou_sem_seg])
Exemplo n.º 10
0

def bce_logits_floatify(input, target, reduction='mean'):
    return F.binary_cross_entropy_with_logits(input, target.float(), reduction=reduction)


def dice_metric(pred, targs, threshold=0):
    pred = (pred > threshold).float()
    targs = targs.float()  # make sure target is float too
    return 2.0 * (pred * targs).sum() / ((pred + targs).sum() + 1.0)


if __name__ == '__main__':
    torch.cuda.empty_cache()

    data = CloudDataset('train')
    train, test = data.split(train_ratio=.8, test_ratio=.2)
    print(len(train), len(test))

    data = DataBunch.create(train, train, bs=2, num_workers=1)

    learn = unet_learner(
        data=data, arch=models.resnet18,
        loss_func=bce_logits_floatify,
        metrics=[dice_metric]
    )

    print(learn.model[0][0])
    print(learn.model[-1][-1])
    learn.fit(epochs=20)
Exemplo n.º 11
0
print(data.show_batch(2, figsize=(10, 7), ds_type=vision.DatasetType.Valid))

# Define accuracy
object2id = {value: key for key, value in enumerate(objects_in_image)}
void_index = object2id['Void']


def camvid_accuracy(inputs, target):
    target = target.squeeze(1)
    mask = target != void_index
    return (inputs.argmax(dim=1)[mask] == target[mask]).float().mean()


# Define model
learner = vision.unet_learner(data,
                              vision.models.resnet34,
                              metrics=camvid_accuracy,
                              wd=WD)
# Find good LR
learner.lr_find()
learner.recorder.plot()
learner.fit_one_cycle(EPOCHS_FINETUNE,
                      max_lr=slice(LR),
                      pct_start=PCT_START_FINETUNE)
learner.save('stage-1-34-unet')
# Show results
learner.show_results(rows=3, figsize=(8, 9))

# After warming up, start to train all network
learner.unfreeze()
learner.fit_one_cycle(EPOCHS,
                      max_lr=slice(LR / 400, LR / 4),
Exemplo n.º 12
0
def train_unet(epochs=5, batch_size=1, lr=0.1, val_percent=0.1):
    print("Start script")
    if args.isgrid is False:
        filename = "/media/adrian/E2B45A26B459FD8B/psfmaskmoving_zernike2d_128_n_1_s_0_p_0_b_0__noise_1_2dzernike_test/"
        batch_size = int(batch_size // 1.5)
    else:
        filename = "/idiap/temp/ashajkofci/psfmaskmoving_zernike2d_128_n_1_s_0_p_0_b_0__noise_1_2dzernike_train/"

        batch_size = batch_size
    os.environ['TORCH_HOME'] = os.getcwd() + 'data'

    #transform = transforms.Compose([
    #    transforms.ToPILImage(),
    #    transforms.RandomCrop([450, 450]),
    #    transforms.RandomVerticalFlip(),
    #    transforms.RandomHorizontalFlip(),
    #    transforms.ToTensor(),
    #                               ])

    all_files_list = glob.glob(filename + "*/*.png")
    print('{} files found in {}'.format(len(all_files_list), filename))

    all_files_list = [x for x in all_files_list if "_mask" not in x]
    print('{} files found'.format(len(all_files_list)))

    all_files_list = sorted(all_files_list, key=lambda name: int(name[-13:-4]))
    print('{} files found'.format(len(all_files_list)))

    #all_files_list = all_files_list[:100000]
    #all_labels_list = lambda x: str(x).replace('.png', '_mask.png')

    num_files = len(all_files_list)
    print('{} files found'.format(len(all_files_list)))
    print("Convert to Dataframe")
    #df = pd.DataFrame({'data':all_files_list, 'label':all_labels_list})
    df = pd.DataFrame(all_files_list)

    print("Create transforms")
    print("Create data")

    #class MyImageList(ImageList):
    #    def open(self, fn):
    #        image = Image(grayloader(fn, onedim=True))

    #       return image

    src = (MyImageImageList.from_df(df,
                                    path='/').split_by_rand_pct(val_percent))

    print("Creating dataloaders")

    data_gen = get_data(bs=batch_size, size=224, src=src)

    #dataset = DatasetFromFolder(filename, loader = grayloader,  transform=transform, target_transform=transform)

    #n_val = int(len(dataset) * val_percent)
    #n_train = len(dataset) - n_val
    #train, val = rs(dataset, [n_train, n_val])
    #data = ImageDataBunch.create(train, val, bs=batch_size, num_workers=4)
    #data.c = 2
    #data.normalize(imagenet_stats)
    #data_gen.show_batch(2)
    #plt.show()
    print("Creating learner")
    #optar = partial(DiffGrad, version=1, betas=(.95, .999), eps=1e-6)
    optar = partial(Ranger, betas=(0.95, 0.99), eps=1e-6)

    selfattention = False
    modelname = 'resnet34unetanneal'
    learn = unet_learner(data_gen,
                         model_resnet34,
                         pretrained=True,
                         self_attention=selfattention,
                         norm_type=NormType.Weight,
                         loss_func=loss_with_flag,
                         y_range=(0., 1.0))

    learn.model_dir = os.getcwd() + 'data'
    learn.opt_func = optar
    print("Summary...")
    dt_string = datetime.now().strftime("%d-%m-%Y-%H:%M:%S")
    #writer = SummaryWriter(comment=f'PROJ_{args.nbgrid}_LR_{lr}_BS_{batch_size}_FP_{args.fakepenalty}_N_{args.network}')
    name = f'{dt_string}_PROJ_{args.nbgrid}_LR_{lr}_BS_{batch_size}_N_{args.network}_ATT_{selfattention}_MODEL_{modelname}'
    mycallback = partial(TensorboardLogger, path='runs', run_name=name)
    learn.callback_fns.append(mycallback)
    learn.model.layers = learn.model.layers[:-1]
    print(learn.summary())
    #learn.lr_find(stop_div = False, num_it=200)
    #learn.recorder.plot(suggestion=True)
    #plt.show()

    flattenAnneal(learn, lr, epochs, 0.7)

    #learn.fit_one_cycle(epochs, max_lr = lr)
    torch.save(learn.model,
               os.getcwd() + '/data/' + name + '_TORCH_INTERMEDIATE.pth')
    learn.export(os.getcwd() + '/data/' + name + '_INTERMEDIATE_EXPORT.pth')

    #learn.fit_one_cycle(epochs, max_lr=lr/5.0)
    learn.unfreeze()
    flattenAnneal(learn, lr / 5, epochs, 0.7)
    mycallback = partial(TensorboardLogger,
                         path='runs',
                         run_name=name + '_UNFREEZE')
    learn.callback_fns[-1] = mycallback

    torch.save(learn.model, os.getcwd() + '/data/' + name + '_TORCH.pth')
    learn.export(os.getcwd() + '/data/' + name + '_EXPORT.pth')
Exemplo n.º 13
0
def train_unet(epochs=5, batch_size=1, lr=0.1, val_percent=0.1):
    print("Start script")
    if args.isgrid is False:
        filename = "/media/adrian/OMENDATA/data/movementgenerator_data_multiple2/"
        batch_size = int(batch_size//1.5)
    else:
        filename = "/idiap/temp/ashajkofci/movementgenerator_data_multiple2/"

        batch_size = batch_size
    os.environ['TORCH_HOME'] = os.getcwd()+'/data'


    all_files_list = glob.glob(filename + "*/*.png")
    print('{} files found in {}'.format(len(all_files_list), filename))

    all_files_list = sorted(all_files_list, key=lambda name: int(name[-13:-4]))
    print('{} files found'.format(len(all_files_list)))

    print("Convert to Dataframe")
    df = pd.DataFrame(all_files_list)

    print("Create transforms")
    print("Create data")

    class MyImageList(ImageList):
        def open(self, fn):
            image = np.load(fn)['arr_0']
            image = np.transpose(image, (2, 0, 1))
            image[1] /= 128.0
            image[0] /= 128.0
            image[3] /= 5.0
            image = torch.Tensor(image)
            #print('{} {} {}'.format(image.min(), image.max(), image.mean()))

            image = Image(image)
            return image

    class MyImageImageList(ImageImageList):
        _label_cls = MyImageList

        def open(self, fn):
            return Image(grayloader(fn))

    def get_data(bs, size):
        data = (src.label_from_func(lambda x: str(x).replace('.png', '_mask.npy.npz'))
                .transform(get_transforms(do_flip = False, max_zoom=1.0, max_warp=0.0, max_rotate=0, max_lighting=0.3), tfm_y=False)
                .transform([rand_crop(), rand_crop()], tfm_y=True, size= size)
                .databunch(bs=bs).normalize(imagenet_stats, do_y=False))

        data.c = 4
        return data

    src = (MyImageImageList.from_df(df, path='/')
            .split_by_rand_pct(val_percent))

    print("Creating dataloaders")

    data_gen = get_data(bs=batch_size, size=448)

    #dataset = DatasetFromFolder(filename, loader = grayloader,  transform=transform, target_transform=transform)

    #n_val = int(len(dataset) * val_percent)
    #n_train = len(dataset) - n_val
    #train, val = rs(dataset, [n_train, n_val])
    #data = ImageDataBunch.create(train, val, bs=batch_size, num_workers=4)
    #data.c = 2
    #data.normalize(imagenet_stats)
    #data_gen.show_batch(2)
    #plt.show()
    print("Creating learner")
    optar = partial(DiffGrad, version=1, betas=(.95, .999), eps=1e-6)




    selfattention=False
    modelname='resnet34'

    learn = unet_learner(data_gen, model_resnet34, pretrained=True, loss_func = MSELossFlat(), self_attention=False)

    learn.model_dir = os.getcwd()+'/data'
    learn.opt_func = optar
    print("Summary...")
    dt_string = datetime.now().strftime("%d-%m-%Y-%H:%M:%S")
    name =f'{dt_string}_PROJ_{args.nbgrid}_LR_{lr}_BS_{batch_size}_N_{args.network}_ATT_{selfattention}_MODEL_{modelname}'
    mycallback = partial(TensorboardLogger, path='runs', run_name=name, run_type='unet')
    learn.callback_fns.append(mycallback)
    learn.callback_fns.append(partial(SaveModelCallback,every='improvement', name='{}/{}.pth'.format(dir_checkpoint, name)))
    #learn.model.layers = learn.model.layers[:-1]
    print(learn.summary())
    #learn.lr_find(stop_div = False, num_it=200)
    #learn.recorder.plot(suggestion=True)
    #plt.show()

    learn.fit_one_cycle(epochs, max_lr = lr)
    torch.save(learn.model, 'data/'+name+'_TORCH_INTERMEDIATE.pth')
    learn.unfreeze()
    learn.fit_one_cycle(epochs, max_lr = slice(lr/50,lr/5))
    learn.save(name+'_FINAL')
    torch.save(learn.model, 'data/'+name+'_TORCH.pth')
Exemplo n.º 14
0
    def train(self, tmp_dir):
        """Train a model."""
        self.print_options()

        # Sync output of previous training run from cloud.
        train_uri = self.backend_opts.train_uri
        train_dir = get_local_path(train_uri, tmp_dir)
        make_dir(train_dir)
        sync_from_dir(train_uri, train_dir)

        # Get zip file for each group, and unzip them into chip_dir.
        chip_dir = join(tmp_dir, 'chips')
        make_dir(chip_dir)
        for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'):
            zip_path = download_if_needed(zip_uri, tmp_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                zipf.extractall(chip_dir)

        # Setup data loader.
        def get_label_path(im_path):
            return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name

        size = self.task_config.chip_size
        class_map = self.task_config.class_map
        classes = class_map.get_class_names()
        if 0 not in class_map.get_keys():
            classes = ['nodata'] + classes
        num_workers = 0 if self.train_opts.debug else 4

        train_img_dir = self.subset_training_data(chip_dir)

        def get_data(train_sampler=None):
            data = (SegmentationItemList.from_folder(chip_dir).split_by_folder(
                train=train_img_dir, valid='val-img').label_from_func(
                    get_label_path, classes=classes).transform(
                        get_transforms(flip_vert=self.train_opts.flip_vert),
                        size=size,
                        tfm_y=True).databunch(bs=self.train_opts.batch_sz,
                                              num_workers=num_workers,
                                              train_sampler=train_sampler))
            return data

        data = get_data()
        oversample = self.train_opts.oversample
        if oversample:
            sampler = get_weighted_sampler(data.train_ds,
                                           oversample['rare_class_ids'],
                                           oversample['rare_target_prop'])
            data = get_data(train_sampler=sampler)

        if self.train_opts.debug:
            make_debug_chips(data, class_map, tmp_dir, train_uri)

        # Setup learner.
        ignore_idx = 0
        metrics = [
            Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            FBeta(average='weighted',
                  clas_idx=1,
                  beta=1,
                  ignore_idx=ignore_idx)
        ]
        model_arch = getattr(models, self.train_opts.model_arch)
        learn = unet_learner(data,
                             model_arch,
                             metrics=metrics,
                             wd=self.train_opts.weight_decay,
                             bottle=True,
                             path=train_dir)
        learn.unfreeze()

        if self.train_opts.fp16 and torch.cuda.is_available():
            # This loss_scale works for Resnet 34 and 50. You might need to adjust this
            # for other models.
            learn = learn.to_fp16(loss_scale=256)

        # Setup callbacks and train model.
        model_path = get_local_path(self.backend_opts.model_uri, tmp_dir)

        pretrained_uri = self.backend_opts.pretrained_uri
        if pretrained_uri:
            print('Loading weights from pretrained_uri: {}'.format(
                pretrained_uri))
            pretrained_path = download_if_needed(pretrained_uri, tmp_dir)
            learn.model.load_state_dict(torch.load(
                pretrained_path, map_location=learn.data.device),
                                        strict=False)

        # Save every epoch so that resume functionality provided by
        # TrackEpochCallback will work.
        callbacks = [
            TrackEpochCallback(learn),
            MySaveModelCallback(learn, every='epoch'),
            MyCSVLogger(learn, filename='log'),
            ExportCallback(learn, model_path, monitor='f_beta'),
            SyncCallback(train_dir, self.backend_opts.train_uri,
                         self.train_opts.sync_interval)
        ]

        lr = self.train_opts.lr
        num_epochs = self.train_opts.num_epochs
        if self.train_opts.one_cycle:
            if lr is None:
                learn.lr_find()
                learn.recorder.plot(suggestion=True, return_fig=True)
                lr = learn.recorder.min_grad_lr
                print('lr_find() found lr: {}'.format(lr))
            learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks)
        else:
            learn.fit(num_epochs, lr, callbacks=callbacks)

        # Since model is exported every epoch, we need some other way to
        # show that training is finished.
        str_to_file('done!', self.backend_opts.train_done_uri)

        # Sync output to cloud.
        sync_to_dir(train_dir, self.backend_opts.train_uri)
    def _learn(self,
               data_path: Path,
               params: Tuple[Any],
               stop_early: bool,
               learner_type="cnn") -> Tuple[Learner, Time]:
        """
        Given a set of permutations, create a learner to train and validate on
        the dataset.
        Args:
            data_path (Path): The location of the data to use
            params (Tuple[Any]): The set of parameters to train and validate on
            stop_early (bool): Whether or not to stop early if the evaluation
            metric does not improve
        Returns:
            Tuple[Learner, Time]: Learn object from Fastai and the duration in
            seconds it took.
        """
        start = time.time()
        params = self._param_tuple_to_dict(params)

        transform = params["transform"]
        im_size = params["im_size"]
        epochs = params["epochs"]
        batch_size = params["batch_size"]
        architecture = params["architecture"]
        dropout = params["dropout"]
        learning_rate = params["learning_rate"]
        discriminative_lr = params["discriminative_lr"]
        training_schedule = params["training_schedule"]
        one_cycle_policy = params["one_cycle_policy"]
        weight_decay = params["weight_decay"]

        callbacks = list()
        if stop_early:
            callbacks.append(ParameterSweeper._early_stopping_callback())

        # Initialize CNN learner
        if learner_type == "cnn":
            data = self._get_data_bunch_imagelist(data_path, transform,
                                                  im_size, batch_size)
            learn = cnn_learner(
                data,
                architecture.value,
                metrics=accuracy,
                ps=dropout,
                callback_fns=callbacks,
            )

        # Initialize UNet learner
        elif learner_type == "unet":
            classes = read_classes(os.path.join(data_path, "classes.txt"))
            data = self._get_data_bunch_segmentationitemlist(
                data_path, transform, im_size, batch_size, classes)
            metric = get_objective_fct(classes)
            metric.__name__ = "ratio_correct"
            learn = unet_learner(
                data,
                architecture.value,
                wd=1e-2,
                metrics=metric,
                callback_fns=callbacks,
            )

        else:
            print(f"Mode learner_type={learner_type} not supported.")

        head_learning_rate = learning_rate
        body_learning_rate = (slice(learning_rate, 3e-3)
                              if discriminative_lr else learning_rate)

        def fit(learn: Learner,
                e: int,
                lr: Union[slice, float],
                wd=float) -> partial:
            """ Returns a partial func for either fit_one_cycle or fit
            depending on <one_cycle_policy> """
            return (partial(learn.fit_one_cycle, cyc_len=e, max_lr=lr, wd=wd)
                    if one_cycle_policy else partial(
                        learn.fit, epochs=e, lr=lr, wd=wd))

        if training_schedule is TrainingSchedule.head_only:
            if discriminative_lr:
                raise Exception(
                    "Cannot run discriminative_lr if training schedule is head_only."
                )
            else:
                fit(learn, epochs, body_learning_rate, weight_decay)()

        elif training_schedule is TrainingSchedule.body_only:
            learn.unfreeze()
            fit(learn, epochs, body_learning_rate, weight_decay)()

        elif training_schedule is TrainingSchedule.head_first_then_body:
            head_epochs = epochs // 4
            fit(learn, head_epochs, head_learning_rate, weight_decay)()
            learn.unfreeze()
            fit(learn, epochs - head_epochs, body_learning_rate,
                weight_decay)()

        end = time.time()
        duration = end - start

        return learn, duration
    def train(self, tmp_dir):
        """Train a model."""
        # Setup hyperparams.
        bs = int(self.config.get('bs', 8))
        wd = self.config.get('wd', 1e-2)
        lr = self.config.get('lr', 2e-3)
        num_epochs = int(self.config.get('num_epochs', 10))
        model_arch = self.config.get('model_arch', 'resnet50')
        model_arch = getattr(models, model_arch)
        fp16 = self.config.get('fp16', False)
        sync_interval = self.config.get('sync_interval', 1)
        debug = self.config.get('debug', False)

        chip_uri = self.config['chip_uri']
        train_uri = self.config['train_uri']

        # Sync output of previous training run from cloud.
        train_dir = get_local_path(train_uri, tmp_dir)
        make_dir(train_dir)
        sync_from_dir(train_uri, train_dir)

        # Get zip file for each group, and unzip them into chip_dir.
        chip_dir = join(tmp_dir, 'chips')
        make_dir(chip_dir)
        for zip_uri in list_paths(chip_uri, 'zip'):
            zip_path = download_if_needed(zip_uri, tmp_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                zipf.extractall(chip_dir)

        # Setup data loader.
        def get_label_path(im_path):
            return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name

        size = self.task_config.chip_size
        classes = ['nodata'] + self.task_config.class_map.get_class_names()
        data = (SegmentationItemList.from_folder(chip_dir).split_by_folder(
            train='train-img', valid='val-img').label_from_func(
                get_label_path,
                classes=classes).transform(get_transforms(),
                                           size=size,
                                           tfm_y=True).databunch(bs=bs))
        print(data)

        if debug:
            # We make debug chips during the run-time of the train command
            # rather than the chip command
            # because this is a better test (see "visualize just before the net"
            # in https://karpathy.github.io/2019/04/25/recipe/), and because
            # it's more convenient since we have the databunch here.
            # TODO make color map based on colors in class_map
            # TODO get rid of white frame
            # TODO zip them
            def _make_debug_chips(split):
                debug_chips_dir = join(train_uri,
                                       '{}-debug-chips'.format(split))
                make_dir(debug_chips_dir)
                ds = data.train_ds if split == 'train' else data.valid_ds
                for i, (x, y) in enumerate(ds):
                    x.show(y=y)
                    plt.savefig(join(debug_chips_dir, '{}.png'.format(i)))
                    plt.close()

            _make_debug_chips('train')
            _make_debug_chips('val')

        # Setup learner.
        metrics = [semseg_acc]
        learn = unet_learner(data,
                             model_arch,
                             metrics=metrics,
                             wd=wd,
                             bottle=True)
        learn.unfreeze()

        if fp16 and torch.cuda.is_available():
            # This loss_scale works for Resnet 34 and 50. You might need to adjust this
            # for other models.
            learn = learn.to_fp16(loss_scale=256)

        # Setup ability to resume training if model exists.
        # This hack won't properly set the learning as a function of epochs
        # when resuming.
        learner_path = join(train_dir, 'learner.pth')
        log_path = join(train_dir, 'log')

        start_epoch = 0
        if isfile(learner_path):
            print('Loading saved model...')
            start_epoch = get_last_epoch(str(log_path) + '.csv') + 1
            if start_epoch >= num_epochs:
                print('Training is already done. If you would like to re-train'
                      ', delete the previous results of training in '
                      '{}.'.format(train_uri))
                return

            learn.load(learner_path[:-4])
            print('Resuming from epoch {}'.format(start_epoch))
            print(
                'Note: fastai does not support a start_epoch, so epoch 0 below '
                'corresponds to {}'.format(start_epoch))
        epochs_left = num_epochs - start_epoch

        # Setup callbacks and train model.
        callbacks = [
            SaveModelCallback(learn, name=learner_path[:-4]),
            MyCSVLogger(learn, filename=log_path, start_epoch=start_epoch),
            SyncCallback(train_dir, train_uri, sync_interval)
        ]
        learn.fit(epochs_left, lr, callbacks=callbacks)

        # Export model for inference
        model_uri = self.config['model_uri']
        model_path = get_local_path(model_uri, tmp_dir)
        learn.export(model_path)

        # Sync output to cloud.
        sync_to_dir(train_dir, train_uri)
Exemplo n.º 17
0
import torchvision
from fastai.vision import ImageDataBunch, cnn_learner, unet_learner, SegmentationItemList, imagenet_stats

data = ImageDataBunch.from_csv('fixtures/classification').normalize(
    imagenet_stats)
learner = cnn_learner(data, torchvision.models.resnet34)
learner.export()

data = (SegmentationItemList.from_folder(
    'fixtures/segmentation/images').split_none().label_from_func(
        lambda x: f'fixtures/segmentation/masks/{x.stem}.jpg',
        classes=[0, 1, 2]).databunch().normalize(imagenet_stats))
learner = unet_learner(data, torchvision.models.resnet50)
learner.export('../export.pkl')
Exemplo n.º 18
0
base_loss = fv.F.l1_loss


def custom_loss(input, target):
    mask = (target > 0) * (target < 1)
    return base_loss(input,
                     target) + base_loss(input[mask], target[mask]) * 100


wd = 1e-3
learn = fv.unet_learner(
    data,
    arch,
    wd=wd,
    loss_func=custom_loss,
    blur=False,
    norm_type=fv.NormType.Weight,
    pretrained=False,
)

fv.gc.collect()

# %%

# learn.lr_find()
# learn.recorder.plot()

#%%

lr = 1e-3
Exemplo n.º 19
0
def train(test, s3_data, batch):
    """Train a segmentation model using fastai and PyTorch on the Camvid dataset.

    This will write to a CSV log after each epoch, sync output to S3, and resume training
    from a checkpoint. Note: This is an adaptation of
    https://github.com/fastai/course-v3/blob/master/nbs/dl1/lesson3-camvid-tiramisu.ipynb
    and uses the Camvid Tiramisu-subset dataset described in the fast.ai course at
    half-resolution. This takes about a minute to get to around 90% accuracy on a
    p3.2xlarge.
    """
    if batch:
        run_on_batch()

    # Setup hyperparams.
    bs = 8
    wd = 1e-2
    lr = 2e-3
    num_epochs = 10
    sample_pct = 1.0
    model_arch = models.resnet34
    fp16 = False
    sync_interval = 20  # Don't sync during training for such a small job.
    seed = 1234

    if test:
        bs = 1
        num_epochs = 2
        sample_pct = 0.01
        model_arch = models.resnet18

    # Setup paths.
    data_uri = Path('/opt/data/camvid/CamVid')
    train_uri = Path('/opt/data/camvid/train')
    data_dir = data_uri
    train_dir = train_uri
    if s3_data:
        temp_dir_obj = tempfile.TemporaryDirectory()
        data_uri = 's3://raster-vision-lf-dev/camvid/CamVid.zip'
        train_uri = 's3://raster-vision-lf-dev/camvid/train'
        train_dir = Path(temp_dir_obj.name) / 'train'
        data_dir = Path(temp_dir_obj.name) / 'data'
    make_dir(train_dir)
    make_dir(data_dir)

    # Retrieve data and remote training directory.
    if s3_data:
        print('Downloading data...')
        data_zip = Path(temp_dir_obj.name) / 'CamVid.zip'
        s3_utils.copy_from(data_uri, str(data_zip))
        zip_ref = zipfile.ZipFile(data_zip, 'r')
        zip_ref.extractall(data_dir)
        zip_ref.close()
        data_dir = data_dir / 'CamVid'

        if s3_utils.list_paths(train_uri):
            print('Syncing train dir...')
            s3_utils.sync_to_dir(train_uri, str(train_dir))

    # Setup data loader.
    def get_y_fn(x):
        return Path(str(x.parent) + 'annot') / x.name

    fnames = get_image_files(data_dir / 'val')
    img = open_image(fnames[0])

    src_size = np.array(img.data.shape[1:])
    size = src_size // 2

    data = (SegmentationItemList.from_folder(data_dir).use_partial_data(
        sample_pct, seed).split_by_folder(valid='val').label_from_func(
            get_y_fn, classes=codes).transform(
                get_transforms(), size=size,
                tfm_y=True).databunch(bs=bs).normalize(imagenet_stats))

    # Setup metrics, callbacks, and then train model.
    metrics = [acc_camvid]
    model_path = train_dir / 'stage-1'
    log_path = train_dir / 'log'
    learn = unet_learner(data, model_arch, metrics=metrics, wd=wd, bottle=True)
    learn.unfreeze()
    if fp16 and torch.cuda.is_available():
        # This loss_scale works for Resnet 34 and 50. You might need to adjust this
        # for other models.
        learn = learn.to_fp16(loss_scale=256)

    start_epoch = 1
    if os.path.isfile(str(model_path) + '.pth'):
        print('Loading saved model...')
        start_epoch = get_last_epoch(str(log_path) + '.csv') + 1
        if start_epoch > num_epochs:
            print(
                'Training already done. If you would like to re-train, delete '
                'previous results of training in {}.'.format(train_dir))
            exit()

        learn.load(model_path)
        print('Resuming from epoch {}'.format(start_epoch))
        print('Note: fastai does not support a start_epoch, so epoch 1 below '
              'corresponds to {}'.format(start_epoch))

    callbacks = [
        SaveModelCallback(learn, name=model_path),
        MyCSVLogger(learn, filename=log_path, start_epoch=start_epoch)
    ]
    if s3_data:
        callbacks.append(S3SyncCallback(train_dir, train_uri, sync_interval))

    epochs_left = num_epochs - start_epoch + 1
    lrs = slice(lr / 100, lr)
    learn.fit_one_cycle(epochs_left, lrs, pct_start=0.8, callbacks=callbacks)

    if s3_data:
        s3_utils.sync_from_dir(train_dir, train_uri)
Exemplo n.º 20
0
    src = (SegmentationItemList.from_df(
        unique_images, DATA / ('train_images' + str(SUFFIX)),
        cols='im_id').split_by_idx(valid_index).label_from_func(get_y_fn,
                                                                classes=codes))

    transforms = get_transforms(max_warp=0, max_rotate=0)
    data = (src.transform(get_transforms(),
                          tfm_y=True,
                          size=training_image_size,
                          resize_method=ResizeMethod.PAD,
                          padding_mode="zeros").databunch(
                              bs=batch_size).normalize(imagenet_stats))

    learn = unet_learner(data,
                         models.resnet34,
                         pretrained=True,
                         metrics=[multiclass_dice, dice_50],
                         loss_func=BCEDiceLoss(),
                         model_dir=DATA)

    learn.fit_one_cycle(10, 1e-3)
    learn.unfreeze()
    learn.fit_one_cycle(60, slice(1e-6, 1e-3))
    valid_dice_score = learn.recorder.metrics[-1]
    print("DEBUG", valid_dice_score)
    all_dice_scores.append(valid_dice_score)

    #Save model
    filename = MODEL_NAME + '_' + str(currentFold)
    print(filename)
    learn.export()
Exemplo n.º 21
0
    def train(self, tmp_dir):
        """Train a model.

        This downloads any previous output saved to the train_uri,
        starts training (or resumes from a checkpoint), periodically
        syncs contents of train_dir to train_uri and after training finishes.

        Args:
            tmp_dir: (str) path to temp directory
        """
        self.log_options()

        # Sync output of previous training run from cloud.
        train_uri = self.backend_opts.train_uri
        train_dir = get_local_path(train_uri, tmp_dir)
        make_dir(train_dir)
        sync_from_dir(train_uri, train_dir)

        # Get zip file for each group, and unzip them into chip_dir.
        chip_dir = join(tmp_dir, 'chips')
        make_dir(chip_dir)
        for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'):
            zip_path = download_if_needed(zip_uri, tmp_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                zipf.extractall(chip_dir)

        # Setup data loader.
        def get_label_path(im_path):
            return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name

        size = self.task_config.chip_size
        class_map = self.task_config.class_map
        classes = class_map.get_class_names()
        if 0 not in class_map.get_keys():
            classes = ['nodata'] + classes
        num_workers = 0 if self.train_opts.debug else 4

        data = (SegmentationItemList.from_folder(chip_dir)
                .split_by_folder(train='train-img', valid='val-img'))
        train_count = None
        if self.train_opts.train_count is not None:
            train_count = min(len(data.train), self.train_opts.train_count)
        elif self.train_opts.train_prop != 1.0:
            train_count = int(round(self.train_opts.train_prop * len(data.train)))
        train_items = data.train.items
        if train_count is not None:
            train_inds = np.random.permutation(np.arange(len(data.train)))[0:train_count]
            train_items = train_items[train_inds]
        items = np.concatenate([train_items, data.valid.items])

        data = (SegmentationItemList(items, chip_dir)
                .split_by_folder(train='train-img', valid='val-img')
                .label_from_func(get_label_path, classes=classes)
                .transform(get_transforms(flip_vert=self.train_opts.flip_vert),
                           size=size, tfm_y=True)
                .databunch(bs=self.train_opts.batch_sz,
                           num_workers=num_workers))
        print(data)

        # Setup learner.
        ignore_idx = 0
        metrics = [
            Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            FBeta(average='weighted', clas_idx=1, beta=1, ignore_idx=ignore_idx)]
        model_arch = getattr(models, self.train_opts.model_arch)
        learn = unet_learner(
            data, model_arch, metrics=metrics, wd=self.train_opts.weight_decay,
            bottle=True, path=train_dir)
        learn.unfreeze()

        if self.train_opts.mixed_prec and torch.cuda.is_available():
            # This loss_scale works for Resnet 34 and 50. You might need to adjust this
            # for other models.
            learn = learn.to_fp16(loss_scale=256)

        # Setup callbacks and train model.
        model_path = get_local_path(self.backend_opts.model_uri, tmp_dir)

        pretrained_uri = self.backend_opts.pretrained_uri
        if pretrained_uri:
            print('Loading weights from pretrained_uri: {}'.format(
                pretrained_uri))
            pretrained_path = download_if_needed(pretrained_uri, tmp_dir)
            learn.model.load_state_dict(
                torch.load(pretrained_path, map_location=learn.data.device),
                strict=False)

        # Save every epoch so that resume functionality provided by
        # TrackEpochCallback will work.
        callbacks = [
            TrackEpochCallback(learn),
            MySaveModelCallback(learn, every='epoch'),
            MyCSVLogger(learn, filename='log'),
            ExportCallback(learn, model_path, monitor='f_beta'),
            SyncCallback(train_dir, self.backend_opts.train_uri,
                         self.train_opts.sync_interval)
        ]

        oversample = self.train_opts.oversample
        if oversample:
            weights = get_oversampling_weights(
                data.train_ds, oversample['rare_class_ids'],
                oversample['rare_target_prop'])
            oversample_callback = OverSamplingCallback(learn, weights=weights)
            callbacks.append(oversample_callback)

        if self.train_opts.debug:
            if oversample:
                oversample_callback.on_train_begin()
            make_debug_chips(data, class_map, tmp_dir, train_uri)

        if self.train_opts.log_tensorboard:
            callbacks.append(TensorboardLogger(learn, 'run'))

        if self.train_opts.run_tensorboard:
            log.info('Starting tensorboard process')
            log_dir = join(train_dir, 'logs', 'run')
            tensorboard_process = Popen(
                ['tensorboard', '--logdir={}'.format(log_dir)])
            terminate_at_exit(tensorboard_process)

        lr = self.train_opts.lr
        num_epochs = self.train_opts.num_epochs
        if self.train_opts.one_cycle:
            if lr is None:
                learn.lr_find()
                learn.recorder.plot(suggestion=True, return_fig=True)
                lr = learn.recorder.min_grad_lr
                print('lr_find() found lr: {}'.format(lr))
            learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks)
        else:
            learn.fit(num_epochs, lr, callbacks=callbacks)

        if self.train_opts.run_tensorboard:
            tensorboard_process.terminate()

        # Since model is exported every epoch, we need some other way to
        # show that training is finished.
        str_to_file('done!', self.backend_opts.train_done_uri)

        # Sync output to cloud.
        sync_to_dir(train_dir, self.backend_opts.train_uri)