def create_coco_data(): """ Create an empty databunch for COCO dataset.""" train_tfms = [] val_tfms = [] ds_tfms = (train_tfms, val_tfms) class_mapping = coco_class_mapping() import tempfile sd = ImageList([], path=tempfile.NamedTemporaryFile().name, ignore_empty=True).split_none() data = sd.label_const( 0, label_cls=ObjectDetectionCategoryList, classes=list(class_mapping.values())).transform(ds_tfms).databunch() data.class_mapping = class_mapping data.classes = list(class_mapping.values()) data._is_empty = False data._is_coco = True data.resize_to = 416 data.chip_size = 416 return data
def from_model(cls, emd_path, data=None): """ Creates a YOLOv3 Object Detector from an Esri Model Definition (EMD) file. ===================== =========================================== **Argument** **Description** --------------------- ------------------------------------------- emd_path Required string. Path to Esri Model Definition file. --------------------- ------------------------------------------- data Required fastai Databunch or None. Returned data object from `prepare_data` function or None for inferencing. ===================== =========================================== :returns: `YOLOv3` Object """ if not HAS_FASTAI: _raise_fastai_import_error(import_exception=import_exception) emd_path = Path(emd_path) emd = json.load(open(emd_path)) model_file = Path(emd['ModelFile']) chip_size = emd["ImageWidth"] if not model_file.is_absolute(): model_file = emd_path.parent / model_file class_mapping = {i['Value']: i['Name'] for i in emd['Classes']} resize_to = emd.get('resize_to') if isinstance(resize_to, list): resize_to = (resize_to[0], resize_to[1]) data_passed = True # Create an image databunch for when loading the model using emd (without training data) if data is None: data_passed = False train_tfms = [] val_tfms = [] ds_tfms = (train_tfms, val_tfms) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) sd = ImageList([], path=emd_path.parent.parent).split_by_idx([]) data = sd.label_const( 0, label_cls=ObjectDetectionCategoryList, classes=list(class_mapping.values())).transform( ds_tfms).databunch().normalize(imagenet_stats) data.chip_size = chip_size data.class_mapping = class_mapping data.classes = ['background'] + list(class_mapping.values()) data = get_multispectral_data_params_from_emd(data, emd) # Add 1 for background class data.c += 1 data._is_empty = True data.emd_path = emd_path data.emd = emd data.resize_to = resize_to ret = cls(data, **emd['ModelParameters'], pretrained_path=model_file) if not data_passed: ret.learn.data.single_ds.classes = ret._data.classes ret.learn.data.single_ds.y.classes = ret._data.classes return ret
def train(self, tmp_dir): """Train a model. This downloads any previous output saved to the train_uri, starts training (or resumes from a checkpoint), periodically syncs contents of train_dir to train_uri and after training finishes. Args: tmp_dir: (str) path to temp directory """ self.log_options() # Sync output of previous training run from cloud. train_uri = self.backend_opts.train_uri train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. chip_dir = join(tmp_dir, 'chips') make_dir(chip_dir) for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(chip_dir) # Setup data loader. size = self.task_config.chip_size class_map = self.task_config.class_map classes = class_map.get_class_names() num_workers = 0 if self.train_opts.debug else 4 tfms = get_transforms(flip_vert=self.train_opts.flip_vert) data = (ImageList.from_folder(chip_dir).split_by_folder(train='train', valid='val')) train_count = None if self.train_opts.train_count is not None: train_count = min(len(data.train), self.train_opts.train_count) elif self.train_opts.train_prop != 1.0: train_count = int( round(self.train_opts.train_prop * len(data.train))) train_items = data.train.items if train_count is not None: train_inds = np.random.permutation(np.arange(len( data.train)))[0:train_count] train_items = train_items[train_inds] items = np.concatenate([train_items, data.valid.items]) data = ImageList(items, chip_dir) \ .split_by_folder(train='train', valid='val') \ .label_from_folder(classes=classes) \ .transform(tfms, size=size) \ .databunch(bs=self.train_opts.batch_size, num_workers=num_workers) log.info(str(data)) if self.train_opts.debug: make_debug_chips(data, class_map, tmp_dir, train_uri) # Setup learner. ignore_idx = -1 metrics = [ Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx), Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx), FBeta(average='weighted', clas_idx=1, beta=1, ignore_idx=ignore_idx) ] model_arch = getattr(models, self.train_opts.model_arch) learn = cnn_learner(data, model_arch, metrics=metrics, wd=self.train_opts.weight_decay, path=train_dir) learn.unfreeze() if self.train_opts.mixed_prec and torch.cuda.is_available(): # This loss_scale works for Resnet 34 and 50. You might need to # adjust this for other models. learn = learn.to_fp16(loss_scale=256) # Setup callbacks and train model. model_path = get_local_path(self.backend_opts.model_uri, tmp_dir) pretrained_uri = self.backend_opts.pretrained_uri if pretrained_uri: log.info('Loading weights from pretrained_uri: {}'.format( pretrained_uri)) pretrained_path = download_if_needed(pretrained_uri, tmp_dir) learn.model = torch.load(pretrained_path, map_location=learn.data.device)['model'] # Save every epoch so that resume functionality provided by # TrackEpochCallback will work. callbacks = [ TrackEpochCallback(learn), MySaveModelCallback(learn, every='epoch'), MyCSVLogger(learn, filename='log'), ExportCallback(learn, model_path, monitor='f_beta'), SyncCallback(train_dir, self.backend_opts.train_uri, self.train_opts.sync_interval) ] if self.train_opts.log_tensorboard: callbacks.append(TensorboardLogger(learn, 'run')) if self.train_opts.run_tensorboard: log.info('Starting tensorboard process') log_dir = join(train_dir, 'logs', 'run') tensorboard_process = Popen( ['tensorboard', '--logdir={}'.format(log_dir)]) terminate_at_exit(tensorboard_process) lr = self.train_opts.lr num_epochs = self.train_opts.num_epochs if self.train_opts.one_cycle: if lr is None: learn.lr_find() learn.recorder.plot(suggestion=True, return_fig=True) lr = learn.recorder.min_grad_lr log.info('lr_find() found lr: {}'.format(lr)) learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks) else: learn.fit(num_epochs, lr, callbacks=callbacks) if self.train_opts.run_tensorboard: tensorboard_process.terminate() # Since model is exported every epoch, we need some other way to # show that training is finished. str_to_file('done!', self.backend_opts.train_done_uri) # Sync output to cloud. sync_to_dir(train_dir, self.backend_opts.train_uri)
def get_train_imagelist(self, validate_ratio=0.2): from fastai.vision import ImageList return ImageList([ info['path'] for info in self.get_train_image_info().values() if info['valid'] ]).split_by_rand_pct(validate_ratio).label_from_folder()
def from_emd(cls, data, emd_path): """ Creates a Single Shot Detector from an Esri Model Definition (EMD) file. ===================== =========================================== **Argument** **Description** --------------------- ------------------------------------------- data Required fastai Databunch or None. Returned data object from `prepare_data` function or None for inferencing. --------------------- ------------------------------------------- emd_path Required string. Path to Esri Model Definition file. ===================== =========================================== :returns: `SingleShotDetector` Object """ emd_path = Path(emd_path) emd = json.load(open(emd_path)) model_file = Path(emd['ModelFile']) backbone = emd.get('backbone', 'resnet34') ssd_version = int(emd.get('SSDVersion', 1)) chip_size = emd["ImageWidth"] if not model_file.is_absolute(): model_file = emd_path.parent / model_file class_mapping = {i['Value']: i['Name'] for i in emd['Classes']} resize_to = emd.get('resize_to') if isinstance(resize_to, list): resize_to = (resize_to[0], resize_to[1]) data_passed = True # Create an image databunch for when loading the model using emd (without training data) if data is None: data_passed = False train_tfms = [] val_tfms = [] ds_tfms = (train_tfms, val_tfms) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) sd = ImageList([], path=tempfile.TemporaryDirectory().name).split_by_idx([]) tempdata = sd.label_const(0, label_cls=SSDObjectCategoryList, classes=list(class_mapping.values())).transform(ds_tfms).databunch().normalize(imagenet_stats) tempdata.chip_size = chip_size tempdata.class_mapping = class_mapping tempdata.classes = ['background'] + list(class_mapping.values()) data = tempdata data.c += 1 # Add 1 for background class data.resize_to = resize_to ssd = cls(data, emd['Grids'], emd['Zooms'], emd['Ratios'], pretrained_path=str(model_file), backbone=backbone, ssd_version=ssd_version) if not data_passed: ssd.learn.data.single_ds.classes = ssd._data.classes ssd.learn.data.single_ds.y.classes = ssd._data.classes return ssd