def create_coco_data(): """ Create an empty databunch for COCO dataset.""" train_tfms = [] val_tfms = [] ds_tfms = (train_tfms, val_tfms) class_mapping = coco_class_mapping() import tempfile sd = ImageList([], path=tempfile.NamedTemporaryFile().name, ignore_empty=True).split_none() data = sd.label_const( 0, label_cls=ObjectDetectionCategoryList, classes=list(class_mapping.values())).transform(ds_tfms).databunch() data.class_mapping = class_mapping data.classes = list(class_mapping.values()) data._is_empty = False data._is_coco = True data.resize_to = 416 data.chip_size = 416 return data
def test_from_df(): this_tests(ItemList.from_df) df = pd.DataFrame(["123.png"], columns=["name"]) try: ImageList.from_df(path="dummy_path", df=df) except Exception as ex: assert not isinstance(ex, TypeError)
def test_set_random_seed(tiny_ic_data_path): # check two data batches are the same after seeding set_random_seed(1) first_data = (ImageList.from_folder(tiny_ic_data_path).split_by_rand_pct(). label_from_folder().transform().databunch(bs=5).normalize()) first_batch = first_data.one_batch() set_random_seed(1) second_data = (ImageList.from_folder(tiny_ic_data_path).split_by_rand_pct( ).label_from_folder().transform().databunch(bs=5).normalize()) second_batch = second_data.one_batch() assert first_batch[1].tolist() == second_batch[1].tolist()
def test_model_to_learner(tmp): model = models.resnet18 # Test if the function loads an ImageNet model (ResNet) trainer learn = model_to_learner(model(pretrained=True)) assert len(learn.data.classes) == 1000 # Check Image net classes assert isinstance(learn.model, models.ResNet) # Test if model can predict very simple image IM_URL = "https://cvbp.blob.core.windows.net/public/images/cvbp_cup.jpg" imagefile = os.path.join(tmp, "cvbp_cup.jpg") urllib.request.urlretrieve(IM_URL, imagefile) category, ind, predict_output = learn.predict( open_image(imagefile, convert_mode="RGB")) assert learn.data.classes[ind] == str(category) == "coffee_mug" # Test if .predict() yield the same output when use .get_preds() one_data = ( ImageList.from_folder(tmp).split_none().label_const( ) # cannot use label_empty because of fastai bug: # https://github.com/fastai/fastai/issues/1908 .transform( tfms=None, size=IMAGENET_IM_SIZE).databunch(bs=1).normalize(imagenet_stats)) learn.data.train_dl = one_data.train_dl get_preds_output = learn.get_preds(ds_type=DatasetType.Train) assert np.all( np.isclose( np.array(get_preds_output[0].tolist() [0]), # Note, get_preds() produces a batch (list) output np.array(predict_output.tolist()), rtol=1e-05, atol=1e-08, ))
def get_data(train_sampler=None): data = (ImageList.from_folder(chip_dir).split_by_folder( train='train', valid='val').label_from_folder().transform( tfms, size=size).databunch(bs=self.train_opts.batch_sz, num_workers=num_workers, train_sampler=train_sampler)) return data
def get_data_from_folder(path: Union[Path, str], bs: int, img_size: int, tfms: Transform = None, extensions: List[str] = [".jpg"]) -> ImageDataBunch: """Takes Imagenet style folder structure of test/train/valid and returns DataBunch with different batch and image sizes to train with PyTorch. Args: path : path to folder with data in train/valid/test folder structure bs : batch size img_size : resize to img_size for training tfms : transformations to do extensions : extensions to grab from the folder path Returns: data : Train/Test data organized in Fastai DataBunch """ if tfms is None: tfms = get_transforms() data = (ImageList.from_folder( path, extensions=extensions).split_by_folder().label_from_folder().transform( tfms, size=img_size).databunch(bs=bs, num_workers=0).normalize(imagenet_stats)) return data
def preprocess(dicom_paths): dicom_paths = [f'{path}.png' for path in dicom_paths] df = pd.DataFrame(dicom_paths, columns=['name']) train_data_stats = torch.load('normal_stats') images = ImageList.from_df(df, '.') return images
def prediction(self, directorio, num_batch=8): data = ImageList.from_folder( directorio) # build the ImageList from the folder learn.data.add_test(data) # add data to the test set of learn learn.to_fp32() # pass the model and data to FP16 if self.arquitecture == 'resnet18': sf = SaveFeatures(learn.model[1][4]) elif self.arquitecture == 'effB4': sf = SaveFeatures(learn.model._avg_pooling) # get the probabilities of images preds, _ = learn.get_preds(ds_type=DatasetType.Test, n_batch=num_batch) # Get the predictions (intenger indexes) y_pred = preds.argmax( dim=1).tolist() # using the arguments of the max probabilities self.predictions_dict = { n.name: learn.data.classes[y] for n, y in zip(data.items, y_pred) } self.features = sf.features.squeeze() self.labels = list(set(self.predictions_dict.values()))
def main(ensemble, tta, output): # Read in test data images from the 'data/test' folder print("Loading test data.") test_imgs = ImageList.from_folder(path=os.path.join(DATA_DIR, TEST_FOLDER), ) # Get predictions if ensemble: # Load ensemble of learners learners = [] learner_names = ['dpn92', 'inceptionv4', 'se_resnext101'] for name in learner_names: print(f"Loading {name}") learn = load_learner(SAVED_DIR, f'{name}.pkl', test=test_imgs) learners.append(learn) # Init ensemble print("Initializing ensemble.") ensemble = Ensemble(learners) # Get predictions print("Performing inference...") preds = ensemble.predict(tta) print("Predictions done.") # Get classes list classes = learners[0].data.classes # Get image names list img_names = [i.name for i in learners[0].data.test_ds.items] else: learner_name = 'se_resnext101' # Initialize Learner print(f"Loading {learner_name}") learn = load_learner(SAVED_DIR, f'{learner_name}.pkl', test=test_imgs) # Get predictions print("Performing inference...") if tta: preds, _ = learn.TTA(ds_type=DatasetType.Test) else: preds, _ = learn.get_preds(ds_type=DatasetType.Test) print("Predictions done.") # Get classes list classes = learn.data.classes # Get image names list img_names = [i.name for i in learn.data.test_ds.items] # Initialize DataFrame with the predictions df = pd.DataFrame(np.array(preds), columns=classes) # Insert image names to DataFrame df.insert(0, 'img_name', img_names) # Save predictions as csv file df.to_csv(output, index=False) print(f"Predictions saved to {output}")
def get_learner(model_path, model_file, test_path, test_file): """ Loads the model learner from given model and test path and file. :param model_path: Path to dir where .pkl file is located. :param model_file: If multiple .pkl files are located in the same path, provide the exact model file name. :param test_path: Path to dir where test data is located :param test_file: Preprocessed test_labels.csv file, as was done in preprocess.py. It eases the fetching of ImageList. :return: The model learner. """ learn = load_learner(model_path, file=model_file, test=ImageList.from_csv(test_path, test_file, folder='test')) return learn
def get_data(data_path: PathOrStr, bs: int = 16, img_size: int = 160, pct_partial: float = 1.0, num_workers: int = 0, seed: int = 42) -> ImageDataBunch: """ Create data object from Imagenet-style directory structure. This is a wrapper around fastai's Data Block API. The purpose is to automate and package together datasets and dataloaders, transforms, splitting the data, etc. :param data_path: path to data in Imagenet-style folder structure. :param bs: batch size :param img_size: target image size :param pct_partial: proportion of all data to use :param num_workers: number of workers used to parallelize data transformations when feeding into the model :param seed: :return: data object containing data set and data loader (in PyTorch sense) .. note:: more on Data Block API here: https://docs.fast.ai/data_block.html .. note:: Imagenet-style directory structure: https://docs.fast.ai/vision.data.html#ImageDataBunch.from_folder .. note:: `num_workers` anything from 0 crashes on my laptop, ideally, should equal the number of cores of your CPU .. note:: all of the data will be used as training set, even images in `valid` folder """ label_lists: LabelLists = ( ImageList.from_folder(data_path) # -> ImageList .use_partial_data(pct_partial, seed=seed) # -> ImageList .split_none() # -> ItemLists: train and valid ItemList .label_from_folder() # -> LabelLists: train and valid LabelList .transform(size=img_size)) # handle the case when number of images is too small - fastai gives warning and throws error when showing batch n_images = len(label_lists.train) if n_images < bs: print( f"Too few images. Decreasing batch size from {bs} to {n_images}.") bs = n_images data: ImageDataBunch = ( label_lists.databunch(bs=bs, num_workers=num_workers) # -> ImageDataBunch .normalize(imagenet_stats)) # -> ImageDataBunch # we want the order of images to not be shuffled to be able to find the right images easily data.train_dl = data.train_dl.new(shuffle=False) data.img_size = img_size # data object needs to know its image size return data
def upload_file(): if request.method == 'POST': image = request.files['file'] filename = secure_filename(image.filename) #saving file in upload path image.save(Path(app.config["IMAGE_UPLOADS"]+"/"+ filename)) my_dict = {} #loading images from upload path img_list_loader = ImageList.from_folder(upload_path) #Checking if valid images are uploaded if len(img_list_loader.items)>0: #loading model load_model = load_learner(model, test=img_list_loader) #running inference preds,y = load_model.get_preds(ds_type=DatasetType.Test) index =0 #Processing results for UI for preds,img_src in zip(preds,img_list_loader.items): top3_return_msg,top_pred = print_top_3_pred(preds) if(np.round(preds[top_pred].numpy()*100,2)<threshold): custom_msg = "NA" Prediction_percent = "NA" else: custom_msg= str(get_label(int(top_pred))) Prediction_percent = str("{:.2f}%".format(np.round(preds[top_pred].numpy()*100,2))) temp_val=[] temp_val.append(img_src) temp_val.append(custom_msg) temp_val.append(Prediction_percent) temp_val.append(top3_return_msg) my_dict[index]=temp_val index+=1 return render_template('result.html', mydict=my_dict) elif len(img_list_loader.items)== 0: return "ERROR: Invalid image. Go back to upload new image"
def getdata(self, bs=32, num_workers=16, noise=True, blur=True, basic=True): """Returns the dataloader to be used during training. The returned data is normalized and the image are resized to 224x224px. Parameters ---------- bs : int, optional Batch size, by default 32 num_workers : int, optional Num of process used for fetching data, by default 16 noise : bool, optional Whether to add noisy patches as augmentation, by default True blur : bool, optional Whether to add blur augmentation, by default True basic : bool, optional Whether to do basic augmentation like rotation, flipping, etc. , by default True Returns ------- dataloader Dataloader with random sampling enabled. """ print("Going through the data..") filenames = ["test", "val", "train"] filenames = [self.root / (x + ".txt") for x in filenames] with open(self.root / "list.txt", "w") as fout: fin = fileinput.input(filenames) for line in fin: fout.write(line) fin.close() self.data = ( ( ImageList.from_csv( path=self.root, folder="images", csv_name="list.txt", delimiter=" " ) ) .split_by_idx(list(range(22169))) .label_from_df() .transform(self.transforms(noise, blur, basic), size=224) .databunch(bs=bs, num_workers=num_workers) ).normalize() return self.data
def __init__(self, learn_name, tta, exp_name): """Logs test info to 'saved/test_info.csv' after initialization. Args: learn_name (str): Name of the saved Learner file, loads from f'saved/{learn_name}.pkl' tta (boolean): Whether to perform test time augmentation. exp_name (str): Experiment name for logging. """ self.exp_name = exp_name # Initialize test ImageList test_imgs = ImageList.from_csv(path=DATA_DIR, folder=TEST_FOLDER, csv_name=TEST_DF_NAME, cols=IMG_COL) # Initialize Learner from test data self.learn = load_learner( path=SAVED_DIR, file=f'{learn_name}.pkl', test=test_imgs, ) # Get classes list self.classes = self.learn.data.classes # Initialize ground truth labels self._init_labels() # Get probability scores from model if tta: self.y_prob, _ = self.learn.TTA(ds_type=DatasetType.Test) else: self.y_prob, _ = self.learn.get_preds(ds_type=DatasetType.Test) # Extract predicted labels from probability scores self.y_pred = np.argmax(self.y_prob, axis=1) # Compute metrics self._init_metrics() # Log test info self._log_info()
def __init__(self): # Read in the training DataFrame df = pd.read_csv(os.path.join(DATA_DIR, TRAIN_DF_NAME)) # Get stratified split indices train_idx, val_idx = get_indices_split(df, CLASS_COL, 0.2) # Initialize the augmentation/transformation function. self._init_tfms() # Initialize the ImageList # (source image data and labels before any transformations) self.src = ( ImageList.from_csv(path=DATA_DIR, csv_name=TRAIN_DF_NAME, folder=TRAIN_FOLDER, cols=IMG_COL) # Stratified split .split_by_idxs(train_idx, val_idx) # Get labels .label_from_df(CLASS_COL))
def _get_data_bunch(path: Union[Path, str], transform: bool, im_size: int, bs: int) -> ImageDataBunch: """ Create ImageDataBunch and return it. TODO in future version is to allow users to pass in their own image bunch or their own Transformation objects (instead of using fastai's <get_transforms>) Args: path (Union[Path, str]): path to data to create databunch with transform (bool): a flag to set fastai default transformations (get_transforms()) im_size (int): image size of databunch bs (int): batch size of databunch Returns: ImageDataBunch """ path = path if type(path) is Path else Path(path) tfms = get_transforms() if transform else None return (ImageList.from_folder(path).split_by_rand_pct( valid_pct=0.33).label_from_folder().transform( tfms=tfms, size=im_size).databunch(bs=bs).normalize(imagenet_stats))
from fastai.callbacks import SaveModelCallback #from fastai.vision import * from fastai.train import ClassificationInterpretation, DatasetType, load_learner from fastai.vision import get_transforms, ImageList, cnn_learner, accuracy, jitter, open_image, learner from torchvision import models as tv_models from matplotlib import pyplot as plt from pathlib import Path base_path = Path('data', 'dataset-15') dataset_path = base_path img_size = 224 bs = 128 arch = tv_models.resnext50_32x4d tfms = get_transforms(do_flip=True, flip_vert=True, max_warp=0.0, max_zoom=1.0) data = (ImageList.from_folder(dataset_path).split_by_folder( train='images', valid='testset-15-cropped').label_from_folder().transform(tfms).databunch( bs=bs).normalize()) data.valid_dl = data.valid_dl.new(shuffle=True) # plot one image with transformations # example_img = open_image('/home/hoth/Desktop/lego-brick-recognition/data/datasets/train-15/images/3008/3008_0.jpg') # example_img.apply_tfms(tfms[0], size=224).show(figsize=(10, 10)) # plt.show() Path.mkdir(base_path / 'classification', exist_ok=True) # view data data.show_batch(rows=10, ds_type=DatasetType.Train) plt.savefig(base_path / 'classification' / 'batch_example_train.svg') data.show_batch(rows=10, ds_type=DatasetType.Valid) plt.savefig(base_path / 'classification' / 'batch_example_valid.svg')
# local modules print(f"Fast.ai version = {fastai.__version__}") which_processor() EPOCHS = 10 LEARNING_RATE = 1e-4 IM_SIZE = 300 BATCH_SIZE = 16 ARCHITECTURE = models.resnet18 path = Path('/app/classifier_data/') data = (ImageList.from_folder(path).split_by_rand_pct( valid_pct=0.2, seed=10).label_from_folder().transform(size=IM_SIZE).databunch( bs=BATCH_SIZE, num_workers=db_num_workers()).normalize(imagenet_stats)) print(f'number of classes: {data.c}') print(data.classes) learn = cnn_learner( data, ARCHITECTURE, metrics=[accuracy], callback_fns=[partial(TrainMetricsRecorder, show_graph=True)]) learn.unfreeze() learn.fit(EPOCHS, LEARNING_RATE) learn.export(file=Path("/app/classifier_model.pkl")) _, validation_accuracy = learn.validate(learn.data.valid_dl, metrics=[accuracy])
optar = partial(Ranger) # In[25]: CV=1 seed = CV bs = 20 tfms = get_transforms(flip_vert=True, do_flip=True, max_zoom=1.05, max_lighting=0.2, max_warp=0.05, max_rotate=5.) data = (ImageList.from_df(df=image_df,path=DATA_BASE_PATH / 'train_images',cols='ImageId') .split_from_df() .label_from_df(cols='Detected',label_cls=FloatList) .transform(tfms) .databunch(bs=bs,num_workers=4) .normalize(IMAGE_STATS_GLOBAL2) ) from fastai.vision.models import resnet50 from models.efficientnet import EfficientNet #making model arch = 'efficientnet-b0' model_name = f'{arch}-v1' # Parameters for the entire model (stem, all blocks, and head) md_ef = EfficientNet.from_pretrained(arch, num_classes=1, dropout_rate=0.5) # md_ef = resnet50(pretrained=False, num_classes=1)
def get_train_imagelist(self, validate_ratio=0.2): from fastai.vision import ImageList return ImageList([ info['path'] for info in self.get_train_image_info().values() if info['valid'] ]).split_by_rand_pct(validate_ratio).label_from_folder()
def train(self, tmp_dir): """Train a model. This downloads any previous output saved to the train_uri, starts training (or resumes from a checkpoint), periodically syncs contents of train_dir to train_uri and after training finishes. Args: tmp_dir: (str) path to temp directory """ self.log_options() # Sync output of previous training run from cloud. train_uri = self.backend_opts.train_uri train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. chip_dir = join(tmp_dir, 'chips') make_dir(chip_dir) for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(chip_dir) # Setup data loader. size = self.task_config.chip_size class_map = self.task_config.class_map classes = class_map.get_class_names() num_workers = 0 if self.train_opts.debug else 4 tfms = get_transforms(flip_vert=self.train_opts.flip_vert) data = (ImageList.from_folder(chip_dir).split_by_folder(train='train', valid='val')) train_count = None if self.train_opts.train_count is not None: train_count = min(len(data.train), self.train_opts.train_count) elif self.train_opts.train_prop != 1.0: train_count = int( round(self.train_opts.train_prop * len(data.train))) train_items = data.train.items if train_count is not None: train_inds = np.random.permutation(np.arange(len( data.train)))[0:train_count] train_items = train_items[train_inds] items = np.concatenate([train_items, data.valid.items]) data = ImageList(items, chip_dir) \ .split_by_folder(train='train', valid='val') \ .label_from_folder(classes=classes) \ .transform(tfms, size=size) \ .databunch(bs=self.train_opts.batch_size, num_workers=num_workers) log.info(str(data)) if self.train_opts.debug: make_debug_chips(data, class_map, tmp_dir, train_uri) # Setup learner. ignore_idx = -1 metrics = [ Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx), Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx), FBeta(average='weighted', clas_idx=1, beta=1, ignore_idx=ignore_idx) ] model_arch = getattr(models, self.train_opts.model_arch) learn = cnn_learner(data, model_arch, metrics=metrics, wd=self.train_opts.weight_decay, path=train_dir) learn.unfreeze() if self.train_opts.mixed_prec and torch.cuda.is_available(): # This loss_scale works for Resnet 34 and 50. You might need to # adjust this for other models. learn = learn.to_fp16(loss_scale=256) # Setup callbacks and train model. model_path = get_local_path(self.backend_opts.model_uri, tmp_dir) pretrained_uri = self.backend_opts.pretrained_uri if pretrained_uri: log.info('Loading weights from pretrained_uri: {}'.format( pretrained_uri)) pretrained_path = download_if_needed(pretrained_uri, tmp_dir) learn.model = torch.load(pretrained_path, map_location=learn.data.device)['model'] # Save every epoch so that resume functionality provided by # TrackEpochCallback will work. callbacks = [ TrackEpochCallback(learn), MySaveModelCallback(learn, every='epoch'), MyCSVLogger(learn, filename='log'), ExportCallback(learn, model_path, monitor='f_beta'), SyncCallback(train_dir, self.backend_opts.train_uri, self.train_opts.sync_interval) ] if self.train_opts.log_tensorboard: callbacks.append(TensorboardLogger(learn, 'run')) if self.train_opts.run_tensorboard: log.info('Starting tensorboard process') log_dir = join(train_dir, 'logs', 'run') tensorboard_process = Popen( ['tensorboard', '--logdir={}'.format(log_dir)]) terminate_at_exit(tensorboard_process) lr = self.train_opts.lr num_epochs = self.train_opts.num_epochs if self.train_opts.one_cycle: if lr is None: learn.lr_find() learn.recorder.plot(suggestion=True, return_fig=True) lr = learn.recorder.min_grad_lr log.info('lr_find() found lr: {}'.format(lr)) learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks) else: learn.fit(num_epochs, lr, callbacks=callbacks) if self.train_opts.run_tensorboard: tensorboard_process.terminate() # Since model is exported every epoch, we need some other way to # show that training is finished. str_to_file('done!', self.backend_opts.train_done_uri) # Sync output to cloud. sync_to_dir(train_dir, self.backend_opts.train_uri)
## helpful way to initially get folders # import split_folders # split_folders.ratio('<path>', output='<path>/split', seed=1337, ratio=(.8, .2)) # uses default values # sys.exit() path = Path("data/CNN/-released/split") ################################################################################ # fastai uses databunches ################################################################################ data = ( ImageList.from_folder(path / "train").split_by_rand_pct( 0.1, seed=33).label_from_folder() # .add_test_folder('..'/path/'test') .transform( get_transforms(do_flip=True, flip_vert=True), size=150, resize_method=ResizeMethod.SQUISH, padding_mode="zeros", ).databunch(bs=64).normalize(imagenet_stats)) # ## turn this on for regular testing # option_name = 'CNN__original' # data_test = (ImageList.from_folder(path) # .split_by_folder(train='train', valid='test') # .label_from_folder() # .transform(get_transforms(do_flip=True,flip_vert=True),size=150,resize_method=ResizeMethod.SQUISH,padding_mode='zeros') # .databunch(bs=64) # .normalize(imagenet_stats)) ## turn this on for test_suite
from fastprogress.fastprogress import force_console_behavior import fastprogress fastprogress.fastprogress.NO_BAR = True master_bar, progress_bar = force_console_behavior() fastai.basic_train.master_bar, fastai.basic_train.progress_bar = master_bar, progress_bar def get_file(aString): return str(aString.split('/')[-1]) image_folder = 'images/' path = untar_data(URLs.DOGS) learn = load_learner(path, test=ImageList.from_folder(image_folder), bs = 1) preds,y = learn.get_preds(ds_type=DatasetType.Test, ) predList = list(preds.numpy()[:,0]) f_names = listdir(image_folder) pred_df = pd.DataFrame(list(zip(f_names,predList)), columns = ['f_name','prob_dog']) registry = 'registry/downloaded_files.csv' regDF = pd.read_csv(registry) regDF['f_name'] = regDF.file.apply(get_file) out_df = pd.merge(regDF,pred_df, on = ['f_name'])
from model_utils import get_data, create_gen_learner # Loading Paths for Model Load path = Path('') # Path to data folder to load your model path_lr = path / '' # Path to model weights # Loading Paths to Inference path_t = Path('') # Path to undamaged files dmgpath = Path('') # Path to damage templates inf_path = Path('') # Directory for files to be inferenced # Creating gen and Loading saved Weights src = ImageImageList.from_folder(path_lr).split_by_rand_pct(0.1, seed=42) data_gen = get_data(1, 500, src, path_lr) learn_gen = create_gen_learner(data_gen).load('') # LOAD MODEL HERE test_list = ImageList.from_folder(inf_path) # Starting Streamlit App st.markdown('# **ML for Photo Repair**') st.markdown('### Choose an Image and Damage Template:') st.markdown('Click **Generate** to Create a damaged photo') names = [] for filename in (os.listdir(path_t)): if '.png' in filename: names.append(filename) elif '.jpg' in filename: names.append(filename) dmgnames = [] for filename in (os.listdir(dmgpath)): if '.png' in filename: dmgnames.append(filename)
def from_model(cls, emd_path, data=None): """ Creates a YOLOv3 Object Detector from an Esri Model Definition (EMD) file. ===================== =========================================== **Argument** **Description** --------------------- ------------------------------------------- emd_path Required string. Path to Esri Model Definition file. --------------------- ------------------------------------------- data Required fastai Databunch or None. Returned data object from `prepare_data` function or None for inferencing. ===================== =========================================== :returns: `YOLOv3` Object """ if not HAS_FASTAI: _raise_fastai_import_error(import_exception=import_exception) emd_path = Path(emd_path) emd = json.load(open(emd_path)) model_file = Path(emd['ModelFile']) chip_size = emd["ImageWidth"] if not model_file.is_absolute(): model_file = emd_path.parent / model_file class_mapping = {i['Value']: i['Name'] for i in emd['Classes']} resize_to = emd.get('resize_to') if isinstance(resize_to, list): resize_to = (resize_to[0], resize_to[1]) data_passed = True # Create an image databunch for when loading the model using emd (without training data) if data is None: data_passed = False train_tfms = [] val_tfms = [] ds_tfms = (train_tfms, val_tfms) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) sd = ImageList([], path=emd_path.parent.parent).split_by_idx([]) data = sd.label_const( 0, label_cls=ObjectDetectionCategoryList, classes=list(class_mapping.values())).transform( ds_tfms).databunch().normalize(imagenet_stats) data.chip_size = chip_size data.class_mapping = class_mapping data.classes = ['background'] + list(class_mapping.values()) data = get_multispectral_data_params_from_emd(data, emd) # Add 1 for background class data.c += 1 data._is_empty = True data.emd_path = emd_path data.emd = emd data.resize_to = resize_to ret = cls(data, **emd['ModelParameters'], pretrained_path=model_file) if not data_passed: ret.learn.data.single_ds.classes = ret._data.classes ret.learn.data.single_ds.y.classes = ret._data.classes return ret
filenames = train['fname'].values filenames = filenames.reshape(-1, 1) oof_preds = np.zeros((len(train), 80)) test_preds = np.zeros((len(test), 80)) tfms = get_transforms(do_flip=False, max_rotate=0, max_lighting=0.1, max_zoom=0, max_warp=0.) df = pd.read_csv(CSV_TRN_MERGED) cols = list(df.columns[1:]) i = 0 val_index = range(len(train)) #Our clasifier stuff src = (ImageList.from_csv(WORK/'image', Path('../../')/DATA/'train_merged.csv', folder='trn_merged2', suffix='.jpg') .split_by_idx(val_index) .label_from_df(cols=list(df.columns[1:]))) #.label_from_df(label_delim=',')) data = (src.transform(tfms, size=128).databunch(bs=64).normalize()) f_score = partial(fbeta, thresh=0.2) learn = cnn_learner(data, models.xresnet50, pretrained=False, metrics=[f_score]).mixup(stack_y=False) learn.fit_one_cycle(125, 1e-2) all_preds = list(custom_tta(learn)) stacked = torch.stack(all_preds) new_preds = []
def create_covidx_databunch(self): bs = self.bs data_path = DATA_DIR / "COVIDx" assert data_path.exists() train_df_path = data_path / f"train_split_{self.version}.txt" # train set LOGGER.info(f'Reading train_df from {train_df_path}') self.train_df = (pd.read_csv(train_df_path, header=None, delimiter=" ", index_col=0, names=["name", "label", "dataset"]).reset_index(drop=True)) self.train_df["name"] = ["train/" + f for f in self.train_df["name"]] self.train_df["is_valid"] = False # validation set test_df_path = data_path / f"test_split_{self.version}.txt" # train set LOGGER.info(f'Reading test_df from {test_df_path}') self.test_df = (pd.read_csv(data_path / test_df_path, header=None, delimiter=" ", index_col=0, names=["name", "label", "dataset"]).reset_index(drop=True)) self.test_df["name"] = ["test/" + f for f in self.test_df["name"]] self.test_df["is_valid"] = True # merge data_df = pd.concat([self.train_df, self.test_df]).reset_index(drop=True) data_df = data_df.drop("dataset", axis=1) # import covidnet test set which is included in testset covidnet_test_df = (pd.read_csv(data_path / "test_COVIDx4.txt", header=None, delimiter=" ", index_col=0, names=["name", "label" ]).reset_index(drop=True)) covidnet_test_df["name"] = [ "test/" + f for f in covidnet_test_df["name"] ] # sanity check a = set(covidnet_test_df.name) b = set(self.train_df.name) c = set(self.test_df.name) assert a.intersection(b) == set() assert a.intersection(c) == a # create fastai databunch tfms = get_dataaug_transformations() np.random.seed(42) src = (ImageList.from_df( data_df, data_path).split_from_df().label_from_df().transform(tfms, size=224)) test = (ImageList.from_df( covidnet_test_df, data_path).split_none().label_from_df().transform(None, size=224)) data = (src.databunch(bs=bs).normalize(imagenet_stats)) data.add_test(test.train.x) # check that proportion classes are same in train and valid train_counts = np.unique(data.train_ds.y.items, return_counts=True) LOGGER.info( f'prop in train set: {train_counts[1]/ train_counts[1].sum()}') valid_counts = np.unique(data.valid_ds.y.items, return_counts=True) LOGGER.info( f'prop in valid set: {valid_counts[1]/ valid_counts[1].sum()}') test_counts = np.unique(test.y.items, return_counts=True) LOGGER.info( f'prop in test set: {test_counts[1]/ test_counts[1].sum()}') return data, test
def test_wrong_order(): this_tests('na') path = untar_data(URLs.MNIST_TINY) with pytest.raises(Exception, match="Your data isn't split*"): ImageList.from_folder(path).label_from_folder().split_by_folder()
def test_wrong_order(): this_tests('na') path = untar_data(URLs.MNIST_TINY) with pytest.raises(Exception, match="Your data isn't split*"): ImageList.from_folder(path).label_from_folder().split_by_folder()
df = pd.read_csv(LABELS) nunique = list(df.nunique())[1:-1] print(nunique) df.head() range(fold * len(df) // nfolds, (fold + 1) * len(df) // nfolds) # + stats = ([0.0692], [0.2051]) data = (ImageList.from_df( df, path='.', folder=TRAIN, suffix='.png', cols='image_id', convert_mode='L').split_by_idx( range(fold * len(df) // nfolds, (fold + 1) * len(df) // nfolds)).label_from_df(cols=[ 'grapheme_root', 'vowel_diacritic', 'consonant_diacritic' ]).transform( transform.get_transforms(do_flip=False, max_warp=0.1), size=sz, padding_mode='zeros').databunch(bs=bs)).normalize(stats) data.show_batch() # + class Head(nn.Module): def __init__(self, nc, n, ps=0.5): super().__init__() layers = [AdaptiveConcatPool2d(), Mish(), Flatten()] + \
from fastai.core import Path from fastai.vision import load_learner, defaults, ImageList,DatasetType import torch as torch import os import yaml pic_name = 'GK_RDR_PG3_2' mp = Path('/Users/nicholasbangs/Notebooks/personal/greek_reader_master') im_path = mp/'lgi_data'/'gk_letter_imgs'/'ω' defaults.device = torch.device('cpu') model_path = mp/'models' model_name = 'rn_34.pkl' model = load_learner(model_path, model_name, test=ImageList.from_folder(im_path)) preds,y = model.get_preds(ds_type=DatasetType.Test) classes = model.data.classes zipped = list([zip(classes, p) for p in preds]) sorted_preds = [sorted(z, key=lambda x: x[1], reverse=True)[0] for z in zipped] print(sorted_preds)