def test_set_random_seed(tiny_ic_data_path): # check two data batches are the same after seeding set_random_seed(1) first_data = (ImageList.from_folder(tiny_ic_data_path).split_by_rand_pct(). label_from_folder().transform().databunch(bs=5).normalize()) first_batch = first_data.one_batch() set_random_seed(1) second_data = (ImageList.from_folder(tiny_ic_data_path).split_by_rand_pct( ).label_from_folder().transform().databunch(bs=5).normalize()) second_batch = second_data.one_batch() assert first_batch[1].tolist() == second_batch[1].tolist()
def test_model_to_learner(tmp): model = models.resnet18 # Test if the function loads an ImageNet model (ResNet) trainer learn = model_to_learner(model(pretrained=True)) assert len(learn.data.classes) == 1000 # Check Image net classes assert isinstance(learn.model, models.ResNet) # Test if model can predict very simple image IM_URL = "https://cvbp.blob.core.windows.net/public/images/cvbp_cup.jpg" imagefile = os.path.join(tmp, "cvbp_cup.jpg") urllib.request.urlretrieve(IM_URL, imagefile) category, ind, predict_output = learn.predict( open_image(imagefile, convert_mode="RGB")) assert learn.data.classes[ind] == str(category) == "coffee_mug" # Test if .predict() yield the same output when use .get_preds() one_data = ( ImageList.from_folder(tmp).split_none().label_const( ) # cannot use label_empty because of fastai bug: # https://github.com/fastai/fastai/issues/1908 .transform( tfms=None, size=IMAGENET_IM_SIZE).databunch(bs=1).normalize(imagenet_stats)) learn.data.train_dl = one_data.train_dl get_preds_output = learn.get_preds(ds_type=DatasetType.Train) assert np.all( np.isclose( np.array(get_preds_output[0].tolist() [0]), # Note, get_preds() produces a batch (list) output np.array(predict_output.tolist()), rtol=1e-05, atol=1e-08, ))
def prediction(self, directorio, num_batch=8): data = ImageList.from_folder( directorio) # build the ImageList from the folder learn.data.add_test(data) # add data to the test set of learn learn.to_fp32() # pass the model and data to FP16 if self.arquitecture == 'resnet18': sf = SaveFeatures(learn.model[1][4]) elif self.arquitecture == 'effB4': sf = SaveFeatures(learn.model._avg_pooling) # get the probabilities of images preds, _ = learn.get_preds(ds_type=DatasetType.Test, n_batch=num_batch) # Get the predictions (intenger indexes) y_pred = preds.argmax( dim=1).tolist() # using the arguments of the max probabilities self.predictions_dict = { n.name: learn.data.classes[y] for n, y in zip(data.items, y_pred) } self.features = sf.features.squeeze() self.labels = list(set(self.predictions_dict.values()))
def get_data_from_folder(path: Union[Path, str], bs: int, img_size: int, tfms: Transform = None, extensions: List[str] = [".jpg"]) -> ImageDataBunch: """Takes Imagenet style folder structure of test/train/valid and returns DataBunch with different batch and image sizes to train with PyTorch. Args: path : path to folder with data in train/valid/test folder structure bs : batch size img_size : resize to img_size for training tfms : transformations to do extensions : extensions to grab from the folder path Returns: data : Train/Test data organized in Fastai DataBunch """ if tfms is None: tfms = get_transforms() data = (ImageList.from_folder( path, extensions=extensions).split_by_folder().label_from_folder().transform( tfms, size=img_size).databunch(bs=bs, num_workers=0).normalize(imagenet_stats)) return data
def get_data(train_sampler=None): data = (ImageList.from_folder(chip_dir).split_by_folder( train='train', valid='val').label_from_folder().transform( tfms, size=size).databunch(bs=self.train_opts.batch_sz, num_workers=num_workers, train_sampler=train_sampler)) return data
def main(ensemble, tta, output): # Read in test data images from the 'data/test' folder print("Loading test data.") test_imgs = ImageList.from_folder(path=os.path.join(DATA_DIR, TEST_FOLDER), ) # Get predictions if ensemble: # Load ensemble of learners learners = [] learner_names = ['dpn92', 'inceptionv4', 'se_resnext101'] for name in learner_names: print(f"Loading {name}") learn = load_learner(SAVED_DIR, f'{name}.pkl', test=test_imgs) learners.append(learn) # Init ensemble print("Initializing ensemble.") ensemble = Ensemble(learners) # Get predictions print("Performing inference...") preds = ensemble.predict(tta) print("Predictions done.") # Get classes list classes = learners[0].data.classes # Get image names list img_names = [i.name for i in learners[0].data.test_ds.items] else: learner_name = 'se_resnext101' # Initialize Learner print(f"Loading {learner_name}") learn = load_learner(SAVED_DIR, f'{learner_name}.pkl', test=test_imgs) # Get predictions print("Performing inference...") if tta: preds, _ = learn.TTA(ds_type=DatasetType.Test) else: preds, _ = learn.get_preds(ds_type=DatasetType.Test) print("Predictions done.") # Get classes list classes = learn.data.classes # Get image names list img_names = [i.name for i in learn.data.test_ds.items] # Initialize DataFrame with the predictions df = pd.DataFrame(np.array(preds), columns=classes) # Insert image names to DataFrame df.insert(0, 'img_name', img_names) # Save predictions as csv file df.to_csv(output, index=False) print(f"Predictions saved to {output}")
def get_data(data_path: PathOrStr, bs: int = 16, img_size: int = 160, pct_partial: float = 1.0, num_workers: int = 0, seed: int = 42) -> ImageDataBunch: """ Create data object from Imagenet-style directory structure. This is a wrapper around fastai's Data Block API. The purpose is to automate and package together datasets and dataloaders, transforms, splitting the data, etc. :param data_path: path to data in Imagenet-style folder structure. :param bs: batch size :param img_size: target image size :param pct_partial: proportion of all data to use :param num_workers: number of workers used to parallelize data transformations when feeding into the model :param seed: :return: data object containing data set and data loader (in PyTorch sense) .. note:: more on Data Block API here: https://docs.fast.ai/data_block.html .. note:: Imagenet-style directory structure: https://docs.fast.ai/vision.data.html#ImageDataBunch.from_folder .. note:: `num_workers` anything from 0 crashes on my laptop, ideally, should equal the number of cores of your CPU .. note:: all of the data will be used as training set, even images in `valid` folder """ label_lists: LabelLists = ( ImageList.from_folder(data_path) # -> ImageList .use_partial_data(pct_partial, seed=seed) # -> ImageList .split_none() # -> ItemLists: train and valid ItemList .label_from_folder() # -> LabelLists: train and valid LabelList .transform(size=img_size)) # handle the case when number of images is too small - fastai gives warning and throws error when showing batch n_images = len(label_lists.train) if n_images < bs: print( f"Too few images. Decreasing batch size from {bs} to {n_images}.") bs = n_images data: ImageDataBunch = ( label_lists.databunch(bs=bs, num_workers=num_workers) # -> ImageDataBunch .normalize(imagenet_stats)) # -> ImageDataBunch # we want the order of images to not be shuffled to be able to find the right images easily data.train_dl = data.train_dl.new(shuffle=False) data.img_size = img_size # data object needs to know its image size return data
def upload_file(): if request.method == 'POST': image = request.files['file'] filename = secure_filename(image.filename) #saving file in upload path image.save(Path(app.config["IMAGE_UPLOADS"]+"/"+ filename)) my_dict = {} #loading images from upload path img_list_loader = ImageList.from_folder(upload_path) #Checking if valid images are uploaded if len(img_list_loader.items)>0: #loading model load_model = load_learner(model, test=img_list_loader) #running inference preds,y = load_model.get_preds(ds_type=DatasetType.Test) index =0 #Processing results for UI for preds,img_src in zip(preds,img_list_loader.items): top3_return_msg,top_pred = print_top_3_pred(preds) if(np.round(preds[top_pred].numpy()*100,2)<threshold): custom_msg = "NA" Prediction_percent = "NA" else: custom_msg= str(get_label(int(top_pred))) Prediction_percent = str("{:.2f}%".format(np.round(preds[top_pred].numpy()*100,2))) temp_val=[] temp_val.append(img_src) temp_val.append(custom_msg) temp_val.append(Prediction_percent) temp_val.append(top3_return_msg) my_dict[index]=temp_val index+=1 return render_template('result.html', mydict=my_dict) elif len(img_list_loader.items)== 0: return "ERROR: Invalid image. Go back to upload new image"
def _get_data_bunch(path: Union[Path, str], transform: bool, im_size: int, bs: int) -> ImageDataBunch: """ Create ImageDataBunch and return it. TODO in future version is to allow users to pass in their own image bunch or their own Transformation objects (instead of using fastai's <get_transforms>) Args: path (Union[Path, str]): path to data to create databunch with transform (bool): a flag to set fastai default transformations (get_transforms()) im_size (int): image size of databunch bs (int): batch size of databunch Returns: ImageDataBunch """ path = path if type(path) is Path else Path(path) tfms = get_transforms() if transform else None return (ImageList.from_folder(path).split_by_rand_pct( valid_pct=0.33).label_from_folder().transform( tfms=tfms, size=im_size).databunch(bs=bs).normalize(imagenet_stats))
from model_utils import get_data, create_gen_learner # Loading Paths for Model Load path = Path('') # Path to data folder to load your model path_lr = path / '' # Path to model weights # Loading Paths to Inference path_t = Path('') # Path to undamaged files dmgpath = Path('') # Path to damage templates inf_path = Path('') # Directory for files to be inferenced # Creating gen and Loading saved Weights src = ImageImageList.from_folder(path_lr).split_by_rand_pct(0.1, seed=42) data_gen = get_data(1, 500, src, path_lr) learn_gen = create_gen_learner(data_gen).load('') # LOAD MODEL HERE test_list = ImageList.from_folder(inf_path) # Starting Streamlit App st.markdown('# **ML for Photo Repair**') st.markdown('### Choose an Image and Damage Template:') st.markdown('Click **Generate** to Create a damaged photo') names = [] for filename in (os.listdir(path_t)): if '.png' in filename: names.append(filename) elif '.jpg' in filename: names.append(filename) dmgnames = [] for filename in (os.listdir(dmgpath)): if '.png' in filename: dmgnames.append(filename)
from fastai.callbacks import SaveModelCallback #from fastai.vision import * from fastai.train import ClassificationInterpretation, DatasetType, load_learner from fastai.vision import get_transforms, ImageList, cnn_learner, accuracy, jitter, open_image, learner from torchvision import models as tv_models from matplotlib import pyplot as plt from pathlib import Path base_path = Path('data', 'dataset-15') dataset_path = base_path img_size = 224 bs = 128 arch = tv_models.resnext50_32x4d tfms = get_transforms(do_flip=True, flip_vert=True, max_warp=0.0, max_zoom=1.0) data = (ImageList.from_folder(dataset_path).split_by_folder( train='images', valid='testset-15-cropped').label_from_folder().transform(tfms).databunch( bs=bs).normalize()) data.valid_dl = data.valid_dl.new(shuffle=True) # plot one image with transformations # example_img = open_image('/home/hoth/Desktop/lego-brick-recognition/data/datasets/train-15/images/3008/3008_0.jpg') # example_img.apply_tfms(tfms[0], size=224).show(figsize=(10, 10)) # plt.show() Path.mkdir(base_path / 'classification', exist_ok=True) # view data data.show_batch(rows=10, ds_type=DatasetType.Train) plt.savefig(base_path / 'classification' / 'batch_example_train.svg') data.show_batch(rows=10, ds_type=DatasetType.Valid) plt.savefig(base_path / 'classification' / 'batch_example_valid.svg')
# local modules print(f"Fast.ai version = {fastai.__version__}") which_processor() EPOCHS = 10 LEARNING_RATE = 1e-4 IM_SIZE = 300 BATCH_SIZE = 16 ARCHITECTURE = models.resnet18 path = Path('/app/classifier_data/') data = (ImageList.from_folder(path).split_by_rand_pct( valid_pct=0.2, seed=10).label_from_folder().transform(size=IM_SIZE).databunch( bs=BATCH_SIZE, num_workers=db_num_workers()).normalize(imagenet_stats)) print(f'number of classes: {data.c}') print(data.classes) learn = cnn_learner( data, ARCHITECTURE, metrics=[accuracy], callback_fns=[partial(TrainMetricsRecorder, show_graph=True)]) learn.unfreeze() learn.fit(EPOCHS, LEARNING_RATE) learn.export(file=Path("/app/classifier_model.pkl")) _, validation_accuracy = learn.validate(learn.data.valid_dl, metrics=[accuracy])
HEAD_LEARNING_RATE = 0.01 BODY_LEARNING_RATE = 0.0001 BATCH_SIZE = 32 IM_SIZE = 224 DROPOUT = 0 ARCHITECTURE = models.resnet50 # Desired embedding dimension. Higher dimensions slow down retrieval but often provide better accuracy. EMBEDDING_DIM = 4096 assert EMBEDDING_DIM == 4096 or EMBEDDING_DIM <= 2048 # Load images into fast.ai's ImageDataBunch object random.seed(642) data_finetune = (ImageList.from_folder(DATA_FINETUNE_PATH).split_by_rand_pct( valid_pct=0.05, seed=20).label_from_folder().transform( tfms=fastai.vision.transform.get_transforms(), size=IM_SIZE).databunch( bs=BATCH_SIZE, num_workers=db_num_workers()).normalize(imagenet_stats)) print( f"Data for fine-tuning: {len(data_finetune.train_ds.x)} training images and {len(data_finetune.valid_ds.x)} validation images." ) learn = cnn_learner(data_finetune, ARCHITECTURE, metrics=[], ps=DROPOUT) print(learn.model[1]) # By default uses the 2048 dimensional pooling layer as implemented in the paper. # Optionally can instead keep the 4096-dimensional pooling layer from the ResNet-50 model. if EMBEDDING_DIM != 4096: modules = []
def test_wrong_order(): this_tests('na') path = untar_data(URLs.MNIST_TINY) with pytest.raises(Exception, match="Your data isn't split*"): ImageList.from_folder(path).label_from_folder().split_by_folder()
## helpful way to initially get folders # import split_folders # split_folders.ratio('<path>', output='<path>/split', seed=1337, ratio=(.8, .2)) # uses default values # sys.exit() path = Path("data/CNN/-released/split") ################################################################################ # fastai uses databunches ################################################################################ data = ( ImageList.from_folder(path / "train").split_by_rand_pct( 0.1, seed=33).label_from_folder() # .add_test_folder('..'/path/'test') .transform( get_transforms(do_flip=True, flip_vert=True), size=150, resize_method=ResizeMethod.SQUISH, padding_mode="zeros", ).databunch(bs=64).normalize(imagenet_stats)) # ## turn this on for regular testing # option_name = 'CNN__original' # data_test = (ImageList.from_folder(path) # .split_by_folder(train='train', valid='test') # .label_from_folder() # .transform(get_transforms(do_flip=True,flip_vert=True),size=150,resize_method=ResizeMethod.SQUISH,padding_mode='zeros') # .databunch(bs=64) # .normalize(imagenet_stats)) ## turn this on for test_suite
import fastai from fastai.vision import ImageList, ImageImageList from pathlib import Path from model_utils import get_data from model_utils import create_gen_learner path = Path('../data/') path_hr = path / 'preprocessed' path_lr = path / 'processed' path_test = path / 'test_imgs' # Gather and select Data / Output size bs, size = 1, 500 src = ImageImageList.from_folder(path_lr).split_by_rand_pct(0.1, seed=42) data_gen = get_data(bs, size, src, path_hr) # Load model to inference from learn_gen = create_gen_learner(data_gen).load('') # Input model to load # Open file to be inferenced test_list = ImageList.from_folder(path_test) test_list.open(test_list.items[0]) # Inference and display output test_list[0].show(figsize=(7, 7),y=learn_gen.predict(test_list[0])[0]) # Save File if desired #y = learn_gen.predict(test_list[8])[0] #y.save(path_test/'inf1.png')
from fastai.core import Path from fastai.vision import load_learner, defaults, ImageList,DatasetType import torch as torch import os import yaml pic_name = 'GK_RDR_PG3_2' mp = Path('/Users/nicholasbangs/Notebooks/personal/greek_reader_master') im_path = mp/'lgi_data'/'gk_letter_imgs'/'ω' defaults.device = torch.device('cpu') model_path = mp/'models' model_name = 'rn_34.pkl' model = load_learner(model_path, model_name, test=ImageList.from_folder(im_path)) preds,y = model.get_preds(ds_type=DatasetType.Test) classes = model.data.classes zipped = list([zip(classes, p) for p in preds]) sorted_preds = [sorted(z, key=lambda x: x[1], reverse=True)[0] for z in zipped] print(sorted_preds)
def train(self, tmp_dir): """Train a model. This downloads any previous output saved to the train_uri, starts training (or resumes from a checkpoint), periodically syncs contents of train_dir to train_uri and after training finishes. Args: tmp_dir: (str) path to temp directory """ self.log_options() # Sync output of previous training run from cloud. train_uri = self.backend_opts.train_uri train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. chip_dir = join(tmp_dir, 'chips') make_dir(chip_dir) for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(chip_dir) # Setup data loader. size = self.task_config.chip_size class_map = self.task_config.class_map classes = class_map.get_class_names() num_workers = 0 if self.train_opts.debug else 4 tfms = get_transforms(flip_vert=self.train_opts.flip_vert) data = (ImageList.from_folder(chip_dir).split_by_folder(train='train', valid='val')) train_count = None if self.train_opts.train_count is not None: train_count = min(len(data.train), self.train_opts.train_count) elif self.train_opts.train_prop != 1.0: train_count = int( round(self.train_opts.train_prop * len(data.train))) train_items = data.train.items if train_count is not None: train_inds = np.random.permutation(np.arange(len( data.train)))[0:train_count] train_items = train_items[train_inds] items = np.concatenate([train_items, data.valid.items]) data = ImageList(items, chip_dir) \ .split_by_folder(train='train', valid='val') \ .label_from_folder(classes=classes) \ .transform(tfms, size=size) \ .databunch(bs=self.train_opts.batch_size, num_workers=num_workers) log.info(str(data)) if self.train_opts.debug: make_debug_chips(data, class_map, tmp_dir, train_uri) # Setup learner. ignore_idx = -1 metrics = [ Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx), Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx), FBeta(average='weighted', clas_idx=1, beta=1, ignore_idx=ignore_idx) ] model_arch = getattr(models, self.train_opts.model_arch) learn = cnn_learner(data, model_arch, metrics=metrics, wd=self.train_opts.weight_decay, path=train_dir) learn.unfreeze() if self.train_opts.mixed_prec and torch.cuda.is_available(): # This loss_scale works for Resnet 34 and 50. You might need to # adjust this for other models. learn = learn.to_fp16(loss_scale=256) # Setup callbacks and train model. model_path = get_local_path(self.backend_opts.model_uri, tmp_dir) pretrained_uri = self.backend_opts.pretrained_uri if pretrained_uri: log.info('Loading weights from pretrained_uri: {}'.format( pretrained_uri)) pretrained_path = download_if_needed(pretrained_uri, tmp_dir) learn.model = torch.load(pretrained_path, map_location=learn.data.device)['model'] # Save every epoch so that resume functionality provided by # TrackEpochCallback will work. callbacks = [ TrackEpochCallback(learn), MySaveModelCallback(learn, every='epoch'), MyCSVLogger(learn, filename='log'), ExportCallback(learn, model_path, monitor='f_beta'), SyncCallback(train_dir, self.backend_opts.train_uri, self.train_opts.sync_interval) ] if self.train_opts.log_tensorboard: callbacks.append(TensorboardLogger(learn, 'run')) if self.train_opts.run_tensorboard: log.info('Starting tensorboard process') log_dir = join(train_dir, 'logs', 'run') tensorboard_process = Popen( ['tensorboard', '--logdir={}'.format(log_dir)]) terminate_at_exit(tensorboard_process) lr = self.train_opts.lr num_epochs = self.train_opts.num_epochs if self.train_opts.one_cycle: if lr is None: learn.lr_find() learn.recorder.plot(suggestion=True, return_fig=True) lr = learn.recorder.min_grad_lr log.info('lr_find() found lr: {}'.format(lr)) learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks) else: learn.fit(num_epochs, lr, callbacks=callbacks) if self.train_opts.run_tensorboard: tensorboard_process.terminate() # Since model is exported every epoch, we need some other way to # show that training is finished. str_to_file('done!', self.backend_opts.train_done_uri) # Sync output to cloud. sync_to_dir(train_dir, self.backend_opts.train_uri)
from fastprogress.fastprogress import force_console_behavior import fastprogress fastprogress.fastprogress.NO_BAR = True master_bar, progress_bar = force_console_behavior() fastai.basic_train.master_bar, fastai.basic_train.progress_bar = master_bar, progress_bar def get_file(aString): return str(aString.split('/')[-1]) image_folder = 'images/' path = untar_data(URLs.DOGS) learn = load_learner(path, test=ImageList.from_folder(image_folder), bs = 1) preds,y = learn.get_preds(ds_type=DatasetType.Test, ) predList = list(preds.numpy()[:,0]) f_names = listdir(image_folder) pred_df = pd.DataFrame(list(zip(f_names,predList)), columns = ['f_name','prob_dog']) registry = 'registry/downloaded_files.csv' regDF = pd.read_csv(registry) regDF['f_name'] = regDF.file.apply(get_file) out_df = pd.merge(regDF,pred_df, on = ['f_name'])