def main(epochs=5, learning_rate=0.01): # Avoid OMP error and allow multiple OpenMP runtime os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' warnings.filterwarnings("ignore") print(mlflow.__version__) # Download and untar the MNIST data set path = vis.untar_data(vis.URLs.MNIST_SAMPLE) # Prepare, transform, and normalize the data data = vis.ImageDataBunch.from_folder(path, ds_tfms=(vis.rand_pad(2, 28), []), bs=64) data.normalize(vis.imagenet_stats) # Train and fit the Learner model learn = vis.cnn_learner(data, vis.models.resnet18, metrics=vis.accuracy) # Enable auto logging mlflow.fastai.autolog() # Start MLflow session with mlflow.start_run() as run: learn.fit(epochs, learning_rate) # fetch the auto logged parameters, metrics, and artifacts print_auto_logged_info(mlflow.get_run(run_id=run.info.run_id))
def fit_model(data, model, pretrained, ps, cyc_len, lr_lower, lr_upper, path, filename=None, verbose=True): # Set up CNN learner learner = cnn_learner(data, model, metrics=accuracy, pretrained=pretrained, ps=ps).mixup() # Fit model using one-cycle policy learner.fit_one_cycle(cyc_len, max_lr=slice(lr_lower, lr_upper), callbacks=(callbacks.SaveModelCallback( learner, every='improvement', monitor='accuracy', name='best'))) # Save trained model output_path = learner.save(os.path.join(path, filename), return_path=True) if verbose: print("Model saved at", output_path) return learner.recorder
def main(epochs=5, learning_rate=0.01): # Avoid OMP error and allow multiple OpenMP runtime os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' warnings.filterwarnings("ignore") print(mlflow.__version__) # Download and untar the MNIST data set path = untar_data(URLs.MNIST_SAMPLE) # Prepare, transform, and normalize the data data = ImageDataBunch.from_folder(path, ds_tfms=(rand_pad(2, 28), []), bs=64) data.normalize(imagenet_stats) # Train and fit the Learner model learn = cnn_learner(data, models.resnet18, metrics=accuracy) # Start MLflow session with mlflow.start_run() as run: learn.fit(epochs, learning_rate) mlflow.fastai.log_model(learn, 'model') # fetch the logged model artifacts artifacts = [ f.path for f in MlflowClient().list_artifacts(run.info.run_id, 'model') ] print("artifacts: {}".format(artifacts))
def main(epochs=5, learning_rate=0.01): # Avoid OMP error and allow multiple OpenMP runtime os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' warnings.filterwarnings("ignore") print(mlflow.__version__) # Download and untar the MNIST data set path = untar_data(URLs.MNIST_SAMPLE) # Prepare, transform, and normalize the data data = ImageDataBunch.from_folder(path, ds_tfms=(rand_pad(2, 28), []), bs=64) data.normalize(imagenet_stats) # Create CNN the Learner model learn = cnn_learner(data, models.resnet18, metrics=accuracy) # Start MLflow session with mlflow.start_run() as run: learn.fit(epochs, learning_rate) mlflow.fastai.log_model(learn, "model") # load the model for scoring model_uri = "runs:/{}/model".format(run.info.run_id) loaded_model = mlflow.fastai.load_model(model_uri) predict_data = ... loaded_model.predict(predict_data)
def tiny_ic_databunch_valid_features(tiny_ic_databunch): """ Returns DNN features for the tiny fridge objects dataset. """ learn = cnn_learner(tiny_ic_databunch, models.resnet18) embedding_layer = learn.model[1][6] features = compute_features_learner(tiny_ic_databunch, DatasetType.Valid, learn, embedding_layer) return features
def createmodel(self, quantize=True): """Creates the model and attaches with the dataloader. By default it sets up the model for quantization aware training. Parameters ---------- quantize : bool, optional To quantize or not, by default True """ print("Creating model..") vision.learner.create_body = self.create_custom_body self.learn = cnn_learner( self.data, models.mobilenet_v2, pretrained=True, metrics=[error_rate, FBeta(beta=1), Precision(), Recall(), AUROC()], split_on=custom_split, model_dir=self.model_dir, ) if quantize: self.learn.model[0].qconfig = torch.quantization.default_qat_qconfig self.learn.model = torch.quantization.prepare_qat( self.learn.model, inplace=True )
def authenticate(): path = '/lib/Auth/RecFace/images/models/' root_models = [f for f in listdir(path) if isfile(join(path, f))] if 'tmp.pth' in root_models: root_models.remove('tmp.pth') classes = ["Test", "Train"] data = ImageDataBunch.single_from_classes('/lib/Auth/RecFace/images/', classes, ds_tfms=None, size = 224) data.normalize(imagenet_stats) learn = cnn_learner(data, models.vgg16_bn) imgs = getFaces.getFaces() if len(imgs)==0: return False for img in imgs: img = resize(img, (224,224), interpolation = INTER_AREA) imwrite('temp.jpeg', img) img = open_image('temp.jpeg') for mod in root_models: if compare(mod.split('.')[0], img, learn): return True return False
def main(epochs=5, learning_rate=0.01): # Avoid OMP error and allow multiple OpenMP runtime os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' warnings.filterwarnings("ignore") print(mlflow.__version__) # Download and untar the MNIST data set path = untar_data(URLs.MNIST_SAMPLE) # Prepare, transform, and normalize the data data = ImageDataBunch.from_folder(path, ds_tfms=(rand_pad(2, 28), []), bs=64) data.normalize(imagenet_stats) # Train and fit the Learner model learn = cnn_learner(data, models.resnet18, metrics=accuracy) # Start MLflow session with mlflow.start_run() as run: learn.fit(epochs, learning_rate) mlflow.fastai.log_model(learn, "model") # Fetch the default conda environment print("run_id: {}".format(run.info.run_id)) env = mlflow.fastai.get_default_conda_env() print("conda environment: {}".format(env))
def audio_cnn_learner(data: AudioDataBunch, base_arch: Callable, cut: Union[int, Callable] = None, pretrained: bool = False, lin_ftrs: Optional[Collection[int]] = None, ps: Floats = 0.5, custom_head: Optional[nn.Module] = None, split_on: Optional[SplitFuncOrIdxList] = None, bn_final: bool = False, init=nn.init.kaiming_normal_, concat_pool: bool = True, padding_mode: str = 'zeros', adapt: Callable[[Tensor, int], Tensor] = None, **kwargs: Any) -> Learner: '''Create a learner to apply a CNN model to audio spectrograms.''' learn = cnn_learner(data, base_arch, cut=cut, pretrained=pretrained, lin_ftrs=lin_ftrs, ps=ps, custom_head=custom_head, split_on=split_on, bn_final=bn_final, init=init, concat_pool=concat_pool, **kwargs) adapt_model(learn.model, data.output_info.channels, pretrained=pretrained, init=init, padding_mode=padding_mode, adapt=adapt) learn.unfreeze() # Model shouldn't be frozen, unlike vision return learn
def get_model(model_dir): path = Path(model_dir) empty_data = vision.ImageDataBunch.load_empty(path, fname='stage-5-data.pkl') learn = vision.cnn_learner(empty_data, vision.models.densenet121, pretrained=False).load('stage-5') return learn
def test_compute_features_learner(tiny_ic_databunch): learn = cnn_learner(tiny_ic_databunch, models.resnet18) embedding_layer = learn.model[1][6] features = compute_features_learner(tiny_ic_databunch, DatasetType.Valid, learn, embedding_layer) im_paths = tiny_ic_databunch.valid_ds.x.items assert len(features) == len(im_paths) assert len(features[str(im_paths[1])]) == 512
def test_compute_features(tiny_ic_databunch): learn = cnn_learner(tiny_ic_databunch, models.resnet18) #learn.fit_one_cycle(0) #, LEARNING_RATE) embedding_layer = learn.model[1][6] features = compute_features(tiny_ic_databunch.valid_ds, learn, embedding_layer) im_paths = tiny_ic_databunch.valid_ds.x.items assert len(features) == len(im_paths) assert len(features[str(im_paths[1])]) == 512
def model_pred_scores(tiny_ic_databunch): """Return a simple learner and prediction scores on tiny ic data""" model = models.resnet18 lr = 1e-4 epochs = 1 learn = cnn_learner(tiny_ic_databunch, model) learn.fit(epochs, lr) return learn, learn.get_preds()[0].tolist()
def tune_lr(data, model, pretrained, ps, start_lr, end_lr, num_iters): # Set up CNN learner learner = cnn_learner(data, model, metrics=accuracy, pretrained=pretrained, ps=ps).mixup() # Explore possible learning rates learner.lr_find(start_lr=start_lr, end_lr=end_lr, num_it=num_iters) # Return learning rates and losses return learner.recorder.lrs, learner.recorder.losses
def __init__(self): self.data = load_data(path_web_cleaned_chicago, "databunch-lsh.pkl") self.arch = models.resnet50 self.learn = cnn_learner(self.data, self.arch, metrics=error_rate) self.learn.load("tatrec-stage-2-1") self.sf = SaveFeatures(self.learn.model[1][5]) self.lsh = pickle.load(open(path_web_models_chicago + 'lsh.pkl', 'rb')) self.n_items = 5 self.path_img_upload = path_web_upload self.distance_func = 'hamming'
def train(data): learn = cnn_learner(data, models.resnet18, metrics=[Precision(), Recall()]) learn.callback_fns.append( partial(LearnerTensorboardWriter, base_dir=Path("data/tensorboard/camlytics_fastai"), name="deterministic-data")) learn.fit_one_cycle(20, 1e-2) learn.save("finetune-epoch-20") import pdb pdb.set_trace()
def main(epochs): Task.init(project_name="examples", task_name="fastai v1") path = untar_data(URLs.MNIST_SAMPLE) data = ImageDataBunch.from_folder(path, ds_tfms=(rand_pad(2, 28), []), bs=64, num_workers=0) data.normalize(imagenet_stats) learn = cnn_learner(data, models.resnet18, metrics=accuracy) accuracy(*learn.get_preds()) learn.fit_one_cycle(epochs, 0.01)
def predict_tiles(data, model_type, trained_model, ps): # Set up CNN learner learner = cnn_learner(data, model_type, metrics=accuracy, ps=ps).mixup() # Load trained model learner.load(trained_model) # Compute precition score tile_scores, tile_targets = learner.get_preds(ds_type=DatasetType.Valid) # Return predictions and true labels return tile_scores, tile_targets
def plot_learning_rate(sample_size=300, image_size=224, load_learner=True): data = load_dataset(sample_size=sample_size, image_size=image_size) if load_learner: learn = load_saved_learner() learn.data = data else: learn = cnn_learner(data, models.resnet50, metrics=accuracy) learn.model = torch.nn.DataParallel(learn.model) learn.lr_find() learn.recorder.plot(return_fig=True).savefig('learning_rate.png', dpi=200)
def main(args): # Set device if args.device is None: if torch.cuda.is_available(): args.device = 'cuda:0' else: args.device = 'cpu' defaults.device = torch.device(args.device) # Aggregate path and labels into list for fastai ImageDataBunch fnames, labels, is_valid = [], [], [] for sample in OpenFire(root=args.data_path, train=True, download=True, valid_pct=0.2).data: fnames.append(sample['path']) labels.append(sample['target']) is_valid.append(False) for sample in OpenFire(root=args.data_path, train=False, download=True, valid_pct=0.2).data: fnames.append(sample['path']) labels.append(sample['target']) is_valid.append(True) df = pd.DataFrame.from_dict( dict(name=fnames, label=labels, is_valid=is_valid)) il = vision.ImageList.from_df( df, path=args.data_path).split_from_df('is_valid').label_from_df( cols='label') data = il.transform(vision.get_transforms(), size=args.resize).databunch( bs=args.batch_size, num_workers=args.workers).normalize(vision.imagenet_stats) learner = vision.cnn_learner(data, vision.models.__dict__[args.model], pretrained=args.pretrained, wd=args.weight_decay, ps=args.dropout_prob, concat_pool=args.concat_pool, metrics=vision.error_rate) learner.fit_one_cycle(args.epochs, max_lr=slice(None, args.lr, None), div_factor=args.div_factor) learner.save(args.checkpoint)
def search(bot, update): """Send reply of user's message.""" photo_file = bot.get_file(update.message.photo[-1].file_id) photo_file.download('testing.jpeg') try: bs = 32 path = "classes" np.random.seed(42) data = ImageDataBunch.from_folder( path, train='.', valid_pct=0.2, ds_tfms=get_transforms(), size=224, num_workers=4).normalize(imagenet_stats) learn = cnn_learner(data, models.resnet34, metrics=error_rate).load("stage-1") learn.export() learn = load_learner("classes") cat, tensor, probs = learn.predict(open_image("testing.jpeg")) l = list(probs) a = tensor.__str__() a = int(a.strip("tensor" "()")) l = list(probs)[a] l = l.__str__() b = float(l.strip("tensor" "()")) if b >= 0.9: update.message.reply_text( '`' + str(cat) + '`', parse_mode=ParseMode.MARKDOWN, reply_to_message_id=update.message.message_id) # print("prediction :") # print(cat) else: cat = "sry I am not sure " update.message.reply_text( '`' + str(cat) + '`', parse_mode=ParseMode.MARKDOWN, reply_to_message_id=update.message.message_id) # print("prediction :") # print("Not Sure") except Exception as e: update.message.reply_text(e)
def test_train_metrics_recorder(tiny_ic_databunch): model = models.resnet18 lr = 1e-4 epochs = 2 def test_callback(learn): tmr = TrainMetricsRecorder(learn) learn.callbacks.append(tmr) learn.unfreeze() learn.fit(epochs, lr) return tmr # multiple metrics learn = cnn_learner(tiny_ic_databunch, model, metrics=[accuracy, error_rate]) cb = test_callback(learn) assert len(cb.train_metrics) == len(cb.valid_metrics) == epochs assert (len(cb.train_metrics[0]) == len(cb.valid_metrics[0]) == 2 ) # we used 2 metrics # no metrics learn = cnn_learner(tiny_ic_databunch, model) cb = test_callback(learn) assert len(cb.train_metrics) == len(cb.valid_metrics) == 0 # no metrics # no validation set learn = cnn_learner(tiny_ic_databunch, model, metrics=accuracy) valid_dl = learn.data.valid_dl learn.data.valid_dl = None cb = test_callback(learn) assert len(cb.train_metrics) == epochs assert len(cb.train_metrics[0]) == 1 # we used 1 metrics assert len(cb.valid_metrics) == 0 # no validation # Since tiny_ic_databunch is being used in other tests too, should put the validation data back. learn.data.valid_dl = valid_dl
def run_training_sample(sample_size=300, image_size=224, n_cycles=10, with_focal_loss=False, with_oversampling=True, with_weighted_loss=True, confusion_matrix_filename='train_confusion_matrix'): """ :param sample_size: number of images per class if the input file has less than the given number, only the existing ones are used :param image_size: Size of the image in image augmantation pre-processing :param n_cycles: epochs :param with_focal_loss: bool Use it if data is balanced :param with_oversampling: bool Use oversampling for the mintority class of COVID xrays to match the `sample_size` :param with_weighted_loss: bool Use weighted loss for unbalaned sample size in classes :return: """ data = load_dataset(sample_size=sample_size, image_size=image_size) callbacks = None if with_oversampling: callbacks = [partial(OverSamplingCallback)] learn = cnn_learner(data, models.resnet50, metrics=accuracy, callback_fns = callbacks ) learn.model = torch.nn.DataParallel(learn.model) # handle unbalanced data with weights # ['COVID-19', 'normal', 'pneumonia'] if with_weighted_loss: classes = {c:1 for c in learn.data.classes} classes['COVID-19'] = 5 learn.loss_func = CrossEntropyLoss(weight=tensor(list(classes.values()), dtype=torch.float, device=fastai.torch_core.defaults.device), reduction='mean') elif with_focal_loss: learn.loss_func = FocalLoss() learn.fit_one_cycle(n_cycles) save_learner(learn, with_focal_loss=with_focal_loss, with_oversampling=with_oversampling, sample_size=sample_size, with_weighted_loss=with_weighted_loss) _save_classification_interpert(learn, confusion_matrix_filename=confusion_matrix_filename)
def fine_tune_convnet(path: str): """ Path must be in the format of a FastAI data bunch, ie Train/Test/Valid dirs and then one subdirectory per class in each of those dirs. """ tfms = get_transforms(do_flip=False) data = ImageDataBunch.from_folder(path, ds_tfms=tfms, size=64) learn = cnn_learner(data, models.resnet34, metrics=error_rate) learn.fit_one_cycle(5) learn.save('stage-1') learn.unfreeze() learn.fit_one_cycle(1, max_lr=slice(1e-6, 1e-3)) learn.save('stage-2') learn.export(file="two-stage-model.pkl")
def __init__(self, data_path: str, **kwargs): # Create data and learner self._validate_path(data_path) self.data: ImageDataBunch = get_data(data_path, **kwargs) self.learner = cnn_learner(self.data, models.resnet18) self.last_layer: nn.Module = flatten_model(self.learner.model)[-2] # Precompute data activations as part of initialization # TODO refactor computation into a separate method? self.activations_list: List[Tensor] = [] self.last_layer.register_forward_hook(self.hook) _ = self.learner.get_preds(self.data.train_ds) self.data_activations = torch.cat(self.activations_list) # This will store activations for query image self.query_act = None
def __init__(self, picture_file): self.classes = [ 'Afghan_hound', 'African_hunting_dog', 'Airedale', 'American_Staffordshire_terrier', 'Appenzeller', 'Australian_terrier', 'Bedlington_terrier', 'Bernese_mountain_dog', 'Blenheim_spaniel', 'Border_collie', 'Border_terrier', 'Boston_bull', 'Bouvier_des_Flandres', 'Brabancon_griffon', 'Brittany_spaniel', 'Cardigan', 'Chesapeake_Bay_retriever', 'Chihuahua', 'Dandie_Dinmont', 'Doberman', 'English_foxhound', 'English_setter', 'English_springer', 'EntleBucher', 'Eskimo_dog', 'French_bulldog', 'German_shepherd', 'German_short-haired_pointer', 'Gordon_setter', 'Great_Dane', 'Great_Pyrenees', 'Greater_Swiss_Mountain_dog', 'Ibizan_hound', 'Irish_setter', 'Irish_terrier', 'Irish_water_spaniel', 'Irish_wolfhound', 'Italian_greyhound', 'Japanese_spaniel', 'Kerry_blue_terrier', 'Labrador_retriever', 'Lakeland_terrier', 'Leonberg', 'Lhasa', 'Maltese_dog', 'Mexican_hairless', 'Newfoundland', 'Norfolk_terrier', 'Norwegian_elkhound', 'Norwich_terrier', 'Old_English_sheepdog', 'Pekinese', 'Pembroke', 'Pomeranian', 'Rhodesian_ridgeback', 'Rottweiler', 'Saint_Bernard', 'Saluki', 'Samoyed', 'Scotch_terrier', 'Scottish_deerhound', 'Sealyham_terrier', 'Shetland_sheepdog', 'Shih-Tzu', 'Siberian_husky', 'Staffordshire_bullterrier', 'Sussex_spaniel', 'Tibetan_mastiff', 'Tibetan_terrier', 'Walker_hound', 'Weimaraner', 'Welsh_springer_spaniel', 'West_Highland_white_terrier', 'Yorkshire_terrier', 'affenpinscher', 'basenji', 'basset', 'beagle', 'black-and', 'bloodhound', 'bluetick', 'borzoi', 'boxer', 'briard', 'bull_mastiff', 'cairn', 'chow', 'clumber', 'cocker_spaniel', 'collie', 'curly-coated_retriever', 'dhole', 'dingo', 'flat-coated_retriever', 'giant_schnauzer', 'golden_retriever', 'groenendael', 'keeshond', 'kelpie', 'komondor', 'kuvasz', 'malamute', 'malinois', 'miniature_pinscher', 'miniature_poodle', 'miniature_schnauzer', 'otterhound', 'papillon', 'pug', 'redbone', 'schipperke', 'silky_terrier', 'soft-coated_wheaten_terrier', 'standard_poodle', 'standard_schnauzer', 'toy_poodle', 'toy_terrier', 'vizsla', 'whippet', 'wire-haired_fox_terrier' ] f_name, f_ext = os.path.splitext(picture_file) img_path = os.path.join(app.root_path, 'static', 'images', f_name + f_ext) self.img = open_image(img_path) self.data = ImageDataBunch.single_from_classes( "./", self.classes, ds_tfms=get_transforms(), size=224).normalize(imagenet_stats) self.learner = cnn_learner(self.data, models.resnet50) self.learner.load('stage-2-rerun')
def train_model(model_name, dataset_name, arch, img_size, epochs): data = ImageDataBunch.from_folder(f'{BASE_DIR}datasets/{dataset_name}', valid_pct=0.2, ds_tfms=get_transforms(), size=img_size, num_workers=6, bs=BATCH_SIZE) arch = model_mapping[arch] learner = cnn_learner(data, arch, metrics=[error_rate, accuracy]) learner.fit_one_cycle(epochs) learner.export( os.path.join(os.getcwd(), '..', 'static', 'models', model_name + '.pkl')) meta = {} meta['metrics'] = [[i.item() for i in e] for e in learner.recorder.metrics] meta['loss'] = [i.item() for i in learner.recorder.losses] meta['lr'] = [i.item() for i in learner.recorder.lrs] return meta
def main(epochs): Task.init(project_name="examples", task_name="fastai with tensorboard callback") path = untar_data(URLs.MNIST_SAMPLE) data = ImageDataBunch.from_folder(path, ds_tfms=(rand_pad(2, 28), []), bs=64, num_workers=0) data.normalize(imagenet_stats) learn = cnn_learner(data, models.resnet18, metrics=accuracy) tboard_path = Path("data/tensorboard/project1") learn.callback_fns.append( partial(LearnerTensorboardWriter, base_dir=tboard_path, name="run0")) accuracy(*learn.get_preds()) learn.fit_one_cycle(epochs, 0.01)
def fit_resnet18(loader: str, storage: str, pretrained: bool = False, name: str = "res18.model") -> str: import dill from fastai.vision import cnn_learner, accuracy, models import codecs loader = dill.loads(codecs.decode(loader.encode(), "base64")) learner = cnn_learner(loader, models.resnet18, pretrained=False, metrics=accuracy) learner.fit_one_cycle(1) outpath = storage + "/" + name with open(outpath, "wb") as fp: dill.dump(learner, fp) return outpath
def train(arch): data = get_data('train') learn = cnn_learner(data, arch, metrics=error_rate).mixup() learn.fit_one_cycle(15, callbacks=[ SaveModelCallback(learn, monitor='error_rate', mode='min', name='bestmodel') ]) learn.load('bestmodel') learn.unfreeze() learn.fit_one_cycle(5, max_lr=slice(1e-6, 1e-4), callbacks=[ SaveModelCallback(learn, monitor='error_rate', mode='min', name='bestmodel-unfreeze') ])