def main(epochs=5, learning_rate=0.01): # Avoid OMP error and allow multiple OpenMP runtime os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' warnings.filterwarnings("ignore") print(mlflow.__version__) # Download and untar the MNIST data set path = untar_data(URLs.MNIST_SAMPLE) # Prepare, transform, and normalize the data data = ImageDataBunch.from_folder(path, ds_tfms=(rand_pad(2, 28), []), bs=64) data.normalize(imagenet_stats) # Train and fit the Learner model learn = cnn_learner(data, models.resnet18, metrics=accuracy) # Start MLflow session with mlflow.start_run() as run: learn.fit(epochs, learning_rate) mlflow.fastai.log_model(learn, 'model') # fetch the logged model artifacts artifacts = [ f.path for f in MlflowClient().list_artifacts(run.info.run_id, 'model') ] print("artifacts: {}".format(artifacts))
def main(epochs=5, learning_rate=0.01): # Avoid OMP error and allow multiple OpenMP runtime os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' warnings.filterwarnings("ignore") print(mlflow.__version__) # Download and untar the MNIST data set path = untar_data(URLs.MNIST_SAMPLE) # Prepare, transform, and normalize the data data = ImageDataBunch.from_folder(path, ds_tfms=(rand_pad(2, 28), []), bs=64) data.normalize(imagenet_stats) # Train and fit the Learner model learn = cnn_learner(data, models.resnet18, metrics=accuracy) # Start MLflow session with mlflow.start_run() as run: learn.fit(epochs, learning_rate) mlflow.fastai.log_model(learn, "model") # Fetch the default conda environment print("run_id: {}".format(run.info.run_id)) env = mlflow.fastai.get_default_conda_env() print("conda environment: {}".format(env))
def main(epochs=5, learning_rate=0.01): # Avoid OMP error and allow multiple OpenMP runtime os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' warnings.filterwarnings("ignore") print(mlflow.__version__) # Download and untar the MNIST data set path = vis.untar_data(vis.URLs.MNIST_SAMPLE) # Prepare, transform, and normalize the data data = vis.ImageDataBunch.from_folder(path, ds_tfms=(vis.rand_pad(2, 28), []), bs=64) data.normalize(vis.imagenet_stats) # Train and fit the Learner model learn = vis.cnn_learner(data, vis.models.resnet18, metrics=vis.accuracy) # Enable auto logging mlflow.fastai.autolog() # Start MLflow session with mlflow.start_run() as run: learn.fit(epochs, learning_rate) # fetch the auto logged parameters, metrics, and artifacts print_auto_logged_info(mlflow.get_run(run_id=run.info.run_id))
def main(epochs=5, learning_rate=0.01): # Avoid OMP error and allow multiple OpenMP runtime os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' warnings.filterwarnings("ignore") print(mlflow.__version__) # Download and untar the MNIST data set path = untar_data(URLs.MNIST_SAMPLE) # Prepare, transform, and normalize the data data = ImageDataBunch.from_folder(path, ds_tfms=(rand_pad(2, 28), []), bs=64) data.normalize(imagenet_stats) # Create CNN the Learner model learn = cnn_learner(data, models.resnet18, metrics=accuracy) # Start MLflow session with mlflow.start_run() as run: learn.fit(epochs, learning_rate) mlflow.fastai.log_model(learn, "model") # load the model for scoring model_uri = "runs:/{}/model".format(run.info.run_id) loaded_model = mlflow.fastai.load_model(model_uri) predict_data = ... loaded_model.predict(predict_data)
def main(epochs): Task.init(project_name="examples", task_name="fastai v1") path = untar_data(URLs.MNIST_SAMPLE) data = ImageDataBunch.from_folder(path, ds_tfms=(rand_pad(2, 28), []), bs=64, num_workers=0) data.normalize(imagenet_stats) learn = cnn_learner(data, models.resnet18, metrics=accuracy) accuracy(*learn.get_preds()) learn.fit_one_cycle(epochs, 0.01)
def main(epochs): Task.init(project_name="examples", task_name="fastai with tensorboard callback") path = untar_data(URLs.MNIST_SAMPLE) data = ImageDataBunch.from_folder(path, ds_tfms=(rand_pad(2, 28), []), bs=64, num_workers=0) data.normalize(imagenet_stats) learn = cnn_learner(data, models.resnet18, metrics=accuracy) tboard_path = Path("data/tensorboard/project1") learn.callback_fns.append( partial(LearnerTensorboardWriter, base_dir=tboard_path, name="run0")) accuracy(*learn.get_preds()) learn.fit_one_cycle(epochs, 0.01)
def main(): # Parse command-line arguments args = parse_args() # Download and untar the MNIST data set path = vis.untar_data(vis.URLs.MNIST_TINY) # Prepare, transform, and normalize the data data = vis.ImageDataBunch.from_folder(path, ds_tfms=(vis.rand_pad(2, 28), []), bs=64) data.normalize(vis.imagenet_stats) # Train and fit the Learner model learn = vis.cnn_learner(data, vis.models.resnet18, metrics=vis.accuracy) # Enable auto logging mlflow.fastai.autolog() # Train and fit with default or supplied command line arguments learn.fit(args.epochs, args.lr)
def main(): # Parse command=line arguments args = parse_args() # Setup MLFlow Tracking mlflow_tracking_uri = os.environ.get("MLFLOW_TRACKING_URI") mlflow.set_tracking_uri(mlflow_tracking_uri) expr_name = str(uuid.uuid1()) s3_bucket = os.environ.get("AWS_S3_BUCKET") # replace this value mlflow.create_experiment(expr_name, s3_bucket) mlflow.set_experiment(expr_name) # Experiment Variables print("MLFlow Tracking Server URI: " + mlflow.get_tracking_uri()) print("Artifact URI: " + mlflow.get_artifact_uri()) # should print out a s3 bucket path # Download and untar the MNIST data set path = vis.untar_data(vis.URLs.MNIST_TINY) # Prepare, transform, and normalize the data data = vis.ImageDataBunch.from_folder(path, ds_tfms=(vis.rand_pad(2, 28), []), bs=64) data.normalize(vis.imagenet_stats) # Train and fit the Learner model learn = vis.cnn_learner(data, vis.models.resnet18, metrics=vis.accuracy) # Enable auto logging mlflow.fastai.autolog() # Start MLflow session #with mlflow.start_run(): # Train and fit with default or supplied command line arguments learn.fit(args.epochs, args.lr)
argument. $ python fastai_integration.py [--pruning] """ import argparse from functools import partial from fastai import vision import optuna from optuna.integration import FastAIPruningCallback BATCHSIZE = 128 EPOCHS = 10 path = vision.untar_data(vision.URLs.MNIST_SAMPLE) def objective(trial): # Data Augmentation apply_tfms = trial.suggest_categorical("apply_tfms", [True, False]) if apply_tfms: # MNIST is a hand-written digit dataset. Thus horizontal and vertical flipping are # disabled. However, the two flipping will be important when the dataset is CIFAR or # ImageNet. tfms = vision.get_transforms( do_flip=False, flip_vert=False, max_rotate=trial.suggest_int("max_rotate", -45, 45), max_zoom=trial.suggest_float("max_zoom", 1, 2), p_affine=trial.suggest_discrete_uniform("p_affine", 0.1, 1.0, 0.1),
from fastai import vision, metrics from fastai.callback import hooks from fastai.utils import mem import numpy as np from os import path import torch vision.defaults.device = vision.defaults.device if torch.cuda.is_available( ) else torch.device('cpu') # Download data and get path fastai_path = vision.untar_data(vision.URLs.CAMVID) PATH = str(fastai_path) print('CAMVID paths:') print(fastai_path.ls()) BATCH_SIZE = 64 WD = 1e-2 LR = 1e-4 PCT_START_FINETUNE = 0.9 # given the default of 0.3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70% PCT_START = 0.8 EPOCHS_FINETUNE = 12 EPOCHS = 12 # Define images and label path LABEL_PATH = path.sep.join([PATH, 'labels']) IMAGE_PATH = path.sep.join([PATH, 'images']) # Define paths of image and label image_paths = vision.get_image_files(IMAGE_PATH) label_paths = vision.get_image_files(LABEL_PATH)
class LearningRateSetter(LearnerCallback): def on_epoch_begin(self, **kwargs): set_learning_rate(self.learn) @reloading def print_model_statistics(model): # Uncomment the following lines after during the training # to start printing statistics # # print('{: <28} {: <7} {: <7}'.format('NAME', ' MEAN', ' STDDEV')) # for name, param in model.named_parameters(): # mean = param.mean().item() # std = param.std().item() # print('{: <28} {: 6.4f} {: 6.4f}'.format(name, mean, std)) pass class ModelStatsPrinter(LearnerCallback): def on_epoch_begin(self, **kwargs): print_model_statistics(self.learn.model) path = untar_data(URLs.MNIST_SAMPLE) data = ImageDataBunch.from_folder(path) learn = cnn_learner(data, models.resnet18, metrics=accuracy, callback_fns=[ModelStatsPrinter, LearningRateSetter]) learn.fit(10)
from os import listdir from fastprogress.fastprogress import force_console_behavior import fastprogress fastprogress.fastprogress.NO_BAR = True master_bar, progress_bar = force_console_behavior() fastai.basic_train.master_bar, fastai.basic_train.progress_bar = master_bar, progress_bar def get_file(aString): return str(aString.split('/')[-1]) image_folder = 'images/' path = untar_data(URLs.DOGS) learn = load_learner(path, test=ImageList.from_folder(image_folder), bs = 1) preds,y = learn.get_preds(ds_type=DatasetType.Test, ) predList = list(preds.numpy()[:,0]) f_names = listdir(image_folder) pred_df = pd.DataFrame(list(zip(f_names,predList)), columns = ['f_name','prob_dog']) registry = 'registry/downloaded_files.csv' regDF = pd.read_csv(registry) regDF['f_name'] = regDF.file.apply(get_file) out_df = pd.merge(regDF,pred_df, on = ['f_name'])
def main(test, s3_data, batch, debug): if batch: run_on_batch(test, debug) # Setup options bs = 16 size = 256 num_workers = 4 num_epochs = 100 lr = 1e-4 # for size 256 # Subtract 2 because there's no padding on final convolution grid_sz = 8 - 2 if test: bs = 8 size = 128 num_debug_images = 32 num_workers = 0 num_epochs = 1 # for size 128 grid_sz = 4 - 2 # Setup data make_dir(output_dir) data_dir = untar_data(URLs.PASCAL_2007, dest='/opt/data/pascal2007/data') img_path = data_dir/'train/' trn_path = data_dir/'train.json' trn_images, trn_lbl_bbox = get_annotations(trn_path) val_path = data_dir/'valid.json' val_images, val_lbl_bbox = get_annotations(val_path) images, lbl_bbox = trn_images+val_images, trn_lbl_bbox+val_lbl_bbox img2bbox = dict(zip(images, lbl_bbox)) get_y_func = lambda o: img2bbox[o.name] with open(trn_path) as f: d = json.load(f) classes = sorted(d['categories'], key=lambda x: x['id']) classes = [x['name'] for x in classes] classes = ['background'] + classes num_classes = len(classes) anc_sizes = torch.tensor([ [1, 1], [2, 2], [3, 3], [3, 1], [1, 3]], dtype=torch.float32) grid = ObjectDetectionGrid(grid_sz, anc_sizes, num_classes) score_thresh = 0.1 iou_thresh = 0.8 class MyObjectCategoryList(ObjectCategoryList): def analyze_pred(self, pred): boxes, labels, _ = grid.get_preds( pred.unsqueeze(0), score_thresh=score_thresh, iou_thresh=iou_thresh) return (boxes[0], labels[0]) class MyObjectItemList(ObjectItemList): _label_cls = MyObjectCategoryList def get_data(bs, size, ): src = MyObjectItemList.from_folder(img_path) if test: src = src[0:num_debug_images] src = src.split_by_files(val_images) src = src.label_from_func(get_y_func, classes=classes) src = src.transform(get_transforms(), size=size, tfm_y=True) return src.databunch(path=data_dir, bs=bs, collate_fn=bb_pad_collate, num_workers=num_workers) data = get_data(bs, size) print(data) plot_data(data, output_dir) # Setup model model = ObjectDetectionModel(grid) def loss(out, gt_boxes, gt_classes): gt = model.grid.encode(gt_boxes, gt_classes) box_loss, class_loss = model.grid.compute_losses(out, gt) return box_loss + class_loss metrics = [F1(grid, score_thresh=score_thresh, iou_thresh=iou_thresh)] learn = Learner(data, model, metrics=metrics, loss_func=loss, path=output_dir) callbacks = [ CSVLogger(learn, filename='log') ] # model.freeze_body() learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks) plot_preds(data, learn, output_dir) if s3_data: sync_to_dir(output_dir, output_uri)
from fastai import vision import numpy as np from os import path # Download data and get path fastai_path = vision.untar_data(vision.URLs.BIWI_HEAD_POSE) PATH = str(fastai_path) print('BIWI_HEAD_POSE paths:') print(fastai_path.ls()) BATCH_SIZE = 64 WD = 1e-2 LR = 2e-2 PCT_START_FINETUNE = 0.9 # given the default of 0.3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70% PCT_START = 0.8 EPOCHS = 5 # Default value from dataset RGB_CAL = np.genfromtxt(path.sep.join([PATH, '01', 'rgb.cal']), skip_footer=6) print('[INFO] RGB cal:') print(RGB_CAL) # define function to match between image path and text file path. E.g: image path: /root/.fastai/data/biwi_head_pose/01/frame_00003_rgb.jpg; text file name: /root/.fastai/data/biwi_head_pose/01/frame_00003_pose.txt image_path2text_file_name = lambda image_path: path.sep.join([f'{str(image_path)[:-7]}pose.txt']) # Sample image image_name = path.sep.join(['01', 'frame_00003_rgb.jpg']) image_path = path.sep.join([PATH, image_name]) sample_image = vision.open_image(image_path) sample_image.show(figsize=(6, 6)) # Load center point of this image from text file in dataset center_pt = np.genfromtxt(image_path2text_file_name(image_path), skip_header=3)
def build_databunch(cfg, tmp_dir): dataset = cfg.data.dataset validate_dataset(dataset) img_sz = cfg.data.img_sz batch_sz = cfg.solver.batch_sz num_workers = cfg.data.num_workers data_uri = cfg.data.data_uri if cfg.data.dataset == pascal2007: data_dir = data_uri if data_uri.startswith('s3://'): data_dir = join(tmp_dir, 'pascal2007-data') untar_data(URLs.PASCAL_2007, dest=data_dir) data_dir = join(data_dir, 'pascal_2007') trn_path = join(data_dir, 'train.json') trn_images, trn_lbl_bbox = get_annotations(trn_path) val_path = join(data_dir, 'valid.json') val_images, val_lbl_bbox = get_annotations(val_path) test_path = join(data_dir, 'test.json') test_images, test_lbl_bbox = get_annotations(test_path) images, lbl_bbox = trn_images + val_images + test_images, trn_lbl_bbox + val_lbl_bbox + test_lbl_bbox img2bbox = dict(zip(images, lbl_bbox)) get_y_func = lambda o: img2bbox[o.name] ann_path = trn_path img_dir = data_dir elif cfg.data.dataset == penn_fudan: data_dir = data_uri if data_uri.startswith('s3://'): data_dir = join(tmp_dir, 'penn-fudan/data') zip_path = download_if_needed(data_uri, tmp_dir) unzip(zip_path, data_dir) ann_path = join(data_dir, 'coco.json') images, lbl_bbox = get_annotations(ann_path) img2bbox = dict(zip(images, lbl_bbox)) get_y_func = lambda o: img2bbox[o.name] img_dir = join(data_dir, 'PNGImages') with open(ann_path) as f: d = json.load(f) classes = sorted(d['categories'], key=lambda x: x['id']) classes = [x['name'] for x in classes] classes = ['background'] + classes def get_databunch(full=True): src = ObjectItemList.from_folder(img_dir, presort=True) if cfg.overfit_mode: # Don't use any validation set so training will run faster. src = src.split_by_idxs(np.arange(0, 4), []) elif cfg.test_mode: src = src.split_by_idxs(np.arange(0, 4), np.arange(0, 4)) else: def file_filter(path): fn = basename(str(path)) return fn in trn_images or fn in val_images or fn in test_images[ 0:500] if not full: src = src.filter_by_func(file_filter) src = src.split_by_files(test_images) src = src.label_from_func(get_y_func, classes=classes) train_transforms, val_transforms = [], [] if not cfg.overfit_mode: train_transforms = [flip_affine(p=0.5)] src = src.transform(tfms=[train_transforms, val_transforms], size=img_sz, tfm_y=True, resize_method=ResizeMethod.SQUISH) data = src.databunch(path=data_dir, bs=batch_sz, collate_fn=bb_pad_collate, num_workers=num_workers) data.normalize(imagenet_stats) data.classes = classes return data return get_databunch(full=False), get_databunch(full=True)
import fastai.vision as fv path_data = fv.untar_data(fv.URLs.MNIST_TINY) # it is unlikely we will actively use untar_data in other ways # But we can elaborate on how this function behave # 1. once it downloaded dataset, it will not re-download it; # 2. but if `dest` is removed, `fname` the tgz file will un-tgz dataset into `dest` # 3. if force_download or dataset corrupted, all dataset will be removed and re-downloaded. # also see the official docs on untar_data