Пример #1
0
def create_coco_data():
    """ Create an empty databunch for COCO dataset."""

    train_tfms = []
    val_tfms = []
    ds_tfms = (train_tfms, val_tfms)

    class_mapping = coco_class_mapping()

    import tempfile
    sd = ImageList([],
                   path=tempfile.NamedTemporaryFile().name,
                   ignore_empty=True).split_none()
    data = sd.label_const(
        0,
        label_cls=ObjectDetectionCategoryList,
        classes=list(class_mapping.values())).transform(ds_tfms).databunch()

    data.class_mapping = class_mapping
    data.classes = list(class_mapping.values())
    data._is_empty = False
    data._is_coco = True
    data.resize_to = 416
    data.chip_size = 416

    return data
def test_from_df():
    this_tests(ItemList.from_df)
    df = pd.DataFrame(["123.png"], columns=["name"])
    try:
        ImageList.from_df(path="dummy_path", df=df)
    except Exception as ex:
        assert not isinstance(ex, TypeError)
Пример #3
0
def test_set_random_seed(tiny_ic_data_path):
    # check two data batches are the same after seeding
    set_random_seed(1)
    first_data = (ImageList.from_folder(tiny_ic_data_path).split_by_rand_pct().
                  label_from_folder().transform().databunch(bs=5).normalize())
    first_batch = first_data.one_batch()

    set_random_seed(1)
    second_data = (ImageList.from_folder(tiny_ic_data_path).split_by_rand_pct(
    ).label_from_folder().transform().databunch(bs=5).normalize())
    second_batch = second_data.one_batch()
    assert first_batch[1].tolist() == second_batch[1].tolist()
def test_model_to_learner(tmp):
    model = models.resnet18

    # Test if the function loads an ImageNet model (ResNet) trainer
    learn = model_to_learner(model(pretrained=True))
    assert len(learn.data.classes) == 1000  # Check Image net classes
    assert isinstance(learn.model, models.ResNet)

    # Test if model can predict very simple image
    IM_URL = "https://cvbp.blob.core.windows.net/public/images/cvbp_cup.jpg"
    imagefile = os.path.join(tmp, "cvbp_cup.jpg")
    urllib.request.urlretrieve(IM_URL, imagefile)

    category, ind, predict_output = learn.predict(
        open_image(imagefile, convert_mode="RGB"))
    assert learn.data.classes[ind] == str(category) == "coffee_mug"

    # Test if .predict() yield the same output when use .get_preds()
    one_data = (
        ImageList.from_folder(tmp).split_none().label_const(
        )  # cannot use label_empty because of fastai bug: # https://github.com/fastai/fastai/issues/1908
        .transform(
            tfms=None,
            size=IMAGENET_IM_SIZE).databunch(bs=1).normalize(imagenet_stats))
    learn.data.train_dl = one_data.train_dl
    get_preds_output = learn.get_preds(ds_type=DatasetType.Train)

    assert np.all(
        np.isclose(
            np.array(get_preds_output[0].tolist()
                     [0]),  # Note, get_preds() produces a batch (list) output
            np.array(predict_output.tolist()),
            rtol=1e-05,
            atol=1e-08,
        ))
Пример #5
0
 def get_data(train_sampler=None):
     data = (ImageList.from_folder(chip_dir).split_by_folder(
         train='train', valid='val').label_from_folder().transform(
             tfms, size=size).databunch(bs=self.train_opts.batch_sz,
                                        num_workers=num_workers,
                                        train_sampler=train_sampler))
     return data
Пример #6
0
def get_data_from_folder(path: Union[Path, str],
                         bs: int,
                         img_size: int,
                         tfms: Transform = None,
                         extensions: List[str] = [".jpg"]) -> ImageDataBunch:
    """Takes Imagenet style folder structure of test/train/valid and returns DataBunch with different
    batch and image sizes to train with PyTorch.

    Args:
        path : path to folder with data in train/valid/test folder structure
        bs : batch size
        img_size : resize to img_size for training
        tfms : transformations to do
        extensions : extensions to grab from the folder path

    Returns:
        data : Train/Test data organized in Fastai DataBunch

    """
    if tfms is None:
        tfms = get_transforms()

    data = (ImageList.from_folder(
        path,
        extensions=extensions).split_by_folder().label_from_folder().transform(
            tfms,
            size=img_size).databunch(bs=bs,
                                     num_workers=0).normalize(imagenet_stats))

    return data
def preprocess(dicom_paths):
    dicom_paths = [f'{path}.png' for path in dicom_paths]
    df = pd.DataFrame(dicom_paths, columns=['name'])
    train_data_stats = torch.load('normal_stats')
    images = ImageList.from_df(df, '.')

    return images
Пример #8
0
    def prediction(self, directorio, num_batch=8):

        data = ImageList.from_folder(
            directorio)  # build the ImageList from the folder
        learn.data.add_test(data)  # add data to the test set of learn

        learn.to_fp32()  # pass the model and data to FP16

        if self.arquitecture == 'resnet18':
            sf = SaveFeatures(learn.model[1][4])
        elif self.arquitecture == 'effB4':
            sf = SaveFeatures(learn.model._avg_pooling)

        # get the probabilities of images
        preds, _ = learn.get_preds(ds_type=DatasetType.Test, n_batch=num_batch)

        # Get the predictions (intenger indexes)
        y_pred = preds.argmax(
            dim=1).tolist()  # using the arguments of the max probabilities

        self.predictions_dict = {
            n.name: learn.data.classes[y]
            for n, y in zip(data.items, y_pred)
        }
        self.features = sf.features.squeeze()
        self.labels = list(set(self.predictions_dict.values()))
Пример #9
0
def main(ensemble, tta, output):
    # Read in test data images from the 'data/test' folder
    print("Loading test data.")
    test_imgs = ImageList.from_folder(path=os.path.join(DATA_DIR,
                                                        TEST_FOLDER), )

    # Get predictions
    if ensemble:
        # Load ensemble of learners
        learners = []
        learner_names = ['dpn92', 'inceptionv4', 'se_resnext101']
        for name in learner_names:
            print(f"Loading {name}")
            learn = load_learner(SAVED_DIR, f'{name}.pkl', test=test_imgs)
            learners.append(learn)

        # Init ensemble
        print("Initializing ensemble.")
        ensemble = Ensemble(learners)

        # Get predictions
        print("Performing inference...")
        preds = ensemble.predict(tta)
        print("Predictions done.")

        # Get classes list
        classes = learners[0].data.classes
        # Get image names list
        img_names = [i.name for i in learners[0].data.test_ds.items]

    else:
        learner_name = 'se_resnext101'

        # Initialize Learner
        print(f"Loading {learner_name}")
        learn = load_learner(SAVED_DIR, f'{learner_name}.pkl', test=test_imgs)

        # Get predictions
        print("Performing inference...")
        if tta:
            preds, _ = learn.TTA(ds_type=DatasetType.Test)
        else:
            preds, _ = learn.get_preds(ds_type=DatasetType.Test)
        print("Predictions done.")

        # Get classes list
        classes = learn.data.classes
        # Get image names list
        img_names = [i.name for i in learn.data.test_ds.items]

    # Initialize DataFrame with the predictions
    df = pd.DataFrame(np.array(preds), columns=classes)
    # Insert image names to DataFrame
    df.insert(0, 'img_name', img_names)

    # Save predictions as csv file
    df.to_csv(output, index=False)
    print(f"Predictions saved to {output}")
Пример #10
0
def get_learner(model_path, model_file, test_path, test_file):
    """
    Loads the model learner from given model and test path and file.

    :param model_path: Path to dir where .pkl file is located.
    :param model_file: If multiple .pkl files are located in the same path, provide the exact model file name.
    :param test_path: Path to dir where test data is located
    :param test_file: Preprocessed test_labels.csv file, as was done in preprocess.py. It eases the fetching of ImageList.
    :return: The model learner.
    """
    learn = load_learner(model_path, file=model_file, test=ImageList.from_csv(test_path, test_file, folder='test'))
    return learn
Пример #11
0
def get_data(data_path: PathOrStr,
             bs: int = 16,
             img_size: int = 160,
             pct_partial: float = 1.0,
             num_workers: int = 0,
             seed: int = 42) -> ImageDataBunch:
    """
    Create data object from Imagenet-style directory structure.

    This is a wrapper around fastai's Data Block API. The purpose is to automate and package together datasets and
    dataloaders, transforms, splitting the data, etc.

    :param data_path: path to data in Imagenet-style folder structure.
    :param bs: batch size
    :param img_size: target image size
    :param pct_partial: proportion of all data to use
    :param num_workers: number of workers used to parallelize data transformations when feeding into the model
    :param seed:
    :return: data object containing data set and data loader (in PyTorch sense)

    .. note:: more on Data Block API here: https://docs.fast.ai/data_block.html
    .. note:: Imagenet-style directory structure: https://docs.fast.ai/vision.data.html#ImageDataBunch.from_folder
    .. note:: `num_workers` anything from 0 crashes on my laptop, ideally, should equal the number of cores of your CPU
    .. note:: all of the data will be used as training set, even images in `valid` folder
    """

    label_lists: LabelLists = (
        ImageList.from_folder(data_path)  # -> ImageList
        .use_partial_data(pct_partial, seed=seed)  # -> ImageList
        .split_none()  # -> ItemLists: train and valid ItemList
        .label_from_folder()  # -> LabelLists: train and valid LabelList
        .transform(size=img_size))

    # handle the case when number of images is too small - fastai gives warning and throws error when showing batch
    n_images = len(label_lists.train)
    if n_images < bs:
        print(
            f"Too few images. Decreasing batch size from {bs} to {n_images}.")
        bs = n_images

    data: ImageDataBunch = (
        label_lists.databunch(bs=bs,
                              num_workers=num_workers)  # -> ImageDataBunch
        .normalize(imagenet_stats))  # -> ImageDataBunch

    # we want the order of images to not be shuffled to be able to find the right images easily
    data.train_dl = data.train_dl.new(shuffle=False)
    data.img_size = img_size  # data object needs to know its image size
    return data
def upload_file():
    
    if request.method == 'POST':
        image = request.files['file']
        filename = secure_filename(image.filename)
        
        #saving file in upload path
        image.save(Path(app.config["IMAGE_UPLOADS"]+"/"+ filename))

        my_dict = {}
        #loading images from upload path      
        img_list_loader = ImageList.from_folder(upload_path)
        
        #Checking if valid images are uploaded
        if len(img_list_loader.items)>0:
            #loading model
            load_model = load_learner(model, 
                                  test=img_list_loader)
            #running inference
            preds,y = load_model.get_preds(ds_type=DatasetType.Test)
            index =0
            
            #Processing results for UI
            for preds,img_src in zip(preds,img_list_loader.items):

                top3_return_msg,top_pred = print_top_3_pred(preds)
                
                if(np.round(preds[top_pred].numpy()*100,2)<threshold):
                    custom_msg = "NA"
                    Prediction_percent = "NA"
                else:
                    custom_msg= str(get_label(int(top_pred)))
                    Prediction_percent = str("{:.2f}%".format(np.round(preds[top_pred].numpy()*100,2)))

                temp_val=[]
                temp_val.append(img_src)
                temp_val.append(custom_msg)
                temp_val.append(Prediction_percent)
                temp_val.append(top3_return_msg)

                my_dict[index]=temp_val
                index+=1

            return render_template('result.html', mydict=my_dict)

            
        elif len(img_list_loader.items)== 0:
            return "ERROR: Invalid image. Go back to upload new image"
Пример #13
0
    def getdata(self, bs=32, num_workers=16, noise=True, blur=True, basic=True):
        """Returns the dataloader to be used during training.

        The returned data is normalized and the image are resized to 224x224px.

        Parameters
        ----------
        bs : int, optional
            Batch size, by default 32
        num_workers : int, optional
            Num of process used for fetching data, by default 16
        noise : bool, optional
            Whether to add noisy patches as augmentation, by default True
        blur : bool, optional
            Whether to add blur augmentation, by default True
        basic : bool, optional
            Whether to do basic augmentation like rotation, flipping, etc.
            , by default True

        Returns
        -------
        dataloader
            Dataloader with random sampling enabled.
        """
        print("Going through the data..")

        filenames = ["test", "val", "train"]
        filenames = [self.root / (x + ".txt") for x in filenames]
        with open(self.root / "list.txt", "w") as fout:
            fin = fileinput.input(filenames)
            for line in fin:
                fout.write(line)
            fin.close()
        self.data = (
            (
                ImageList.from_csv(
                    path=self.root, folder="images", csv_name="list.txt", delimiter=" "
                )
            )
            .split_by_idx(list(range(22169)))
            .label_from_df()
            .transform(self.transforms(noise, blur, basic), size=224)
            .databunch(bs=bs, num_workers=num_workers)
        ).normalize()
        return self.data
Пример #14
0
    def __init__(self, learn_name, tta, exp_name):
        """Logs test info to 'saved/test_info.csv' after initialization.

        Args:
            learn_name (str): Name of the saved Learner file,
                loads from f'saved/{learn_name}.pkl'
            tta (boolean): Whether to perform test time augmentation.
            exp_name (str): Experiment name for logging.
        """
        self.exp_name = exp_name

        # Initialize test ImageList
        test_imgs = ImageList.from_csv(path=DATA_DIR,
                                       folder=TEST_FOLDER,
                                       csv_name=TEST_DF_NAME,
                                       cols=IMG_COL)

        # Initialize Learner from test data
        self.learn = load_learner(
            path=SAVED_DIR,
            file=f'{learn_name}.pkl',
            test=test_imgs,
        )

        # Get classes list
        self.classes = self.learn.data.classes

        # Initialize ground truth labels
        self._init_labels()

        # Get probability scores from model
        if tta:
            self.y_prob, _ = self.learn.TTA(ds_type=DatasetType.Test)
        else:
            self.y_prob, _ = self.learn.get_preds(ds_type=DatasetType.Test)

        # Extract predicted labels from probability scores
        self.y_pred = np.argmax(self.y_prob, axis=1)

        # Compute metrics
        self._init_metrics()

        # Log test info
        self._log_info()
Пример #15
0
    def __init__(self):
        # Read in the training DataFrame
        df = pd.read_csv(os.path.join(DATA_DIR, TRAIN_DF_NAME))
        # Get stratified split indices
        train_idx, val_idx = get_indices_split(df, CLASS_COL, 0.2)

        # Initialize the augmentation/transformation function.
        self._init_tfms()

        # Initialize the ImageList
        # (source image data and labels before any transformations)
        self.src = (
            ImageList.from_csv(path=DATA_DIR,
                               csv_name=TRAIN_DF_NAME,
                               folder=TRAIN_FOLDER,
                               cols=IMG_COL)
            # Stratified split
            .split_by_idxs(train_idx, val_idx)
            # Get labels
            .label_from_df(CLASS_COL))
Пример #16
0
    def _get_data_bunch(path: Union[Path, str], transform: bool, im_size: int,
                        bs: int) -> ImageDataBunch:
        """
        Create ImageDataBunch and return it. TODO in future version is to allow
        users to pass in their own image bunch or their own Transformation
        objects (instead of using fastai's <get_transforms>)

        Args:
            path (Union[Path, str]): path to data to create databunch with
            transform (bool): a flag to set fastai default transformations (get_transforms())
            im_size (int): image size of databunch
            bs (int): batch size of databunch
        Returns:
            ImageDataBunch
        """
        path = path if type(path) is Path else Path(path)
        tfms = get_transforms() if transform else None
        return (ImageList.from_folder(path).split_by_rand_pct(
            valid_pct=0.33).label_from_folder().transform(
                tfms=tfms,
                size=im_size).databunch(bs=bs).normalize(imagenet_stats))
Пример #17
0
from fastai.callbacks import SaveModelCallback
#from fastai.vision import *
from fastai.train import ClassificationInterpretation, DatasetType, load_learner
from fastai.vision import get_transforms, ImageList, cnn_learner, accuracy, jitter, open_image, learner
from torchvision import models as tv_models
from matplotlib import pyplot as plt
from pathlib import Path

base_path = Path('data', 'dataset-15')
dataset_path = base_path
img_size = 224
bs = 128
arch = tv_models.resnext50_32x4d
tfms = get_transforms(do_flip=True, flip_vert=True, max_warp=0.0, max_zoom=1.0)
data = (ImageList.from_folder(dataset_path).split_by_folder(
    train='images',
    valid='testset-15-cropped').label_from_folder().transform(tfms).databunch(
        bs=bs).normalize())
data.valid_dl = data.valid_dl.new(shuffle=True)

# plot one image with transformations
# example_img = open_image('/home/hoth/Desktop/lego-brick-recognition/data/datasets/train-15/images/3008/3008_0.jpg')
# example_img.apply_tfms(tfms[0], size=224).show(figsize=(10, 10))
# plt.show()

Path.mkdir(base_path / 'classification', exist_ok=True)
# view data
data.show_batch(rows=10, ds_type=DatasetType.Train)
plt.savefig(base_path / 'classification' / 'batch_example_train.svg')
data.show_batch(rows=10, ds_type=DatasetType.Valid)
plt.savefig(base_path / 'classification' / 'batch_example_valid.svg')
Пример #18
0
# local modules

print(f"Fast.ai version = {fastai.__version__}")
which_processor()

EPOCHS = 10
LEARNING_RATE = 1e-4
IM_SIZE = 300

BATCH_SIZE = 16
ARCHITECTURE = models.resnet18
path = Path('/app/classifier_data/')

data = (ImageList.from_folder(path).split_by_rand_pct(
    valid_pct=0.2,
    seed=10).label_from_folder().transform(size=IM_SIZE).databunch(
        bs=BATCH_SIZE, num_workers=db_num_workers()).normalize(imagenet_stats))

print(f'number of classes: {data.c}')
print(data.classes)

learn = cnn_learner(
    data,
    ARCHITECTURE,
    metrics=[accuracy],
    callback_fns=[partial(TrainMetricsRecorder, show_graph=True)])
learn.unfreeze()
learn.fit(EPOCHS, LEARNING_RATE)
learn.export(file=Path("/app/classifier_model.pkl"))
_, validation_accuracy = learn.validate(learn.data.valid_dl,
                                        metrics=[accuracy])
Пример #19
0

optar = partial(Ranger)


# In[25]:


CV=1
seed = CV
bs = 20
tfms = get_transforms(flip_vert=True, do_flip=True, max_zoom=1.05, max_lighting=0.2,
                      max_warp=0.05, max_rotate=5.)
data = (ImageList.from_df(df=image_df,path=DATA_BASE_PATH / 'train_images',cols='ImageId')
        .split_from_df()
        .label_from_df(cols='Detected',label_cls=FloatList)
        .transform(tfms)
        .databunch(bs=bs,num_workers=4)
        .normalize(IMAGE_STATS_GLOBAL2)
       )


from fastai.vision.models import resnet50
from models.efficientnet import EfficientNet
#making model
arch = 'efficientnet-b0'
model_name = f'{arch}-v1'
# Parameters for the entire model (stem, all blocks, and head)

md_ef = EfficientNet.from_pretrained(arch, num_classes=1, dropout_rate=0.5)
# md_ef = resnet50(pretrained=False, num_classes=1)
Пример #20
0
 def get_train_imagelist(self, validate_ratio=0.2):
     from fastai.vision import ImageList
     return ImageList([
         info['path'] for info in self.get_train_image_info().values()
         if info['valid']
     ]).split_by_rand_pct(validate_ratio).label_from_folder()
Пример #21
0
    def train(self, tmp_dir):
        """Train a model.

        This downloads any previous output saved to the train_uri,
        starts training (or resumes from a checkpoint), periodically
        syncs contents of train_dir to train_uri and after training finishes.

        Args:
            tmp_dir: (str) path to temp directory
        """
        self.log_options()

        # Sync output of previous training run from cloud.
        train_uri = self.backend_opts.train_uri
        train_dir = get_local_path(train_uri, tmp_dir)
        make_dir(train_dir)
        sync_from_dir(train_uri, train_dir)

        # Get zip file for each group, and unzip them into chip_dir.
        chip_dir = join(tmp_dir, 'chips')
        make_dir(chip_dir)
        for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'):
            zip_path = download_if_needed(zip_uri, tmp_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                zipf.extractall(chip_dir)

        # Setup data loader.
        size = self.task_config.chip_size
        class_map = self.task_config.class_map
        classes = class_map.get_class_names()
        num_workers = 0 if self.train_opts.debug else 4
        tfms = get_transforms(flip_vert=self.train_opts.flip_vert)

        data = (ImageList.from_folder(chip_dir).split_by_folder(train='train',
                                                                valid='val'))
        train_count = None
        if self.train_opts.train_count is not None:
            train_count = min(len(data.train), self.train_opts.train_count)
        elif self.train_opts.train_prop != 1.0:
            train_count = int(
                round(self.train_opts.train_prop * len(data.train)))
        train_items = data.train.items
        if train_count is not None:
            train_inds = np.random.permutation(np.arange(len(
                data.train)))[0:train_count]
            train_items = train_items[train_inds]
        items = np.concatenate([train_items, data.valid.items])

        data = ImageList(items, chip_dir) \
            .split_by_folder(train='train', valid='val') \
            .label_from_folder(classes=classes) \
            .transform(tfms, size=size) \
            .databunch(bs=self.train_opts.batch_size, num_workers=num_workers)
        log.info(str(data))

        if self.train_opts.debug:
            make_debug_chips(data, class_map, tmp_dir, train_uri)

        # Setup learner.
        ignore_idx = -1
        metrics = [
            Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            FBeta(average='weighted',
                  clas_idx=1,
                  beta=1,
                  ignore_idx=ignore_idx)
        ]
        model_arch = getattr(models, self.train_opts.model_arch)
        learn = cnn_learner(data,
                            model_arch,
                            metrics=metrics,
                            wd=self.train_opts.weight_decay,
                            path=train_dir)
        learn.unfreeze()

        if self.train_opts.mixed_prec and torch.cuda.is_available():
            # This loss_scale works for Resnet 34 and 50. You might need to
            # adjust this for other models.
            learn = learn.to_fp16(loss_scale=256)

        # Setup callbacks and train model.
        model_path = get_local_path(self.backend_opts.model_uri, tmp_dir)

        pretrained_uri = self.backend_opts.pretrained_uri
        if pretrained_uri:
            log.info('Loading weights from pretrained_uri: {}'.format(
                pretrained_uri))
            pretrained_path = download_if_needed(pretrained_uri, tmp_dir)
            learn.model = torch.load(pretrained_path,
                                     map_location=learn.data.device)['model']

        # Save every epoch so that resume functionality provided by
        # TrackEpochCallback will work.
        callbacks = [
            TrackEpochCallback(learn),
            MySaveModelCallback(learn, every='epoch'),
            MyCSVLogger(learn, filename='log'),
            ExportCallback(learn, model_path, monitor='f_beta'),
            SyncCallback(train_dir, self.backend_opts.train_uri,
                         self.train_opts.sync_interval)
        ]

        if self.train_opts.log_tensorboard:
            callbacks.append(TensorboardLogger(learn, 'run'))

        if self.train_opts.run_tensorboard:
            log.info('Starting tensorboard process')
            log_dir = join(train_dir, 'logs', 'run')
            tensorboard_process = Popen(
                ['tensorboard', '--logdir={}'.format(log_dir)])
            terminate_at_exit(tensorboard_process)

        lr = self.train_opts.lr
        num_epochs = self.train_opts.num_epochs
        if self.train_opts.one_cycle:
            if lr is None:
                learn.lr_find()
                learn.recorder.plot(suggestion=True, return_fig=True)
                lr = learn.recorder.min_grad_lr
                log.info('lr_find() found lr: {}'.format(lr))
            learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks)
        else:
            learn.fit(num_epochs, lr, callbacks=callbacks)

        if self.train_opts.run_tensorboard:
            tensorboard_process.terminate()

        # Since model is exported every epoch, we need some other way to
        # show that training is finished.
        str_to_file('done!', self.backend_opts.train_done_uri)

        # Sync output to cloud.
        sync_to_dir(train_dir, self.backend_opts.train_uri)
Пример #22
0
## helpful way to initially get folders
# import split_folders
# split_folders.ratio('<path>', output='<path>/split', seed=1337, ratio=(.8, .2)) # uses default values
# sys.exit()

path = Path("data/CNN/-released/split")

################################################################################
# fastai uses databunches
################################################################################
data = (
    ImageList.from_folder(path / "train").split_by_rand_pct(
        0.1, seed=33).label_from_folder()
    # .add_test_folder('..'/path/'test')
    .transform(
        get_transforms(do_flip=True, flip_vert=True),
        size=150,
        resize_method=ResizeMethod.SQUISH,
        padding_mode="zeros",
    ).databunch(bs=64).normalize(imagenet_stats))

# ## turn this on for regular testing
# option_name = 'CNN__original'
# data_test = (ImageList.from_folder(path)
#                 .split_by_folder(train='train', valid='test')
#                 .label_from_folder()
#                 .transform(get_transforms(do_flip=True,flip_vert=True),size=150,resize_method=ResizeMethod.SQUISH,padding_mode='zeros')
#                 .databunch(bs=64)
#                 .normalize(imagenet_stats))

## turn this on for test_suite
Пример #23
0
from fastprogress.fastprogress import force_console_behavior
import fastprogress
fastprogress.fastprogress.NO_BAR = True
master_bar, progress_bar = force_console_behavior()
fastai.basic_train.master_bar, fastai.basic_train.progress_bar = master_bar, progress_bar



def get_file(aString):
    return str(aString.split('/')[-1])

image_folder = 'images/'

path = untar_data(URLs.DOGS)
learn = load_learner(path, test=ImageList.from_folder(image_folder), bs = 1)
preds,y = learn.get_preds(ds_type=DatasetType.Test, )
predList = list(preds.numpy()[:,0])

f_names = listdir(image_folder)

pred_df = pd.DataFrame(list(zip(f_names,predList)), columns = ['f_name','prob_dog'])

registry = 'registry/downloaded_files.csv'
regDF = pd.read_csv(registry)


regDF['f_name'] = regDF.file.apply(get_file)

out_df = pd.merge(regDF,pred_df, on = ['f_name'])
Пример #24
0
from model_utils import get_data, create_gen_learner

# Loading Paths for Model Load
path = Path('')  # Path to data folder to load your model
path_lr = path / ''  # Path to model weights

# Loading Paths to Inference
path_t = Path('')  # Path to undamaged files
dmgpath = Path('')  # Path to damage templates
inf_path = Path('')  # Directory for files to be inferenced

# Creating gen and Loading saved Weights
src = ImageImageList.from_folder(path_lr).split_by_rand_pct(0.1, seed=42)
data_gen = get_data(1, 500, src, path_lr)
learn_gen = create_gen_learner(data_gen).load('')  # LOAD MODEL HERE
test_list = ImageList.from_folder(inf_path)

# Starting Streamlit App
st.markdown('# **ML for Photo Repair**')
st.markdown('### Choose an Image and Damage Template:')
st.markdown('Click **Generate** to Create a damaged photo')
names = []
for filename in (os.listdir(path_t)):
    if '.png' in filename:
        names.append(filename)
    elif '.jpg' in filename:
        names.append(filename)
dmgnames = []
for filename in (os.listdir(dmgpath)):
    if '.png' in filename:
        dmgnames.append(filename)
Пример #25
0
    def from_model(cls, emd_path, data=None):
        """
        Creates a YOLOv3 Object Detector from an Esri Model Definition (EMD) file.

        =====================   ===========================================
        **Argument**            **Description**
        ---------------------   -------------------------------------------
        emd_path                Required string. Path to Esri Model Definition
                                file.
        ---------------------   -------------------------------------------
        data                    Required fastai Databunch or None. Returned data
                                object from `prepare_data` function or None for
                                inferencing.
        =====================   ===========================================
        
        :returns: `YOLOv3` Object
        """
        if not HAS_FASTAI:
            _raise_fastai_import_error(import_exception=import_exception)

        emd_path = Path(emd_path)
        emd = json.load(open(emd_path))
        model_file = Path(emd['ModelFile'])
        chip_size = emd["ImageWidth"]

        if not model_file.is_absolute():
            model_file = emd_path.parent / model_file

        class_mapping = {i['Value']: i['Name'] for i in emd['Classes']}

        resize_to = emd.get('resize_to')
        if isinstance(resize_to, list):
            resize_to = (resize_to[0], resize_to[1])

        data_passed = True
        # Create an image databunch for when loading the model using emd (without training data)
        if data is None:
            data_passed = False
            train_tfms = []
            val_tfms = []
            ds_tfms = (train_tfms, val_tfms)

            with warnings.catch_warnings():
                warnings.simplefilter("ignore", UserWarning)

                sd = ImageList([],
                               path=emd_path.parent.parent).split_by_idx([])
                data = sd.label_const(
                    0,
                    label_cls=ObjectDetectionCategoryList,
                    classes=list(class_mapping.values())).transform(
                        ds_tfms).databunch().normalize(imagenet_stats)

            data.chip_size = chip_size
            data.class_mapping = class_mapping
            data.classes = ['background'] + list(class_mapping.values())
            data = get_multispectral_data_params_from_emd(data, emd)
            # Add 1 for background class
            data.c += 1
            data._is_empty = True
            data.emd_path = emd_path
            data.emd = emd

        data.resize_to = resize_to
        ret = cls(data, **emd['ModelParameters'], pretrained_path=model_file)

        if not data_passed:
            ret.learn.data.single_ds.classes = ret._data.classes
            ret.learn.data.single_ds.y.classes = ret._data.classes

        return ret
Пример #26
0
filenames = train['fname'].values
filenames = filenames.reshape(-1, 1)

oof_preds = np.zeros((len(train), 80))
test_preds = np.zeros((len(test), 80))

tfms = get_transforms(do_flip=False, max_rotate=0, max_lighting=0.1, max_zoom=0, max_warp=0.)
df = pd.read_csv(CSV_TRN_MERGED)
cols = list(df.columns[1:])
i = 0

val_index = range(len(train))

#Our clasifier stuff    
src = (ImageList.from_csv(WORK/'image', Path('../../')/DATA/'train_merged.csv', folder='trn_merged2', suffix='.jpg')
    .split_by_idx(val_index)
    .label_from_df(cols=list(df.columns[1:])))
    #.label_from_df(label_delim=','))

data = (src.transform(tfms, size=128).databunch(bs=64).normalize())

f_score = partial(fbeta, thresh=0.2)

learn = cnn_learner(data, models.xresnet50, pretrained=False, metrics=[f_score]).mixup(stack_y=False)
learn.fit_one_cycle(125, 1e-2)

all_preds = list(custom_tta(learn))

stacked = torch.stack(all_preds)

new_preds = []
Пример #27
0
    def create_covidx_databunch(self):
        bs = self.bs

        data_path = DATA_DIR / "COVIDx"
        assert data_path.exists()
        train_df_path = data_path / f"train_split_{self.version}.txt"
        # train set
        LOGGER.info(f'Reading train_df from {train_df_path}')
        self.train_df = (pd.read_csv(train_df_path,
                                     header=None,
                                     delimiter=" ",
                                     index_col=0,
                                     names=["name", "label",
                                            "dataset"]).reset_index(drop=True))
        self.train_df["name"] = ["train/" + f for f in self.train_df["name"]]
        self.train_df["is_valid"] = False

        # validation set
        test_df_path = data_path / f"test_split_{self.version}.txt"
        # train set
        LOGGER.info(f'Reading test_df from {test_df_path}')
        self.test_df = (pd.read_csv(data_path / test_df_path,
                                    header=None,
                                    delimiter=" ",
                                    index_col=0,
                                    names=["name", "label",
                                           "dataset"]).reset_index(drop=True))
        self.test_df["name"] = ["test/" + f for f in self.test_df["name"]]
        self.test_df["is_valid"] = True

        # merge
        data_df = pd.concat([self.train_df,
                             self.test_df]).reset_index(drop=True)
        data_df = data_df.drop("dataset", axis=1)

        # import covidnet test set which is included in testset
        covidnet_test_df = (pd.read_csv(data_path / "test_COVIDx4.txt",
                                        header=None,
                                        delimiter=" ",
                                        index_col=0,
                                        names=["name", "label"
                                               ]).reset_index(drop=True))
        covidnet_test_df["name"] = [
            "test/" + f for f in covidnet_test_df["name"]
        ]
        # sanity check
        a = set(covidnet_test_df.name)
        b = set(self.train_df.name)
        c = set(self.test_df.name)
        assert a.intersection(b) == set()
        assert a.intersection(c) == a

        # create fastai databunch
        tfms = get_dataaug_transformations()

        np.random.seed(42)
        src = (ImageList.from_df(
            data_df,
            data_path).split_from_df().label_from_df().transform(tfms,
                                                                 size=224))
        test = (ImageList.from_df(
            covidnet_test_df,
            data_path).split_none().label_from_df().transform(None, size=224))

        data = (src.databunch(bs=bs).normalize(imagenet_stats))
        data.add_test(test.train.x)

        # check that proportion classes are same in train and valid
        train_counts = np.unique(data.train_ds.y.items, return_counts=True)
        LOGGER.info(
            f'prop in train set: {train_counts[1]/ train_counts[1].sum()}')
        valid_counts = np.unique(data.valid_ds.y.items, return_counts=True)
        LOGGER.info(
            f'prop in valid set: {valid_counts[1]/ valid_counts[1].sum()}')
        test_counts = np.unique(test.y.items, return_counts=True)
        LOGGER.info(
            f'prop in test set: {test_counts[1]/ test_counts[1].sum()}')

        return data, test
Пример #28
0
def test_wrong_order():
    this_tests('na')
    path = untar_data(URLs.MNIST_TINY)
    with pytest.raises(Exception, match="Your data isn't split*"):
        ImageList.from_folder(path).label_from_folder().split_by_folder()
Пример #29
0
def test_wrong_order():
    this_tests('na')
    path = untar_data(URLs.MNIST_TINY)
    with pytest.raises(Exception, match="Your data isn't split*"):
        ImageList.from_folder(path).label_from_folder().split_by_folder()
df = pd.read_csv(LABELS)
nunique = list(df.nunique())[1:-1]
print(nunique)
df.head()

range(fold * len(df) // nfolds, (fold + 1) * len(df) // nfolds)

# +
stats = ([0.0692], [0.2051])
data = (ImageList.from_df(
    df,
    path='.',
    folder=TRAIN,
    suffix='.png',
    cols='image_id',
    convert_mode='L').split_by_idx(
        range(fold * len(df) // nfolds,
              (fold + 1) * len(df) // nfolds)).label_from_df(cols=[
                  'grapheme_root', 'vowel_diacritic', 'consonant_diacritic'
              ]).transform(
                  transform.get_transforms(do_flip=False, max_warp=0.1),
                  size=sz,
                  padding_mode='zeros').databunch(bs=bs)).normalize(stats)

data.show_batch()


# +
class Head(nn.Module):
    def __init__(self, nc, n, ps=0.5):
        super().__init__()
        layers = [AdaptiveConcatPool2d(), Mish(), Flatten()] + \
Пример #31
0
from fastai.core import Path
from fastai.vision import load_learner, defaults, ImageList,DatasetType
import torch as torch
import os
import yaml

pic_name = 'GK_RDR_PG3_2'

mp = Path('/Users/nicholasbangs/Notebooks/personal/greek_reader_master')
im_path = mp/'lgi_data'/'gk_letter_imgs'/'ω'

defaults.device = torch.device('cpu')
model_path = mp/'models'
model_name = 'rn_34.pkl'
model = load_learner(model_path, model_name, test=ImageList.from_folder(im_path))
preds,y = model.get_preds(ds_type=DatasetType.Test)
classes = model.data.classes
zipped = list([zip(classes, p) for p in preds])
sorted_preds = [sorted(z, key=lambda x: x[1], reverse=True)[0] for z in zipped]
print(sorted_preds)