Пример #1
0
 def _demodata_toy_sesssion(workdir, name='demo_session', lr=1e-4):
     """
     workdir = ub.ensure_app_cache_dir('netharn/tests/sessions')
     workdir
     """
     # This will train a toy model with toy data using netharn
     import netharn as nh
     hyper = nh.HyperParams(
         **{
             'workdir': ub.ensure_app_cache_dir('netharn/tests/sessions'),
             'name': name,
             'xpu': nh.XPU.coerce('cpu'),
             'datasets': {
                 'train': nh.data.ToyData2d(size=3, rng=0),
                 'vali': nh.data.ToyData2d(size=3, rng=0)
             },
             'loaders': {
                 'batch_size': 64
             },
             'model': (nh.models.ToyNet2d, {}),
             'optimizer': (nh.optimizers.SGD, {
                 'lr': lr
             }),
             'criterion': (nh.criterions.FocalLoss, {}),
             'initializer': (nh.initializers.KaimingNormal, {}),
             'monitor': (nh.Monitor, {
                 'max_epoch': 1
             }),
         })
     harn = nh.FitHarn(hyper)
     harn.preferences['use_tensorboard'] = False
     harn.preferences['timeout'] = 1
     harn.run()  # TODO: make this run faster if we don't need to rerun
Пример #2
0
def _demodata_trained_dpath():
    # This will train a toy model with toy data using netharn
    import netharn as nh
    hyper = nh.HyperParams(
        **{
            'workdir': ub.ensure_app_cache_dir('netharn/tests/deploy'),
            'nice': 'deploy_demo_static',
            'xpu': nh.XPU.cast('cpu'),
            'datasets': {
                'train': nh.data.ToyData2d(size=3, rng=0)
            },
            'loaders': {
                'batch_size': 64
            },
            'model': (nh.models.ToyNet2d, {}),
            'optimizer': (nh.optimizers.SGD, {
                'lr': 0.0001
            }),
            'criterion': (nh.criterions.FocalLoss, {}),
            'initializer': (nh.initializers.KaimingNormal, {}),
            'monitor': (nh.Monitor, {
                'max_epoch': 1
            }),
        })
    harn = nh.FitHarn(hyper)
    harn.run()  # TODO: make this run faster if we don't need to rerun
    if len(list(glob.glob(join(harn.train_dpath, '*.py')))) > 1:
        # If multiple models are deployed some hash changed. Need to reset
        harn.initialize(reset='delete')
        harn.run()  # don't relearn if we already finished this one
    return harn.train_dpath
Пример #3
0
def _demodata_toy_harn():
    # This will train a toy model with toy data using netharn
    import netharn as nh
    hyper = nh.HyperParams(
        **{
            'workdir': ub.ensure_app_cache_dir('torch_liberator/tests/deploy'),
            'name': 'demo_liberator_static',
            'xpu': nh.XPU.coerce('cpu'),
            'datasets': {
                'train': nh.data.ToyData2d(size=3, rng=0)
            },
            'loaders': {
                'batch_size': 64
            },
            'model': (nh.models.ToyNet2d, {}),
            'optimizer': (nh.optimizers.SGD, {
                'lr': 0.0001
            }),
            'criterion': (nh.criterions.FocalLoss, {}),
            'initializer': (nh.initializers.KaimingNormal, {}),
            'monitor': (nh.Monitor, {
                'max_epoch': 1
            }),
        })
    harn = nh.FitHarn(hyper)
    harn.preferences['use_tensorboard'] = False
    harn.preferences['log_gradients'] = False
    harn.preferences['timeout'] = 1
    return harn
Пример #4
0
def setup_harn(cmdline=False, **kw):
    """
    Ignore:
        kw = {}
        cmdline = False
        harn = setup_harn()
    """
    config = StyleTransferConfig(default=kw)
    config.load(cmdline=cmdline)
    print('config = {}'.format(ub.repr2(config.asdict())))

    nh.configure_hacks(config)

    dataset_info = nh.api.DatasetInfo.coerce(config)

    # input_stats = dataset_info['input_stats']
    model = (TransformerNetwork, {})

    hyper = nh.HyperParams(name=config['name'],
                           workdir=config['workdir'],
                           xpu=nh.XPU.coerce(config['xpu']),
                           datasets=dataset_info['torch_datasets'],
                           loaders=dataset_info['torch_loaders'],
                           model=model,
                           criterion=None,
                           initializer=None,
                           optimizer=nh.Optimizer.coerce(config),
                           dynamics=nh.Dynamics.coerce(config),
                           scheduler=nh.Scheduler.coerce(config),
                           monitor=(nh.Monitor, {
                               'minimize': ['loss'],
                               'patience': config['patience'],
                               'max_epoch': config['max_epoch'],
                               'smoothing': 0.0,
                           }),
                           other={
                               'name': config['name'],
                               'batch_size': config['batch_size'],
                               'balance': config['balance'],
                           },
                           extra={
                               'argv': sys.argv,
                               'config': ub.repr2(config.asdict()),
                           })
    harn = StyleTransferHarn(hyper=hyper)
    harn.preferences.update({
        'num_keep': 3,
        'keep_freq': 10,
        'tensorboard_groups': ['loss'],
        'eager_dump_tensorboard': True,
    })
    harn.intervals.update({})
    harn.script_config = config
    return harn
Пример #5
0
def setup_harn(cmdline=True, **kw):
    """
    Example:
        >>> # xdoctest: +REQUIRES(--download)
        >>> import sys, ubelt
        >>> sys.path.append(ubelt.expandpath('~/code/netharn/examples'))
        >>> from sseg_camvid import *  # NOQA
        >>> kw = {'workers': 0, 'xpu': 'cpu', 'batch_size': 2}
        >>> cmdline = False
        >>> # Just sets up the harness, does not do any heavy lifting
        >>> harn = setup_harn(cmdline=cmdline, **kw)
        >>> #
        >>> harn.initialize()
        >>> #
        >>> batch = harn._demo_batch(tag='train')
        >>> epoch_metrics = harn._demo_epoch(tag='vali', max_iter=4)
    """
    import sys
    import ndsampler

    config = SegmentationConfig(default=kw)
    config.load(cmdline=cmdline)
    nh.configure_hacks(config)  # fix opencv bugs

    assert config['datasets'] == 'special:camvid'

    coco_datasets = setup_coco_datasets()

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        # tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='cog')
        tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='npy')
        for tag, dset in coco_datasets.items()
    }
    torch_datasets = {
        tag: SegmentationDataset(
            sampler,
            config['input_dims'],
            input_overlap=((tag == 'train') and config['input_overlap']),
            augment=((tag == 'train') and config['augment']),
        )
        for tag, sampler in samplers.items()
    }
    torch_loaders = {
        tag: torch_data.DataLoader(dset,
                                   batch_size=config['batch_size'],
                                   num_workers=config['workers'],
                                   shuffle=(tag == 'train'),
                                   drop_last=True, pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    if config['class_weights']:
        mode = config['class_weights']
        dset = torch_datasets['train']
        class_weights = _precompute_class_weights(dset, mode=mode)
        class_weights = torch.FloatTensor(class_weights)
        class_weights[dset.classes.index('background')] = 0
    else:
        class_weights = None

    initializer_ = nh.Initializer.coerce(config)

    if config['arch'] == 'unet':
        # Note: UNet can get through 256x256 images at a rate of ~17Hz with
        # batch_size=8. This is pretty slow and can likely be improved by fixing
        # some of the weird padding / mirror stuff I have to do in unet to get
        # output_dims = input_dims.
        from netharn.models.unet import UNet
        model_ = (UNet, {
            'classes': torch_datasets['train'].classes,
            'in_channels': 3,
        })
    elif config['arch'] == 'segnet':
        from netharn.models.segnet import Segnet
        model_ = (Segnet, {
            'classes': torch_datasets['train'].classes,
            'in_channels': 3,
        })
    elif config['arch'] == 'psp':
        from netharn.models.psp import PSPNet_Resnet50_8s
        model_ = (PSPNet_Resnet50_8s, {
            'classes': torch_datasets['train'].classes,
            'in_channels': 3,
        })
    elif config['arch'] == 'deeplab':
        from netharn.models.deeplab import DeepLab_ASPP
        model_ = (DeepLab_ASPP, {
            'classes': torch_datasets['train'].classes,
            'in_channels': 3,
        })

    else:
        raise KeyError(config['arch'])

    if config['init'] == 'cls':
        initializer_ = model_[0]._initializer_cls()

    # Create hyperparameters
    hyper = nh.HyperParams(
        nice=config['nice'],
        workdir=config['workdir'],
        xpu=nh.XPU.coerce(config['xpu']),

        datasets=torch_datasets,
        loaders=torch_loaders,

        model=model_,
        initializer=initializer_,

        scheduler=nh.Scheduler.coerce(config),
        optimizer=nh.Optimizer.coerce(config),
        dynamics=nh.Dynamics.coerce(config),

        criterion=(nh.criterions.FocalLoss, {
            'focus': config['focus'],
            'weight': class_weights,
            # 'reduction': 'none',
        }),

        monitor=(nh.Monitor, {
            'minimize': ['loss'],
            'patience': config['patience'],
            'max_epoch': config['max_epoch'],
            'smoothing': .6,
        }),

        other={
            'batch_size': config['batch_size'],
        },
        extra={
            'argv': sys.argv,
            'config': ub.repr2(config.asdict()),
        }
    )

    # Create harness
    harn = SegmentationHarn(hyper=hyper)
    harn.classes = torch_datasets['train'].classes
    harn.preferences.update({
        'num_keep': 5,
        'keyboard_debug': True,
        # 'export_modules': ['netharn'],
    })
    harn.intervals.update({
        'vali': 1,
        'test': 10,
    })
    harn.script_config = config
    return harn
Пример #6
0
def check_inconsistency():
    import netharn as nh
    import numpy as np
    import torch
    import ubelt as ub
    from netharn.models.yolo2 import light_yolo
    from netharn.models.yolo2 import light_region_loss

    yolo_voc = ub.import_module_from_path(ub.truepath('~/code/netharn/examples/yolo_voc.py'))
    xpu = nh.XPU.cast('argv')

    nice = ub.argval('--nice', default='Yolo2Baseline')
    batch_size = 8
    bstep = 8
    workers = 0
    decay = 0.0005
    lr = 0.001
    ovthresh = 0.5
    simulated_bsize = bstep * batch_size

    # We will divide the learning rate by the simulated batch size
    datasets = {
        # 'train': yolo_voc.YoloVOCDataset(years=[2007, 2012], split='trainval'),
        'test': yolo_voc.YoloVOCDataset(years=[2007], split='test'),
    }
    loaders = {
        key: dset.make_loader(batch_size=batch_size, num_workers=workers,
                              shuffle=(key == 'train'), pin_memory=True,
                              resize_rate=10 * bstep, drop_last=True)
        for key, dset in datasets.items()
    }

    if workers > 0:
        import cv2
        cv2.setNumThreads(0)

    assert simulated_bsize == 64, 'must be 64'

    lr_step_points = {
        0:   0,  # Hack to see performance before any learning
        1:   0,
        2:   lr * 1.0 / simulated_bsize,
        3:   lr * 1.0 / simulated_bsize,
    }
    max_epoch = 3

    # Anchors
    anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944),
                        (5.05587, 8.09892), (9.47112, 4.84053),
                        (11.2364, 10.0071)])

    hyper = nh.HyperParams(**{
        'nice': nice,
        'workdir': ub.truepath('~/work/devcheck_yolo'),
        'datasets': datasets,
        'xpu': xpu,

        # a single dict is applied to all datset loaders
        'loaders': loaders,

        'model': (light_yolo.Yolo, {
            # 'num_classes': datasets['train'].num_classes,
            'num_classes': 20,
            'anchors': anchors,
            # 'conf_thresh': 0.001,
            'conf_thresh': 0.1,  # make training a bit faster
            # nms_thresh=0.5 to reproduce original yolo
            # nms_thresh=0.4 to reproduce lightnet
            'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4
        }),

        'criterion': (light_region_loss.RegionLoss, {
            # 'num_classes': datasets['train'].num_classes,
            'num_classes': 20,
            'anchors': anchors,
            'object_scale': 5.0,
            'noobject_scale': 1.0,
            'class_scale': 1.0,
            'coord_scale': 1.0,
            'thresh': 0.6,  # iou_thresh
        }),

        'initializer': (nh.initializers.Pretrained, {
            # 'fpath': light_yolo.initial_imagenet_weights(),
            'fpath': light_yolo.demo_voc_weights(),
        }),

        'optimizer': (torch.optim.SGD, {
            'lr': lr_step_points[0],
            'momentum': 0.9,
            'dampening': 0,
            # multiplying by batch size was one of those unpublished details
            'weight_decay': decay * simulated_bsize,
        }),

        'scheduler': (nh.schedulers.core.YOLOScheduler, {
            'points': lr_step_points,
            'interpolate': True,
            'burn_in': 1,
            # 'dset_size': len(datasets['train']),  # when drop_last=False
            'dset_size': len(datasets['test']),  # when drop_last=False
            'batch_size': batch_size,
        }),

        'monitor': (nh.Monitor, {
            'minimize': ['loss'],
            'maximize': ['mAP'],
            'patience': max_epoch,
            'max_epoch': max_epoch,
        }),
        # 'augment': datasets['train'].augmenter,
        'dynamics': {'batch_step': bstep},
        'other': {
            'nice': nice,
            'ovthresh': ovthresh,
        },
    })
    print('max_epoch = {!r}'.format(max_epoch))
    harn = yolo_voc.YoloHarn(hyper=hyper)
    harn.config['use_tqdm'] = False
    harn.intervals['log_iter_train'] = None
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None

    harn.initialize()
    harn.run()
Пример #7
0
def setup_harness(**kwargs):
    """
    CommandLine:
        python ~/code/netharn/netharn/examples/siam_ibeis.py setup_harness

    Example:
        >>> harn = setup_harness(dbname='PZ_MTEST')
        >>> harn.initialize()
    """
    nice = kwargs.get('nice', 'untitled')
    bsize = int(kwargs.get('bsize', 6))
    bstep = int(kwargs.get('bstep', 4))
    workers = int(kwargs.get('workers', 0))
    decay = float(kwargs.get('decay', 0.0005))
    lr = float(kwargs.get('lr', 0.001))
    dim = int(kwargs.get('dim', 416))
    xpu = kwargs.get('xpu', 'cpu')
    workdir = kwargs.get('workdir', None)
    dbname = kwargs.get('dbname', 'PZ_MTEST')

    datasets = randomized_ibeis_dset(dbname, dim=dim)
    if workdir is None:
        workdir = ub.truepath(os.path.join('~/work/siam-ibeis2', dbname))
    ub.ensuredir(workdir)

    for k, v in datasets.items():
        print('* len({}) = {}'.format(k, len(v)))

    loaders = {
        key:  torch.utils.data.DataLoader(
            dset, batch_size=bsize, num_workers=workers,
            shuffle=(key == 'train'), pin_memory=True)
        for key, dset in datasets.items()
    }

    xpu = nh.XPU.cast(xpu)

    hyper = nh.HyperParams(**{
        'nice': nice,
        'workdir': workdir,
        'datasets': datasets,
        'loaders': loaders,

        'xpu': xpu,

        'model': (SiameseLP, {
            'p': 2,
            'input_shape': (1, 3, dim, dim),
        }),

        'criterion': (nh.criterions.ContrastiveLoss, {
            'margin': 4,
            'weight': None,
        }),

        'optimizer': (torch.optim.SGD, {
            'lr': lr / 10,
            'weight_decay': decay,
            'momentum': 0.9,
            'nesterov': True,
        }),

        'initializer': (nh.initializers.NoOp, {}),

        'scheduler': (nh.schedulers.ListedLR, {
            'points': {
                0:  lr / 10,
                1:  lr,
                59: lr * 1.1,
                60: lr / 10,
                90: lr / 100,
            },
            'interpolate': True
        }),

        'monitor': (nh.Monitor, {
            'minimize': ['loss', 'pos_dist'],
            'maximize': ['accuracy', 'neg_dist'],
            'patience': 160,
            'max_epoch': 160,
        }),

        'augment': datasets['train'].augmenter,

        'dynamics': {
            # Controls how many batches to process before taking a step in the
            # gradient direction. Effectively simulates a batch_size that is
            # `bstep` times bigger.
            'batch_step': bstep,
        },

        'other': {
            'n_classes': 2,
        },
    })
    harn = SiamHarness(hyper=hyper)
    harn.config['prog_backend'] = 'progiter'
    harn.intervals['log_iter_train'] = 1
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None

    return harn
Пример #8
0
def train():
    np.random.seed(1031726816 % 4294967295)
    torch.manual_seed(137852547 % 4294967295)
    random.seed(2497950049 % 4294967295)

    # batch_size = int(ub.argval('--batch_size', default=128))
    batch_size = int(ub.argval('--batch_size', default=64))
    workers = int(ub.argval('--workers', default=6))
    model_key = ub.argval('--model', default="CropNetFCAE")
    xpu = nh.XPU.cast("gpu")

    lr = 0.001

    transform_train = transforms.Compose([
        transforms.ToTensor(),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
    ])

    workdir = ub.ensure_app_cache_dir('netharn')

    datasets = {
        'train':
        torchvision.datasets.CIFAR10(root=workdir,
                                     train=True,
                                     download=True,
                                     transform=transform_train),
        'test':
        torchvision.datasets.CIFAR10(root=workdir,
                                     train=False,
                                     download=True,
                                     transform=transform_test),
    }

    # For some reason the torchvision objects dont have the label names
    CIFAR10_CLASSNAMES = [
        'airplane',
        'automobile',
        'bird',
        'cat',
        'deer',
        'dog',
        'frog',
        'horse',
        'ship',
        'truck',
    ]
    datasets['train'].class_names = CIFAR10_CLASSNAMES
    datasets['test'].class_names = CIFAR10_CLASSNAMES

    n_classes = 10  # hacked in
    loaders = {
        key: torch.utils.data.DataLoader(dset,
                                         shuffle=key == 'train',
                                         num_workers=workers,
                                         batch_size=batch_size,
                                         pin_memory=True)
        for key, dset in datasets.items()
    }

    if workers > 0:
        import cv2
        cv2.setNumThreads(0)

    initializer_ = (nh.initializers.KaimingNormal, {
        'param': 0,
        'mode': 'fan_in'
    })
    # initializer_ = (initializers.LSUV, {})

    available_models = {
        "CropNetFCAE": (CropNetFCAE, {
            "chip_size": 19,
            "bneck_size": 3,
        }),
    }
    model_ = available_models[model_key]

    hyper = nh.HyperParams(
        datasets=datasets,
        nice='cifar10_' + model_key,
        loaders=loaders,
        workdir=workdir,
        xpu=xpu,
        model=model_,
        optimizer=(torch.optim.SGD, {
            'lr': lr,
            'weight_decay': 5e-4,
            'momentum': 0.9,
            'nesterov': True,
        }),
        scheduler=(nh.schedulers.ListedLR, {
            'points': {
                0: lr,
                150: lr * 0.1,
                250: lr * 0.01,
            },
            'interpolate': False
        }),
        monitor=(nh.Monitor, {
            'minimize': ['loss'],
            'patience': 350,
            'max_epoch': 350,
        }),
        initializer=initializer_,
        criterion=(torch.nn.CrossEntropyLoss, {}),
        # Specify anything else that is special about your hyperparams here
        # Especially if you make a custom_batch_runner
        # TODO: type of augmentation as a parameter dependency
        # augment=str(datasets['train'].augmenter),
        # other=ub.dict_union({
        #     # 'colorspace': datasets['train'].output_colorspace,
        # }, datasets['train'].center_inputs.__dict__),
    )
    harn = CIFAR_FitHarn(hyper=hyper)
    harn.initialize()
    harn.run()
Пример #9
0
def setup_harness(bsize=16, workers=0, **kw):
    """
    CommandLine:
        python ~/code/netharn/netharn/examples/yolo_voc.py setup_harness

    Example:
        >>> harn = setup_harness()
        >>> harn.initialize()
    """

    xpu = nh.XPU.cast('argv')

    def _argval(arg, default):
        return ub.argval(arg, kw.get(arg.lstrip('-'), default))

    nice = _argval('--nice', default='Yolo2Baseline')
    batch_size = int(_argval('--batch_size', default=bsize))
    bstep = int(_argval('--bstep', 1))
    workers = int(_argval('--workers', default=workers))
    decay = float(_argval('--decay', default=0.0005))
    lr = float(_argval('--lr', default=0.001))
    workdir = _argval('--workdir', default=ub.truepath('~/work/viame/yolo'))
    ovthresh = 0.5

    coco_dsets = load_coco_datasets()

    datasets = {
        'train': YoloCocoDataset(coco_dsets['train'], train=True),
        'vali': YoloCocoDataset(coco_dsets['vali']),
    }

    anchors = np.asarray([(1.08, 1.19), (3.42, 4.41), (6.63, 11.38),
                          (9.42, 5.11), (16.62, 10.52)],
                         dtype=np.float)

    datasets['train'].check_images_exist()
    datasets['vali'].check_images_exist()

    if workers > 0:
        cv2.setNumThreads(0)

    loaders = {
        key: dset.make_loader(batch_size=batch_size,
                              num_workers=workers,
                              shuffle=(key == 'train'),
                              pin_memory=False)
        for key, dset in datasets.items()
    }

    # simulated_bsize = bstep * batch_size
    hyper = nh.HyperParams(
        **{
            'nice':
            nice,
            'workdir':
            workdir,
            'datasets':
            datasets,
            'xpu':
            xpu,

            # a single dict is applied to all datset loaders
            'loaders':
            loaders,
            'model': (light_yolo.Yolo, {
                'num_classes': datasets['train'].num_classes,
                'anchors': anchors,
                'conf_thresh': 0.001,
                'nms_thresh': 0.5,
            }),
            'criterion': (
                light_region_loss.RegionLoss,
                {
                    'num_classes': datasets['train'].num_classes,
                    'anchors': anchors,
                    'object_scale': 5.0,
                    'noobject_scale': 1.0,
                    'class_scale': 1.0,
                    'coord_scale': 1.0,
                    'thresh': 0.6,  # iou_thresh
                }),
            'initializer': (nh.initializers.Pretrained, {
                'fpath': light_yolo.initial_imagenet_weights(),
            }),
            'optimizer': (torch.optim.SGD, {
                'lr': lr / 10,
                'momentum': 0.9,
                'weight_decay': decay,
            }),
            'scheduler': (nh.schedulers.ListedLR, {
                'points': {
                    0: lr / 10,
                    1: lr,
                    59: lr * 1.1,
                    60: lr / 10,
                    90: lr / 100,
                },
                'interpolate': True
            }),
            'monitor': (nh.Monitor, {
                'minimize': ['loss'],
                'maximize': ['mAP'],
                'patience': 160,
                'max_epoch': 160,
            }),
            'augment':
            datasets['train'].augmenter,
            'dynamics': {
                # Controls how many batches to process before taking a step in the
                # gradient direction. Effectively simulates a batch_size that is
                # `bstep` times bigger.
                'batch_step': bstep,
            },
            'other': {
                # Other params are not used internally, so you are free to set any
                # extra params specific to your algorithm, and still have them
                # logged in the hyperparam structure. For YOLO this is `ovthresh`.
                'batch_size': batch_size,
                'nice': nice,
                'ovthresh': ovthresh,  # used in mAP computation
                'input_range': 'norm01',
            },
        })
    harn = YoloHarn(hyper=hyper)
    harn.config['use_tqdm'] = False
    harn.intervals['log_iter_train'] = None
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None
    return harn
Пример #10
0
def setup_harn(cmdline=True, **kw):
    """
    CommandLine:
        xdoctest -m netharn.examples.segmentation setup_harn

    Example:
        >>> # xdoctest: +REQUIRES(--slow)
        >>> kw = {'workers': 0, 'xpu': 'cpu', 'batch_size': 2}
        >>> cmdline = False
        >>> # Just sets up the harness, does not do any heavy lifting
        >>> harn = setup_harn(cmdline=cmdline, **kw)
        >>> #
        >>> harn.initialize()
        >>> #
        >>> batch = harn._demo_batch(tag='train')
        >>> epoch_metrics = harn._demo_epoch(tag='vali', max_iter=2)
    """
    import sys
    import ndsampler
    import kwarray
    # kwarray.seed_global(2108744082)

    config = SegmentationConfig(default=kw)
    config.load(cmdline=cmdline)
    nh.configure_hacks(config)  # fix opencv bugs

    coco_datasets = nh.api.Datasets.coerce(config)
    print('coco_datasets = {}'.format(ub.repr2(coco_datasets)))
    for tag, dset in coco_datasets.items():
        dset._build_hashid(hash_pixels=False)

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        tag: ndsampler.CocoSampler(dset,
                                   workdir=workdir,
                                   backend=config['backend'])
        for tag, dset in coco_datasets.items()
    }

    for tag, sampler in ub.ProgIter(list(samplers.items()),
                                    desc='prepare frames'):
        try:
            sampler.frames.prepare(workers=config['workers'])
        except AttributeError:
            pass

    torch_datasets = {
        tag: SegmentationDataset(
            sampler,
            config['input_dims'],
            input_overlap=((tag == 'train') and config['input_overlap']),
            augmenter=((tag == 'train') and config['augmenter']),
        )
        for tag, sampler in samplers.items()
    }
    torch_loaders = {
        tag: torch_data.DataLoader(dset,
                                   batch_size=config['batch_size'],
                                   num_workers=config['workers'],
                                   shuffle=(tag == 'train'),
                                   drop_last=True,
                                   pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    if config['class_weights']:
        mode = config['class_weights']
        dset = torch_datasets['train']
        class_weights = _precompute_class_weights(dset,
                                                  mode=mode,
                                                  workers=config['workers'])
        class_weights = torch.FloatTensor(class_weights)
        class_weights[dset.classes.index('background')] = 0
    else:
        class_weights = None

    if config['normalize_inputs']:
        stats_dset = torch_datasets['train']
        stats_idxs = kwarray.shuffle(np.arange(len(stats_dset)),
                                     rng=0)[0:min(1000, len(stats_dset))]
        stats_subset = torch.utils.data.Subset(stats_dset, stats_idxs)
        cacher = ub.Cacher('dset_mean', cfgstr=stats_dset.input_id + 'v3')
        input_stats = cacher.tryload()
        if input_stats is None:
            loader = torch.utils.data.DataLoader(
                stats_subset,
                num_workers=config['workers'],
                shuffle=True,
                batch_size=config['batch_size'])
            running = nh.util.RunningStats()
            for batch in ub.ProgIter(loader, desc='estimate mean/std'):
                try:
                    running.update(batch['im'].numpy())
                except ValueError:  # final batch broadcast error
                    pass
            input_stats = {
                'std': running.simple(axis=None)['mean'].round(3),
                'mean': running.simple(axis=None)['std'].round(3),
            }
            cacher.save(input_stats)
    else:
        input_stats = {}

    print('input_stats = {!r}'.format(input_stats))

    # TODO: infer numbr of channels
    model_ = (SegmentationModel, {
        'arch': config['arch'],
        'input_stats': input_stats,
        'classes': torch_datasets['train'].classes.__json__(),
        'in_channels': 3,
    })

    initializer_ = nh.Initializer.coerce(config)
    # if config['init'] == 'cls':
    #     initializer_ = model_[0]._initializer_cls()

    # Create hyperparameters
    hyper = nh.HyperParams(
        nice=config['nice'],
        workdir=config['workdir'],
        xpu=nh.XPU.coerce(config['xpu']),
        datasets=torch_datasets,
        loaders=torch_loaders,
        model=model_,
        initializer=initializer_,
        scheduler=nh.Scheduler.coerce(config),
        optimizer=nh.Optimizer.coerce(config),
        dynamics=nh.Dynamics.coerce(config),
        criterion=(
            nh.criterions.FocalLoss,
            {
                'focus': config['focus'],
                'weight': class_weights,
                # 'reduction': 'none',
            }),
        monitor=(nh.Monitor, {
            'minimize': ['loss'],
            'patience': config['patience'],
            'max_epoch': config['max_epoch'],
            'smoothing': .6,
        }),
        other={
            'batch_size': config['batch_size'],
        },
        extra={
            'argv': sys.argv,
            'config': ub.repr2(config.asdict()),
        })

    # Create harness
    harn = SegmentationHarn(hyper=hyper)
    harn.classes = torch_datasets['train'].classes
    harn.preferences.update({
        'num_keep': 2,
        'keyboard_debug': True,
        # 'export_modules': ['netharn'],
    })
    harn.intervals.update({
        'vali': 1,
        'test': 10,
    })
    harn.script_config = config
    return harn
Пример #11
0
def train():
    """
    Replicates parameters from https://github.com/kuangliu/pytorch-cifar

    The following is a table of kuangliu's reported accuracy and our measured
    accuracy for each model.

    The first column is kuangliu's reported accuracy, the second column is me
    running kuangliu's code, and the final column is using my own training
    harness (handles logging and whatnot) called netharn.

          model |  kuangliu  | rerun-kuangliu  |  netharn |
    -------------------------------------------------------
    ResNet50    |    93.62%  |         95.370% |  95.72%  |  <- how did that happen?
    DenseNet121 |    95.04%  |         95.420% |  94.47%  |
    DPN92       |    95.16%  |         95.410% |  94.92%  |

    """
    import random
    import torchvision
    from torchvision import transforms

    np.random.seed(1031726816 % 4294967295)
    torch.manual_seed(137852547 % 4294967295)
    random.seed(2497950049 % 4294967295)

    # batch_size = int(ub.argval('--batch_size', default=128))
    batch_size = int(ub.argval('--batch_size', default=64))
    workers = int(ub.argval('--workers', default=2))
    model_key = ub.argval('--model', default='densenet121')
    xpu = nh.XPU.cast('argv')

    lr = 0.1

    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    workdir = ub.ensure_app_cache_dir('netharn')

    datasets = {
        'train':
        torchvision.datasets.CIFAR10(root=workdir,
                                     train=True,
                                     download=True,
                                     transform=transform_train),
        'test':
        torchvision.datasets.CIFAR10(root=workdir,
                                     train=False,
                                     download=True,
                                     transform=transform_test),
    }

    # For some reason the torchvision objects dont have the label names
    CIFAR10_CLASSNAMES = [
        'airplane',
        'automobile',
        'bird',
        'cat',
        'deer',
        'dog',
        'frog',
        'horse',
        'ship',
        'truck',
    ]
    datasets['train'].class_names = CIFAR10_CLASSNAMES
    datasets['test'].class_names = CIFAR10_CLASSNAMES

    n_classes = 10  # hacked in
    loaders = {
        key: torch.utils.data.DataLoader(dset,
                                         shuffle=key == 'train',
                                         num_workers=workers,
                                         batch_size=batch_size,
                                         pin_memory=True)
        for key, dset in datasets.items()
    }

    if workers > 0:
        import cv2
        cv2.setNumThreads(0)

    initializer_ = (nh.initializers.KaimingNormal, {
        'param': 0,
        'mode': 'fan_in'
    })
    # initializer_ = (initializers.LSUV, {})

    available_models = {
        'densenet121': (nh.models.densenet.DenseNet, {
            'nblocks': [6, 12, 24, 16],
            'growth_rate': 12,
            'reduction': 0.5,
            'num_classes': n_classes,
        }),
        'resnet50': (nh.models.resnet.ResNet, {
            'num_blocks': [3, 4, 6, 3],
            'num_classes': n_classes,
            'block': 'Bottleneck',
        }),
        'dpn26': (nh.models.dual_path_net.DPN,
                  dict(
                      cfg={
                          'in_planes': (96, 192, 384, 768),
                          'out_planes': (256, 512, 1024, 2048),
                          'num_blocks': (2, 2, 2, 2),
                          'dense_depth': (16, 32, 24, 128),
                          'num_classes': n_classes,
                      })),
        'dpn92': (nh.models.dual_path_net.DPN,
                  dict(
                      cfg={
                          'in_planes': (96, 192, 384, 768),
                          'out_planes': (256, 512, 1024, 2048),
                          'num_blocks': (3, 4, 20, 3),
                          'dense_depth': (16, 32, 24, 128),
                          'num_classes': n_classes,
                      })),
    }

    model_ = available_models[model_key]

    hyper = nh.HyperParams(
        datasets=datasets,
        nice='cifar10_' + model_key,
        loaders=loaders,
        workdir=workdir,
        xpu=xpu,
        model=model_,
        optimizer=(torch.optim.SGD, {
            'lr': lr,
            'weight_decay': 5e-4,
            'momentum': 0.9,
            'nesterov': True,
        }),
        scheduler=(nh.schedulers.ListedLR, {
            'points': {
                0: lr,
                150: lr * 0.1,
                250: lr * 0.01,
            },
            'interpolate': False
        }),
        monitor=(nh.Monitor, {
            'minimize': ['loss'],
            'patience': 350,
            'max_epoch': 350,
        }),
        initializer=initializer_,
        criterion=(torch.nn.CrossEntropyLoss, {}),
        # Specify anything else that is special about your hyperparams here
        # Especially if you make a custom_batch_runner
        # TODO: type of augmentation as a parameter dependency
        # augment=str(datasets['train'].augmenter),
        # other=ub.dict_union({
        #     # 'colorspace': datasets['train'].output_colorspace,
        # }, datasets['train'].center_inputs.__dict__),
    )
    harn = CIFAR_FitHarn(hyper=hyper)
    harn.initialize()
    harn.run()
Пример #12
0
def setup_harness(**kwargs):
    """
    CommandLine:
        python ~/code/netharn/netharn/examples/ggr_matching.py setup_harness

    Example:
        >>> harn = setup_harness(dbname='PZ_MTEST')
        >>> harn.initialize()
    """
    nice = kwargs.get('nice', 'untitled')
    batch_size = int(kwargs.get('batch_size', 6))
    bstep = int(kwargs.get('bstep', 1))
    workers = int(kwargs.get('workers', 0))
    decay = float(kwargs.get('decay', 0.0005))
    lr = float(kwargs.get('lr', 0.001))
    dim = int(kwargs.get('dim', 416))
    xpu = kwargs.get('xpu', 'argv')
    workdir = kwargs.get('workdir', None)
    dbname = kwargs.get('dbname', 'ggr2')

    if workdir is None:
        workdir = ub.truepath(os.path.join('~/work/siam-ibeis2', dbname))
    ub.ensuredir(workdir)

    if dbname == 'ggr2':
        print('Creating torch CocoDataset')
        train_dset = ndsampler.CocoDataset(
            data=
            '/media/joncrall/raid/data/ggr2-coco/annotations/instances_train2018.json',
            img_root='/media/joncrall/raid/data/ggr2-coco/images/train2018',
        )
        train_dset.hashid = 'ggr2-coco-train2018'
        vali_dset = ndsampler.CocoDataset(
            data=
            '/media/joncrall/raid/data/ggr2-coco/annotations/instances_val2018.json',
            img_root='/media/joncrall/raid/data/ggr2-coco/images/val2018',
        )
        vali_dset.hashid = 'ggr2-coco-val2018'

        print('Creating samplers')
        train_sampler = ndsampler.CocoSampler(train_dset, workdir=workdir)
        vali_sampler = ndsampler.CocoSampler(vali_dset, workdir=workdir)

        print('Creating torch Datasets')
        datasets = {
            'train':
            MatchingCocoDataset(train_sampler,
                                train_dset,
                                workdir,
                                dim=dim,
                                augment=True),
            'vali':
            MatchingCocoDataset(vali_sampler, vali_dset, workdir, dim=dim),
        }
    else:
        from ibeis_utils import randomized_ibeis_dset
        datasets = randomized_ibeis_dset(dbname, dim=dim)

    for k, v in datasets.items():
        print('* len({}) = {}'.format(k, len(v)))

    if workers > 0:
        import cv2
        cv2.setNumThreads(0)

    loaders = {
        key: torch.utils.data.DataLoader(dset,
                                         batch_size=batch_size,
                                         num_workers=workers,
                                         shuffle=(key == 'train'),
                                         pin_memory=True)
        for key, dset in datasets.items()
    }

    xpu = nh.XPU.cast(xpu)

    hyper = nh.HyperParams(
        **{
            'nice':
            nice,
            'workdir':
            workdir,
            'datasets':
            datasets,
            'loaders':
            loaders,
            'xpu':
            xpu,
            'model': (MatchingNetworkLP, {
                'p': 2,
                'input_shape': (1, 3, dim, dim),
            }),
            'criterion': (nh.criterions.ContrastiveLoss, {
                'margin': 4,
                'weight': None,
            }),
            'optimizer': (torch.optim.SGD, {
                'lr': lr,
                'weight_decay': decay,
                'momentum': 0.9,
                'nesterov': True,
            }),
            'initializer': (nh.initializers.NoOp, {}),
            'scheduler': (nh.schedulers.Exponential, {
                'gamma': 0.99,
                'stepsize': 2,
            }),
            # 'scheduler': (nh.schedulers.ListedLR, {
            #     'points': {
            #         1:   lr * 1.0,
            #         19:  lr * 1.1,
            #         20:  lr * 0.1,
            #     },
            #     'interpolate': True
            # }),
            'monitor': (nh.Monitor, {
                'minimize': ['loss', 'pos_dist', 'brier'],
                'maximize': ['accuracy', 'neg_dist', 'mcc'],
                'patience': 40,
                'max_epoch': 40,
            }),

            # 'augment': datasets['train'].augmenter,
            'dynamics': {
                # Controls how many batches to process before taking a step in the
                # gradient direction. Effectively simulates a batch_size that is
                # `bstep` times bigger.
                'batch_step': bstep,
            },
            'other': {
                'n_classes': 2,
            },
        })
    harn = MatchingHarness(hyper=hyper)
    harn.config['prog_backend'] = 'progiter'
    harn.intervals['log_iter_train'] = 1
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None

    return harn
Пример #13
0
def train():
    import random
    import torchvision
    from torchvision import transforms

    xpu = nh.XPU.coerce('argv')
    config = {
        'lr': float(ub.argval('--lr', default=0.1)),
        'batch_size': int(ub.argval('--batch_size', default=64)),
        'workers': int(ub.argval('--workers', default=2)),
        'arch': ub.argval('--arch', default='resnet50'),
        'dataset': ub.argval('--dataset', default='coco'),
        'workdir': ub.argval('--workdir', default=ub.get_app_cache_dir('netharn')),
        'seed': int(ub.argval('--seed', default=137852547)),
        'deterministic': False,
    }

    # The work directory is where all intermediate results are dumped.
    ub.ensuredir(config['workdir'])

    # Take care of random seeding and ensuring appropriate determinisim
    torch.manual_seed((config['seed'] + 0) % int(2 ** 32 - 1))
    random.seed((config['seed'] + 2360097502) % int(2 ** 32 - 1))
    np.random.seed((config['seed'] + 893874269) % int(2 ** 32 - 1))
    if torch.backends.cudnn.enabled:
        # TODO: ensure the CPU mode is also deterministic
        torch.backends.cudnn.deterministic = config['deterministic']

    # Define augmentation strategy
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    if config['dataset'] == 'coco':
        DATASET = torchvision.datasets.CocoDetection
        # TODO: download
        dset = DATASET(root=config['workdir'], download=True)
        meta_fpath = os.path.join(dset.root, dset.base_folder, 'meta')
        meta_dict = pickle.load(open(meta_fpath, 'rb'))
        categories = meta_dict['fine_label_names']
        # categories = [
        #     'apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee',
        #     'beetle', 'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus',
        #     'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle',
        #     'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch',
        #     'crab', 'crocodile', 'cup', 'dinosaur', 'dolphin', 'elephant',
        #     'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house',
        #     'kangaroo', 'keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion',
        #     'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain',
        #     'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter',
        #     'palm_tree', 'pear', 'pickup_truck', 'pine_tree', 'plain', 'plate',
        #     'poppy', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road',
        #     'rocket', 'rose', 'sea', 'seal', 'shark', 'shrew', 'skunk',
        #     'skyscraper', 'snail', 'snake', 'spider', 'squirrel', 'streetcar',
        #     'sunflower', 'sweet_pepper', 'table', 'tank', 'telephone',
        #     'television', 'tiger', 'tractor', 'train', 'trout', 'tulip',
        #     'turtle', 'wardrobe', 'whale', 'willow_tree', 'wolf', 'woman',
        #     'worm']
    else:
        raise KeyError(config['dataset'])

    datasets = {
        'train': DATASET(root=config['workdir'], train=True,
                         transform=transform_train),
        'test': DATASET(root=config['workdir'], train=False,
                        transform=transform_test),
    }
    # For some reason the torchvision objects do not make the category names
    # easilly available. We set them here for ease of use.
    datasets['train'].categories = categories
    datasets['test'].categories = categories

    loaders = {
        key: torch.utils.data.DataLoader(dset, shuffle=key == 'train',
                                         num_workers=config['workers'],
                                         batch_size=config['batch_size'],
                                         pin_memory=True)
        for key, dset in datasets.items()
    }

    if config['workers'] > 0:
        # Solves pytorch deadlock issue #1355.
        import cv2
        cv2.setNumThreads(0)

    # Choose which network architecture to train
    available_architectures = {
        'densenet121': (nh.models.densenet.DenseNet, {
            'nblocks': [6, 12, 24, 16],
            'growth_rate': 12,
            'reduction': 0.5,
            'num_classes': len(categories),
        }),

        'resnet50': (nh.models.resnet.ResNet, {
            'num_blocks': [3, 4, 6, 3],
            'num_classes': len(categories),
            'block': 'Bottleneck',
        }),

        'dpn26': (nh.models.dual_path_net.DPN, dict(cfg={
            'in_planes': (96, 192, 384, 768),
            'out_planes': (256, 512, 1024, 2048),
            'num_blocks': (2, 2, 2, 2),
            'dense_depth': (16, 32, 24, 128),
            'num_classes': len(categories),
        })),

        'dpn92': (nh.models.dual_path_net.DPN, dict(cfg={
            'in_planes': (96, 192, 384, 768),
            'out_planes': (256, 512, 1024, 2048),
            'num_blocks': (3, 4, 20, 3),
            'dense_depth': (16, 32, 24, 128),
            'num_classes': len(categories),
        })),
    }
    model_ = available_architectures[config['arch']]

    # Note there are lots of different initializers including a special
    # pretrained initializer.
    initializer_ = (nh.initializers.KaimingNormal, {'param': 0, 'mode': 'fan_in'})

    # Notice that arguments to hyperparameters are typically specified as a
    # tuple of (type, Dict), where the dictionary are the keyword arguments
    # that can be used to instanciate an instance of that class. While
    # this may be slightly awkward, it enables netharn to track hyperparameters
    # more effectively. Note that it is possible to simply pass an already
    # constructed instance of a class, but this causes information loss.
    hyper = nh.HyperParams(
        # Datasets must be preconstructed
        datasets=datasets,
        nice='cifar10_' + config['arch'],
        # Loader preconstructed
        loaders=loaders,
        workdir=config['workdir'],
        xpu=xpu,
        # The 6 major hyper components are best specified as a Tuple[type, dict]
        model=model_,
        optimizer=(torch.optim.SGD, {
            'lr': config['lr'],
            'weight_decay': 5e-4,
            'momentum': 0.9,
            'nesterov': True,
        }),
        scheduler=(nh.schedulers.ListedLR, {
            'points': {
                0: config['lr'],
                150: config['lr'] * 0.1,
                250: config['lr'] * 0.01,
            },
            'interpolate': False
        }),
        monitor=(nh.Monitor, {
            'minimize': ['loss'],
            'patience': 350,
            'max_epoch': 350,
        }),
        initializer=initializer_,
        criterion=(torch.nn.CrossEntropyLoss, {}),
        # The rests of the keyword arguments are simply dictionaries used to
        # track other information.
        # Specify what augmentations you are performing for experiment tracking
        augment=datasets['train'].augmenter,
        other={
            # Specify anything else that is special about your hyperparams here
            # Especially if you make a custom_batch_runner
        },
    )

    # Creating an instance of a Fitharn object is typically fast.
    harn = Coco_FitHarn(hyper=hyper)

    # Initializing a FitHarn object can take a little time, but not too much.
    # This is where instances of the model, optimizer, scheduler, monitor, and
    # initializer are created. This is also where we check if there is a
    # pre-existing checkpoint that we can restart from.
    harn.initialize()

    # This starts the main loop which will run until a the monitor's terminator
    # criterion is satisfied. If the initialize step loaded a checkpointed that
    # already met the termination criterion, then this will simply return.
    deploy_fpath = harn.run()

    # The returned deploy_fpath is the path to an exported netharn model.
    # This model is the on with the best weights according to the monitor.
    print('deploy_fpath = {!r}'.format(deploy_fpath))
Пример #14
0
def setup_harness(bsize=16, workers=0):
    """
    CommandLine:
        python ~/code/netharn/netharn/examples/yolo_voc.py setup_harness

    Example:
        >>> # DISABLE_DOCTSET
        >>> harn = setup_harness()
        >>> harn.initialize()
    """

    xpu = nh.XPU.cast('argv')

    nice = ub.argval('--nice', default='Yolo2Baseline')
    batch_size = int(ub.argval('--batch_size', default=bsize))
    bstep = int(ub.argval('--bstep', 4))
    workers = int(ub.argval('--workers', default=workers))
    decay = float(ub.argval('--decay', default=0.0005))
    lr = float(ub.argval('--lr', default=0.001))
    ovthresh = 0.5

    # We will divide the learning rate by the simulated batch size
    datasets = {
        'train': YoloVOCDataset(years=[2007, 2012], split='trainval'),
        'test': YoloVOCDataset(years=[2007], split='test'),
    }
    loaders = {
        key: dset.make_loader(batch_size=batch_size,
                              num_workers=workers,
                              shuffle=(key == 'train'),
                              pin_memory=True)
        for key, dset in datasets.items()
    }

    if workers > 0:
        import cv2
        cv2.setNumThreads(0)

    simulated_bsize = bstep * batch_size
    hyper = nh.HyperParams(
        **{
            'nice':
            nice,
            'workdir':
            ub.truepath('~/work/voc_yolo2'),
            'datasets':
            datasets,

            # 'xpu': 'distributed(todo: fancy network stuff)',
            # 'xpu': 'cpu',
            # 'xpu': 'gpu:0,1,2,3',
            'xpu':
            xpu,

            # a single dict is applied to all datset loaders
            'loaders':
            loaders,
            'model': (
                light_yolo.Yolo,
                {
                    'num_classes': datasets['train'].num_classes,
                    'anchors': datasets['train'].anchors,
                    'conf_thresh': 0.001,
                    # 'nms_thresh': 0.5,  # reproduce original yolo
                    'nms_thresh': 0.4,  # reproduce lightnet
                }),
            'criterion': (
                light_region_loss.RegionLoss,
                {
                    'num_classes': datasets['train'].num_classes,
                    'anchors': datasets['train'].anchors,
                    'object_scale': 5.0,
                    'noobject_scale': 1.0,
                    'class_scale': 1.0,
                    'coord_scale': 1.0,
                    'thresh': 0.6,  # iou_thresh
                }),
            'initializer': (
                nh.initializers.Pretrained,
                {
                    # 'fpath': light_yolo.demo_voc_weights(),
                    'fpath': light_yolo.initial_imagenet_weights(),
                }),
            'optimizer': (
                torch.optim.SGD,
                {
                    'lr': lr / 10,
                    'momentum': 0.9,
                    'dampening': 0,
                    # multiplying by batch size was one of those unpublished details
                    'weight_decay': decay * simulated_bsize,
                }),

            # Pascal 2007 + 2012 trainval has 16551 images
            # Pascal 2007 test has 4952 images
            # In the original YOLO, one batch is 64 images,
            # so one epoch is 16551 / 64 = 259 iterations.
            #
            # From the original YOLO VOC v2 config
            # https://github.com/pjreddie/darknet/blob/master/cfg/yolov2-voc.cfg
            #     learning_rate=0.001
            #     burn_in=1000
            #     max_batches = 80200
            #     policy=steps
            #     steps=40000,60000
            #     scales=.1,.1
            #
            # However, the LIGHTNET values are
            #   LR_STEPS = [250, 25000, 35000]
            #
            # Based in this, the iter to batch conversion is
            #
            # ((np.array([250, 25000, 35000, 1000, 40000, 60000, 80200]) / 256) + 1).astype(np.int)
            # array([  1,  98, 137,   4, 157, 235, 314])
            'scheduler': (
                nh.schedulers.ListedLR,
                {
                    'points': {
                        # dividing by batch size was one of those unpublished details
                        # 0:  lr * 0.1 / simulated_bsize,  # burnin
                        # 4:  lr * 1.0 / simulated_bsize,
                        # 157: lr * 0.1 / simulated_bsize,
                        # 235: lr * 0.001 / simulated_bsize,
                        0: lr * 0.1 / simulated_bsize,
                        1: lr * 1.0 / simulated_bsize,
                        60: lr * 0.1 / simulated_bsize,
                        90: lr * 0.001 / simulated_bsize,
                    },
                    'interpolate': False
                }),
            'monitor': (nh.Monitor, {
                'minimize': ['loss'],
                'maximize': ['mAP'],
                'patience': 314,
                'max_epoch': 314,
            }),
            'augment':
            datasets['train'].augmenter,
            'dynamics': {
                # Controls how many batches to process before taking a step in the
                # gradient direction. Effectively simulates a batch_size that is
                # `bstep` times bigger.
                'batch_step': bstep,
            },
            'other': {
                # Other params are not used internally, so you are free to set any
                # extra params specific to your algorithm, and still have them
                # logged in the hyperparam structure. For YOLO this is `ovthresh`.
                'batch_size': batch_size,
                'nice': nice,
                'ovthresh': ovthresh,  # used in mAP computation
                'input_range': 'norm01',
            },
        })
    harn = YoloHarn(hyper=hyper)
    harn.config['use_tqdm'] = False
    harn.intervals['log_iter_train'] = 1
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None
    return harn
Пример #15
0
def setup_harn(cmdline=True, **kw):
    """
    This creates the "The Classification Harness" (i.e. core ClfHarn object).
    This is where we programmatically connect our program arguments with the
    netharn HyperParameter standards. We are using :module:`scriptconfig` to
    capture these, but you could use click / argparse / etc.

    This function has the responsibility of creating our torch datasets,
    lazy computing input statistics, specifying our model architecture,
    schedule, initialization, optimizer, dynamics, XPU etc. These can usually
    be coerced using netharn API helpers and a "standardized" config dict. See
    the function code for details.

    Args:
        cmdline (bool, default=True):
            if True, behavior will be modified based on ``sys.argv``.
            Note this will activate the scriptconfig ``--help``, ``--dump`` and
            ``--config`` interactions.

    Kwargs:
        **kw: the overrides the default config for :class:`ClfConfig`.
            Note, command line flags have precedence if cmdline=True.

    Returns:
        ClfHarn: a fully-defined, but uninitialized custom :class:`FitHarn`
            object.

    Example:
        >>> # xdoctest: +SKIP
        >>> kw = {'datasets': 'special:shapes256'}
        >>> cmdline = False
        >>> harn = setup_harn(cmdline, **kw)
        >>> harn.initialize()
    """
    import ndsampler
    config = ClfConfig(default=kw)
    config.load(cmdline=cmdline)
    print('config = {}'.format(ub.repr2(config.asdict())))

    nh.configure_hacks(config)
    coco_datasets = nh.api.Datasets.coerce(config)

    print('coco_datasets = {}'.format(ub.repr2(coco_datasets, nl=1)))
    for tag, dset in coco_datasets.items():
        dset._build_hashid(hash_pixels=False)

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        tag: ndsampler.CocoSampler(dset,
                                   workdir=workdir,
                                   backend=config['sampler_backend'])
        for tag, dset in coco_datasets.items()
    }

    for tag, sampler in ub.ProgIter(list(samplers.items()),
                                    desc='prepare frames'):
        sampler.frames.prepare(workers=config['workers'])

    torch_datasets = {
        'train':
        ClfDataset(
            samplers['train'],
            input_dims=config['input_dims'],
            augmenter=config['augmenter'],
        ),
        'vali':
        ClfDataset(samplers['vali'],
                   input_dims=config['input_dims'],
                   augmenter=False),
    }

    if config['normalize_inputs']:
        # Get stats on the dataset (todo: turn off augmentation for this)
        _dset = torch_datasets['train']
        stats_idxs = kwarray.shuffle(np.arange(len(_dset)),
                                     rng=0)[0:min(1000, len(_dset))]
        stats_subset = torch.utils.data.Subset(_dset, stats_idxs)

        cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v3')
        input_stats = cacher.tryload()

        channels = ChannelSpec.coerce(config['channels'])

        if input_stats is None:
            # Use parallel workers to load data faster
            from netharn.data.data_containers import container_collate
            from functools import partial
            collate_fn = partial(container_collate, num_devices=1)

            loader = torch.utils.data.DataLoader(
                stats_subset,
                collate_fn=collate_fn,
                num_workers=config['workers'],
                shuffle=True,
                batch_size=config['batch_size'])

            # Track moving average of each fused channel stream
            channel_stats = {
                key: nh.util.RunningStats()
                for key in channels.keys()
            }
            assert len(channel_stats) == 1, (
                'only support one fused stream for now')
            for batch in ub.ProgIter(loader, desc='estimate mean/std'):
                for key, val in batch['inputs'].items():
                    try:
                        for part in val.numpy():
                            channel_stats[key].update(part)
                    except ValueError:  # final batch broadcast error
                        pass

            perchan_input_stats = {}
            for key, running in channel_stats.items():
                running = ub.peek(channel_stats.values())
                perchan_stats = running.simple(axis=(1, 2))
                perchan_input_stats[key] = {
                    'std': perchan_stats['mean'].round(3),
                    'mean': perchan_stats['std'].round(3),
                }

            input_stats = ub.peek(perchan_input_stats.values())
            cacher.save(input_stats)
    else:
        input_stats = {}

    torch_loaders = {
        tag: dset.make_loader(
            batch_size=config['batch_size'],
            num_batches=config['num_batches'],
            num_workers=config['workers'],
            shuffle=(tag == 'train'),
            balance=(config['balance'] if tag == 'train' else None),
            pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    initializer_ = None
    classes = torch_datasets['train'].classes

    modelkw = {
        'arch': config['arch'],
        'input_stats': input_stats,
        'classes': classes.__json__(),
        'channels': channels,
    }
    model = ClfModel(**modelkw)
    model._initkw = modelkw

    if initializer_ is None:
        initializer_ = nh.Initializer.coerce(config)

    hyper = nh.HyperParams(name=config['name'],
                           workdir=config['workdir'],
                           xpu=nh.XPU.coerce(config['xpu']),
                           datasets=torch_datasets,
                           loaders=torch_loaders,
                           model=model,
                           criterion=None,
                           optimizer=nh.Optimizer.coerce(config),
                           dynamics=nh.Dynamics.coerce(config),
                           scheduler=nh.Scheduler.coerce(config),
                           initializer=initializer_,
                           monitor=(nh.Monitor, {
                               'minimize': ['loss'],
                               'patience': config['patience'],
                               'max_epoch': config['max_epoch'],
                               'smoothing': 0.0,
                           }),
                           other={
                               'name': config['name'],
                               'batch_size': config['batch_size'],
                               'balance': config['balance'],
                           },
                           extra={
                               'argv': sys.argv,
                               'config': ub.repr2(config.asdict()),
                           })
    harn = ClfHarn(hyper=hyper)
    harn.preferences.update({
        'num_keep': 3,
        'keep_freq': 10,
        'tensorboard_groups': ['loss'],
        'eager_dump_tensorboard': True,
    })
    harn.intervals.update({})
    harn.script_config = config
    return harn
Пример #16
0
def setup_harn(cmdline=True, **kwargs):
    """
    cmdline, kwargs = False, {}
    """
    import sys
    import ndsampler

    config = ImageClfConfig(default=kwargs)
    config.load(cmdline=cmdline)
    nh.configure_hacks(config)  # fix opencv bugs

    cacher = ub.Cacher('tiny-imagenet', cfgstr='v4', verbose=3)
    data = cacher.tryload()
    if data is None:
        data = grab_tiny_imagenet_as_coco()
        cacher.save(data)
    coco_datasets = data  # setup_coco_datasets()
    dset = coco_datasets['train']
    print('train dset = {!r}'.format(dset))

    workdir = ub.ensuredir(ub.expandpath(config['workdir']))
    samplers = {
        # tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='cog')
        tag: ndsampler.CocoSampler(dset, workdir=workdir, backend='npy')
        for tag, dset in coco_datasets.items()
    }
    torch_datasets = {
        tag: ImagClfDataset(
            sampler, config['input_dims'],
            augmenter=((tag == 'train') and config['augmenter']),
        )
        for tag, sampler in samplers.items()
    }
    torch_loaders = {
        tag: torch_data.DataLoader(dset,
                                   batch_size=config['batch_size'],
                                   num_workers=config['workers'],
                                   shuffle=(tag == 'train'),
                                   pin_memory=True)
        for tag, dset in torch_datasets.items()
    }

    import torchvision
    # TODO: netharn should allow for this
    model_ = torchvision.models.resnet50(pretrained=False)

    # model_ = (, {
    #     'classes': torch_datasets['train'].classes,
    #     'in_channels': 3,
    # })
    initializer_ = nh.Initializer.coerce(config)

    hyper = nh.HyperParams(
        nice=config['nice'],
        workdir=config['workdir'],
        xpu=nh.XPU.coerce(config['xpu']),

        datasets=torch_datasets,
        loaders=torch_loaders,

        model=model_,
        initializer=initializer_,

        scheduler=nh.Scheduler.coerce(config),
        optimizer=nh.Optimizer.coerce(config),
        dynamics=nh.Dynamics.coerce(config),

        criterion=(nh.criterions.FocalLoss, {
            'focus': 0.0,
        }),

        monitor=(nh.Monitor, {
            'minimize': ['loss'],
            'patience': config['patience'],
            'max_epoch': config['max_epoch'],
            'smoothing': .6,
        }),

        other={
            'batch_size': config['batch_size'],
        },
        extra={
            'argv': sys.argv,
            'config': ub.repr2(config.asdict()),
        }
    )

    # Create harness
    harn = ImageClfHarn(hyper=hyper)
    harn.classes = torch_datasets['train'].classes
    harn.preferences.update({
        'num_keep': 5,
        'keyboard_debug': True,
        # 'export_modules': ['netharn'],
    })
    harn.intervals.update({
        'vali': 1,
        'test': 10,
    })
    harn.script_config = config
    return harn
Пример #17
0
def setup_yolo_harness(bsize=16, workers=0):
    """
    CommandLine:
        python ~/code/netharn/examples/yolo_voc.py setup_yolo_harness

    Example:
        >>> # DISABLE_DOCTSET
        >>> harn = setup_yolo_harness()
        >>> harn.initialize()
    """

    xpu = nh.XPU.cast('argv')

    nice = ub.argval('--nice', default='Yolo2Baseline')
    batch_size = int(ub.argval('--batch_size', default=bsize))
    bstep = int(ub.argval('--bstep', 4))
    workers = int(ub.argval('--workers', default=workers))
    decay = float(ub.argval('--decay', default=0.0005))
    lr = float(ub.argval('--lr', default=0.001))
    ovthresh = 0.5
    simulated_bsize = bstep * batch_size

    # We will divide the learning rate by the simulated batch size
    datasets = {
        'train': YoloVOCDataset(years=[2007, 2012], split='trainval'),
        'test': YoloVOCDataset(years=[2007], split='test'),
    }
    loaders = {
        key: dset.make_loader(batch_size=batch_size,
                              num_workers=workers,
                              shuffle=(key == 'train'),
                              pin_memory=True,
                              resize_rate=10 * bstep,
                              drop_last=True)
        for key, dset in datasets.items()
    }

    if workers > 0:
        import cv2
        cv2.setNumThreads(0)

    # assert simulated_bsize == 64, 'must be 64'

    # Pascal 2007 + 2012 trainval has 16551 images
    # Pascal 2007 test has 4952 images
    # In the original YOLO, one batch is 64 images, therefore:
    #
    # ONE EPOCH is 16551 / 64 = 258.609375 = 259 iterations.
    #
    # From the original YOLO VOC v2 config
    # https://github.com/pjreddie/darknet/blob/master/cfg/yolov2-voc.cfg
    #     learning_rate=0.001
    #     burn_in=1000
    #     max_batches = 80200
    #     policy=steps
    #     steps=40000,60000
    #     scales=.1,.1
    #
    # However, the LIGHTNET values are
    #   LR_STEPS = [250, 25000, 35000]
    #
    # The DARNKET STEPS ARE:
    #   DN_STEPS = 1000, 40000, 60000, 80200
    #
    # Based in this, the iter to batch conversion is
    #
    # Key lightnet batch numbers
    # >>> np.array([250, 25000, 30000, 35000, 45000]) / (16512 / 64)
    # array([0.9689,  96.899, 116.2790, 135.658, 174.4186])
    # -> Round
    # array([  1.,  97., 135.])
    # >>> np.array([1000, 40000, 60000, 80200]) / 258
    # array([  3.86683584, 154.67343363, 232.01015044, 310.12023443])
    # -> Round
    # array(4, 157, 232, 310])
    # array([  3.87596899, 155.03875969, 232.55813953, 310.85271318])
    if not ub.argflag('--eav'):
        lr_step_points = {
            # 0:   lr * 0.1 / simulated_bsize,  # burnin
            # 4:   lr * 1.0 / simulated_bsize,
            0: lr * 1.0 / simulated_bsize,
            154: lr * 1.0 / simulated_bsize,
            155: lr * 0.1 / simulated_bsize,
            232: lr * 0.1 / simulated_bsize,
            233: lr * 0.01 / simulated_bsize,
        }
        max_epoch = 311
        scheduler_ = (
            nh.schedulers.core.YOLOScheduler,
            {
                'points': lr_step_points,
                # 'interpolate': False,
                'interpolate': True,
                'burn_in': 0.96899225 if ub.argflag('--eav') else
                3.86683584,  # number of epochs to burn_in for. approx 1000 batches?
                'dset_size': len(datasets['train']),  # when drop_last=False
                # 'dset_size': (len(datasets['train']) // simulated_bsize) * simulated_bsize,  # make a multiple of batch_size because drop_last=True
                'batch_size': batch_size,
            })
    else:
        lr_step_points = {
            # dividing by batch size was one of those unpublished details
            0: lr * 0.1 / simulated_bsize,
            1: lr * 1.0 / simulated_bsize,
            96: lr * 1.0 / simulated_bsize,
            97: lr * 0.1 / simulated_bsize,
            135: lr * 0.1 / simulated_bsize,
            136: lr * 0.01 / simulated_bsize,
        }
        max_epoch = 176
        scheduler_ = (nh.schedulers.ListedLR, {
            'points': lr_step_points,
            'interpolate': False,
        })

    weights = ub.argval('--weights', default=None)
    if weights is None or weights == 'imagenet':
        weights = light_yolo.initial_imagenet_weights()
    elif weights == 'lightnet':
        weights = light_yolo.demo_voc_weights()
    else:
        print('weights = {!r}'.format(weights))

    # Anchors
    anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944),
                        (5.05587, 8.09892), (9.47112, 4.84053),
                        (11.2364, 10.0071)])

    from netharn.models.yolo2 import region_loss2
    # from netharn.models.yolo2 import light_region_loss

    hyper = nh.HyperParams(
        **{
            'nice':
            nice,
            'workdir':
            ub.truepath('~/work/voc_yolo2'),
            'datasets':
            datasets,

            # 'xpu': 'distributed(todo: fancy network stuff)',
            # 'xpu': 'cpu',
            # 'xpu': 'gpu:0,1,2,3',
            'xpu':
            xpu,

            # a single dict is applied to all datset loaders
            'loaders':
            loaders,
            'model': (
                light_yolo.Yolo,
                {
                    'num_classes': datasets['train'].num_classes,
                    'anchors': anchors,
                    'conf_thresh': 0.001,
                    # 'conf_thresh': 0.1,  # make training a bit faster
                    'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4
                }),
            'criterion': (
                region_loss2.RegionLoss,
                {
                    'num_classes': datasets['train'].num_classes,
                    'anchors': anchors,
                    'reduction': 32,
                    'seen': 0,
                    'coord_scale': 1.0,
                    'noobject_scale': 1.0,
                    'object_scale': 5.0,
                    'class_scale': 1.0,
                    'thresh': 0.6,  # iou_thresh
                    # 'seen_thresh': 12800,
                }),

            # 'criterion': (light_region_loss.RegionLoss, {
            #     'num_classes': datasets['train'].num_classes,
            #     'anchors': anchors,
            #     'object_scale': 5.0,
            #     'noobject_scale': 1.0,

            #     # eav version originally had a random *2 in cls loss,
            #     # we removed, that but we can replicate it here.
            #     'class_scale': 1.0 if not ub.argflag('--eav') else 2.0,
            #     'coord_scale': 1.0,

            #     'thresh': 0.6,  # iou_thresh
            #     'seen_thresh': 12800,
            #     # 'small_boxes': not ub.argflag('--eav'),
            #     'small_boxes': True,
            #     'mse_factor': 0.5 if not ub.argflag('--eav') else 1.0,
            # }),
            'initializer': (nh.initializers.Pretrained, {
                'fpath': weights,
            }),
            'optimizer': (
                torch.optim.SGD,
                {
                    'lr': lr_step_points[0],
                    'momentum': 0.9,
                    'dampening': 0,
                    # multiplying by batch size was one of those unpublished details
                    'weight_decay': decay * simulated_bsize,
                }),
            'scheduler':
            scheduler_,
            'monitor': (nh.Monitor, {
                'minimize': ['loss'],
                'maximize': ['mAP'],
                'patience': max_epoch,
                'max_epoch': max_epoch,
            }),
            'augment':
            datasets['train'].augmenter,
            'dynamics': {
                # Controls how many batches to process before taking a step in the
                # gradient direction. Effectively simulates a batch_size that is
                # `bstep` times bigger.
                'batch_step': bstep,
            },
            'other': {
                # Other params are not used internally, so you are free to set any
                # extra params specific to your algorithm, and still have them
                # logged in the hyperparam structure. For YOLO this is `ovthresh`.
                'batch_size': batch_size,
                'nice': nice,
                'ovthresh': ovthresh,  # used in mAP computation
                'input_range': 'norm01',
            },
        })
    print('max_epoch = {!r}'.format(max_epoch))
    harn = YoloHarn(hyper=hyper)
    harn.config['prog_backend'] = 'progiter'
    harn.intervals['log_iter_train'] = None
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None
    harn.config[
        'large_loss'] = 1000  # tell netharn when to check for divergence
    return harn
Пример #18
0
def setup_yolo_harness(bsize=16, workers=0):
    """
    CommandLine:
        python -m netharn.examples.yolo_voc setup_yolo_harness

    Example:
        >>> # DISABLE_DOCTSET
        >>> harn = setup_yolo_harness()
        >>> harn.initialize()
    """

    xpu = nh.XPU.coerce('argv')

    nice = ub.argval('--nice', default='Yolo2Baseline')
    batch_size = int(ub.argval('--batch_size', default=bsize))
    bstep = int(ub.argval('--bstep', 4))
    workers = int(ub.argval('--workers', default=workers))
    decay = float(ub.argval('--decay', default=0.0005))
    lr = float(ub.argval('--lr', default=0.001))
    ovthresh = 0.5
    simulated_bsize = bstep * batch_size

    nh.configure_hacks(workers=workers)

    # We will divide the learning rate by the simulated batch size
    datasets = {
        'train': YoloVOCDataset(years=[2007, 2012], split='trainval'),
        # 'test': YoloVOCDataset(years=[2007], split='test'),
    }
    loaders = {
        key: dset.make_loader(batch_size=batch_size,
                              num_workers=workers,
                              shuffle=(key == 'train'),
                              pin_memory=True,
                              resize_rate=10 * bstep,
                              drop_last=True)
        for key, dset in datasets.items()
    }

    anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944),
                        (5.05587, 8.09892), (9.47112, 4.84053),
                        (11.2364, 10.0071)])

    if not ub.argflag('--eav'):
        lr_step_points = {
            # 0:   lr * 0.1 / simulated_bsize,  # burnin
            # 4:   lr * 1.0 / simulated_bsize,
            0: lr * 1.0 / simulated_bsize,
            154: lr * 1.0 / simulated_bsize,
            155: lr * 0.1 / simulated_bsize,
            232: lr * 0.1 / simulated_bsize,
            233: lr * 0.01 / simulated_bsize,
        }
        max_epoch = 311
        scheduler_ = (
            nh.schedulers.core.YOLOScheduler,
            {
                'points': lr_step_points,
                # 'interpolate': False,
                'interpolate': True,
                'burn_in': 0.96899225 if ub.argflag('--eav') else
                3.86683584,  # number of epochs to burn_in for. approx 1000 batches?
                'dset_size': len(datasets['train']),  # when drop_last=False
                # 'dset_size': (len(datasets['train']) // simulated_bsize) * simulated_bsize,  # make a multiple of batch_size because drop_last=True
                'batch_size': batch_size,
            })
        from netharn.models.yolo2 import light_region_loss
        criterion_ = (
            light_region_loss.RegionLoss,
            {
                'num_classes': datasets['train'].num_classes,
                'anchors': anchors,
                'object_scale': 5.0,
                'noobject_scale': 1.0,

                # eav version originally had a random *2 in cls loss,
                # we removed, that but we can replicate it here.
                'class_scale': 1.0 if not ub.argflag('--eav') else 2.0,
                'coord_scale': 1.0,
                'thresh': 0.6,  # iou_thresh
                'seen_thresh': 12800,
                # 'small_boxes': not ub.argflag('--eav'),
                'small_boxes': True,
                'mse_factor': 0.5 if not ub.argflag('--eav') else 1.0,
            })
    else:
        lr_step_points = {
            # dividing by batch size was one of those unpublished details
            0: lr * 0.1 / simulated_bsize,
            1: lr * 1.0 / simulated_bsize,
            96: lr * 1.0 / simulated_bsize,
            97: lr * 0.1 / simulated_bsize,
            135: lr * 0.1 / simulated_bsize,
            136: lr * 0.01 / simulated_bsize,
        }
        max_epoch = 176
        scheduler_ = (nh.schedulers.ListedLR, {
            'points': lr_step_points,
            'interpolate': False,
        })
        from netharn.models.yolo2 import region_loss2
        criterion_ = (
            region_loss2.RegionLoss,
            {
                'num_classes': datasets['train'].num_classes,
                'anchors': anchors,
                'reduction': 32,
                'seen': 0,
                'coord_scale': 1.0,
                'noobject_scale': 1.0,
                'object_scale': 5.0,
                'class_scale': 1.0,
                'thresh': 0.6,  # iou_thresh
                # 'seen_thresh': 12800,
            })

    weights = ub.argval('--weights', default=None)
    if weights is None or weights == 'imagenet':
        weights = light_yolo.initial_imagenet_weights()
    elif weights == 'lightnet':
        weights = light_yolo.demo_voc_weights()
    else:
        print('weights = {!r}'.format(weights))

    hyper = nh.HyperParams(
        **{
            'nice':
            nice,
            'workdir':
            ub.expandpath('~/work/voc_yolo2'),
            'datasets':
            datasets,
            'loaders':
            loaders,
            'xpu':
            xpu,
            'model': (
                light_yolo.Yolo,
                {
                    'num_classes': datasets['train'].num_classes,
                    'anchors': anchors,
                    'conf_thresh': 0.001,
                    # 'conf_thresh': 0.1,  # make training a bit faster
                    'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4
                }),
            'criterion':
            criterion_,
            'initializer': (nh.initializers.Pretrained, {
                'fpath': weights,
            }),
            'optimizer': (
                torch.optim.SGD,
                {
                    'lr': lr_step_points[0],
                    'momentum': 0.9,
                    'dampening': 0,
                    # multiplying by batch size was one of those unpublished details
                    'weight_decay': decay * simulated_bsize,
                }),
            'scheduler':
            scheduler_,
            'monitor': (nh.Monitor, {
                'minimize': ['loss'],
                'maximize': ['mAP'],
                'patience': max_epoch,
                'max_epoch': max_epoch,
            }),

            # 'augment': datasets['train'].augmenter,
            'dynamics': {
                # Controls how many batches to process before taking a step in the
                # gradient direction. Effectively simulates a batch_size that is
                # `bstep` times bigger.
                'batch_step': bstep,
            },
            'other': {
                # Other params are not used internally, so you are free to set any
                # extra params specific to your algorithm, and still have them
                # logged in the hyperparam structure. For YOLO this is `ovthresh`.
                'batch_size': batch_size,
                'nice': nice,
                'ovthresh': ovthresh,  # used in mAP computation
                'input_range': 'norm01',
            },
        })
    print('max_epoch = {!r}'.format(max_epoch))
    harn = YoloHarn(hyper=hyper)
    harn.preferences['prog_backend'] = 'progiter'
    harn.intervals['log_iter_train'] = None
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None
    harn.preferences[
        'large_loss'] = 1000  # tell netharn when to check for divergence
    return harn
Пример #19
0
def setup_harness(bsize=16, workers=0):
    """
    CommandLine:
        python ~/code/netharn/netharn/examples/yolo_voc.py setup_harness

    Example:
        >>> harn = setup_harness()
        >>> harn.initialize()
    """

    xpu = nh.XPU.cast('argv')

    nice = ub.argval('--nice', default='Yolo2Baseline')
    batch_size = int(ub.argval('--batch_size', default=bsize))
    bstep = int(ub.argval('--bstep', 1))
    workers = int(ub.argval('--workers', default=workers))
    decay = float(ub.argval('--decay', default=0.0005))
    lr = float(ub.argval('--lr', default=0.001))
    ovthresh = 0.5

    # We will divide the learning rate by the simulated batch size
    datasets = {
        'train': YoloVOCDataset(split='trainval'),
        'test': YoloVOCDataset(split='test'),
    }
    loaders = {
        key: dset.make_loader(batch_size=batch_size,
                              num_workers=workers,
                              shuffle=(key == 'train'),
                              pin_memory=True)
        for key, dset in datasets.items()
    }

    # simulated_bsize = bstep * batch_size
    hyper = nh.HyperParams(
        **{
            'nice':
            nice,
            'workdir':
            ub.truepath('~/work/voc_yolo2'),
            'datasets':
            datasets,

            # 'xpu': 'distributed(todo: fancy network stuff)',
            # 'xpu': 'cpu',
            # 'xpu': 'gpu:0,1,2,3',
            'xpu':
            xpu,

            # a single dict is applied to all datset loaders
            'loaders':
            loaders,
            'model': (light_yolo.Yolo, {
                'num_classes': datasets['train'].num_classes,
                'anchors': datasets['train'].anchors,
                'conf_thresh': 0.001,
                'nms_thresh': 0.5,
            }),
            'criterion': (
                light_region_loss.RegionLoss,
                {
                    'num_classes': datasets['train'].num_classes,
                    'anchors': datasets['train'].anchors,
                    'object_scale': 5.0,
                    'noobject_scale': 1.0,
                    'class_scale': 1.0,
                    'coord_scale': 1.0,
                    'thresh': 0.6,  # iou_thresh
                }),
            'initializer': (
                nh.initializers.Pretrained,
                {
                    # 'fpath': light_yolo.demo_weights(),
                    'fpath': light_yolo.initial_imagenet_weights(),
                }),
            'optimizer': (torch.optim.SGD, {
                'lr': lr / 10,
                'momentum': 0.9,
                'weight_decay': decay,
            }),
            'scheduler': (
                nh.schedulers.ListedLR,
                {
                    'points': {
                        # dividing by batch size was one of those unpublished details
                        # 0: lr / simulated_bsize,
                        # 5:  .01 / simulated_bsize,
                        # 60: .011 / simulated_bsize,
                        # 90: .001 / simulated_bsize,
                        0: lr / 10,
                        1: lr,
                        59: lr * 1.1,
                        60: lr / 10,
                        90: lr / 100,
                    },
                    'interpolate': True
                }),
            'monitor': (nh.Monitor, {
                'minimize': ['loss'],
                'maximize': ['mAP'],
                'patience': 160,
                'max_epoch': 160,
            }),
            'augment':
            datasets['train'].augmenter,
            'dynamics': {
                # Controls how many batches to process before taking a step in the
                # gradient direction. Effectively simulates a batch_size that is
                # `bstep` times bigger.
                'batch_step': bstep,
            },
            'other': {
                # Other params are not used internally, so you are free to set any
                # extra params specific to your algorithm, and still have them
                # logged in the hyperparam structure. For YOLO this is `ovthresh`.
                'batch_size': batch_size,
                'nice': nice,
                'ovthresh': ovthresh,  # used in mAP computation
                'input_range': 'norm01',
            },
        })
    harn = YoloHarn(hyper=hyper)
    harn.config['use_tqdm'] = False
    harn.intervals['log_iter_train'] = 1
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None

    return harn
Пример #20
0
def setup_harn(cmdline=True, **kw):
    """
    Ignore:
        >>> from object_detection import *  # NOQA
        >>> cmdline = False
        >>> kw = {
        >>>     'train_dataset': '~/data/VOC/voc-trainval.mscoco.json',
        >>>     'vali_dataset': '~/data/VOC/voc-test-2007.mscoco.json',
        >>> }
        >>> harn = setup_harn(**kw)
    """
    import ndsampler
    from ndsampler import coerce_data
    # Seed other global rngs just in case something uses them under the hood
    kwarray.seed_global(1129989262, offset=1797315558)

    config = DetectFitConfig(default=kw, cmdline=cmdline)

    nh.configure_hacks(config)  # fix opencv bugs
    ub.ensuredir(config['workdir'])

    # Load ndsampler.CocoDataset objects from info in the config
    subsets = coerce_data.coerce_datasets(config)

    samplers = {}
    for tag, subset in subsets.items():
        print('subset = {!r}'.format(subset))
        sampler = ndsampler.CocoSampler(subset, workdir=config['workdir'])
        samplers[tag] = sampler

    torch_datasets = {
        tag: DetectDataset(
            sampler,
            input_dims=config['input_dims'],
            augment=config['augment'] if (tag == 'train') else False,
        )
        for tag, sampler in samplers.items()
    }

    print('make loaders')
    loaders_ = {
        tag:
        torch.utils.data.DataLoader(dset,
                                    batch_size=config['batch_size'],
                                    num_workers=config['workers'],
                                    shuffle=(tag == 'train'),
                                    collate_fn=nh.data.collate.padded_collate,
                                    pin_memory=True)
        for tag, dset in torch_datasets.items()
    }
    # for x in ub.ProgIter(loaders_['train']):
    #     pass

    if config['normalize_inputs']:
        # Get stats on the dataset (todo: turn off augmentation for this)
        _dset = torch_datasets['train']
        stats_idxs = kwarray.shuffle(np.arange(len(_dset)),
                                     rng=0)[0:min(1000, len(_dset))]
        stats_subset = torch.utils.data.Subset(_dset, stats_idxs)
        cacher = ub.Cacher('dset_mean', cfgstr=_dset.input_id + 'v2')
        input_stats = cacher.tryload()
        if input_stats is None:
            # Use parallel workers to load data faster
            loader = torch.utils.data.DataLoader(
                stats_subset,
                collate_fn=nh.data.collate.padded_collate,
                num_workers=config['workers'],
                shuffle=True,
                batch_size=config['batch_size'])
            # Track moving average
            running = nh.util.RunningStats()
            for batch in ub.ProgIter(loader, desc='estimate mean/std'):
                try:
                    running.update(batch['im'].numpy())
                except ValueError:  # final batch broadcast error
                    pass
            input_stats = {
                'std': running.simple(axis=None)['mean'].round(3),
                'mean': running.simple(axis=None)['std'].round(3),
            }
            cacher.save(input_stats)
    else:
        input_stats = None
    print('input_stats = {!r}'.format(input_stats))

    initializer_ = nh.Initializer.coerce(config, leftover='kaiming_normal')
    print('initializer_ = {!r}'.format(initializer_))

    arch = config['arch']
    if arch == 'yolo2':

        if False:
            dset = samplers['train'].dset
            print('dset = {!r}'.format(dset))
            # anchors = yolo2.find_anchors(dset)

        anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944),
                            (5.05587, 8.09892), (9.47112, 4.84053),
                            (11.2364, 10.0071)])

        classes = samplers['train'].classes
        model_ = (yolo2.Yolo2, {
            'classes': classes,
            'anchors': anchors,
            'conf_thresh': 0.001,
            'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4
        })
        model = model_[0](**model_[1])
        model._initkw = model_[1]

        criterion_ = (
            yolo2.YoloLoss,
            {
                'coder': model.coder,
                'seen': 0,
                'coord_scale': 1.0,
                'noobject_scale': 1.0,
                'object_scale': 5.0,
                'class_scale': 1.0,
                'thresh': 0.6,  # iou_thresh
                # 'seen_thresh': 12800,
            })
    else:
        raise KeyError(arch)

    scheduler_ = nh.Scheduler.coerce(config)
    print('scheduler_ = {!r}'.format(scheduler_))

    optimizer_ = nh.Optimizer.coerce(config)
    print('optimizer_ = {!r}'.format(optimizer_))

    dynamics_ = nh.Dynamics.coerce(config)
    print('dynamics_ = {!r}'.format(dynamics_))

    xpu = nh.XPU.coerce(config['xpu'])
    print('xpu = {!r}'.format(xpu))

    import sys

    hyper = nh.HyperParams(
        **{
            'nice':
            config['nice'],
            'workdir':
            config['workdir'],
            'datasets':
            torch_datasets,
            'loaders':
            loaders_,
            'xpu':
            xpu,
            'model':
            model,
            'criterion':
            criterion_,
            'initializer':
            initializer_,
            'optimizer':
            optimizer_,
            'dynamics':
            dynamics_,

            # 'optimizer': (torch.optim.SGD, {
            #     'lr': lr_step_points[0],
            #     'momentum': 0.9,
            #     'dampening': 0,
            #     # multiplying by batch size was one of those unpublished details
            #     'weight_decay': decay * simulated_bsize,
            # }),
            'scheduler':
            scheduler_,
            'monitor': (
                nh.Monitor,
                {
                    'minimize': ['loss'],
                    # 'maximize': ['mAP'],
                    'patience': config['patience'],
                    'max_epoch': config['max_epoch'],
                    'smoothing': .6,
                }),
            'other': {
                # Other params are not used internally, so you are free to set any
                # extra params specific to your algorithm, and still have them
                # logged in the hyperparam structure. For YOLO this is `ovthresh`.
                'batch_size': config['batch_size'],
                'nice': config['nice'],
                'ovthresh': config['ovthresh'],  # used in mAP computation
            },
            'extra': {
                'config': ub.repr2(config.asdict()),
                'argv': sys.argv,
            }
        })
    print('hyper = {!r}'.format(hyper))
    print('make harn')
    harn = DetectHarn(hyper=hyper)
    harn.preferences.update({
        'num_keep': 2,
        'keep_freq': 30,
        'export_modules': ['netharn'],  # TODO
        'prog_backend': 'progiter',  # alternative: 'tqdm'
        'keyboard_debug': True,
    })
    harn.intervals.update({
        'log_iter_train': 50,
    })
    harn.fit_config = config
    print('harn = {!r}'.format(harn))
    print('samplers = {!r}'.format(samplers))
    return harn
Пример #21
0
def setup_harn(**kwargs):
    """
    CommandLine:
        python ~/code/netharn/netharn/examples/ggr_matching.py setup_harn

    Args:
        dbname (str): Name of IBEIS database to use
        nice (str): Custom tag for this run
        workdir (PathLike): path to dump all the intermedate results
        dim (int): Width and height of the network input
        batch_size (int): Base batch size. Number of examples in GPU at any time.
        bstep (int): Multiply by batch_size to simulate a larger batches.
        lr (float): Base learning rate
        decay (float): Weight decay (L2 regularization)
        workers (int): Number of parallel data loader workers
        xpu (str): Device to train on. Can be either `'cpu'`, `'gpu'`, a number
            indicating a GPU (e.g. `0`), or a list of numbers (e.g. `[0,1,2]`)
            indicating multiple GPUs
        triple (bool): if True uses triplet loss, otherwise contrastive loss
        norm_desc (bool): if True normalizes the descriptors
        pretrained (PathLike): path to a compatible pretrained model
        margin (float): margin for loss criterion
        soft (bool): use soft margin

    Example:
        >>> harn = setup_harn(dbname='PZ_MTEST')
        >>> harn.initialize()
    """
    config = parse_config(**kwargs)

    nh.configure_hacks(config)
    datasets, workdir = setup_datasets(config)

    loaders = {
        tag: dset.make_loader(
            shuffle=(tag == 'train'),
            batch_size=config['batch_size'],
            num_batches=(config['num_batches']
                         if tag == 'train' else config['num_batches'] // 10),
            k=config['k'],
            p=config['p'],
            num_workers=config['workers'],
        )
        for tag, dset in datasets.items()
    }

    if config['scheduler'] == 'steplr':
        from torch.optim import lr_scheduler
        scheduler_ = (lr_scheduler.StepLR,
                      dict(step_size=8, gamma=0.1, last_epoch=-1))
    else:
        scheduler_ = nh.Scheduler.coerce(config, scheduler='onecycle70')

    hyper = nh.HyperParams(
        **{
            'nice':
            config['nice'],
            'workdir':
            config['workdir'],
            'datasets':
            datasets,
            'loaders':
            loaders,
            'xpu':
            nh.XPU.coerce(config['xpu']),
            'model': (
                nh.models.DescriptorNetwork,
                {
                    'input_shape': (1, 3, config['dim'], config['dim']),
                    'norm_desc': config['norm_desc'],
                    # 'hidden_channels': [512, 256]
                    'hidden_channels': [256],
                    'desc_size': 128,
                }),
            'initializer':
            nh.Initializer.coerce(config),
            'optimizer':
            nh.Optimizer.coerce(config),
            'scheduler':
            scheduler_,
            'criterion': (nh.criterions.TripletLoss, {
                'margin': config['margin'],
                'soft': config['soft'],
            }),
            'monitor':
            nh.Monitor.coerce(
                config,
                minimize=['loss', 'pos_dist', 'brier'],
                maximize=['accuracy', 'neg_dist', 'mcc'],
                patience=100,
                max_epoch=100,
            ),
            'dynamics':
            nh.Dynamics.coerce(config),
            'other': {
                'n_classes': 2,
            },
        })
    harn = MatchingHarness(hyper=hyper)
    harn.preferences['prog_backend'] = 'progiter'
    harn.intervals['log_iter_train'] = 1
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None

    return harn