Exemplo n.º 1
0
 def history(self):
     """
     if available return the history of the model as well
     """
     import netharn as nh
     if self.info is None:
         if False:
             info_dpath = dirname(dirname(ub.truepath(self.fpath)))
             info_fpath = join(info_dpath, 'train_info.json')
             if exists(info_fpath):
                 info = nh.util.read_json(info_fpath)
             else:
                 info = '__UNKNOWN__'
         else:
             # TODO: check for train_info.json in a few different places
             snap_fpath = ub.truepath(self.fpath)
             candidate_paths = [
                 join(dirname(snap_fpath), 'train_info.json'),
                 join(dirname(dirname(snap_fpath)), 'train_info.json'),
             ]
             info = None
             for info_fpath in candidate_paths:
                 info_fpath = normpath(info_fpath)
                 try:
                     # Info might be inside of a zipfile
                     info = nh.util.read_json(nh.util.zopen(info_fpath))
                     break
                 except Exception as ex:
                     pass
             if info is None:
                 info = '__UNKNOWN__'
     else:
         info = self.info
     return info
Exemplo n.º 2
0
def script_workdir():
    if DEBUG:
        workdir = ub.ensuredir(ub.truepath('~/data/work_phase2_debug'))
    else:
        workdir = ub.ensuredir(ub.truepath('~/data/work_phase2'))
    workdir = ub.argval('--workdir', default=workdir)
    return workdir
Exemplo n.º 3
0
def load_coco_datasets():
    import wrangle
    # annot_globstr = ub.truepath('~/data/viame-challenge-2018/phase0-annotations/*.json')
    # annot_globstr = ub.truepath('~/data/viame-challenge-2018/phase0-annotations/mbari_seq0.mscoco.json')
    # img_root = ub.truepath('~/data/viame-challenge-2018/phase0-imagery')

    # Contest training data on hermes
    annot_globstr = ub.truepath(
        '~/data/noaa/training_data/annotations/*/*-coarse-bbox-only*.json')
    img_root = ub.truepath('~/data/noaa/training_data/imagery/')

    fpaths = sorted(glob.glob(annot_globstr))
    # Remove keypoints annotation data (hack)
    fpaths = [p for p in fpaths if not ('nwfsc' in p or 'afsc' in p)]

    cacher = ub.Cacher('coco_dsets',
                       cfgstr=ub.hash_data(fpaths),
                       appname='viame')
    coco_dsets = cacher.tryload()
    if coco_dsets is None:
        print('Reading raw mscoco files')
        import os
        dsets = []
        for fpath in sorted(fpaths):
            print('reading fpath = {!r}'.format(fpath))
            dset = coco_api.CocoDataset(fpath, tag='', img_root=img_root)
            try:
                assert not dset.missing_images()
            except AssertionError:
                print('fixing image file names')
                hack = os.path.basename(fpath).split('-')[0].split('.')[0]
                dset = coco_api.CocoDataset(fpath,
                                            tag=hack,
                                            img_root=join(img_root, hack))
                assert not dset.missing_images(), ub.repr2(
                    dset.missing_images()) + 'MISSING'
            print(ub.repr2(dset.basic_stats()))
            dsets.append(dset)

        print('Merging')
        merged = coco_api.CocoDataset.union(*dsets)
        merged.img_root = img_root

        # HACK: wont need to do this for the released challenge data
        # probably wont hurt though
        # if not REAL_RUN:
        #     merged._remove_keypoint_annotations()
        #     merged._run_fixes()

        train_dset, vali_dset = wrangle.make_train_vali(merged)

        coco_dsets = {
            'train': train_dset,
            'vali': vali_dset,
        }

        cacher.save(coco_dsets)

    return coco_dsets
Exemplo n.º 4
0
def compare_loss():
    harn = setup_harness(bsize=2)
    harn.hyper.xpu = nh.XPU(0)
    harn.initialize()

    weights_fpath = ub.truepath(
        '~/code/lightnet/examples/yolo-voc/backup/weights_30000.pt')
    state_dict = harn.xpu.load(weights_fpath)['weights']
    harn.model.module.load_state_dict(state_dict)

    ln_test = ub.import_module_from_path(
        ub.truepath('~/code/lightnet/examples/yolo-voc/test.py'))
    TESTFILE = ub.truepath('~/code/lightnet/examples/yolo-voc/data/test.pkl')

    import lightnet as ln
    net = ln.models.Yolo(ln_test.CLASSES, weights_fpath, ln_test.CONF_THRESH,
                         ln_test.NMS_THRESH)
    net = harn.xpu.move(net)

    import os
    os.chdir(ub.truepath('~/code/lightnet/examples/yolo-voc/'))
    ln_dset = ln_test.CustomDataset(TESTFILE, net)

    ln_img, ln_label = ln_dset[0]
    my_img, my_label = harn.datasets['test'][0]
    my_targets = my_label[0][None, :]
    ln_targets = [ln_label]

    # Test model forward is the same for my image
    ln_outputs = net._forward(harn.xpu.move(my_img[None, :]))
    my_outputs = harn.model(harn.xpu.move(my_img[None, :]))

    seen = net.loss.seen = 99999999
    ln_loss = net.loss(ln_outputs, my_targets)
    my_loss = harn.criterion(ln_outputs, my_targets, seen=seen)
    print('my_loss = {!r}'.format(my_loss))
    print('ln_loss = {!r}'.format(ln_loss))

    ln_brambox_loss = net.loss(ln_outputs, ln_targets)
    print('ln_brambox_loss = {!r}'.format(ln_brambox_loss))

    inp_size = tuple(my_img.shape[-2:][::-1])

    ln_tf_target = []
    for anno in ln_targets[0]:
        anno.class_label = anno.class_id
        tf = ln.data.preprocess.BramboxToTensor._tf_anno(anno, inp_size, None)
        ln_tf_target.append(tf)

    ln_boxes = nh.util.Boxes(np.array(ln_tf_target)[:, 1:],
                             'cxywh').scale(inp_size)
    my_boxes = nh.util.Boxes(np.array(my_targets[0])[:, 1:],
                             'cxywh').scale(inp_size)

    nh.util.imshow(ln_img.numpy(), colorspace='rgb', fnum=1)
    nh.util.draw_boxes(ln_boxes, color='blue')
    nh.util.draw_boxes(my_boxes, color='red')
Exemplo n.º 5
0
def main():
    if ub.argflag(('--help', '-h')):
        print(
            ub.codeblock('''
            Usage:
                python -m clab.live.final train --train_data_path=<path/to/UrbanMapper3D/training>
                python -m clab.live.final test --train_data_path=<path/to/UrbanMapper3D/testing> --test_data_path=<path/to/UrbanMapper3D/testing> --output_file=<outfile>

            Optional Args / Flags:
                --debug
                --serial
                --nopin
                --noprog
                --workdir=<path>
                --num_workers=<int>
                --batch_size=<int>
            '''))
        sys.exit(1)

    train_data_path = ub.truepath(
        '~/remote/aretha/data/UrbanMapper3D/training')
    test_data_path = ub.truepath('~/remote/aretha/data/UrbanMapper3D/testing')
    output_file = 'prediction'

    # Conform to positional argument specs from challenge doc
    if sys.argv[1] in ['train', 'test']:
        if len(sys.argv) > 2 and exists(sys.argv[2]):
            train_data_path = sys.argv[2]
    if sys.argv[1] in ['test']:
        if len(sys.argv) > 4 and exists(sys.argv[3]):
            test_data_path = sys.argv[3]
            output_file = sys.argv[4]

    train_data_path = ub.argval('--train_data_path', default=train_data_path)
    test_data_path = ub.argval('--test_data_path', default=test_data_path)
    output_file = ub.argval('--output_file', default=output_file)

    workdir = script_workdir()

    if sys.argv[1] in ['train', 'test']:
        print('* train_data_path = {!r}'.format(train_data_path))
    if sys.argv[1] in ['test']:
        print('* test_data_path = {!r}'.format(test_data_path))
        print('* output_file = {!r}'.format(output_file))
    print(' * workdir = {!r}'.format(workdir))

    if sys.argv[1] == 'train':
        train(train_data_path)

    if sys.argv[1] == 'test':
        test(train_data_path, test_data_path, output_file)
Exemplo n.º 6
0
def test_rel_file_link():
    dpath = ub.ensure_app_cache_dir('ubelt', 'test_rel_file_link')
    ub.delete(dpath, verbose=2)
    ub.ensuredir(dpath, verbose=2)

    real_fpath = join(ub.ensuredir((dpath, 'dir1')), 'real')
    link_fpath = join(ub.ensuredir((dpath, 'dir2')), 'link')
    ub.touch(real_fpath)

    orig = os.getcwd()
    try:
        os.chdir(dpath)
        real_path = relpath(real_fpath, dpath)
        link_path = relpath(link_fpath, dpath)
        link = ub.symlink(real_path, link_path)
        import sys
        if sys.platform.startswith('win32') and isfile(link):
            # Note: if windows hard links the file there is no way we can
            # tell that it was a symlink. Just verify it exists.
            from ubelt import _win32_links
            assert _win32_links._win32_is_hardlinked(real_fpath, link_fpath)
        else:
            pointed = ub.util_links._readlink(link)
            resolved = ub.truepath(join(dirname(link), pointed), real=True)
            assert ub.truepath(real_fpath, real=True) == resolved
    except Exception:
        util_links._dirstats(dpath)
        util_links._dirstats(join(dpath, 'dir1'))
        util_links._dirstats(join(dpath, 'dir2'))
        print('TEST FAILED: test_rel_link')
        print('real_fpath = {!r}'.format(real_fpath))
        print('link_fpath = {!r}'.format(link_fpath))
        print('real_path = {!r}'.format(real_path))
        print('link_path = {!r}'.format(link_path))
        try:
            if 'link' in vars():
                print('link = {!r}'.format(link))
            if 'pointed' in vars():
                print('pointed = {!r}'.format(pointed))
            if 'resolved' in vars():
                print('resolved = {!r}'.format(resolved))
        except Exception:
            print('...rest of the names are not available')
        raise
    finally:
        util_links._dirstats(dpath)
        util_links._dirstats(join(dpath, 'dir1'))
        util_links._dirstats(join(dpath, 'dir2'))
        os.chdir(orig)
Exemplo n.º 7
0
def parse_fish_data():
    annot_dir = ub.truepath('~/data/viame-challenge-2018/phase0-annotations')
    assert exists(annot_dir)
    for fpath in glob.glob(join(annot_dir, '*.json')):
        # ub.cmd('sed -i "s/roi_category/category_id/g" {}'.format(fpath))
        # self = coco.COCO(fpath)
        break
Exemplo n.º 8
0
def regenerate_phase1_flavors():
    """
    Assumes original data is in a good format
    """
    cfg = viame_wrangler.config.WrangleConfig({
        'annots':
        ub.truepath(
            '~/data/viame-challenge-2018/phase1-annotations/*/original_*.json')
    })

    annots = cfg.annots
    fpaths = list(glob.glob(annots))
    print('Reading raw mscoco files')
    for fpath in fpaths:
        print('reading fpath = {!r}'.format(fpath))
        dset_name = os.path.basename(fpath).replace('original_',
                                                    '').split('.')[0]
        orig_dset = CocoDataset(fpath, img_root=cfg.img_root, tag=dset_name)
        dpath = os.path.dirname(fpath)

        assert not orig_dset.missing_images()
        assert not orig_dset._find_bad_annotations()
        assert all([
            img['has_annots'] in [True, False, None]
            for img in orig_dset.imgs.values()
        ])
        print(ub.dict_hist([g['has_annots'] for g in orig_dset.imgs.values()]))

        make_dataset_flavors(orig_dset, dpath, dset_name)
Exemplo n.º 9
0
def download_phase1_annots():
    """

    References:
        http://www.viametoolkit.org/cvpr-2018-workshop-data-challenge/challenge-data-description/
        https://challenge.kitware.com/api/v1/item/5ac385f056357d4ff856e183/download
        https://challenge.kitware.com/girder#item/5ac385f056357d4ff856e183

    CommandLine:
        python ~/code/baseline-viame-2018/viame_wrangler/config.py download_phase0_annots --datadir=~/data
    """
    cfg = Config({'datadir': '~/data/viame-challenge-2018'})
    dpath = ub.truepath(cfg.datadir)
    fname = 'phase1-annotations.tar.gz'
    hash = '5ac385f056357d4ff856e183'
    url = 'https://challenge.kitware.com/api/v1'

    # FIXME: broken

    dest = _grabdata_girder(dpath, fname, hash, url, force=False)

    unpacked = join(dpath, fname.split('.')[0])
    if not os.path.exists(unpacked):
        info = ub.cmd('tar -xvzf "{}" -C "{}"'.format(dest, dpath),
                      verbose=2,
                      verbout=1)
        assert info['ret'] == 0
Exemplo n.º 10
0
def get_bibtex_dict():
    import ubelt as ub
    # HACK: custom current bibtex file
    possible_bib_fpaths = [
        ub.truepath('./My_Library_clean.bib'),
        #ub.truepath('~/latex/crall-thesis-2017/My_Library_clean.bib'),
    ]

    bib_fpath = None
    for bib_fpath_ in possible_bib_fpaths:
        if exists(bib_fpath_):
            bib_fpath = bib_fpath_
            break

    if bib_fpath is None:
        raise Exception('cant find bibtex file')

    # import bibtexparser
    from bibtexparser import bparser
    parser = bparser.BibTexParser()
    parser.ignore_nonstandard_types = True
    bib_text = ub.readfrom(bib_fpath)
    bibtex_db = parser.parse(bib_text)
    bibtex_dict = bibtex_db.get_entry_dict()

    return bibtex_dict
Exemplo n.º 11
0
    def ensure_voc_data(VOCDataset,
                        dpath=None,
                        force=False,
                        years=[2007, 2012]):
        """
        Download the Pascal VOC 2007 data if it does not already exist.

        CommandLine:
            python -m netharn.data.voc VOCDataset.ensure_voc_data

        Example:
            >>> # SCRIPT
            >>> # xdoc: +REQUIRES(--voc)
            >>> from netharn.data.voc import *  # NOQA
            >>> VOCDataset.ensure_voc_data()
        """
        if dpath is None:
            dpath = ub.truepath('~/data/VOC')
        devkit_dpath = join(dpath, 'VOCdevkit')
        # if force or not exists(devkit_dpath):
        ub.ensuredir(dpath)

        fpath1 = ub.grabdata(
            'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar',
            dpath=dpath)
        if force or not exists(join(dpath, 'VOCdevkit', 'VOCcode')):
            ub.cmd('tar xvf "{}" -C "{}"'.format(fpath1, dpath), verbout=1)

        if 2007 in years:
            # VOC 2007 train+validation data
            fpath2 = ub.grabdata(
                'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
                dpath=dpath)
            if force or not exists(
                    join(dpath, 'VOCdevkit', 'VOC2007', 'ImageSets', 'Main',
                         'bird_trainval.txt')):
                ub.cmd('tar xvf "{}" -C "{}"'.format(fpath2, dpath), verbout=1)

            # VOC 2007 test data
            fpath3 = ub.grabdata(
                'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
                dpath=dpath)
            if force or not exists(
                    join(dpath, 'VOCdevkit', 'VOC2007', 'ImageSets', 'Main',
                         'bird_test.txt')):
                ub.cmd('tar xvf "{}" -C "{}"'.format(fpath3, dpath), verbout=1)

        if 2012 in years:
            # VOC 2012 train+validation data
            fpath4 = ub.grabdata(
                'https://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar',
                dpath=dpath)
            if force or not exists(
                    join(dpath, 'VOCdevkit', 'VOC2012', 'ImageSets', 'Main',
                         'bird_trainval.txt')):
                ub.cmd('tar xvf "{}" -C "{}"'.format(fpath4, dpath), verbout=1)
        return devkit_dpath
Exemplo n.º 12
0
def _nh_data_nh_map(harn, num=10):
    with torch.no_grad():
        postprocess = harn.model.module.postprocess
        # postprocess.conf_thresh = 0.001
        # postprocess.nms_thresh = 0.5
        batch_confusions = []
        moving_ave = nh.util.util_averages.CumMovingAve()
        loader = harn.loaders['test']
        prog = ub.ProgIter(iter(loader), desc='')
        for bx, batch in enumerate(prog):
            inputs, labels = harn.prepare_batch(batch)
            inp_size = np.array(inputs.shape[-2:][::-1])
            outputs = harn.model(inputs)

            loss = harn.criterion(outputs, labels['targets'],
                                  gt_weights=labels['gt_weights'],
                                  seen=1000000000)
            moving_ave.update(ub.odict([
                ('loss', float(loss.sum())),
                ('coord', harn.criterion.loss_coord),
                ('conf', harn.criterion.loss_conf),
                ('cls', harn.criterion.loss_cls),
            ]))

            average_losses = moving_ave.average()
            desc = ub.repr2(average_losses, nl=0, precision=2, si=True)
            prog.set_description(desc, refresh=False)

            postout = postprocess(outputs)
            for y in harn._measure_confusion(postout, labels, inp_size):
                batch_confusions.append(y)

            # batch_output.append((outputs.cpu().data.numpy().copy(), inp_size))
            # batch_labels.append([x.cpu().data.numpy().copy() for x in labels])
            if num is not None and bx >= num:
                break

        average_losses = moving_ave.average()
        print('average_losses {}'.format(ub.repr2(average_losses)))

    if False:
        from netharn.util import mplutil
        mplutil.qtensure()  # xdoc: +SKIP
        harn.visualize_prediction(batch, outputs, postout, thresh=.1)

    y = pd.concat([pd.DataFrame(c) for c in batch_confusions])
    precision, recall, ap = nh.metrics.detections._multiclass_ap(y)

    ln_test = ub.import_module_from_path(ub.truepath('~/code/lightnet/examples/yolo-voc/test.py'))
    num_classes = len(ln_test.LABELS)
    cls_labels = list(range(num_classes))

    aps = nh.metrics.ave_precisions(y, cls_labels, use_07_metric=True)
    aps = aps.rename(dict(zip(cls_labels, ln_test.LABELS)), axis=0)
    # return ap
    return ap, aps
Exemplo n.º 13
0
def test_rel_dir_link():
    dpath = ub.ensure_app_cache_dir('ubelt', 'test_rel_dir_link')
    ub.delete(dpath, verbose=2)
    ub.ensuredir(dpath, verbose=2)

    real_dpath = join(ub.ensuredir((dpath, 'dir1')), 'real')
    link_dpath = join(ub.ensuredir((dpath, 'dir2')), 'link')
    ub.ensuredir(real_dpath)

    orig = os.getcwd()
    try:
        os.chdir(dpath)
        real_path = relpath(real_dpath, dpath)
        link_path = relpath(link_dpath, dpath)
        link = ub.symlink(real_path, link_path)
        # Note: on windows this is hacked.
        pointed = ub.util_links._readlink(link)
        resolved = ub.truepath(join(dirname(link), pointed), real=True)
        assert ub.truepath(real_dpath, real=True) == resolved
    except Exception:
        util_links._dirstats(dpath)
        util_links._dirstats(join(dpath, 'dir1'))
        util_links._dirstats(join(dpath, 'dir2'))
        print('TEST FAILED: test_rel_link')
        print('real_dpath = {!r}'.format(real_dpath))
        print('link_dpath = {!r}'.format(link_dpath))
        print('real_path = {!r}'.format(real_path))
        print('link_path = {!r}'.format(link_path))
        try:
            if 'link' in vars():
                print('link = {!r}'.format(link))
            if 'pointed' in vars():
                print('pointed = {!r}'.format(pointed))
            if 'resolved' in vars():
                print('resolved = {!r}'.format(resolved))
        except Exception:
            print('...rest of the names are not available')
        raise
    finally:
        util_links._dirstats(dpath)
        util_links._dirstats(join(dpath, 'dir1'))
        util_links._dirstats(join(dpath, 'dir2'))
        os.chdir(orig)
Exemplo n.º 14
0
 def history(self):
     """
     if available return the history of the model as well
     """
     # TODO: check for train_info.json in a few different places
     info_dpath = dirname(dirname(ub.truepath(self.fpath)))
     info_fpath = join(info_dpath, 'train_info.json')
     if exists(info_fpath):
         return util.read_json(info_fpath)
     else:
         return '__UNKNOWN__'
Exemplo n.º 15
0
 def __init__(cfg, kw=None, argv=None):
     # cfg.phase = ub.argval('--phase', default='1', argv=argv)
     # cfg.data_dir = ub.truepath(ub.argval('--data', default='~/data', argv=argv))
     super().__init__(
         {
             'workdir': '~/work/viame-challenge-2018',
             'img_root': '~/data/viame-challenge-2018/phase1-imagery',
             'annots':
             '~/data/viame-challenge-2018/phase1-annotations/*.json',
         }, kw, argv)
     for key in cfg._keys:
         cfg[key] = ub.truepath(cfg[key])
Exemplo n.º 16
0
def get_task(taskname, boundary=True, arch=None):
    # the arch param is a hack
    if taskname == 'urban_mapper_3d':
        from clab.tasks.urban_mapper_3d import UrbanMapper3D
        if boundary:
            workdir = '~/data/work/urban_mapper2'
            if arch.startswith('dense_unet'):
                workdir = '~/data/work/urban_mapper4'
        else:
            workdir = '~/data/work/urban_mapper'

        root = ub.truepath('~/data/UrbanMapper3D')
        if not exists(root):
            root = ub.truepath('~/remote/aretha/data/UrbanMapper3D')
        if not exists(root):
            raise Exception('root {} does not exist'.format(root))

        task = UrbanMapper3D(root=root, workdir=workdir, boundary=boundary)
        print(task.classnames)
        task.prepare_fullres_inputs()
        task.preprocess()
    else:
        assert False
    return task
Exemplo n.º 17
0
def generate_phase1_data_tables():
    cfg = viame_wrangler.config.WrangleConfig({
        'annots':
        ub.truepath(
            '~/data/viame-challenge-2018/phase1-annotations/*/*coarse*bbox-keypoint*.json'
        )
    })

    all_stats = {}

    annots = cfg.annots
    fpaths = list(glob.glob(annots))
    print('fpaths = {}'.format(ub.repr2(fpaths)))
    for fpath in fpaths:
        dset_name = os.path.basename(fpath).split('-')[0]
        dset = CocoDataset(fpath, img_root=cfg.img_root, tag=dset_name)

        assert not dset.missing_images()
        assert not dset._find_bad_annotations()
        assert all([
            img['has_annots'] in [True, False, None]
            for img in dset.imgs.values()
        ])

        print(ub.dict_hist([g['has_annots'] for g in dset.imgs.values()]))

        stats = {}
        stats.update(ub.dict_subset(dset.basic_stats(), ['n_anns', 'n_imgs']))

        roi_shapes_hist = dict()
        populated_cats = dict()
        for name, item in dset.category_annotation_type_frequency().items():
            if item:
                populated_cats[name] = sum(item.values())
                for k, v in item.items():
                    roi_shapes_hist[k] = roi_shapes_hist.get(k, 0) + v

        stats['n_cats'] = populated_cats
        stats['n_roi_shapes'] = roi_shapes_hist
        stats['n_imgs_with_annots'] = ub.map_keys(
            {
                None: 'unsure',
                True: 'has_objects',
                False: 'no_objects'
            }, ub.dict_hist([g['has_annots'] for g in dset.imgs.values()]))
        all_stats[dset_name] = stats

    print(ub.repr2(all_stats, nl=3, sk=1))
Exemplo n.º 18
0
def copy_latest_snapshots():
    train_base = ub.truepath('~/remote/aretha/data/work/urban_mapper/arch/unet/train')

    import glob
    import shutil
    def update_latest(train_dpath):
        load_path = most_recent_snapshot(train_dpath)
        suffix = load_path.split('/')[-1]
        new_path = join(train_dpath, basename(train_dpath) + suffix)
        print('new_path = {!r}'.format(new_path))
        shutil.copy2(load_path, new_path)

    for train_dpath in glob.glob(train_base + '/input_*/solver_*'):
        if os.path.isdir(train_dpath):
            print('train_dpath = {!r}'.format(train_dpath))
            update_latest(train_dpath)
Exemplo n.º 19
0
def run_checks():
    cfg = viame_wrangler.config.WrangleConfig({
        'annots': ub.truepath('~/data/viame-challenge-2018/phase1-annotations/*/*.json')
    })
    fpaths = list(glob.glob(cfg.annots))
    print('fpaths = {}'.format(ub.repr2(fpaths)))

    for fpath in fpaths:
        dset_name = os.path.basename(fpath).split('-')[0].split('.')[0]
        dset = CocoDataset(fpath, img_root=cfg.img_root, tag=dset_name)

        assert not dset.missing_images()
        assert not dset._find_bad_annotations()
        assert all([img['has_annots'] in [True, False, None] for img in dset.imgs.values()])
        if 'original' not in dset_name:
            assert len(dset.cats) in [106, 21]
Exemplo n.º 20
0
def cmake_clean(dpath='.'):
    """
    """
    dpath = ub.truepath(dpath)
    cmake_cache_fpath = join(dpath, 'CMakeCache.txt')
    assert exists(cmake_cache_fpath)
    fpath_set = set(glob.glob(join(dpath, '*'))) - {cmake_cache_fpath}

    for fpath in list(fpath_set):
        if basename(fpath).startswith('_cmake_build_backup_'):
            fpath_set.remove(fpath)

    backup_dpath = ub.ensuredir(
        join(dpath, '_cmake_build_backup_' + ub.timestamp()))
    for fpath in ub.ProgIter(fpath_set, 'moving files'):
        shutil.move(fpath, backup_dpath)
Exemplo n.º 21
0
 def current_gvim_edit(op='e', fpath=''):
     r"""
     CommandLine:
         python -m vimtk.xctrl XCtrl.current_gvim_edit sp ~/.bashrc
     """
     fpath = ub.compressuser(ub.truepath(fpath))
     # print('fpath = %r' % (fpath,))
     cplat.copy_text_to_clipboard(fpath)
     doscript = [
         ('focus', 'gvim'),
         ('key', 'Escape'),
         ('type2', ';' + op + ' ' + fpath),
         # ('type2', ';' + op + ' '),
         # ('key', 'ctrl+v'),
         ('key', 'KP_Enter'),
     ]
     XCtrl.do(*doscript, verbose=0, sleeptime=.001)
Exemplo n.º 22
0
def download_phase0_annots():
    """
    CommandLine:
        python ~/code/baseline-viame-2018/viame_wrangler/config.py download_phase0_annots
    """
    cfg = Config({'datadir': ub.truepath('~/data/viame-challenge-2018')})
    dpath = cfg.datadir

    fname = 'phase0-annotations.tar.gz'
    hash = '5a9d839456357d0cb633d0e3'
    url = 'https://challenge.kitware.com/api/v1'

    dest = _grabdata_girder(dpath, fname, hash, url)

    unpacked = join(dpath, fname.split('.')[0])
    if not os.path.exists(unpacked):
        info = ub.cmd('tar -xvzf "{}" -C "{}"'.format(dest, dpath),
                      verbose=2,
                      verbout=1)
        assert info['ret'] == 0
Exemplo n.º 23
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        prog='manage_snapshots',
        description=ub.codeblock(
            '''
            Cleanup snapshots produced by netharn
            ''')
    )
    parser.add_argument(*('-w', '--workdir'), type=str,
                        help='specify the workdir for your project', default=None)
    parser.add_argument(*('-f', '--force'), help='dry run',
                        action='store_false', dest='dry')
    # parser.add_argument(*('-n', '--dry'), help='dry run', action='store_true')
    parser.add_argument(*('--recent',), help='num recent to keep', type=int, default=100)
    parser.add_argument(*('--factor',), help='keep one every <factor> epochs', type=int, default=1)

    args, unknown = parser.parse_known_args()
    ns = args.__dict__.copy()
    print('ns = {!r}'.format(ns))

    ns['workdir'] = ub.truepath(ns['workdir'])

    _devcheck_manage_snapshots(**ns)
Exemplo n.º 24
0
def ignore():
    # inp_size = (96, 96)
    inp_size = (416, 416)
    W = H = inp_size[0] // 32
    n_classes = 3
    import ubelt as ub

    for i in ub.ProgIter(range(1000)):
        data1, anchors = demo_npdata(5,
                                     W,
                                     H,
                                     inp_size=inp_size,
                                     C=n_classes,
                                     n=1000)
        _tup1 = build_target_item(data1, inp_size, n_classes, anchors, epoch=1)
        (_boxes1, _ious1, _classes1, _box_mask1, _iou_mask1,
         _class_mask1) = _tup1

        orig_darknet = ub.import_module_from_path(
            ub.truepath('~/code/yolo2-pytorch/darknet.py'))
        orig_darknet.cfg.anchors = anchors
        orig_darknet.cfg.multi_scale_inp_size[0][:] = inp_size
        orig_darknet.cfg.multi_scale_out_size[0][:] = [W, H]
        orig_darknet.cfg.num_classes = n_classes

        pbox, piou, gbox, gcls, gw = data1
        data2 = (pbox, gbox, gcls, gw, piou)
        _tup2 = orig_darknet._process_batch(data2, size_index=0)
        (_boxes2, _ious2, _classes2, _box_mask2, _iou_mask2,
         _class_mask2) = _tup2

        if np.any(_box_mask1 != _box_mask2):
            raise Exception

        flags = ~np.isclose(_iou_mask1, _iou_mask2)
        if np.any(flags):
            print(np.where(flags))
            print(_iou_mask1[flags])
            print(_iou_mask2[flags])
            raise Exception

        flags = ~np.isclose(_class_mask1, _class_mask2)
        if np.any(flags):
            raise Exception

        flags = ~np.isclose(_classes2, _classes1)
        if np.any(flags):
            raise Exception

        flags = ~np.isclose(_ious1, _ious2)
        if np.any(flags):
            raise Exception
            _ious1[flags]
            _ious2[flags]

        ba = _boxes1.reshape(-1, 4)
        bb = _boxes2.reshape(-1, 4)
        flags = ~np.isclose(ba, bb)
        if np.any(flags):
            print(ba[(~np.isclose(ba, bb)).sum(axis=-1) > 0])
            print(bb[(~np.isclose(ba, bb)).sum(axis=-1) > 0])
            raise Exception
Exemplo n.º 25
0
def cifar_training_datasets(output_colorspace='RGB',
                            norm_mode='independent',
                            cifar_num=10):
    """
    Example:
        >>> datasets = cifar_training_datasets()
    """
    inputs, task = cifar_inputs(train=True, cifar_num=cifar_num)

    # split training into train / validation
    # 45K / 5K validation split was used in densenet and resnet papers.
    # https://arxiv.org/pdf/1512.03385.pdf page 7
    # https://arxiv.org/pdf/1608.06993.pdf page 5

    vali_frac = .1  # 10%  is 5K images
    n_vali = int(len(inputs) * vali_frac)
    # n_vali = 10000  # 10K validation as in http://torch.ch/blog/2015/07/30/cifar.html

    # the gt indexes seem to already be scrambled, I think other papers sample
    # validation from the end, so lets do that
    # The NIN paper https://arxiv.org/pdf/1312.4400.pdf in section 4 mentions
    # that it uses the last 10K images for validation
    input_idxs = np.arange(len(inputs))
    # or just uncomment this line for reproducable random sampling
    # input_idxs = util.random_indices(len(inputs), seed=1184576173)

    train_idxs = sorted(input_idxs[:-n_vali])
    vali_idxs = sorted(input_idxs[-n_vali:])

    train_inputs = inputs.take(train_idxs, tag='train')
    vali_inputs = inputs.take(vali_idxs, tag='vali')
    test_inputs, _ = cifar_inputs(train=False, cifar_num=cifar_num)
    # The dataset name and indices should fully specifiy dependencies
    train_inputs._set_id_from_dependency(
        ['cifar{}-train'.format(cifar_num), train_idxs])
    vali_inputs._set_id_from_dependency(
        ['cifar{}-train'.format(cifar_num), vali_idxs])
    test_inputs._set_id_from_dependency(['cifar{}-test'.format(cifar_num)])

    workdir = ub.ensuredir(ub.truepath('~/data/work/cifar'))

    train_dset = CIFAR_Wrapper(train_inputs,
                               task,
                               workdir,
                               output_colorspace=output_colorspace)
    vali_dset = CIFAR_Wrapper(vali_inputs,
                              task,
                              workdir,
                              output_colorspace=output_colorspace)
    test_dset = CIFAR_Wrapper(test_inputs,
                              task,
                              workdir,
                              output_colorspace=output_colorspace)
    print('built datasets')

    datasets = {
        'train': train_dset,
        'vali': vali_dset,
        'test': test_dset,
    }

    print('computing normalizers')
    datasets['train'].center_inputs = datasets['train']._make_normalizer(
        norm_mode)
    for key in datasets.keys():
        datasets[key].center_inputs = datasets['train'].center_inputs
    print('computed normalizers')

    datasets['train'].augment = True
    return datasets
Exemplo n.º 26
0
def setup_harness(**kwargs):
    """
    CommandLine:
        python ~/code/netharn/netharn/examples/ggr_matching.py setup_harness

    Example:
        >>> harn = setup_harness(dbname='PZ_MTEST')
        >>> harn.initialize()
    """
    nice = kwargs.get('nice', 'untitled')
    batch_size = int(kwargs.get('batch_size', 6))
    bstep = int(kwargs.get('bstep', 1))
    workers = int(kwargs.get('workers', 0))
    decay = float(kwargs.get('decay', 0.0005))
    lr = float(kwargs.get('lr', 0.001))
    dim = int(kwargs.get('dim', 416))
    xpu = kwargs.get('xpu', 'argv')
    workdir = kwargs.get('workdir', None)
    dbname = kwargs.get('dbname', 'ggr2')

    if workdir is None:
        workdir = ub.truepath(os.path.join('~/work/siam-ibeis2', dbname))
    ub.ensuredir(workdir)

    if dbname == 'ggr2':
        print('Creating torch CocoDataset')
        train_dset = ndsampler.CocoDataset(
            data=
            '/media/joncrall/raid/data/ggr2-coco/annotations/instances_train2018.json',
            img_root='/media/joncrall/raid/data/ggr2-coco/images/train2018',
        )
        train_dset.hashid = 'ggr2-coco-train2018'
        vali_dset = ndsampler.CocoDataset(
            data=
            '/media/joncrall/raid/data/ggr2-coco/annotations/instances_val2018.json',
            img_root='/media/joncrall/raid/data/ggr2-coco/images/val2018',
        )
        vali_dset.hashid = 'ggr2-coco-val2018'

        print('Creating samplers')
        train_sampler = ndsampler.CocoSampler(train_dset, workdir=workdir)
        vali_sampler = ndsampler.CocoSampler(vali_dset, workdir=workdir)

        print('Creating torch Datasets')
        datasets = {
            'train':
            MatchingCocoDataset(train_sampler,
                                train_dset,
                                workdir,
                                dim=dim,
                                augment=True),
            'vali':
            MatchingCocoDataset(vali_sampler, vali_dset, workdir, dim=dim),
        }
    else:
        from ibeis_utils import randomized_ibeis_dset
        datasets = randomized_ibeis_dset(dbname, dim=dim)

    for k, v in datasets.items():
        print('* len({}) = {}'.format(k, len(v)))

    if workers > 0:
        import cv2
        cv2.setNumThreads(0)

    loaders = {
        key: torch.utils.data.DataLoader(dset,
                                         batch_size=batch_size,
                                         num_workers=workers,
                                         shuffle=(key == 'train'),
                                         pin_memory=True)
        for key, dset in datasets.items()
    }

    xpu = nh.XPU.cast(xpu)

    hyper = nh.HyperParams(
        **{
            'nice':
            nice,
            'workdir':
            workdir,
            'datasets':
            datasets,
            'loaders':
            loaders,
            'xpu':
            xpu,
            'model': (MatchingNetworkLP, {
                'p': 2,
                'input_shape': (1, 3, dim, dim),
            }),
            'criterion': (nh.criterions.ContrastiveLoss, {
                'margin': 4,
                'weight': None,
            }),
            'optimizer': (torch.optim.SGD, {
                'lr': lr,
                'weight_decay': decay,
                'momentum': 0.9,
                'nesterov': True,
            }),
            'initializer': (nh.initializers.NoOp, {}),
            'scheduler': (nh.schedulers.Exponential, {
                'gamma': 0.99,
                'stepsize': 2,
            }),
            # 'scheduler': (nh.schedulers.ListedLR, {
            #     'points': {
            #         1:   lr * 1.0,
            #         19:  lr * 1.1,
            #         20:  lr * 0.1,
            #     },
            #     'interpolate': True
            # }),
            'monitor': (nh.Monitor, {
                'minimize': ['loss', 'pos_dist', 'brier'],
                'maximize': ['accuracy', 'neg_dist', 'mcc'],
                'patience': 40,
                'max_epoch': 40,
            }),

            # 'augment': datasets['train'].augmenter,
            'dynamics': {
                # Controls how many batches to process before taking a step in the
                # gradient direction. Effectively simulates a batch_size that is
                # `bstep` times bigger.
                'batch_step': bstep,
            },
            'other': {
                'n_classes': 2,
            },
        })
    harn = MatchingHarness(hyper=hyper)
    harn.config['prog_backend'] = 'progiter'
    harn.intervals['log_iter_train'] = 1
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None

    return harn
Exemplo n.º 27
0
def setup_data():
    """
    Create final MSCOCO training files for the 4 challenge types:
        * fine-grained + bbox-only
        * fine-grained + bbox-keypoints
        * coarse-grained + bbox-only
        * coarse-grained + bbox-keypoints

    CommandLine:
        python ~/code/baseline-viame-2018/wrangle.py setup_data --data=$HOME/data --work=$HOME/work --phase=0
    """
    # cfg = viame_wrangler.config.WrangleConfig()
    cfg = viame_wrangler.config.WrangleConfig({
        'annots': ub.truepath('~/data/viame-challenge-2018/phase1-annotations/*/original_*.json')
    })

    img_root = cfg.img_root
    annots = cfg.annots
    fpaths = list(glob.glob(annots))

    print('Reading raw mscoco files')
    dsets = []
    for fpath in fpaths:
        print('reading fpath = {!r}'.format(fpath))
        dset = CocoDataset(fpath)
        dsets.append(dset)

    print('Merging')
    merged = CocoDataset.union(*dsets)
    merged.img_root = img_root
    # Set has_annots=True on all images with at least one annotation
    merged._mark_annotated_images()

    def ensure_heirarchy(dset, heirarchy):
        for cat in heirarchy:
            try:
                dset.add_category(**cat)
            except ValueError:
                realcat = dset.name_to_cat[cat['name']]
                realcat['supercategory'] = cat['supercategory']

    prefix = 'phase{}'.format(cfg.phase)

    def verbose_dump(dset, fpath):
        print('Dumping {}'.format(fpath))
        if False:
            print(ub.repr2(dset.category_annotation_type_frequency(), nl=1, sk=1))
        print(ub.dict_hist([img['has_annots'] for img in dset.imgs.values()]))
        print(ub.repr2(dset.basic_stats()))
        dset.dump(fpath)

    ### FINE-GRAIND DSET  ###
    fine = merged.copy()
    FineGrainedChallenge = viame_wrangler.mappings.FineGrainedChallenge
    fine.rename_categories(FineGrainedChallenge.raw_to_cat)
    ensure_heirarchy(fine, FineGrainedChallenge.heirarchy)
    verbose_dump(fine, join(cfg.challenge_work_dir, prefix + '-fine-bbox-keypoint.mscoco.json'))

    # remove keypoint annotations
    # Should we remove the images that have keypoints in them?
    fine_bbox = fine.copy()
    fine_bbox._remove_keypoint_annotations()
    verbose_dump(fine_bbox, join(cfg.challenge_work_dir, prefix + '-fine-bbox-only.mscoco.json'))

    ### COARSE DSET  ###
    coarse = merged.copy()
    CoarseChallenge = viame_wrangler.mappings.CoarseChallenge
    coarse.rename_categories(CoarseChallenge.raw_to_cat)
    ensure_heirarchy(coarse, CoarseChallenge.heirarchy)
    verbose_dump(coarse, join(cfg.challenge_work_dir, prefix + '-coarse-bbox-keypoint.mscoco.json'))

    # remove keypoint annotations
    coarse_bbox = coarse.copy()
    coarse_bbox._remove_keypoint_annotations()
    verbose_dump(coarse_bbox, join(cfg.challenge_work_dir, prefix + '-coarse-bbox-only.mscoco.json'))
    return fine, coarse, fine_bbox, coarse_bbox
Exemplo n.º 28
0
def run_demo():
    """
    CommandLine:
        python -m graphid.demo.demo_script run_demo --viz
        python -m graphid.demo.demo_script run_demo

    Example:
        >>> run_demo()
    """
    from graphid import demo
    import matplotlib as mpl
    TMP_RC = {
        'axes.titlesize': 12,
        'axes.labelsize': int(ub.argval('--labelsize', default=8)),
        'font.family': 'sans-serif',
        'font.serif': 'CMU Serif',
        'font.sans-serif': 'CMU Sans Serif',
        'font.monospace': 'CMU Typewriter Text',
        'xtick.labelsize': 12,
        'ytick.labelsize': 12,
        # 'legend.alpha': .8,
        'legend.fontsize': 12,
        'legend.facecolor': 'w',
    }
    mpl.rcParams.update(TMP_RC)
    # ---- Synthetic data params
    params = {
        'redun.pos': 2,
        'redun.neg': 2,
    }
    # oracle_accuracy = .98
    # oracle_accuracy = .90
    # oracle_accuracy = (.8, 1.0)
    oracle_accuracy = (.85, 1.0)
    # oracle_accuracy = 1.0

    # --- draw params

    VISUALIZE = ub.argflag('--viz')
    # QUIT_OR_EMEBED = 'embed'
    QUIT_OR_EMEBED = 'quit'

    def asint(p):
        return p if p is None else int(p)

    TARGET_REVIEW = asint(ub.argval('--target', default=None))
    START = asint(ub.argval('--start', default=None))
    END = asint(ub.argval('--end', default=None))

    # ------------------

    # rng = np.random.RandomState(42)
    # infr = demo.demodata_infr(num_pccs=4, size=3, size_std=1, p_incon=0)
    # infr = demo.demodata_infr(num_pccs=6, size=7, size_std=1, p_incon=0)
    # infr = demo.demodata_infr(num_pccs=3, size=5, size_std=.2, p_incon=0)
    infr = demo.demodata_infr(pcc_sizes=[5, 2, 4])
    infr.verbose = 100
    infr.ensure_cliques()
    infr.ensure_full()
    # Dummy scoring

    infr.init_simulation(oracle_accuracy=oracle_accuracy, name='run_demo')
    # infr_gt = infr.copy()
    dpath = ub.ensuredir(ub.truepath('~/Desktop/demo'))
    if 0:
        ub.delete(dpath)
    ub.ensuredir(dpath)

    fig_counter = it.count(0)

    def show_graph(infr, title, final=False, selected_edges=None):
        from matplotlib import pyplot as plt
        if not VISUALIZE:
            return
        # TODO: rich colored text?
        latest = '\n'.join(infr.latest_logs())
        showkw = dict(
            # fontsize=infr.graph.graph['fontsize'],
            # fontname=infr.graph.graph['fontname'],
            show_unreviewed_edges=True,
            show_inferred_same=False,
            show_inferred_diff=False,
            outof=(len(infr.aids)),
            # show_inferred_same=True,
            # show_inferred_diff=True,
            selected_edges=selected_edges,
            show_labels=True,
            simple_labels=True,
            # show_recent_review=not final,
            show_recent_review=False,
            # splines=infr.graph.graph['splines'],
            reposition=False,
            # with_colorbar=True
        )
        verbose = infr.verbose
        infr.verbose = 0
        infr_ = infr.copy()
        infr_ = infr
        infr_.verbose = verbose
        infr_.show(pickable=True, verbose=0, **showkw)
        infr.verbose = verbose
        # print('status ' + ub.repr2(infr_.status()))
        # infr.show(**showkw)
        ax = plt.gca()
        ax.set_title(title, fontsize=20)
        fig = plt.gcf()
        # fontsize = 22
        fontsize = 12
        if True:
            # postprocess xlabel
            lines = []
            for line in latest.split('\n'):
                if False and line.startswith('ORACLE ERROR'):
                    lines += ['ORACLE ERROR']
                else:
                    lines += [line]
            latest = '\n'.join(lines)
            if len(lines) > 10:
                fontsize = 16
            if len(lines) > 12:
                fontsize = 14
            if len(lines) > 14:
                fontsize = 12
            if len(lines) > 18:
                fontsize = 10

            if len(lines) > 23:
                fontsize = 8

        if True:
            util.mplutil.adjust_subplots(top=.95,
                                         left=0,
                                         right=1,
                                         bottom=.45,
                                         fig=fig)
            ax.set_xlabel('\n' + latest)
            xlabel = ax.get_xaxis().get_label()
            xlabel.set_horizontalalignment('left')
            # xlabel.set_x(.025)
            # xlabel.set_x(-.6)
            xlabel.set_x(-2.0)
            # xlabel.set_fontname('CMU Typewriter Text')
            xlabel.set_fontname('Inconsolata')
            xlabel.set_fontsize(fontsize)
        ax.set_aspect('equal')

        # ax.xaxis.label.set_color('red')
        fpath = join(dpath, 'demo_{:04d}.png'.format(next(fig_counter)))
        fig.savefig(
            fpath,
            dpi=300,
            # transparent=True,
            edgecolor='none')

        # pt.save_figure(dpath=dpath, dpi=300)
        infr.latest_logs()

    if VISUALIZE:
        infr.update_visual_attrs(groupby='name_label')
        infr.set_node_attrs('pin', 'true')
        node_dict = infr.graph.nodes
        print(ub.repr2(node_dict[1]))

    if VISUALIZE:
        infr.latest_logs()
        # Pin Nodes into the target groundtruth position
        show_graph(infr, 'target-gt')

    print(ub.repr2(infr.status()))
    infr.clear_feedback()
    infr.clear_name_labels()
    infr.clear_edges()
    print(ub.repr2(infr.status()))
    infr.latest_logs()

    if VISUALIZE:
        infr.update_visual_attrs()

    infr.prioritize('prob_match')
    if VISUALIZE or TARGET_REVIEW is None or TARGET_REVIEW == 0:
        show_graph(infr, 'initial state')

    def on_new_candidate_edges(infr, edges):
        # hack updateing visual attrs as a callback
        if VISUALIZE:
            infr.update_visual_attrs()

    infr.on_new_candidate_edges = on_new_candidate_edges

    infr.params.update(**params)
    infr.refresh_candidate_edges()

    VIZ_ALL = (VISUALIZE and TARGET_REVIEW is None and START is None)
    print('VIZ_ALL = %r' % (VIZ_ALL, ))

    if VIZ_ALL or TARGET_REVIEW == 0:
        show_graph(infr, 'find-candidates')

    # _iter2 = enumerate(infr.generate_reviews(**params))
    # _iter2 = list(_iter2)
    # assert len(_iter2) > 0

    # prog = ub.ProgIter(_iter2, label='run_demo', bs=False, adjust=False,
    #                    enabled=False)
    count = 1
    first = 1
    for edge, priority in infr._generate_reviews(data=True):
        msg = 'review #%d, priority=%.3f' % (count, priority)
        print('\n----------')
        infr.print('pop edge {} with priority={:.3f}'.format(edge, priority))
        # print('remaining_reviews = %r' % (infr.remaining_reviews()),)
        # Make the next review

        if START is not None:
            VIZ_ALL = count >= START

        if END is not None and count >= END:
            break

        infr.print(msg)
        if ub.allsame(infr.pos_graph.node_labels(*edge)) and first:
            # Have oracle make a mistake early
            feedback = infr.request_oracle_review(edge, accuracy=0)
            first -= 1
        else:
            feedback = infr.request_oracle_review(edge)

        AT_TARGET = TARGET_REVIEW is not None and count >= TARGET_REVIEW - 1

        SHOW_CANDIATE_POP = True
        if SHOW_CANDIATE_POP and (VIZ_ALL or AT_TARGET):
            infr.print(
                ub.repr2(infr.task_probs['match_state'][edge],
                         precision=4,
                         si=True))
            infr.print('len(queue) = %r' % (len(infr.queue)))
            # Show edge selection
            infr.print('Oracle will predict: ' + feedback['evidence_decision'])
            show_graph(infr, 'pre' + msg, selected_edges=[edge])

        if count == TARGET_REVIEW:
            infr.EMBEDME = QUIT_OR_EMEBED == 'embed'
        infr.add_feedback(edge, **feedback)
        infr.print('len(queue) = %r' % (len(infr.queue)))
        # infr.apply_nondynamic_update()
        # Show the result
        if VIZ_ALL or AT_TARGET:
            show_graph(infr, msg)
            # import sys
            # sys.exit(1)
        if count == TARGET_REVIEW:
            break
        count += 1

    infr.print('status = ' + ub.repr2(infr.status(extended=False)))
    show_graph(infr, 'post-review (#reviews={})'.format(count), final=True)

    if VISUALIZE:
        if not getattr(infr, 'EMBEDME', False):
            # import plottool as pt
            # util.mplutil.all_figures_tile()
            util.mplutil.show_if_requested()
Exemplo n.º 29
0
def predict():
    """
    Currently hacked in due to limited harness support.

    srun -c 4 -p priority --gres=gpu:1 \
            python ~/code/baseline-viame-2018/yolo_viame.py predict \
            --gpu=0
    """

    # HACK: Load the training dataset to extract the categories
    # INSTEAD: Should read the categories from a deployed model
    coco_dsets = load_coco_datasets()
    categories = coco_dsets['train'].dataset['categories']

    # Create a dataset to iterate through the images to predict on
    test_gpaths = glob.glob(ub.truepath('~/data/noaa/test_data/*/*.png'))
    predict_coco_dataset = {
        'licenses': [],
        'info': [],
        'categories':
        categories,
        'images': [{
            'id': idx,
            'file_name': fpath,
        } for idx, fpath in enumerate(test_gpaths)],
        'annotations': [],
    }
    predict_coco_dset = coco_api.CocoDataset(predict_coco_dataset,
                                             tag='predict')
    predict_dset = YoloCocoDataset(predict_coco_dset, train=False)

    # HACK: Define the path to the model weights
    # INSTEAD: best weights should be packaged in a model deployment
    load_path = ub.truepath(
        '~/work/viame/yolo/fit/nice/baseline1/best_snapshot.pt')
    # load_path = ub.truepath(
    #     '~/work/viame/yolo/fit/nice/baseline1/torch_snapshots/_epoch_00000080.pt')

    # HACK: Define the model topology (because we know what we trained with)
    # INSTEAD: model deployment should store and abstract away the topology
    model = light_yolo.Yolo(
        **{
            'num_classes':
            predict_dset.num_classes,
            'anchors':
            np.asarray([(1.08, 1.19), (3.42,
                                       4.41), (6.63,
                                               11.38), (9.42,
                                                        5.11), (16.62, 10.52)],
                       dtype=np.float),
            'conf_thresh':
            0.001,
            'nms_thresh':
            0.5,
        })

    # Boilerplate code that could be abstracted away in a prediction harness
    xpu = nh.XPU.cast('gpu')
    print('xpu = {!r}'.format(xpu))
    model = xpu.mount(model)
    snapshot_state = xpu.load(load_path)
    model.load_state_dict(snapshot_state['model_state_dict'])

    batch_size = 16
    workers = 4
    predict_loader = predict_dset.make_loader(batch_size=batch_size,
                                              num_workers=workers,
                                              shuffle=False,
                                              pin_memory=False)

    letterbox = predict_dset.letterbox

    # HACK: Main prediction loop
    # INSTEAD: Use a prediction harness to abstract these in a similar way to
    # the fit harness.
    predictions = []

    with nh.util.grad_context(False):
        _iter = ub.ProgIter(predict_loader, desc='predicting')
        for bx, raw_batch in enumerate(_iter):
            batch_inputs, batch_labels = raw_batch

            inputs = xpu.variable(batch_inputs)
            labels = {k: xpu.variable(d) for k, d in batch_labels.items()}

            outputs = model(inputs)

            # Transform yolo outputs into the coco format
            postout = model.module.postprocess(outputs)

            indices = labels['indices']
            orig_sizes = labels['orig_sizes']
            inp_size = np.array(inputs.shape[-2:][::-1])
            bsize = len(inputs)
            for ix in range(bsize):
                postitem = postout[ix].data.cpu().numpy()

                orig_size = orig_sizes[ix].data.cpu().numpy()
                gx = int(indices[ix].data.cpu().numpy())
                gid = predict_dset.dset.dataset['images'][gx]['id']

                # Unpack postprocessed predictions
                sboxes = postitem.reshape(-1, 6)
                pred_cxywh = sboxes[:, 0:4]
                pred_scores = sboxes[:, 4]
                pred_cxs = sboxes[:, 5].astype(np.int)

                sortx = pred_scores.argsort()
                pred_scores = pred_scores.take(sortx)
                pred_cxs = pred_cxs.take(sortx)
                pred_cxywh = pred_cxywh.take(sortx, axis=0)

                norm_boxes = nh.util.Boxes(pred_cxywh, 'cxywh')
                boxes = norm_boxes.scale(inp_size)
                pred_box = letterbox._boxes_letterbox_invert(
                    boxes, orig_size, inp_size)
                pred_box = pred_box.clip(0, 0, orig_size[0], orig_size[1])

                pred_xywh = pred_box.toformat('xywh').data

                # print(ub.repr2(pred_cxywh.tolist(), precision=2))
                # print(ub.repr2(pred_xywh.tolist(), precision=2))

                for xywh, cx, score in zip(pred_xywh, pred_cxs, pred_scores):
                    if score > 0.1:
                        cid = predict_dset.dset.dataset['categories'][cx]['id']
                        pred = {
                            'id': len(predictions) + 1,
                            'image_id': gid,
                            'category_id': cid,
                            'bbox': list(xywh),
                            'score': score,
                        }
                        predictions.append(pred)
            # if bx > 1:
            #     break

    predict_coco_dset.dataset['annotations'] = predictions
    predict_coco_dset._build_index()

    with open('./viame_pred_dump.mscoco.json') as file:
        predict_coco_dset.dump(file)

    if False:
        import utool as ut
        from matplotlib import pyplot as plt
        gids = set([a['image_id'] for a in predict_coco_dset.anns.values()])
        for gid in ut.InteractiveIter(list(gids)):

            try:
                fig = plt.figure(1)
                fig.clf()
                predict_coco_dset.show_annotation(gid=gid)
                fig.canvas.draw()
            except Exception:
                print('cannot draw')

        z = inputs[0].cpu().numpy().transpose(1, 2, 0)
        nh.util.imshow(z, fnum=2, colorspace='rgb')
Exemplo n.º 30
0
def setup_yolo_harness(bsize=16, workers=0):
    """
    CommandLine:
        python ~/code/netharn/examples/yolo_voc.py setup_yolo_harness

    Example:
        >>> # DISABLE_DOCTSET
        >>> harn = setup_yolo_harness()
        >>> harn.initialize()
    """

    xpu = nh.XPU.cast('argv')

    nice = ub.argval('--nice', default='Yolo2Baseline')
    batch_size = int(ub.argval('--batch_size', default=bsize))
    bstep = int(ub.argval('--bstep', 4))
    workers = int(ub.argval('--workers', default=workers))
    decay = float(ub.argval('--decay', default=0.0005))
    lr = float(ub.argval('--lr', default=0.001))
    ovthresh = 0.5
    simulated_bsize = bstep * batch_size

    # We will divide the learning rate by the simulated batch size
    datasets = {
        'train': YoloVOCDataset(years=[2007, 2012], split='trainval'),
        'test': YoloVOCDataset(years=[2007], split='test'),
    }
    loaders = {
        key: dset.make_loader(batch_size=batch_size,
                              num_workers=workers,
                              shuffle=(key == 'train'),
                              pin_memory=True,
                              resize_rate=10 * bstep,
                              drop_last=True)
        for key, dset in datasets.items()
    }

    if workers > 0:
        import cv2
        cv2.setNumThreads(0)

    # assert simulated_bsize == 64, 'must be 64'

    # Pascal 2007 + 2012 trainval has 16551 images
    # Pascal 2007 test has 4952 images
    # In the original YOLO, one batch is 64 images, therefore:
    #
    # ONE EPOCH is 16551 / 64 = 258.609375 = 259 iterations.
    #
    # From the original YOLO VOC v2 config
    # https://github.com/pjreddie/darknet/blob/master/cfg/yolov2-voc.cfg
    #     learning_rate=0.001
    #     burn_in=1000
    #     max_batches = 80200
    #     policy=steps
    #     steps=40000,60000
    #     scales=.1,.1
    #
    # However, the LIGHTNET values are
    #   LR_STEPS = [250, 25000, 35000]
    #
    # The DARNKET STEPS ARE:
    #   DN_STEPS = 1000, 40000, 60000, 80200
    #
    # Based in this, the iter to batch conversion is
    #
    # Key lightnet batch numbers
    # >>> np.array([250, 25000, 30000, 35000, 45000]) / (16512 / 64)
    # array([0.9689,  96.899, 116.2790, 135.658, 174.4186])
    # -> Round
    # array([  1.,  97., 135.])
    # >>> np.array([1000, 40000, 60000, 80200]) / 258
    # array([  3.86683584, 154.67343363, 232.01015044, 310.12023443])
    # -> Round
    # array(4, 157, 232, 310])
    # array([  3.87596899, 155.03875969, 232.55813953, 310.85271318])
    if not ub.argflag('--eav'):
        lr_step_points = {
            # 0:   lr * 0.1 / simulated_bsize,  # burnin
            # 4:   lr * 1.0 / simulated_bsize,
            0: lr * 1.0 / simulated_bsize,
            154: lr * 1.0 / simulated_bsize,
            155: lr * 0.1 / simulated_bsize,
            232: lr * 0.1 / simulated_bsize,
            233: lr * 0.01 / simulated_bsize,
        }
        max_epoch = 311
        scheduler_ = (
            nh.schedulers.core.YOLOScheduler,
            {
                'points': lr_step_points,
                # 'interpolate': False,
                'interpolate': True,
                'burn_in': 0.96899225 if ub.argflag('--eav') else
                3.86683584,  # number of epochs to burn_in for. approx 1000 batches?
                'dset_size': len(datasets['train']),  # when drop_last=False
                # 'dset_size': (len(datasets['train']) // simulated_bsize) * simulated_bsize,  # make a multiple of batch_size because drop_last=True
                'batch_size': batch_size,
            })
    else:
        lr_step_points = {
            # dividing by batch size was one of those unpublished details
            0: lr * 0.1 / simulated_bsize,
            1: lr * 1.0 / simulated_bsize,
            96: lr * 1.0 / simulated_bsize,
            97: lr * 0.1 / simulated_bsize,
            135: lr * 0.1 / simulated_bsize,
            136: lr * 0.01 / simulated_bsize,
        }
        max_epoch = 176
        scheduler_ = (nh.schedulers.ListedLR, {
            'points': lr_step_points,
            'interpolate': False,
        })

    weights = ub.argval('--weights', default=None)
    if weights is None or weights == 'imagenet':
        weights = light_yolo.initial_imagenet_weights()
    elif weights == 'lightnet':
        weights = light_yolo.demo_voc_weights()
    else:
        print('weights = {!r}'.format(weights))

    # Anchors
    anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944),
                        (5.05587, 8.09892), (9.47112, 4.84053),
                        (11.2364, 10.0071)])

    from netharn.models.yolo2 import region_loss2
    # from netharn.models.yolo2 import light_region_loss

    hyper = nh.HyperParams(
        **{
            'nice':
            nice,
            'workdir':
            ub.truepath('~/work/voc_yolo2'),
            'datasets':
            datasets,

            # 'xpu': 'distributed(todo: fancy network stuff)',
            # 'xpu': 'cpu',
            # 'xpu': 'gpu:0,1,2,3',
            'xpu':
            xpu,

            # a single dict is applied to all datset loaders
            'loaders':
            loaders,
            'model': (
                light_yolo.Yolo,
                {
                    'num_classes': datasets['train'].num_classes,
                    'anchors': anchors,
                    'conf_thresh': 0.001,
                    # 'conf_thresh': 0.1,  # make training a bit faster
                    'nms_thresh': 0.5 if not ub.argflag('--eav') else 0.4
                }),
            'criterion': (
                region_loss2.RegionLoss,
                {
                    'num_classes': datasets['train'].num_classes,
                    'anchors': anchors,
                    'reduction': 32,
                    'seen': 0,
                    'coord_scale': 1.0,
                    'noobject_scale': 1.0,
                    'object_scale': 5.0,
                    'class_scale': 1.0,
                    'thresh': 0.6,  # iou_thresh
                    # 'seen_thresh': 12800,
                }),

            # 'criterion': (light_region_loss.RegionLoss, {
            #     'num_classes': datasets['train'].num_classes,
            #     'anchors': anchors,
            #     'object_scale': 5.0,
            #     'noobject_scale': 1.0,

            #     # eav version originally had a random *2 in cls loss,
            #     # we removed, that but we can replicate it here.
            #     'class_scale': 1.0 if not ub.argflag('--eav') else 2.0,
            #     'coord_scale': 1.0,

            #     'thresh': 0.6,  # iou_thresh
            #     'seen_thresh': 12800,
            #     # 'small_boxes': not ub.argflag('--eav'),
            #     'small_boxes': True,
            #     'mse_factor': 0.5 if not ub.argflag('--eav') else 1.0,
            # }),
            'initializer': (nh.initializers.Pretrained, {
                'fpath': weights,
            }),
            'optimizer': (
                torch.optim.SGD,
                {
                    'lr': lr_step_points[0],
                    'momentum': 0.9,
                    'dampening': 0,
                    # multiplying by batch size was one of those unpublished details
                    'weight_decay': decay * simulated_bsize,
                }),
            'scheduler':
            scheduler_,
            'monitor': (nh.Monitor, {
                'minimize': ['loss'],
                'maximize': ['mAP'],
                'patience': max_epoch,
                'max_epoch': max_epoch,
            }),
            'augment':
            datasets['train'].augmenter,
            'dynamics': {
                # Controls how many batches to process before taking a step in the
                # gradient direction. Effectively simulates a batch_size that is
                # `bstep` times bigger.
                'batch_step': bstep,
            },
            'other': {
                # Other params are not used internally, so you are free to set any
                # extra params specific to your algorithm, and still have them
                # logged in the hyperparam structure. For YOLO this is `ovthresh`.
                'batch_size': batch_size,
                'nice': nice,
                'ovthresh': ovthresh,  # used in mAP computation
                'input_range': 'norm01',
            },
        })
    print('max_epoch = {!r}'.format(max_epoch))
    harn = YoloHarn(hyper=hyper)
    harn.config['prog_backend'] = 'progiter'
    harn.intervals['log_iter_train'] = None
    harn.intervals['log_iter_test'] = None
    harn.intervals['log_iter_vali'] = None
    harn.config[
        'large_loss'] = 1000  # tell netharn when to check for divergence
    return harn