def task_fit(taskname): """ CommandLine: python -m clab.live.sseg_train task_fit --task=camvid --arch=segnet python -m clab.live.sseg_train task_fit --task=camvid --arch=unet python -m clab.live.sseg_train task_fit --task=camvid --arch=segnet --dry python -m clab.live.sseg_train task_fit --task=camvid --arch=unet --colorspace=RGB python -m clab.live.sseg_train task_fit --task=camvid --arch=unet --colorspace=LAB python -m clab.live.sseg_train task_fit --task=camvid --arch=segnet --colorspace=RGB python -m clab.live.sseg_train task_fit --task=camvid --arch=segnet --colorspace=LAB python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --arch=segnet python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --arch=unet --noaux python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --arch=unet python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --dry python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --arch=unet --colorspace=RGB --all python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --arch=unet --colorspace=RGB python -m clab.live.sseg_train task_fit --task=urban_mapper_3d --arch=unet --dry Script: >>> from clab.fit_harness import * >>> taskname = ub.argval('--task', default='camvid') >>> harn = task_fit(taskname) >>> #import utool as ut >>> #ut.exec_func_src(task_fit) """ colorspace = ub.argval('--colorspace', default='RGB').upper() datasets = load_task_dataset(taskname, colorspace=colorspace) datasets['train'].augment = True # Make sure we use consistent normalization # TODO: give normalization a part of the hashid # TODO: save normalization type with the model center_inputs = datasets['train']._make_normalizer() datasets['test'].center_inputs = center_inputs datasets['vali'].center_inputs = center_inputs # Ensure normalization is the same for each dataset datasets['train'].augment = True # turn off aux layers if ub.argflag('--noaux'): for v in datasets.values(): v.aux_keys = [] arch = ub.argval('--arch', default='unet') batch_size = 6 if arch == 'segnet': batch_size = 6 n_classes = datasets['train'].n_classes n_channels = datasets['train'].n_channels class_weights = datasets['train'].class_weights() ignore_label = datasets['train'].ignore_label print('n_classes = {!r}'.format(n_classes)) print('n_channels = {!r}'.format(n_channels)) print('batch_size = {!r}'.format(batch_size)) hyper = hyperparams.HyperParams( criterion=(criterions.CrossEntropyLoss2D, { 'ignore_label': ignore_label, 'weight': class_weights, }), optimizer=(torch.optim.SGD, { 'weight_decay': .0005, 'momentum': 0.9, 'nesterov': True, }), # optimizer=(torch.optim.Adam, { # 'weight_decay': .0005, # }), # scheduler=('Constant', {}), scheduler=('Exponential', { 'gamma': 0.99, 'base_lr': 0.001, 'stepsize': 2, }), other={ 'n_classes': n_classes, 'n_channels': n_channels, 'augment': datasets['train'].augment, 'colorspace': datasets['train'].colorspace, }) if arch == 'segnet': pretrained = 'vgg' else: pretrained = None train_dpath, test_dpath = directory_structure( datasets['train'].task.workdir, arch, datasets, pretrained=pretrained, train_hyper_id=hyper.hyper_id(), suffix='_' + hyper.other_id()) def custom_metrics(harn, output, label): ignore_label = datasets['train'].ignore_label labels = datasets['train'].task.labels metrics_dict = metrics._sseg_metrics(output, label, labels=labels, ignore_label=ignore_label) return metrics_dict print('arch = {!r}'.format(arch)) dry = ub.argflag('--dry') if dry: model = None elif arch == 'segnet': model = models.SegNet(in_channels=n_channels, n_classes=n_classes) model.init_he_normal() model.init_vgg16_params() elif arch == 'linknet': model = models.LinkNet(in_channels=n_channels, n_classes=n_classes) elif arch == 'unet': model = models.UNet(in_channels=n_channels, n_classes=n_classes) model.init_he_normal() elif arch == 'dummy': model = models.SSegDummy(in_channels=n_channels, n_classes=n_classes) else: raise ValueError('unknown arch') xpu = xpu_device.XPU.from_argv() harn = fit_harness.FitHarness( model=model, hyper=hyper, datasets=datasets, xpu=xpu, train_dpath=train_dpath, dry=dry, batch_size=batch_size, ) harn.add_batch_metric_hook(custom_metrics) # HACK # im = datasets['train'][0][0] # w, h = im.shape[-2:] # single_output_shape = (n_classes, w, h) # harn.single_output_shape = single_output_shape # print('harn.single_output_shape = {!r}'.format(harn.single_output_shape)) harn.run() return harn
def setup_harness(workers=None): """ CommandLine: python ~/code/clab/examples/yolo_voc.py setup_harness python ~/code/clab/examples/yolo_voc.py setup_harness --profile Example: >>> harn = setup_harness(workers=0) >>> harn.initialize() >>> harn.dry = True >>> # xdoc: +SKIP >>> harn.run() """ workdir = ub.truepath('~/work/VOC2007') devkit_dpath = ub.truepath('~/data/VOC/VOCdevkit') YoloVOCDataset.ensure_voc_data() if ub.argflag('--2007'): dsetkw = {'years': [2007]} elif ub.argflag('--2012'): dsetkw = {'years': [2007, 2012]} else: dsetkw = {'years': [2007]} data_choice = ub.argval('--data', 'normal') if data_choice == 'combined': datasets = { 'test': YoloVOCDataset(devkit_dpath, split='test', **dsetkw), 'train': YoloVOCDataset(devkit_dpath, split='trainval', **dsetkw), } elif data_choice == 'notest': datasets = { 'train': YoloVOCDataset(devkit_dpath, split='train', **dsetkw), 'vali': YoloVOCDataset(devkit_dpath, split='val', **dsetkw), } elif data_choice == 'normal': datasets = { 'train': YoloVOCDataset(devkit_dpath, split='train', **dsetkw), 'vali': YoloVOCDataset(devkit_dpath, split='val', **dsetkw), 'test': YoloVOCDataset(devkit_dpath, split='test', **dsetkw), } else: raise KeyError(data_choice) nice = ub.argval('--nice', default=None) pretrained_fpath = darknet.initial_weights() # NOTE: XPU implicitly supports DataParallel just pass --gpu=0,1,2,3 xpu = xpu_device.XPU.cast('argv') print('xpu = {!r}'.format(xpu)) ensure_ulimit() postproc_params = dict( conf_thresh=0.001, nms_thresh=0.5, ovthresh=0.5, ) max_epoch = 160 lr_step_points = { 0: 0.001, 60: 0.0001, 90: 0.00001, } if ub.argflag('--warmup'): lr_step_points = { # warmup learning rate 0: 0.0001, 1: 0.0001, 2: 0.0002, 3: 0.0003, 4: 0.0004, 5: 0.0005, 6: 0.0006, 7: 0.0007, 8: 0.0008, 9: 0.0009, 10: 0.0010, # cooldown learning rate 60: 0.0001, 90: 0.00001, } batch_size = int(ub.argval('--batch_size', default=16)) n_cpus = psutil.cpu_count(logical=True) workers = int(ub.argval('--workers', default=int(n_cpus / 2))) print('Making loaders') loaders = make_loaders(datasets, batch_size=batch_size, workers=workers if workers is not None else workers) """ Reference: Original YOLO9000 hyperparameters are defined here: https://github.com/pjreddie/darknet/blob/master/cfg/yolo-voc.2.0.cfg https://github.com/longcw/yolo2-pytorch/issues/1#issuecomment-286410772 Notes: jitter is a translation / crop parameter https://groups.google.com/forum/#!topic/darknet/A-JJeXprvJU thresh in 2.0.cfg is iou_thresh here """ print('Making hyperparams') hyper = hyperparams.HyperParams( model=(darknet.Darknet19, { 'num_classes': datasets['train'].num_classes, 'anchors': datasets['train'].anchors }), criterion=(darknet_loss.DarknetLoss, { 'anchors': datasets['train'].anchors, 'object_scale': 5.0, 'noobject_scale': 1.0, 'class_scale': 1.0, 'coord_scale': 1.0, 'iou_thresh': 0.6, 'reproduce_longcw': ub.argflag('--longcw'), 'denom': ub.argval('--denom', default='num_boxes'), }), optimizer=(torch.optim.SGD, dict( lr=lr_step_points[0], momentum=0.9, weight_decay=0.0005 )), # initializer=(nninit.KaimingNormal, {}), initializer=(nninit.Pretrained, { 'fpath': pretrained_fpath, }), scheduler=(ListedLR, dict( step_points=lr_step_points )), other=ub.dict_union({ 'nice': str(nice), 'batch_size': loaders['train'].batch_sampler.batch_size, }, postproc_params), centering=None, # centering=datasets['train'].centering, augment=datasets['train'].augmenter, ) harn = fit_harness.FitHarness( hyper=hyper, xpu=xpu, loaders=loaders, max_iter=max_epoch, workdir=workdir, ) harn.postproc_params = postproc_params harn.nice = nice harn.monitor = monitor.Monitor(min_keys=['loss'], # max_keys=['global_acc', 'class_acc'], patience=max_epoch) @harn.set_batch_runner def batch_runner(harn, inputs, labels): """ Custom function to compute the output of a batch and its loss. Example: >>> import sys >>> sys.path.append('/home/joncrall/code/clab/examples') >>> from yolo_voc import * >>> harn = setup_harness(workers=0) >>> harn.initialize() >>> batch = harn._demo_batch(0, 'train') >>> inputs, labels = batch >>> criterion = harn.criterion >>> weights_fpath = darknet.demo_weights() >>> state_dict = torch.load(weights_fpath)['model_state_dict'] >>> harn.model.module.load_state_dict(state_dict) >>> outputs, loss = harn._custom_run_batch(harn, inputs, labels) """ # hack for data parallel # if harn.current_tag == 'train': outputs = harn.model(*inputs) # else: # # Run test and validation on a single GPU # outputs = harn.model.module(*inputs) # darknet criterion needs to know the input image shape inp_size = tuple(inputs[0].shape[-2:]) aoff_pred, iou_pred, prob_pred = outputs gt_boxes, gt_classes, orig_size, indices, gt_weights = labels loss = harn.criterion(aoff_pred, iou_pred, prob_pred, gt_boxes, gt_classes, gt_weights=gt_weights, inp_size=inp_size, epoch=harn.epoch) return outputs, loss @harn.add_batch_metric_hook def custom_metrics(harn, output, labels): metrics_dict = ub.odict() criterion = harn.criterion metrics_dict['L_bbox'] = float(criterion.bbox_loss.data.cpu().numpy()) metrics_dict['L_iou'] = float(criterion.iou_loss.data.cpu().numpy()) metrics_dict['L_cls'] = float(criterion.cls_loss.data.cpu().numpy()) return metrics_dict # Set as a harness attribute instead of using a closure harn.batch_confusions = [] @harn.add_iter_callback def on_batch(harn, tag, loader, bx, inputs, labels, outputs, loss): """ Custom hook to run on each batch (used to compute mAP on the fly) Example: >>> harn = setup_harness(workers=0) >>> harn.initialize() >>> batch = harn._demo_batch(0, 'train') >>> inputs, labels = batch >>> criterion = harn.criterion >>> loader = harn.loaders['train'] >>> weights_fpath = darknet.demo_weights() >>> state_dict = torch.load(weights_fpath)['model_state_dict'] >>> harn.model.module.load_state_dict(state_dict) >>> outputs, loss = harn._custom_run_batch(harn, inputs, labels) >>> tag = 'train' >>> on_batch(harn, tag, loader, bx, inputs, labels, outputs, loss) """ # Accumulate relevant outputs to measure gt_boxes, gt_classes, orig_size, indices, gt_weights = labels # aoff_pred, iou_pred, prob_pred = outputs im_sizes = orig_size inp_size = inputs[0].shape[-2:][::-1] conf_thresh = harn.postproc_params['conf_thresh'] nms_thresh = harn.postproc_params['nms_thresh'] ovthresh = harn.postproc_params['ovthresh'] postout = harn.model.module.postprocess(outputs, inp_size, im_sizes, conf_thresh, nms_thresh) # batch_pred_boxes, batch_pred_scores, batch_pred_cls_inds = postout # Compute: y_pred, y_true, and y_score for this batch batch_pred_boxes, batch_pred_scores, batch_pred_cls_inds = postout batch_true_boxes, batch_true_cls_inds = labels[0:2] batch_orig_sz, batch_img_inds = labels[2:4] y_batch = [] for bx, index in enumerate(batch_img_inds.data.cpu().numpy().ravel()): pred_boxes = batch_pred_boxes[bx] pred_scores = batch_pred_scores[bx] pred_cxs = batch_pred_cls_inds[bx] # Group groundtruth boxes by class true_boxes_ = batch_true_boxes[bx].data.cpu().numpy() true_cxs = batch_true_cls_inds[bx].data.cpu().numpy() true_weights = gt_weights[bx].data.cpu().numpy() # Unnormalize the true bboxes back to orig coords orig_size = batch_orig_sz[bx] sx, sy = np.array(orig_size) / np.array(inp_size) if len(true_boxes_): true_boxes = np.hstack([true_boxes_, true_weights[:, None]]) true_boxes[:, 0:4:2] *= sx true_boxes[:, 1:4:2] *= sy y = voc.EvaluateVOC.image_confusions(true_boxes, true_cxs, pred_boxes, pred_scores, pred_cxs, ovthresh=ovthresh) y['gx'] = index y_batch.append(y) harn.batch_confusions.extend(y_batch) @harn.add_epoch_callback def on_epoch(harn, tag, loader): y = pd.concat(harn.batch_confusions) num_classes = len(loader.dataset.label_names) mean_ap, ap_list = voc.EvaluateVOC.compute_map(y, num_classes) harn.log_value(tag + ' epoch mAP', mean_ap, harn.epoch) # max_ap = np.nanmax(ap_list) # harn.log_value(tag + ' epoch max-AP', max_ap, harn.epoch) harn.batch_confusions.clear() return harn
def setup_harness(workers=None): """ CommandLine: python ~/code/clab/examples/yolo_voc2.py setup_harness python ~/code/clab/examples/yolo_voc2.py setup_harness --profile python ~/code/clab/examples/yolo_voc2.py setup_harness --flamegraph Example: >>> harn = setup_harness(workers=0) >>> harn.initialize() >>> harn.dry = True >>> harn.run() """ workdir = ub.truepath('~/work/VOC2007') devkit_dpath = ub.truepath('~/data/VOC/VOCdevkit') YoloVOCDataset.ensure_voc_data() if ub.argflag('--2007'): dsetkw = {'years': [2007]} elif ub.argflag('--2012'): dsetkw = {'years': [2007, 2012]} else: dsetkw = {'years': [2007]} data_choice = ub.argval('--data', 'normal') if ub.argflag('--small'): dsetkw['base_wh'] = np.array([7, 7]) * 32 dsetkw['scales'] = [-1, 1] if data_choice == 'combined': datasets = { 'test': YoloVOCDataset(devkit_dpath, split='test', **dsetkw), 'train': YoloVOCDataset(devkit_dpath, split='trainval', **dsetkw), } elif data_choice == 'notest': datasets = { 'train': YoloVOCDataset(devkit_dpath, split='train', **dsetkw), 'vali': YoloVOCDataset(devkit_dpath, split='val', **dsetkw), } elif data_choice == 'normal': datasets = { 'train': YoloVOCDataset(devkit_dpath, split='train', **dsetkw), 'vali': YoloVOCDataset(devkit_dpath, split='val', **dsetkw), 'test': YoloVOCDataset(devkit_dpath, split='test', **dsetkw), } else: raise KeyError(data_choice) nice = ub.argval('--nice', default=None) pretrained_fpath = ensure_lightnet_initial_weights() # NOTE: XPU implicitly supports DataParallel just pass --gpu=0,1,2,3 xpu = xpu_device.XPU.cast('argv') print('xpu = {!r}'.format(xpu)) ensure_ulimit() postproc_params = dict( conf_thresh=0.001, # nms_thresh=0.5, nms_thresh=0.4, ovthresh=0.5, ) max_epoch = 160 lr_step_points = { 0: 0.001, 60: 0.0001, 90: 0.00001, } # if ub.argflag('--warmup'): lr_step_points = { # warmup learning rate 0: 0.0001, 1: 0.0001, 2: 0.0002, 3: 0.0003, 4: 0.0004, 5: 0.0005, 6: 0.0006, 7: 0.0007, 8: 0.0008, 9: 0.0009, 10: 0.0010, # cooldown learning rate 60: 0.0001, 90: 0.00001, } batch_size = int(ub.argval('--batch_size', default=16)) n_cpus = psutil.cpu_count(logical=True) if workers is None: workers = int(ub.argval('--workers', default=int(n_cpus / 2))) print('Making loaders') loaders = make_loaders(datasets, batch_size=batch_size, workers=workers if workers is not None else workers) # anchors = {'num': 5, 'values': list(ub.flatten(datasets['train'].anchors))} anchors = dict(num=5, values=[1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071]) print('Making hyperparams') hyper = hyperparams.HyperParams( # model=(darknet.Darknet19, { model=(light_yolo.Yolo, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, 'conf_thresh': postproc_params['conf_thresh'], 'nms_thresh': postproc_params['nms_thresh'], }), criterion=(RegionLoss, { 'num_classes': datasets['train'].num_classes, 'anchors': anchors, # 'object_scale': 5.0, # 'noobject_scale': 1.0, # 'class_scale': 1.0, # 'coord_scale': 1.0, # 'thresh': 0.6, }), optimizer=(torch.optim.SGD, dict( lr=lr_step_points[0], momentum=0.9, weight_decay=0.0005 )), initializer=(nninit.Pretrained, { 'fpath': pretrained_fpath, }), scheduler=(ListedLR, dict( step_points=lr_step_points )), other=ub.dict_union({ 'nice': str(nice), 'batch_size': loaders['train'].batch_sampler.batch_size, }, postproc_params), centering=None, # centering=datasets['train'].centering, augment=datasets['train'].augmenter, ) harn = fit_harness.FitHarness( hyper=hyper, xpu=xpu, loaders=loaders, max_iter=max_epoch, workdir=workdir, ) harn.postproc_params = postproc_params harn.nice = nice harn.monitor = monitor.Monitor(min_keys=['loss'], # max_keys=['global_acc', 'class_acc'], patience=max_epoch) @harn.set_batch_runner @profiler.profile def batch_runner(harn, inputs, labels): """ Custom function to compute the output of a batch and its loss. Example: >>> import sys >>> sys.path.append('/home/joncrall/code/clab/examples') >>> from yolo_voc2 import * >>> harn = setup_harness(workers=0) >>> harn.initialize() >>> batch = harn._demo_batch(0, 'vali') >>> inputs, labels = batch >>> criterion = harn.criterion >>> weights_fpath = light_yolo.demo_weights() >>> state_dict = torch.load(weights_fpath)['weights'] >>> harn.model.module.load_state_dict(state_dict) >>> outputs, loss = harn._custom_run_batch(harn, inputs, labels) """ if harn.dry: shape = harn.model.module.output_shape_for(inputs[0].shape) outputs = torch.rand(*shape) else: outputs = harn.model.forward(*inputs) # darknet criterion needs to know the input image shape # inp_size = tuple(inputs[0].shape[-2:]) target = labels[0] bsize = inputs[0].shape[0] n_items = len(harn.loaders['train']) bx = harn.bxs.get('train', 0) seen = harn.epoch * n_items + (bx * bsize) loss = harn.criterion(outputs, target, seen=seen) return outputs, loss @harn.add_batch_metric_hook @profiler.profile def custom_metrics(harn, output, labels): metrics_dict = ub.odict() criterion = harn.criterion metrics_dict['L_bbox'] = float(criterion.loss_coord.data.cpu().numpy()) metrics_dict['L_iou'] = float(criterion.loss_conf.data.cpu().numpy()) metrics_dict['L_cls'] = float(criterion.loss_cls.data.cpu().numpy()) return metrics_dict # Set as a harness attribute instead of using a closure harn.batch_confusions = [] @harn.add_iter_callback @profiler.profile def on_batch(harn, tag, loader, bx, inputs, labels, outputs, loss): """ Custom hook to run on each batch (used to compute mAP on the fly) Example: >>> harn = setup_harness(workers=0) >>> harn.initialize() >>> batch = harn._demo_batch(0, 'vali') >>> inputs, labels = batch >>> criterion = harn.criterion >>> loader = harn.loaders['train'] >>> weights_fpath = light_yolo.demo_weights() >>> state_dict = torch.load(weights_fpath)['weights'] >>> harn.model.module.load_state_dict(state_dict) >>> outputs, loss = harn._custom_run_batch(harn, inputs, labels) >>> tag = 'train' >>> on_batch(harn, tag, loader, bx, inputs, labels, outputs, loss) Ignore: >>> target, gt_weights, batch_orig_sz, batch_index = labels >>> bx = 0 >>> postout = harn.model.module.postprocess(outputs.clone()) >>> item = postout[bx].cpu().numpy() >>> item = item[item[:, 4] > .6] >>> cxywh = util.Boxes(item[..., 0:4], 'cxywh') >>> orig_size = batch_orig_sz[bx].numpy().ravel() >>> tlbr = cxywh.scale(orig_size).asformat('tlbr').data >>> truth_bx = target[bx] >>> truth_bx = truth_bx[truth_bx[:, 0] != -1] >>> truth_tlbr = util.Boxes(truth_bx[..., 1:5].numpy(), 'cxywh').scale(orig_size).asformat('tlbr').data >>> chw = inputs[0][bx].numpy().transpose(1, 2, 0) >>> rgb255 = cv2.resize(chw * 255, tuple(orig_size)) >>> mplutil.figure(fnum=1, doclf=True) >>> mplutil.imshow(rgb255, colorspace='rgb') >>> mplutil.draw_boxes(tlbr, 'tlbr') >>> mplutil.draw_boxes(truth_tlbr, 'tlbr', color='orange') >>> mplutil.show_if_requested() """ # Accumulate relevant outputs to measure target, gt_weights, batch_orig_sz, batch_index = labels # inp_size = inputs[0].shape[-2:][::-1] conf_thresh = harn.postproc_params['conf_thresh'] nms_thresh = harn.postproc_params['nms_thresh'] ovthresh = harn.postproc_params['ovthresh'] if outputs is None: return get_bboxes = harn.model.module.postprocess get_bboxes.conf_thresh = conf_thresh get_bboxes.nms_thresh = nms_thresh postout = harn.model.module.postprocess(outputs) batch_pred_boxes = [] batch_pred_scores = [] batch_pred_cls_inds = [] for bx, item_ in enumerate(postout): item = item_.cpu().numpy() if len(item): cxywh = util.Boxes(item[..., 0:4], 'cxywh') orig_size = batch_orig_sz[bx].cpu().numpy().ravel() tlbr = cxywh.scale(orig_size).asformat('tlbr').data batch_pred_boxes.append(tlbr) batch_pred_scores.append(item[..., 4]) batch_pred_cls_inds.append(item[..., 5]) else: batch_pred_boxes.append(np.empty((0, 4))) batch_pred_scores.append(np.empty(0)) batch_pred_cls_inds.append(np.empty(0)) batch_true_cls_inds = target[..., 0] batch_true_boxes = target[..., 1:5] batch_img_inds = batch_index y_batch = [] for bx, index in enumerate(batch_img_inds.data.cpu().numpy().ravel()): pred_boxes = batch_pred_boxes[bx] pred_scores = batch_pred_scores[bx] pred_cxs = batch_pred_cls_inds[bx] # Group groundtruth boxes by class true_boxes_ = batch_true_boxes[bx].data.cpu().numpy() true_cxs = batch_true_cls_inds[bx].data.cpu().numpy() true_weights = gt_weights[bx].data.cpu().numpy() # Unnormalize the true bboxes back to orig coords orig_size = batch_orig_sz[bx] if len(true_boxes_): true_boxes = util.Boxes(true_boxes_, 'cxywh').scale( orig_size).asformat('tlbr').data true_boxes = np.hstack([true_boxes, true_weights[:, None]]) else: true_boxes = true_boxes_.reshape(-1, 4) y = voc.EvaluateVOC.image_confusions(true_boxes, true_cxs, pred_boxes, pred_scores, pred_cxs, ovthresh=ovthresh) y['gx'] = index y_batch.append(y) harn.batch_confusions.extend(y_batch) @harn.add_epoch_callback @profiler.profile def on_epoch(harn, tag, loader): y = pd.concat(harn.batch_confusions) num_classes = len(loader.dataset.label_names) mean_ap, ap_list = voc.EvaluateVOC.compute_map(y, num_classes) harn.log_value(tag + ' epoch mAP', mean_ap, harn.epoch) max_ap = np.nanmax(ap_list) harn.log_value(tag + ' epoch max-AP', max_ap, harn.epoch) harn.batch_confusions.clear() return harn
def siam_vsone_train(): r""" CommandLine: python -m clab.live.siam_train siam_vsone_train --dry python -m clab.live.siam_train siam_vsone_train python -m clab.live.siam_train siam_vsone_train --db PZ_Master1 python -m clab.live.siam_train siam_vsone_train --db PZ_MTEST --dry python -m clab.live.siam_train siam_vsone_train --db PZ_MTEST python -m clab.live.siam_train siam_vsone_train --db RotanTurtles python -m clab.live.siam_train siam_vsone_train --db humpbacks_fb Script: >>> # DISABLE_DOCTEST >>> from clab.live.siam_train import * # NOQA >>> siam_vsone_train() """ dbname = ub.argval('--db', default='PZ_MTEST') # train_dataset, vali_dataset, test_dataset = ibeis_datasets('GZ_Master') dim = 512 datasets = randomized_ibeis_dset(dbname, dim=dim) workdir = ub.ensuredir( os.path.expanduser('~/data/work/siam-ibeis2/' + dbname)) print('workdir = {!r}'.format(workdir)) # train_dataset, vali_dataset, test_dataset = att_faces_datasets() # workdir = os.path.expanduser('~/data/work/siam-att') for k, v in datasets.items(): print('* len({}) = {}'.format(k, len(v))) batch_size = 6 print('batch_size = {!r}'.format(batch_size)) # class_weights = train_dataset.class_weights() # print('class_weights = {!r}'.format(class_weights)) pretrained = 'resnet50' # pretrained = 'resnet50' branch = getattr(torchvision.models, pretrained)(pretrained=True) model = models.SiameseLP(p=2, branch=branch, input_shape=(1, 3, dim, dim)) print(model) hyper = hyperparams.HyperParams(criterion=(criterions.ContrastiveLoss, { 'margin': 4, 'weight': None, }), optimizer=(torch.optim.SGD, { 'weight_decay': .0005, 'momentum': 0.9, 'nesterov': True, }), scheduler=('Exponential', { 'gamma': 0.99, 'base_lr': 0.001, 'stepsize': 2, }), other={ 'n_classes': 2, 'augment': datasets['train'].augment, }) def custom_metrics(harn, output, label): from clab import metrics metrics_dict = metrics._siamese_metrics(output, label, margin=harn.criterion.margin) return metrics_dict dry = ub.argflag('--dry') from clab.live.sseg_train import directory_structure train_dpath, test_dpath = directory_structure( workdir, model.__class__.__name__, datasets, pretrained=pretrained, train_hyper_id=hyper.hyper_id(), suffix='_' + hyper.other_id()) xpu = xpu_device.XPU.from_argv() harn = fit_harness.FitHarness( model=model, hyper=hyper, datasets=datasets, xpu=xpu, batch_size=batch_size, train_dpath=train_dpath, dry=dry, ) harn.add_batch_metric_hook(custom_metrics) harn.run()
def urban_fit(): """ CommandLine: python -m clab.live.urban_train urban_fit --profile python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --arch=segnet python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --arch=segnet --dry python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --arch=unet --noaux python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --arch=unet python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --dry python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --arch=unet --colorspace=RGB --combine python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --arch=unet --dry python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --arch=unet2 --colorspace=RGB --combine python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --arch=unet2 --colorspace=RGB --use_aux_diff python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --arch=segnet --colorspace=RGB --use_aux_diff python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --arch=dense_unet --colorspace=RGB --use_aux_diff # Train a variant of the dense net with more parameters python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --arch=dense_unet --colorspace=RGB --use_aux_diff --combine \ --pretrained '/home/local/KHQ/jon.crall/data/work/urban_mapper4/arch/dense_unet/train/input_25800-phpjjsqu/solver_25800-phpjjsqu_dense_unet_mmavmuou_zeosddyf_a=1,c=RGB,n_ch=6,n_cl=4/torch_snapshots/_epoch_00000030.pt' --gpu=1 # Fine tune the model using all the available data python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --arch=unet2 --colorspace=RGB --use_aux_diff --combine \ --pretrained '/home/local/KHQ/jon.crall/data/work/urban_mapper2/arch/unet2/train/input_25800-hemanvft/solver_25800-hemanvft_unet2_mmavmuou_stuyuerd_a=1,c=RGB,n_ch=6,n_cl=4/torch_snapshots/_epoch_00000041.pt' --gpu=3 --finetune # Keep a bit of the data for validation but use more python -m clab.live.urban_train urban_fit --task=urban_mapper_3d --arch=unet2 --colorspace=RGB --use_aux_diff --halfcombo \ --pretrained '/home/local/KHQ/jon.crall/data/work/urban_mapper2/arch/unet2/train/input_25800-hemanvft/solver_25800-hemanvft_unet2_mmavmuou_stuyuerd_a=1,c=RGB,n_ch=6,n_cl=4/torch_snapshots/_epoch_00000041.pt' --gpu=3 Script: >>> from clab.fit_harness import * >>> harn = urban_fit() """ arch = ub.argval('--arch', default='unet') colorspace = ub.argval('--colorspace', default='RGB').upper() boundary = True datasets = load_task_dataset('urban_mapper_3d', colorspace=colorspace, arch=arch, boundary=boundary) # Make sure we use consistent normalization # TODO: give normalization a part of the hashid # TODO: save normalization type with the model # datasets['train'].center_inputs = datasets['train']._make_normalizer() # if ub.argflag('--combine'): # # custom centering from the initialization point I'm going to use # datasets['train'].center_inputs = datasets['train']._custom_urban_mapper_normalizer( # 0.3750553785198646, 1.026544662398811, 2.5136079110849674) # else: # datasets['train'].center_inputs = datasets['train']._make_normalizer(mode=2) datasets['train'].center_inputs = datasets['train']._make_normalizer( mode=3) # datasets['train'].center_inputs = _custom_urban_mapper_normalizer(0, 1, 2.5) datasets['test'].center_inputs = datasets['train'].center_inputs datasets['vali'].center_inputs = datasets['train'].center_inputs # Ensure normalization is the same for each dataset # datasets['train'].augment = True datasets['train'].augment = False # turn off aux layers if ub.argflag('--noaux'): for v in datasets.values(): v.aux_keys = [] batch_size = 14 if arch == 'segnet': batch_size = 6 elif arch == 'dense_unet': batch_size = 6 # dense_unet batch memsizes # idle = 11 MiB # 0 = 438 MiB # 3 ~= 5000 MiB # 5 = 8280 MiB # 6 = 9758 MiB # each image adds (1478 - 1568.4) MiB n_classes = datasets['train'].n_classes n_channels = datasets['train'].n_channels class_weights = datasets['train'].class_weights() ignore_label = datasets['train'].ignore_label print('n_classes = {!r}'.format(n_classes)) print('n_channels = {!r}'.format(n_channels)) print('batch_size = {!r}'.format(batch_size)) criterion = (criterions.CrossEntropyLoss2D, { 'ignore_label': ignore_label, 'weight': torch.FloatTensor(class_weights), }) initializer = nninit.HeNormal print('arch = {!r}'.format(arch)) dry = ub.argflag('--dry') if arch == 'segnet': model = (models.SegNet, dict(in_channels=n_channels, n_classes=n_classes)) initializer = SegNetVGG elif arch == 'linknet': model = (models.LinkNet, dict(in_channels=n_channels, n_classes=n_classes)) elif arch == 'unet': model = (models.UNet, dict(in_channels=n_channels, n_classes=n_classes, nonlinearity='leaky_relu')) elif arch == 'unet2': from clab.live import unet2 model = (unet2.UNet2, dict(n_alt_classes=3, in_channels=n_channels, n_classes=n_classes, nonlinearity='leaky_relu')) criterion = (DualChanCE, { 'ignore_label': ignore_label, 'weight': torch.FloatTensor(class_weights), }) elif arch == 'dense_unet': from clab.live import unet3 model = (unet3.DenseUNet, dict(n_alt_classes=3, in_channels=n_channels, n_classes=n_classes)) elif arch == 'dense_unet2': from clab.live import unet3 model = (unet3.DenseUNet2, dict(n_alt_classes=3, in_channels=n_channels, n_classes=n_classes)) elif arch == 'dummy': model = (models.SSegDummy, dict(in_channels=n_channels, n_classes=n_classes)) else: raise ValueError('unknown arch') xpu = xpu_device.XPU.from_argv() hyper = hyperparams.HyperParams( model=model, optimizer=( torch.optim.SGD, { # 'weight_decay': .0006, 'weight_decay': .0005, 'momentum': 0.99 if arch == 'dense_unet' else .9, 'nesterov': True, 'lr': 0.001, }), criterion=criterion, # scheduler=('Exponential', { # 'gamma': 0.99, # # 'base_lr': 0.0015, # 'base_lr': 0.001, # 'stepsize': 2, # }), scheduler=('ReduceLROnPlateau', {}), initializer=initializer, other={ 'n_classes': n_classes, 'n_channels': n_channels, 'augment': datasets['train'].augment, 'colorspace': datasets['train'].colorspace, }) harn = fit_harness.FitHarness( hyper=hyper, datasets=datasets, xpu=xpu, dry=dry, batch_size=batch_size, ) if arch == 'segnet': @harn.set_batch_runner def batch_runner(harn, inputs, labels): outputs = harn.model(*inputs) loss = harn.criterion(outputs, labels[1]) return outputs, loss # @harn.add_batch_metric_hook # def custom_metrics(harn, output, label): # ignore_label = datasets['train'].ignore_label # labels = datasets['train'].task.labels # metrics_dict = metrics._sseg_metrics(output, label[1], # labels=labels, # ignore_label=ignore_label) # return metrics_dict else: @harn.set_batch_runner def batch_runner(harn, inputs, labels): outputs = harn.model(*inputs) loss = harn.criterion(outputs, labels) return outputs, loss # if datasets['train'].use_aux_diff: # @harn.add_batch_metric_hook # def custom_metrics(harn, output, label): # ignore_label = datasets['train'].ignore_label # labels = datasets['train'].task.labels # metrics_dict = metrics._sseg_metrics(output[1], label[1], # labels=labels, # ignore_label=ignore_label) # return metrics_dict # else: # @harn.add_batch_metric_hook # def custom_metrics(harn, output, label): # ignore_label = datasets['train'].ignore_label # labels = datasets['train'].task.labels # metrics_dict = metrics._sseg_metrics(output, label, labels=labels, # ignore_label=ignore_label) # return metrics_dict workdir = datasets['train'].task.workdir train_dpath = harn.setup_dpath( workdir, # short=['model', 'criterion'], hashed=True) print('train_dpath = {!r}'.format(train_dpath)) harn.run() return harn
def train(): """ Example: >>> train() """ import random np.random.seed(1031726816 % 4294967295) torch.manual_seed(137852547 % 4294967295) random.seed(2497950049 % 4294967295) xpu = xpu_device.XPU.from_argv() print('Chosen xpu = {!r}'.format(xpu)) cifar_num = 10 if ub.argflag('--lab'): datasets = cifar_training_datasets(output_colorspace='LAB', norm_mode='independent', cifar_num=cifar_num) elif ub.argflag('--rgb'): datasets = cifar_training_datasets(output_colorspace='RGB', norm_mode='independent', cifar_num=cifar_num) elif ub.argflag('--rgb-dep'): datasets = cifar_training_datasets(output_colorspace='RGB', norm_mode='dependant', cifar_num=cifar_num) else: raise AssertionError('specify --rgb / --lab') import clab.models.densenet # batch_size = (128 // 3) * 3 batch_size = 64 # initializer_ = (nninit.KaimingNormal, { # 'nonlinearity': 'relu', # }) initializer_ = (nninit.LSUV, {}) hyper = hyperparams.HyperParams( model=( clab.models.densenet.DenseNet, { 'cifar': True, 'block_config': (32, 32, 32), # 100 layer depth 'num_classes': datasets['train'].n_classes, 'drop_rate': float(ub.argval('--drop_rate', default=.2)), 'groups': 1, }), optimizer=( torch.optim.SGD, { # 'weight_decay': .0005, 'weight_decay': float(ub.argval('--weight_decay', default=.0005)), 'momentum': 0.9, 'nesterov': True, 'lr': 0.1, }), scheduler=(torch.optim.lr_scheduler.ReduceLROnPlateau, { 'factor': .5, }), initializer=initializer_, criterion=(torch.nn.CrossEntropyLoss, {}), # Specify anything else that is special about your hyperparams here # Especially if you make a custom_batch_runner augment=str(datasets['train'].augmenter), other=ub.dict_union( { # TODO: type of augmentation as a parameter dependency # 'augmenter': str(datasets['train'].augmenter), # 'augment': datasets['train'].augment, 'batch_size': batch_size, 'colorspace': datasets['train'].output_colorspace, 'n_classes': datasets['train'].n_classes, # 'center_inputs': datasets['train'].center_inputs, }, datasets['train'].center_inputs.__dict__), ) # if ub.argflag('--rgb-indie'): # hyper.other['norm'] = 'dependant' hyper.input_ids['train'] = datasets['train'].input_id xpu = xpu_device.XPU.cast('auto') print('xpu = {}'.format(xpu)) data_kw = {'batch_size': batch_size} if xpu.is_gpu(): data_kw.update({'num_workers': 8, 'pin_memory': True}) tags = ['train', 'vali', 'test'] loaders = ub.odict() for tag in tags: dset = datasets[tag] shuffle = tag == 'train' data_kw_ = data_kw.copy() if tag != 'train': data_kw_['batch_size'] = max(batch_size // 4, 1) loader = torch.utils.data.DataLoader(dset, shuffle=shuffle, **data_kw_) loaders[tag] = loader harn = fit_harness.FitHarness( hyper=hyper, datasets=datasets, xpu=xpu, loaders=loaders, ) # harn.monitor = early_stop.EarlyStop(patience=40) harn.monitor = monitor.Monitor(min_keys=['loss'], max_keys=['global_acc', 'class_acc'], patience=40) @harn.set_batch_runner def batch_runner(harn, inputs, labels): """ Custom function to compute the output of a batch and its loss. """ output = harn.model(*inputs) label = labels[0] loss = harn.criterion(output, label) outputs = [output] return outputs, loss task = harn.datasets['train'].task all_labels = task.labels # ignore_label = datasets['train'].ignore_label # from clab import metrics from clab.metrics import (confusion_matrix, pixel_accuracy_from_confusion, perclass_accuracy_from_confusion) @harn.add_batch_metric_hook def custom_metrics(harn, outputs, labels): label = labels[0] output = outputs[0] y_pred = output.data.max(dim=1)[1].cpu().numpy() y_true = label.data.cpu().numpy() cfsn = confusion_matrix(y_pred, y_true, labels=all_labels) global_acc = pixel_accuracy_from_confusion(cfsn) # same as acc perclass_acc = perclass_accuracy_from_confusion(cfsn) # class_accuracy = perclass_acc.fillna(0).mean() class_accuracy = np.nan_to_num(perclass_acc).mean() metrics_dict = ub.odict() metrics_dict['global_acc'] = global_acc metrics_dict['class_acc'] = class_accuracy return metrics_dict workdir = ub.ensuredir('train_cifar_work') harn.setup_dpath(workdir) harn.run()