def preprocess_augment(task, modes=None): assert task.enable_augment if modes is None: modes = task.base_modes for scene in ub.ProgIter(task.scene_ids, label='preproc augment scene', verbose=3): for mode in ub.ProgIter(modes, label=' * mode', verbose=0): task.make_augment_scene(mode, scene, rng='determ')
def _cached_class_frequency(dset): import ubelt as ub import copy # Copy the dataset so we can muck with it dset_copy = copy.copy(dset) dset_copy._build_sliders(input_overlap=0) dset_copy.augmenter = None cfgstr = '_'.join([dset_copy.sampler.dset.hashid, 'v1']) cacher = ub.Cacher('class_freq', cfgstr=cfgstr) total_freq = cacher.tryload() if total_freq is None: total_freq = np.zeros(len(dset_copy.classes), dtype=np.int64) if True: loader = torch_data.DataLoader(dset_copy, batch_size=16, num_workers=7, shuffle=False, pin_memory=True) prog = ub.ProgIter(loader, desc='computing (par) class freq') for batch in prog: class_idxs = batch['class_idxs'].data.numpy() item_freq = np.histogram(class_idxs, bins=len(dset_copy.classes))[0] total_freq += item_freq else: prog = ub.ProgIter(range(len(dset_copy)), desc='computing (ser) class freq') for index in prog: item = dset_copy[index] class_idxs = item['class_idxs'].data.numpy() item_freq = np.histogram(class_idxs, bins=len(dset_copy.classes))[0] total_freq += item_freq cacher.save(total_freq) return total_freq
def _devcheck_manage_monitor(workdir, dry=True): all_sessions = collect_sessions(workdir) # Get all the images in the monitor directories # (this is a convention and not something netharn does by default) all_files = [] # factor = 100 max_keep = 300 def _choose_action(file_infos): import kwarray file_infos = kwarray.shuffle(file_infos, rng=0) n_keep = max_keep # n_keep = (len(file_infos) // factor) + 1 # n_keep = min(max_keep, n_keep) for info in file_infos[:n_keep]: info['action'] = 'keep' for info in file_infos[n_keep:]: info['action'] = 'delete' for session in ub.ProgIter(all_sessions, desc='checking monitor files'): dpaths = [ join(session.dpath, 'monitor', 'train', 'batch'), join(session.dpath, 'monitor', 'vali', 'batch'), join(session.dpath, 'monitor', 'train'), join(session.dpath, 'monitor', 'vali'), ] exts = ['*.jpg', '*.png'] for dpath in dpaths: for ext in exts: fpaths = list(glob.glob(join(dpath, ext))) file_infos = [{ 'size': os.stat(p).st_size, 'fpath': p } for p in fpaths] _choose_action(file_infos) all_files.extend(file_infos) grouped_actions = ub.group_items(all_files, lambda x: x['action']) for key, group in grouped_actions.items(): size = byte_str(sum([s['size'] for s in group])) print('{:>4} images: {:>4}, size={}'.format(key.capitalize(), len(group), size)) if dry: print('Dry run') else: delete = grouped_actions.get('delete', []) delete_fpaths = [item['fpath'] for item in delete] for p in ub.ProgIter(delete_fpaths, desc='deleting'): ub.delete(p)
def parallel_refine(cls, pfiles, step_idx, mode='serial', max_workers=6, verbose=0): """ Refines the hashids of multiple files Ignore: >>> # Create a directory filled with random files >>> #fpaths = _demodata_files( >>> # num_files=1, size_pool=[30], pool_size=2) >>> fpaths = _demodata_files() >>> pfiles = [ProgressiveFile(f) for f in fpaths] >>> with ub.Timer('step'): >>> step_idx = 2 >>> ProgressiveFile.parallel_refine(pfiles, step_idx) """ from kwcoco.util.util_futures import JobPool # NOQA # jobs = JobPool(mode='thread', max_workers=2) jobs = JobPool(mode=mode, max_workers=max_workers) for pfile in ub.ProgIter(pfiles, desc='submit hash jobs', verbose=verbose): # only submit the job if we need to parts = pfile._parts if pfile.can_refine and (step_idx == 'next' or len(parts) <= step_idx): hasher = pfile._hasher fpath = pfile.fpath pos = pfile._pos size = pfile._size curr_blocks = pfile._curr_blocks job = jobs.submit(progressive_refine_worker, hasher, fpath, parts, pos, curr_blocks, step_idx, size) job.pfile = pfile for job in ub.ProgIter(jobs.as_completed(), total=len(jobs), desc='collect hash jobs', verbose=verbose): pfile = job.pfile result = job.result() if result is not None: hasher, next_parts, pos, curr_blocks, size = result pfile._hasher = hasher pfile._parts.extend(next_parts) pfile._pos = pos pfile._size = size pfile._curr_blocks = curr_blocks
def test_tqdm_compatibility(): prog = ProgIter(range(20), total=20, miniters=17, show_times=False) assert prog.pos == 0 assert prog.freq == 17 for _ in prog: pass import ubelt as ub with ub.CaptureStdout() as cap: ProgIter.write('foo') assert cap.text.strip() == 'foo' with ub.CaptureStdout() as cap: prog = ProgIter(show_times=False) prog.set_description('new desc', refresh=False) prog.begin() prog.refresh() prog.close() assert prog.label == 'new desc' assert 'new desc' in cap.text.strip() with ub.CaptureStdout() as cap: prog = ProgIter(show_times=False) prog.set_description('new desc', refresh=True) prog.close() assert prog.label == 'new desc' assert 'new desc' in cap.text.strip() with ub.CaptureStdout() as cap: prog = ProgIter(show_times=False) prog.set_description_str('new desc') prog.begin() prog.refresh() prog.close() assert prog.label == 'new desc' assert 'new desc' in cap.text.strip() import ubelt as ub with ub.CaptureStdout() as cap: prog = ub.ProgIter(show_times=False) prog.set_postfix({'foo': 'bar'}, baz='biz', x=object(), y=2) prog.begin() assert prog.length is None assert 'foo=bar' in cap.text.strip() assert 'baz=biz' in cap.text.strip() assert 'y=2' in cap.text.strip() assert 'x=<object' in cap.text.strip() import ubelt as ub with ub.CaptureStdout() as cap: prog = ub.ProgIter(show_times=False) prog.set_postfix_str('bar baz', refresh=False) assert 'bar baz' not in cap.text.strip()
def _demodata_files(dpath=None, num_files=10, pool_size=3, size_pool=None): def _random_data(rng, num): return ''.join([rng.choice(string.hexdigits) for _ in range(num)]) def _write_random_file(dpath, part_pool, size_pool, rng): namesize = 16 # Choose 1, 4, or 16 parts of data num_parts = rng.choice(size_pool) chunks = [rng.choice(part_pool) for _ in range(num_parts)] contents = ''.join(chunks) fname_noext = _random_data(rng, namesize) ext = ub.hash_data(contents)[0:4] fname = '{}.{}'.format(fname_noext, ext) fpath = join(dpath, fname) with open(fpath, 'w') as file: file.write(contents) return fpath if size_pool is None: size_pool = [1, 4, 16] dpath = ub.ensure_app_cache_dir('pfile/random') rng = random.Random(0) # Create a pool of random chunks of data chunksize = 65536 part_pool = [_random_data(rng, chunksize) for _ in range(pool_size)] # Write 100 random files that have a reasonable collision probability fpaths = [ _write_random_file(dpath, part_pool, size_pool, rng) for _ in ub.ProgIter(range(num_files), desc='write files') ] for fpath in fpaths: assert exists(fpath) return fpaths
def rebase_groundtruth(task, fullres, force=False): """ Inplace / lazy modification of groundtruth labels hacky. """ # Remap the original three labels to [0, 1, 2] orig_labels = [2, 6, 65] mapping = np.full(max(orig_labels) + 1, fill_value=-1) mapping[orig_labels] = np.arange(len(orig_labels)) datadir = ub.ensuredir((task.workdir, 'data')) dpath = ub.ensuredir((datadir, 'gt', 'full')) new_gt_paths = [] for ix in ub.ProgIter(range(len(fullres.paths['gt'])), label='rebase'): path = fullres.paths['gt'][ix] name = fullres.dump_im_names[ix] out_dpath = join(dpath, name) # Hacky cache if force or not exists(out_dpath): in_data = imutil.imread(path) out_data = mapping[in_data] imutil.imwrite(out_dpath, out_data) new_gt_paths.append(out_dpath) fullres.paths['gt'] = new_gt_paths return fullres
def find_connecting_edges(infr): """ Searches for a small set of edges, which if reviewed as positive would ensure that each PCC is k-connected. Note that in somes cases this is not possible """ label = 'name_label' node_to_label = infr.get_node_attrs(label) label_to_nodes = ub.group_items(node_to_label.keys(), node_to_label.values()) # k = infr.params['redun.pos'] k = 1 new_edges = [] prog = ub.ProgIter(list(label_to_nodes.keys()), desc='finding connecting edges', enabled=infr.verbose > 0) for nid in prog: nodes = set(label_to_nodes[nid]) G = infr.pos_graph.subgraph(nodes, dynamic=False) impossible = nxu.edges_inside(infr.neg_graph, nodes) impossible |= nxu.edges_inside(infr.incomp_graph, nodes) candidates = set(nx.complement(G).edges()) candidates.difference_update(impossible) aug_edges = nxu.k_edge_augmentation(G, k=k, avail=candidates) new_edges += aug_edges prog.ensure_newline() return new_edges
def stage_files(self): import xdev self.staging_infos = [] for info in ub.ProgIter(self.template_infos, desc='staging'): tags = info.get('tags', None) if tags: tags = set(tags.split(',')) if not set(self.config['tags']).issuperset(tags): continue stage_fpath = self.staging_dpath / info['fname'] if info.get('dynamic', ''): text = getattr(self, info.get('dynamic', ''))() stage_fpath.write_text(text) else: raw_fpath = self.template_dpath / info['fname'] stage_fpath.parent.ensuredir() shutil.copy2(raw_fpath, stage_fpath) if info['template']: xdev.sedfile(stage_fpath, 'PYPKG', self.repo_name, verbose=0) info['stage_fpath'] = stage_fpath info['repo_fpath'] = self.repo_dpath / info['fname'] self.staging_infos.append(info) if 1: import pandas as pd df = pd.DataFrame(self.staging_infos) print(df)
def run_benchmark(): import ubelt as ub data_dim = 128 num_dpts = 1000000 num_qpts = 25000 num_neighbs = 5 random_seed = 42 rng = np.random.RandomState(0) dataset = rand_vecs(num_dpts, data_dim, rng) testset = rand_vecs(num_qpts, data_dim, rng) # Build determenistic flann object flann = pyflann.FLANN() print('building datset for %d vecs' % (len(dataset))) with ub.Timer(label='building kdtrees', verbose=True) as t: params = flann.build_index( dataset, algorithm='kdtree', trees=6, random_seed=random_seed, cores=6, ) print(params) qvec_chunks = list(ub.chunks(testset, 1000)) times = [] for qvecs in ub.ProgIter(qvec_chunks, label='find nn'): with ub.Timer(verbose=0) as t: _ = flann.nn_index(testset, num_neighbs) # NOQA times.append(t.ellapsed) print(np.mean(times))
def parse_mscoco(): # Test that our implementation can handle the real mscoco data root = ub.expandpath('~/data/standard_datasets/mscoco/') fpath = join(root, 'annotations/instances_val2014.json') img_root = normpath(ub.ensuredir((root, 'images', 'val2014'))) # fpath = join(root, 'annotations/stuff_val2017.json') # img_root = normpath(ub.ensuredir((root, 'images', 'val2017'))) import ujson dataset = ujson.load(open(fpath, 'rb')) import ndsampler dset = ndsampler.CocoDataset(dataset) dset.img_root = img_root gid_iter = iter(dset.imgs.keys()) gid = ub.peek(gid_iter) for gid in ub.ProgIter(gid_iter): img = dset.imgs[gid] ub.grabdata(img['coco_url'], dpath=img_root, verbose=0) anns = [dset.anns[aid] for aid in dset.gid_to_aids[gid]] dset.show_image(gid=gid) ann = anns[0] segmentation = ann['segmentation'] from PIL import Image gpath = join(dset.img_root, img['file_name']) with Image.open(gpath) as pil_img: np_img = np.array(pil_img)
def dump_selection(dset, gid_list): from matplotlib import pyplot as plt for gid in ub.ProgIter(gid_list, verbose=3): fig = plt.figure(6) fig.clf() dset.show_annotation(gid=gid) name = os.path.basename( os.path.dirname(dset.imgs[gid]['file_name'])) ax = plt.gca() plt.gca().set_title(name) ax.set_xticks([]) ax.set_yticks([]) plt.gca().grid('off') fig.canvas.draw() dpi = 96 fig.set_dpi(dpi) fig.set_size_inches(1920 / dpi, 1080 / dpi) img = nh.util.mplutil.render_figure_to_image(fig, dpi=dpi) # print('img = {!r}'.format(img.shape)) if dset.tag: out_fname = dset.tag + '_' + '_'.join( dset.imgs[gid]['file_name'].split('/')[-2:]) else: out_fname = '_'.join( dset.imgs[gid]['file_name'].split('/')[-2:]) fpath = join(output_dpath, out_fname) print('fpath = {!r}'.format(fpath)) nh.util.imwrite(fpath, img)
def _configure(self): logger.debug(' ----- ' + self.__class__.__name__ + ' configure') config = tmp_smart_cast_config(self) logger.info('triangulator config = {}'.format(ub.repr2(config, nl=2))) self.measurement_file = config.pop('measurement_file') self.calibration_file = config.pop('calibration_file') self.triangulator = ctalgo.StereoLengthMeasurments(**config) # Load camera calibration data here. if not os.path.exists(self.calibration_file): raise KeyError('must specify a valid camera calibration path') self.cal = ctalgo.StereoCalibration.from_file(self.calibration_file) logger.info('self.cal = {!r}'.format(self.cal)) self.headers = [ 'current_frame', 'fishlen', 'range', 'error', 'dz', 'box_pts1', 'box_pts2' ] if self.measurement_file: self.output_file = open(self.measurement_file, 'w') self.output_file.write(','.join(self.headers) + '\n') self.output_file.close() self.output_file = open(self.measurement_file, 'a') self._base_configure() self.prog = ub.ProgIter(verbose=3) self.prog.begin() self.frame_id = 0
def predict(harn, have_true=True): # Import the right version of caffe print(ub.color_text('[segnet] begin prediction', 'blue')) harn.prepare_test_model(force=False) harn.test.make_dumpsafe_names() net = harn.make_net() assert harn.test_batch_size == 1 # have_true = bool(harn.test.gt_paths) if not have_true: def load_batch_data(bx): """ bx = 0 """ offset = bx * harn.test_batch_size blob_data = net.blobs['data'].data for jx in range(harn.test_batch_size): # push data into the network ix = offset + jx im_hwc = util.imread(harn.test.im_paths[ix]) im_hwc = im_hwc[:, :, ::-1] im_chw = np.transpose(im_hwc, (2, 0, 1)).astype(np.float32) blob_data[jx, :, :, :] = im_chw n_iter = int(harn.test.n_input / harn.test_batch_size) for bx in ub.ProgIter(range(n_iter), label='forward batch', freq=1): if not have_true: load_batch_data(bx) net.forward() blobs = net.blobs harn.dump_predictions(blobs, bx, have_true=have_true)
def find_pos_redun_candidate_edges(infr, k=None, verbose=False): """ Searches for augmenting edges that would make PCCs k-positive redundant CommandLine: python -m graphid.core.mixin_dynamic _RedundancyAugmentation.find_pos_redun_candidate_edges Doctest: >>> from graphid import demo >>> infr = demo.demodata_infr(ccs=[(1, 2, 3, 4, 5), (7, 8, 9, 10)], pos_redun=1) >>> infr.add_feedback((2, 5), POSTV) >>> infr.add_feedback((1, 5), INCMP) >>> infr.params['redun.pos'] = 2 >>> candidate_edges = list(infr.find_pos_redun_candidate_edges()) >>> result = ('candidate_edges = ' + ub.repr2(candidate_edges, nl=0)) >>> print(result) candidate_edges = [(1, 4), (3, 5), (7, 10)] """ # Add random edges between exisiting non-redundant PCCs if k is None: k = infr.params['redun.pos'] # infr.find_non_pos_redundant_pccs(k=k, relax=True) pcc_gen = list(infr.positive_components()) prog = ub.ProgIter(pcc_gen, enabled=verbose, freq=1, adjust=False) for pcc in prog: if not infr.is_pos_redundant(pcc, k=k, relax=True, assume_connected=True): for edge in infr.find_pos_augment_edges(pcc, k=k): yield nxu.e_(*edge)
def _configure(self): logger.debug(' ----- ' + self.__class__.__name__ + ' configure') config = tmp_smart_cast_config(self) logger.info('triangulator config = {}'.format(ub.repr2(config, nl=2))) output_fpath = config.pop('output_fpath') cal_fpath = config.pop('cal_fpath') self.triangulator = ctalgo.FishStereoMeasurments(**config) # Camera loading process is not working correctly. # Load camera calibration data here for now. # if not os.path.exists(cal_fpath): raise KeyError('must specify a valid camera calibration path') self.cal = ctalgo.StereoCalibration.from_file(cal_fpath) logger.info('self.cal = {!r}'.format(self.cal)) self.headers = [ 'current_frame', 'fishlen', 'range', 'error', 'dz', 'box_pts1', 'box_pts2' ] self.output_file = open(output_fpath, 'w') self.output_file.write(','.join(self.headers) + '\n') self.output_file.close() self.output_file = open(output_fpath, 'a') self._base_configure() self.prog = ub.ProgIter(verbose=3) self.prog.begin()
def batch_convert_to_cog(src_fpaths, dst_fpaths, mode='process', max_workers=0, cog_config=None): """ Converts many input images to COGs and verifies that the outputs are correct Args: src_fpaths (List[str]): source image filepaths dst_fpaths (List[str]): corresponding destination image filepaths mode (str, default='process'): either process, thread, or serial max_workers (int, default=0): number of processes / threads to use cog_config (dict): config options for COG files (e.g. compress, blocksize, overviews, etc). """ if cog_config is None: cog_config = { 'compress': 'LZW', 'blocksize': 256, } from ndsampler.utils import util_futures jobs = util_futures.JobPool(mode, max_workers=max_workers) for src_fpath, dst_fpath in zip(src_fpaths, dst_fpaths): jobs.submit(_convert_to_cog_worker, src_fpath, dst_fpath, cog_config=cog_config) for job in ub.ProgIter(jobs.as_completed(), total=len(jobs), desc='converting to cog'): job.result()
def make_pycocotools_compliant(fpath): import kwcoco import kwimage import ubelt as ub print('Reading fpath = {!r}'.format(fpath)) dset = kwcoco.CocoDataset(fpath) dset._ensure_imgsize(workers=8) for ann in ub.ProgIter(dset.dataset['annotations'], desc='update anns'): if 'iscrowd' not in ann: ann['iscrowd'] = False if 'ignore' not in ann: ann['ignore'] = ann.get('weight', 1.0) < .5 if 'area' not in ann: # Use segmentation if available if 'segmentation' in ann: poly = kwimage.Polygon.from_coco(ann['segmentation'][0]) ann['area'] = float(poly.to_shapely().area) else: x, y, w, h = ann['bbox'] ann['area'] = w * h dset.dump(dset.fpath, newlines=True)
def _query_sentinel2_with_csv(collection_file, cc_limit, date_start, date_end, tile, latest=False): cc_values = [] all_urls = [] all_acqdates = [] with open(collection_file) as csvfile: reader = csv.DictReader(csvfile) for row in ubelt.ProgIter(reader, desc='searching S2'): year_acq = int(row['SENSING_TIME'][0:4]) month_acq = int(row['SENSING_TIME'][5:7]) day_acq = int(row['SENSING_TIME'][8:10]) acqdate = datetime.datetime(year_acq, month_acq, day_acq) if row['MGRS_TILE'] == tile and float(row['CLOUD_COVER']) <= cc_limit \ and date_start < acqdate < date_end: all_urls.append(row['BASE_URL']) cc_values.append(float(row['CLOUD_COVER'])) all_acqdates.append(acqdate) if latest and all_urls: return [sort_url_list(cc_values, all_acqdates, all_urls).pop()] return sort_url_list(cc_values, all_acqdates, all_urls)
def main(cls, cmdline=True, **kw): """ Example: >>> from kwcoco.cli.coco_validate import * # NOQA >>> kw = {'src': 'special:shapes8'} >>> cmdline = False >>> cls = CocoValidateCLI >>> cls.main(cmdline, **kw) """ import kwcoco config = cls.CLIConfig(kw, cmdline=cmdline) print('config = {}'.format(ub.repr2(dict(config), nl=1))) if config['src'] is None: raise Exception('must specify source: {}'.format(config['src'])) if isinstance(config['src'], str): fpaths = [config['src']] else: fpaths = config['src'] if config['dst']: if len(fpaths) != 1: raise Exception('can only specify 1 dataset in fix mode') fix_strat = set() if config['fix'] is not None: fix_strat = {c.lower() for c in config['fix'].split('+')} for fpath in ub.ProgIter(fpaths, desc='reading datasets', verbose=1): print('reading fpath = {!r}'.format(fpath)) dset = kwcoco.CocoDataset.coerce(fpath) config_ = ub.dict_diff(config, {'src', 'dst', 'fix'}) result = dset.validate(**config_) if 'missing' in result: if 'remove' in fix_strat: missing = result['missing'] bad_gids = [t[2] for t in missing] status = dset.remove_images(bad_gids, verbose=1) print('status = {}'.format(ub.repr2(status, nl=1))) if 'corrupted' in result: if 'remove' in fix_strat: corrupted = result['corrupted'] bad_gids = [t[2] for t in corrupted] status = dset.remove_images(bad_gids, verbose=1) print('status = {}'.format(ub.repr2(status, nl=1))) if config['dst']: if len(fpaths) != 1: raise Exception('can only specify 1 dataset in fix mode') dset.dump(config['dst'], newlines=True) errors = result['errors'] if errors: print('result = {}'.format(ub.repr2(result, nl=-1))) raise Exception('\n'.join(errors))
def run_epoch(harn, loader, tag, learn=False): # Use exponentially weighted or windowed moving averages across epochs run_metrics = harn._run_metrics[tag] # Use simple moving average within an epoch batch_metrics = metrics.CumMovingAve() # train batch if not harn.dry: # Flag if model is training (influences batch-norm / dropout) if harn.model.training != learn or learn: harn.model.train(learn) display_interval = harn.intervals['display_' + tag] prog = ub.ProgIter(label=tag, length=len(loader), verbose=1, clearline=True) with prog: for bx, input_batch in enumerate(loader): iter_idx = (harn.epoch * len(loader) + bx) input_batch = harn.xpu.to_xpu_var(*input_batch) # Core learning / backprop *inputs, label = input_batch output, loss = harn.run_batch(inputs, label, learn=learn) # Measure train accuracy and other informative metrics cur_metrics = harn._call_metric_hooks(output, label, loss) if 1: harn._tensorboard_extra(inputs, output, label, tag, iter_idx, loader) # Accumulate measures batch_metrics.update(cur_metrics) run_metrics.update(cur_metrics) # display_train training info if (bx + 1) % display_interval == 0: ave_metrics = run_metrics.average() msg = harn.batch_msg({'loss': ave_metrics['loss']}, loader.batch_size) prog.set_extra(msg) for key, value in ave_metrics.items(): # harn.log_value(tag + ' ' + key, value, iter_idx) # TODO: use this one: harn.log_value(tag + ' iter ' + key, value, iter_idx) prog.step(harn.intervals['display_' + tag]) # Record a true average for the entire batch final_metrics = batch_metrics.average() for key, value in final_metrics.items(): harn.log_value(tag + ' epoch ' + key, value, harn.epoch)
def missing_images(dset): import os bad_paths = [] for index in ub.ProgIter(range(len(dset.dataset['images']))): img = dset.dataset['images'][index] gpath = join(dset.img_root, img['file_name']) if not os.path.exists(gpath): bad_paths.append((index, gpath)) return bad_paths
def _nh_data_nh_map(harn, num=10): with torch.no_grad(): postprocess = harn.model.module.postprocess # postprocess.conf_thresh = 0.001 # postprocess.nms_thresh = 0.5 batch_confusions = [] moving_ave = nh.util.util_averages.CumMovingAve() loader = harn.loaders['test'] prog = ub.ProgIter(iter(loader), desc='') for bx, batch in enumerate(prog): inputs, labels = harn.prepare_batch(batch) inp_size = np.array(inputs.shape[-2:][::-1]) outputs = harn.model(inputs) loss = harn.criterion(outputs, labels['targets'], gt_weights=labels['gt_weights'], seen=1000000000) moving_ave.update(ub.odict([ ('loss', float(loss.sum())), ('coord', harn.criterion.loss_coord), ('conf', harn.criterion.loss_conf), ('cls', harn.criterion.loss_cls), ])) average_losses = moving_ave.average() desc = ub.repr2(average_losses, nl=0, precision=2, si=True) prog.set_description(desc, refresh=False) postout = postprocess(outputs) for y in harn._measure_confusion(postout, labels, inp_size): batch_confusions.append(y) # batch_output.append((outputs.cpu().data.numpy().copy(), inp_size)) # batch_labels.append([x.cpu().data.numpy().copy() for x in labels]) if num is not None and bx >= num: break average_losses = moving_ave.average() print('average_losses {}'.format(ub.repr2(average_losses))) if False: from netharn.util import mplutil mplutil.qtensure() # xdoc: +SKIP harn.visualize_prediction(batch, outputs, postout, thresh=.1) y = pd.concat([pd.DataFrame(c) for c in batch_confusions]) precision, recall, ap = nh.metrics.detections._multiclass_ap(y) ln_test = ub.import_module_from_path(ub.truepath('~/code/lightnet/examples/yolo-voc/test.py')) num_classes = len(ln_test.LABELS) cls_labels = list(range(num_classes)) aps = nh.metrics.ave_precisions(y, cls_labels, use_07_metric=True) aps = aps.rename(dict(zip(cls_labels, ln_test.LABELS)), axis=0) # return ap return ap, aps
def draw_data_overlay(task, sl=None): """ >>> from clab.tasks import * >>> import clab >>> task = DivaV1(clean=2) >>> arch = 'segnet_proper' >>> # Use external dataset to increase the amount of training data >>> tutorial_dir = './SegNet-Tutorial' >>> task.extend_data_from(clab.tasks.CamVid(tutorial_dir)) >>> task.draw_data_overlay() """ keys = task._preprocessing_keys() scenes = task.scene_ids[:] keys = keys + ['extern'] for key in ub.ProgIter(keys, label='overlay', verbose=3): scene_overlay_dir = task.datasubdir('overlay', key) if key == 'extern': # HACK im_paths = task.extern_train_im_paths gt_paths = task.extern_train_gt_paths else: im_paths, gt_paths = task._scene_data_subset(scenes, [key]) gt_paths = fnameutil.align_paths(im_paths, gt_paths) overlay_fnames = list(fnameutil.dumpsafe(im_paths)) if sl is not None: im_paths = im_paths[sl] gt_paths = gt_paths[sl] overlay_fnames = overlay_fnames[sl] prog = ub.ProgIter(zip(im_paths, gt_paths, overlay_fnames), length=len(im_paths), label='overlay key={}'.format(key)) for impath, gtpath, safename in prog: # Make a nice visualization fpath = join(scene_overlay_dir, safename) gt_img = cv2.imread(gtpath, cv2.IMREAD_UNCHANGED) im_img = cv2.imread(impath, cv2.IMREAD_UNCHANGED) gt_color = task.colorize(gt_img) gt_overlay = imutil.overlay_colorized(gt_color, im_img) cv2.imwrite(fpath, gt_overlay)
def _ensure_imgsize(self): from PIL import Image for img in ub.ProgIter(list(self.imgs.values())): gpath = join(self.img_root, img['file_name']) if 'width' not in img: pil_img = Image.open(gpath) w, h = pil_img.size pil_img.close() img['width'] = w img['height'] = h
def _coco_to_dets(coco_dset, desc=''): for gid in ub.ProgIter(gids, desc=desc, verbose=verbose): img = coco_dset.imgs[gid] gid = img['id'] imgname = img['file_name'] aids = coco_dset.gid_to_aids[gid] annots = [coco_dset.anns[aid] for aid in aids] dets = kwimage.Detections.from_coco_annots(annots, dset=coco_dset, classes=classes) yield dets, imgname, gid
def _nh_loop(harn): # Reset harn.current_tag = tag = 'test' dmet = harn.dmets[tag] dmet.pred.remove_all_annotations() dmet.true.remove_all_annotations() dmet.true._build_index() dmet.pred._build_index() moving_ave = nh.util.util_averages.CumMovingAve() loader = harn.loaders[tag] loader.num_workers = 4 prog = ub.ProgIter(iter(loader), desc='') with torch.no_grad(): for bx, batch in enumerate(prog): inputs, labels = harn.prepare_batch(batch) outputs = harn.model(inputs) loss = harn.criterion(outputs, labels['targets'], gt_weights=labels['gt_weights'], seen=1000000000) moving_ave.update(ub.odict([ ('loss', float(loss.sum())), ('coord', harn.criterion.loss_coord), ('conf', harn.criterion.loss_conf), ('cls', harn.criterion.loss_cls), ])) average_losses = moving_ave.average() desc = ub.repr2(average_losses, nl=0, precision=2, si=True) prog.set_description(desc, refresh=False) postout = harn.model.module.postprocess(outputs, nms_mode=2) inputs, labels = batch inp_size = np.array(inputs.shape[-2:][::-1]) pred_anns = list(harn._postout_to_pred_ann( inp_size, labels, postout, _aidbase=len(dmet.pred.dataset['annotations']) + 1 )) dmet.pred.add_annotations(pred_anns) true_anns = list(harn._labels_to_true_ann( inp_size, labels, _aidbase=len(dmet.true.dataset['annotations']) + 1 )) dmet.true.add_annotations(true_anns) average_losses = moving_ave.average() print('average_losses {}'.format(ub.repr2(average_losses))) print('netharn voc_mAP = {}'.format(dmet.score_voc()['mAP'])) print('netharn nh_mAP = {}'.format(dmet.score_netharn()['mAP'])) # Reset dmet.pred.remove_all_annotations() dmet.true.remove_all_annotations()
def count_ubelt_usage(): import ubelt as ub import glob from os.path import join names = [ 'xdoctest', 'netharn', 'xdev', 'xinspect', 'ndsampler', 'kwil', 'kwarray', 'kwimage', 'kwplot', 'scriptconfig', ] all_fpaths = [] for name in names: repo_fpath = ub.expandpath(join('~/code', name)) fpaths = glob.glob(join(repo_fpath, '**', '*.py'), recursive=True) for fpath in fpaths: all_fpaths.append((name, fpath)) import re pat = re.compile(r'\bub\.(?P<attr>[a-zA-Z_][A-Za-z_0-9]*)\b') import ubelt as ub pkg_to_hist = ub.ddict(lambda: ub.ddict(int)) for name, fpath in ub.ProgIter(all_fpaths): text = open(fpath, 'r').read() for match in pat.finditer(text): attr = match.groupdict()['attr'] if attr in ub.__all__: pkg_to_hist[name][attr] += 1 hist_iter = iter(pkg_to_hist.values()) usage = next(hist_iter).copy() for other in hist_iter: for k, v in other.items(): usage[k] += v for attr in ub.__all__: usage[attr] += 0 for name in pkg_to_hist.keys(): pkg_to_hist[name] = ub.odict( sorted(pkg_to_hist[name].items(), key=lambda t: t[1])[::-1]) usage = ub.odict(sorted(usage.items(), key=lambda t: t[1])[::-1]) print(ub.repr2(pkg_to_hist, nl=2)) print(ub.repr2(usage, nl=1))
def as_completed(self, timeout=None, desc=None, progkw=None): """ Generates completed jobs in an arbitrary order Args: timeout (float | None): Specify the the maximum number of seconds to wait for a job. desc (str | None): if specified, reports progress with a :class:`ubelt.progiter.ProgIter` object. progkw (dict | None): extra keyword arguments to :class:`ubelt.progiter.ProgIter`. Yields: concurrent.futures.Future: The completed future object containing the results of a job. CommandLine: xdoctest -m ubelt.util_futures JobPool.as_completed Example: >>> import ubelt as ub >>> pool = ub.JobPool('thread', max_workers=8) >>> text = ub.paragraph( ... ''' ... UDP is a cool protocol, check out the wiki: ... ... UDP-based Data Transfer Protocol (UDT), is a high-performance ... data transfer protocol designed for transferring large ... volumetric datasets over high-speed wide area networks. Such ... settings are typically disadvantageous for the more common TCP ... protocol. ... ''') >>> for word in text.split(' '): ... pool.submit(print, word) >>> for _ in pool.as_completed(): ... pass >>> pool.shutdown() """ import ubelt as ub job_iter = as_completed(self.jobs) if desc is not None: if progkw is None: progkw = {} job_iter = ub.ProgIter(job_iter, desc=desc, total=len(self.jobs), **progkw) for job in job_iter: yield job
def make_augment_scene(task, mode, scene, rng=None): """ Augments data in a scene of a specific "mode" mode = 'part-scale1' scene = '0000' rng = 'determ' gtdir = task.datasubdir('gtpart', scene)) imdir = task.datasubdir('impart', scene)) """ assert task.enable_augment if rng == 'determ': # Make a determenistic seed based on the scene and mode seed = int(hashutil.hash_data([scene, mode], alphabet='hex'), 16) seed = seed % (2**32 - 1) rng = np.random.RandomState(seed) auger = augment.SSegAugmentor(rng=rng, ignore_label=task.ignore_label) auger.params = task.aug_params # rng = np.random.RandomState(0) imdir = task.datasubdir('im' + mode, scene) gtdir = task.datasubdir('gt' + mode, scene) im_fpaths = sorted(glob.glob(join(imdir, '*.png'))) gt_fpaths = sorted(glob.glob(join(gtdir, '*.png'))) # Define the output path for the augmentation of this mode key = mode + '-aug' scene_imout_dpath = task.datasubdir('im' + key, scene) scene_gtout_dpath = task.datasubdir('gt' + key, scene) # Start fresh. Remove existing files ub.delete(scene_gtout_dpath, verbose=False) ub.delete(scene_imout_dpath, verbose=False) ub.ensuredir(scene_gtout_dpath) ub.ensuredir(scene_imout_dpath) for impath, gtpath in ub.ProgIter( list(zip(im_fpaths, gt_fpaths)), label=' * augment mode={}'.format(mode)): fname_we = splitext(basename(impath))[0] im = cv2.imread(impath, flags=cv2.IMREAD_UNCHANGED) gt = cv2.imread(gtpath, flags=cv2.IMREAD_UNCHANGED) aug_gen = auger.augment(im, gt) for augx, aug_data in enumerate(aug_gen): (im_aug, gt_aug) = aug_data[0:2] fname = '{}_aug{:0=4d}.png'.format(fname_we, augx) cv2.imwrite(join(scene_imout_dpath, fname), im_aug) cv2.imwrite(join(scene_gtout_dpath, fname), gt_aug) return scene_imout_dpath, scene_gtout_dpath