def xval_splits(task): train = inputs.Inputs.from_paths(task.train_im_paths(), task.train_gt_paths(), tag='train') test = inputs.Inputs.from_paths(task.test_im_paths(), task.test_gt_paths(), tag='test') assert not bool(ub.find_duplicates(test.im_paths)) assert not bool(ub.find_duplicates(train.im_paths)) xval_split = (train, test) yield xval_split
def xval_splits(task, test_keys=None): import parse import logging parse.log.setLevel(logging.INFO) train_scene = 'JAX' test_scene = 'TAM' def primary_key_info(paths): infos = [parse.parse('{site_id}_Tile_{N}{junk}', p).named for p in map(basename, paths)] df = pd.DataFrame(infos) return df train_inputs = [] test_inputs = [] # THESE PATHS MUST BE GENERATED IN THE SAME ORDER EACH TIME for k, v in sorted(task.input_modes.items()): df = primary_key_info(v.im_paths) parts = dict(list(df.groupby(['site_id']))) train_idx = parts[train_scene].index train_inputs.append(v.take(train_idx)) if k in ['part-scale1']: test_idx = parts[test_scene].index test_inputs.append(v.take(test_idx)) for k, v in sorted(task.augment_modes.items()): df = primary_key_info(v.im_paths) parts = dict(list(df.groupby(['site_id']))) train_idx = parts[train_scene].index train_inputs.append(v.take(train_idx)) for v in train_inputs: assert not ub.find_duplicates(v.im_paths) for v in test_inputs: assert not ub.find_duplicates(v.im_paths) train = inputs.Inputs.union_all(*train_inputs) test = inputs.Inputs.union_all(*test_inputs) train.tag = 'train' test.tag = 'test' xval_split = (train, test) assert not bool(ub.find_duplicates(test.im_paths)) assert not bool(ub.find_duplicates(train.im_paths)) yield xval_split
def access_cache(): """ 0.24 """ def _walk(parent): for node in parent.nodes: item = (node.item, id(parent)) yield item for child in parent.children: for item in _walk(child): yield item print('Access regions in {}'.format(sys.executable)) self = ndsampler.CocoSampler.demo(verbose=0).regions # Set workdir to a special location self.workdir = ub.ensure_app_cache_dir('ndsampler', 'tests', '23_regions') print('self.workdir = {!r}'.format(self.workdir)) print('self.hashid = {!r}'.format(self.hashid)) self.verbose = 100 isect_index = self.isect_index for gid, qtree in isect_index.qtrees.items(): # items = sorted([item for item in _walk(qtree)]) # print('items = {!r}'.format(items)) # if ub.find_duplicates(items): # raise Exception('DUPLICATE ITEM AIDS') box = [0, 0, qtree.width, qtree.height] isect_aids = qtree.intersect(box) print('isect_aids = {!r}'.format(isect_aids)) if ub.find_duplicates(isect_aids): raise Exception('DUPLICATE AIDS') for aid, box in qtree.aid_to_tlbr.items(): isect_aids = qtree.intersect(box) if ub.find_duplicates(isect_aids): raise Exception('DUPLICATE AIDS') # print('isect_aids = {!r}'.format(isect_aids)) print('----') print('gid = {!r}'.format(gid)) print('qtree = {!r}'.format(qtree)) for node in qtree.nodes: print('node.item, node.rect = {!r}, {!r}'.format( node.item, node.rect))
def xval_splits(task, xval_method='predef', test_keys=None): """ Generate the list of inputs in each test/train split. Currently does the simple thing which is train on all training data and test on all testing data. Logic exists for leave one out, but it is disabled. Yields: tuple(inputs.Inputs, inputs.Inputs): train / test inputs >>> (train_ims, train_gts), train = next(task.xval_splits()) """ # Parse the prepared data and prepare to split it into test / train task.create_groundtruth(force=False) scene_im_paths, scene_gt_paths = task._load_all_scene_paths() # Per scene xval generator def leave_k_out_xval(k=2): for test_scenes in ub.chunks(task.scene_ids, chunksize=k): # Simple leave one out train_scenes = list(task.scene_ids) for test_scene in test_scenes: train_scenes.remove(test_scene) print('test_scenes = {!r}'.format(test_scenes)) print('train_scenes = {!r}'.format(train_scenes)) yield train_scenes, test_scenes def predef_single_xval(): train_scenes, test_scenes = task.load_predefined_train_test() yield train_scenes, test_scenes if xval_method == 'predef': xval_iter = predef_single_xval() elif xval_method == 'k=2': xval_iter = leave_k_out_xval(k=2) else: raise KeyError(xval_method) train_keys = task._preprocessing_keys() if test_keys is None: test_keys = ['lowres'] # Given a per scene split, map it to a split on an per-image basis flatten = it.chain.from_iterable for train_scenes, test_scenes in xval_iter: train_im_paths = list( flatten([ scene_im_paths[s][k] for (s, k) in it.product(train_scenes, train_keys) ])) + task.extern_train_im_paths train_gt_paths = list( flatten([ scene_gt_paths[s][k] for (s, k) in it.product(train_scenes, train_keys) ])) + task.extern_train_gt_paths test_im_paths = list( flatten([ scene_im_paths[s][k] for (s, k) in it.product(test_scenes, test_keys) ])) test_gt_paths = list( flatten([ scene_gt_paths[s][k] for (s, k) in it.product(test_scenes, test_keys) ])) train = inputs.Inputs.from_paths(train_im_paths, train_gt_paths, tag='train') test = inputs.Inputs.from_paths(test_im_paths, test_gt_paths, tag='test') assert not bool(ub.find_duplicates(test.im_paths)) assert not bool(ub.find_duplicates(train.im_paths)) xval_split = (train, test) yield xval_split
def argparse(self, parser=None, special_options=False): """ construct or update an argparse.ArgumentParser CLI parser Args: parser (None | argparse.ArgumentParser): if specified this parser is updated with options from this config. special_options (bool, default=False): adds special scriptconfig options, namely: --config, --dumps, and --dump. Returns: argparse.ArgumentParser : a new or updated argument parser CommandLine: xdoctest -m scriptconfig.config Config.argparse:0 xdoctest -m scriptconfig.config Config.argparse:1 TODO: A good CLI spec for lists might be # In the case where ``key`` ends with and ``=``, assume the list is # given as a comma separated string with optional square brakets at # each end. --key=[f] # In the case where ``key`` does not end with equals and we know # the value is supposd to be a list, then we consume arguments # until we hit the next one that starts with '--' (which means # that list items cannot start with -- but they can contains # commas) FIXME: * In the case where we have an nargs='+' action, and we specify the option with an `=`, and then we give position args after it there is no way to modify behavior of the action to just look at the data in the string without modifying the ArgumentParser itself. The action object has no control over it. For example `--foo=bar baz biz` will parse as `[baz, biz]` which is really not what we want. We may be able to overload ArgumentParser to fix this. Example: >>> # You can now make instances of this class >>> import scriptconfig >>> self = scriptconfig.Config.demo() >>> parser = self.argparse() >>> parser.print_help() >>> # xdoctest: +REQUIRES(PY3) >>> # Python2 argparse does a hard sys.exit instead of raise >>> ns, extra = parser.parse_known_args() Example: >>> # You can now make instances of this class >>> import scriptconfig as scfg >>> class MyConfig(scfg.Config): >>> description = 'my CLI description' >>> default = { >>> 'path1': scfg.Value(None, position=1, alias='src'), >>> 'path2': scfg.Value(None, position=2, alias='dst'), >>> 'dry': scfg.Value(False, isflag=True), >>> 'approx': scfg.Value(False, isflag=False, alias=['a1', 'a2']), >>> } >>> self = MyConfig() >>> special_options = True >>> parser = None >>> parser = self.argparse(special_options=special_options) >>> parser.print_help() >>> self._read_argv(argv=['objection', '42', '--path1=overruled!']) >>> print('self = {!r}'.format(self)) Ignore: >>> self._read_argv(argv=['hi','--path1=foobar']) >>> self._read_argv(argv=['hi', 'hello', '--path1=foobar']) >>> self._read_argv(argv=['hi', 'hello', '--path1=foobar', '--help']) >>> self._read_argv(argv=['--path1=foobar', '--path1=baz']) >>> print('self = {!r}'.format(self)) """ import argparse if parser is None: parserkw = self._parserkw() parser = argparse.ArgumentParser(**parserkw) # Use custom action used to mark which values were explicitly set on # the commandline parser._explicitly_given = set() parent = self class ParseAction(argparse.Action): def __init__(self, *args, **kwargs): super(ParseAction, self).__init__(*args, **kwargs) # with script config nothing should be required by default all # positional arguments should have keyword arg variants Setting # required=False here will prevent positional args from # erroring if they are not specified. I dont think there are # other side effects, but we should make sure that is actually # the case. self.required = False if self.type is None: # Is this the right place to put this? def _mytype(value): key = self.dest template = parent.default[key] if not isinstance(template, Value): # smartcast non-valued params from commandline value = smartcast.smartcast(value) else: value = template.cast(value) return value self.type = _mytype # print('self.type = {!r}'.format(self.type)) def __call__(action, parser, namespace, values, option_string=None): # print('CALL action = {!r}'.format(action)) # print('option_string = {!r}'.format(option_string)) # print('values = {!r}'.format(values)) if isinstance(values, list) and len(values): # We got a list of lists, which we hack into a flat list if isinstance(values[0], list): import itertools as it values = list(it.chain(*values)) setattr(namespace, action.dest, values) parser._explicitly_given.add(action.dest) # IRC: this ensures each key has a real Value class _metadata = { key: self._data[key] for key, value in self._default.items() if isinstance(self._data[key], Value) } # :type: Dict[str, Value] _positions = {k: v.position for k, v in _metadata.items() if v.position is not None} if _positions: if ub.find_duplicates(_positions.values()): raise Exception('two values have the same position') _keyorder = ub.oset(ub.argsort(_positions)) _keyorder |= (ub.oset(self._default) - _keyorder) else: _keyorder = list(self._default.keys()) def _add_arg(parser, name, key, argkw, positional, isflag, isalias): _argkw = argkw.copy() if isalias: _argkw['help'] = 'alias of {}'.format(key) _argkw.pop('default', None) # flags cannot have flag aliases isflag = False elif positional: parser.add_argument(name, **_argkw) if isflag: # Can we support both flag and setitem methods of cli # parsing? if not isinstance(_argkw.get('default', None), bool): raise ValueError('can only use isflag with bools') _argkw.pop('type', None) _argkw.pop('choices', None) _argkw.pop('action', None) _argkw.pop('nargs', None) _argkw['dest'] = key _argkw_true = _argkw.copy() _argkw_true['action'] = 'store_true' _argkw_false = _argkw.copy() _argkw_false['action'] = 'store_false' _argkw_false.pop('help', None) parser.add_argument('--' + name, **_argkw_true) parser.add_argument('--no-' + name, **_argkw_false) else: parser.add_argument('--' + name, **_argkw) mode = 1 alias_registry = [] for key, value in self._data.items(): # key: str # value: Any | Value argkw = {} argkw['help'] = '' positional = None isflag = False if key in _metadata: # Use the metadata in the Value class to enhance argparse _value = _metadata[key] argkw.update(_value.parsekw) value = _value.value isflag = _value.isflag positional = _value.position else: _value = value if isinstance(value, Value) else None if not argkw['help']: argkw['help'] = '<undocumented>' argkw['default'] = value argkw['action'] = ParseAction name = key _add_arg(parser, name, key, argkw, positional, isflag, isalias=False) if _value is not None: if _value.alias: alts = _value.alias alts = alts if ub.iterable(alts) else [alts] for alias in alts: tup = (alias, key, argkw) alias_registry.append(tup) if mode == 0: name = alias _add_arg(parser, name, key, argkw, positional, isflag, isalias=True) if mode == 1: for tup in alias_registry: (alias, key, argkw) = tup name = alias dest = key _add_arg(parser, name, dest, argkw, positional, isflag, isalias=True) if special_options: parser.add_argument('--config', default=None, help=ub.codeblock( ''' special scriptconfig option that accepts the path to a on-disk configuration file, and loads that into this {!r} object. ''').format(self.__class__.__name__)) parser.add_argument('--dump', default=None, help=ub.codeblock( ''' If specified, dump this config to disk. ''').format(self.__class__.__name__)) parser.add_argument('--dumps', action='store_true', help=ub.codeblock( ''' If specified, dump this config stdout ''').format(self.__class__.__name__)) return parser
def main(bib_fpath=None): r""" intro point to fixbib script CommmandLine: fixbib python -m fixtex bib python -m fixtex bib --dryrun python -m fixtex bib --dryrun --debug """ if bib_fpath is None: bib_fpath = 'My Library.bib' # DEBUG = ub.argflag('--debug') # Read in text and ensure ascii format dirty_text = ut.readfrom(bib_fpath) from fixtex.fix_tex import find_used_citations, testdata_fpaths if exists('custom_extra.bib'): extra_parser = bparser.BibTexParser(ignore_nonstandard_types=False) parser = bparser.BibTexParser() ut.delete_keys(parser.alt_dict, ['url', 'urls']) print('Parsing extra bibtex file') extra_text = ut.readfrom('custom_extra.bib') extra_database = extra_parser.parse(extra_text, partial=False) print('Finished parsing extra') extra_dict = extra_database.get_entry_dict() else: extra_dict = None #udata = dirty_text.decode("utf-8") #dirty_text = udata.encode("ascii", "ignore") #dirty_text = udata # parser = bparser.BibTexParser() # bib_database = parser.parse(dirty_text) # d = bib_database.get_entry_dict() print('BIBTEXPARSER LOAD') parser = bparser.BibTexParser(ignore_nonstandard_types=False, common_strings=True) ut.delete_keys(parser.alt_dict, ['url', 'urls']) print('Parsing bibtex file') bib_database = parser.parse(dirty_text, partial=False) print('Finished parsing') bibtex_dict = bib_database.get_entry_dict() old_keys = list(bibtex_dict.keys()) new_keys = [] for key in ub.ProgIter(old_keys, label='fixing keys'): new_key = key new_key = new_key.replace(':', '') new_key = new_key.replace('-', '_') new_key = re.sub('__*', '_', new_key) new_keys.append(new_key) # assert len(ut.find_duplicate_items(new_keys)) == 0, 'new keys created conflict' assert len(ub.find_duplicates(new_keys)) == 0, 'new keys created conflict' for key, new_key in zip(old_keys, new_keys): if key != new_key: entry = bibtex_dict[key] entry['ID'] = new_key bibtex_dict[new_key] = entry del bibtex_dict[key] # The bibtext is now clean. Print it to stdout #print(clean_text) verbose = None if verbose is None: verbose = 1 # Find citations from the tex documents key_list = None if key_list is None: cacher = ub.Cacher('texcite1', enabled=0) data = cacher.tryload() if data is None: fpaths = testdata_fpaths() key_list, inverse = find_used_citations(fpaths, return_inverse=True) # ignore = ['JP', '?', 'hendrick'] # for item in ignore: # try: # key_list.remove(item) # except ValueError: # pass if verbose: print('Found %d citations used in the document' % (len(key_list), )) data = key_list, inverse cacher.save(data) key_list, inverse = data # else: # key_list = None unknown_pubkeys = [] debug_author = ub.argval('--debug-author', default=None) # ./fix_bib.py --debug_author=Kappes if verbose: print('Fixing %d/%d bibtex entries' % (len(key_list), len(bibtex_dict))) # debug = True debug = False if debug_author is not None: debug = False known_keys = list(bibtex_dict.keys()) missing_keys = set(key_list) - set(known_keys) if extra_dict is not None: missing_keys.difference_update(set(extra_dict.keys())) if missing_keys: print('The library is missing keys found in tex files %s' % (ub.repr2(missing_keys), )) # Search for possible typos: candidate_typos = {} sedlines = [] for key in missing_keys: candidates = ut.closet_words(key, known_keys, num=3, subset=True) if len(candidates) > 1: top = candidates[0] if ut.edit_distance(key, top) == 1: # "sed -i -e 's/{}/{}/g' *.tex".format(key, top) import os replpaths = ' '.join( [relpath(p, os.getcwd()) for p in inverse[key]]) sedlines.append("sed -i -e 's/{}/{}/g' {}".format( key, top, replpaths)) candidate_typos[key] = candidates print('Cannot find key = %r' % (key, )) print('Did you mean? %r' % (candidates, )) print('Quick fixes') print('\n'.join(sedlines)) # group by file just = max([0] + list(map(len, missing_keys))) missing_fpaths = [inverse[key] for key in missing_keys] for fpath in sorted(set(ub.flatten(missing_fpaths))): # ut.fix_embed_globals() subkeys = [k for k in missing_keys if fpath in inverse[k]] print('') ut.cprint('--- Missing Keys ---', 'blue') ut.cprint('fpath = %r' % (fpath, ), 'blue') ut.cprint('{} | {}'.format('Missing'.ljust(just), 'Did you mean?'), 'blue') for key in subkeys: print('{} | {}'.format(ut.highlight_text(key.ljust(just), 'red'), ' '.join(candidate_typos[key]))) # for key in list(bibtex_dict.keys()): if extra_dict is not None: # Extra database takes precidence over regular key_list = list(ut.unique(key_list + list(extra_dict.keys()))) for k, v in extra_dict.items(): bibtex_dict[k] = v full = ub.argflag('--full') for key in key_list: try: entry = bibtex_dict[key] except KeyError: continue self = BibTexCleaner(key, entry, full=full) if debug_author is not None: debug = debug_author in entry.get('author', '') if debug: ut.cprint(' --- ENTRY ---', 'yellow') print(ub.repr2(entry, nl=1)) entry = self.fix() # self.clip_abstract() # self.shorten_keys() # self.fix_authors() # self.fix_year() # old_pubval = self.fix_pubkey() # if old_pubval: # unknown_pubkeys.append(old_pubval) # self.fix_arxiv() # self.fix_general() # self.fix_paper_types() if debug: print(ub.repr2(entry, nl=1)) ut.cprint(' --- END ENTRY ---', 'yellow') bibtex_dict[key] = entry unwanted_keys = set(bibtex_dict.keys()) - set(key_list) if verbose: print('Removing unwanted %d entries' % (len(unwanted_keys))) ut.delete_dict_keys(bibtex_dict, unwanted_keys) if 0: d1 = bibtex_dict.copy() full = True for key, entry in d1.items(): self = BibTexCleaner(key, entry, full=full) pub = self.publication() if pub is None: print(self.entry['ENTRYTYPE']) old = self.fix_pubkey() x1 = self._pubval() x2 = self.standard_pubval(full=full) # if x2 is not None and len(x2) > 5: # print(ub.repr2(self.entry)) if x1 != x2: print('x2 = %r' % (x2, )) print('x1 = %r' % (x1, )) print(ub.repr2(self.entry)) # if 'CVPR' in self.entry.get('booktitle', ''): # if 'CVPR' != self.entry.get('booktitle', ''): # break if old: print('old = %r' % (old, )) d1[key] = self.entry if full: d1 = bibtex_dict.copy() import numpy as np import pandas as pd df = pd.DataFrame.from_dict(d1, orient='index') paged_items = df[~pd.isnull(df['pub_accro'])] has_pages = ~pd.isnull(paged_items['pages']) print('have pages {} / {}'.format(has_pages.sum(), len(has_pages))) print(ub.repr2(paged_items[~has_pages]['title'].values.tolist())) entrytypes = dict(list(df.groupby('pub_type'))) if False: # entrytypes['misc'] g = entrytypes['online'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] entrytypes['book'] entrytypes['thesis'] g = entrytypes['article'] g = entrytypes['incollection'] g = entrytypes['conference'] def lookup_pub(e): if e == 'article': return 'journal', 'journal' elif e == 'incollection': return 'booksection', 'booktitle' elif e == 'conference': return 'conference', 'booktitle' return None, None for e, g in entrytypes.items(): print('e = %r' % (e, )) g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] if 'pub_full' in g.columns: place_title = g['pub_full'].tolist() print(ub.repr2(ub.dict_hist(place_title))) else: print('Unknown publications') if 'report' in entrytypes: g = entrytypes['report'] missing = g[pd.isnull(g['title'])] if len(missing): print('Missing Title') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'journal' in entrytypes: g = entrytypes['journal'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['journal'])] if len(missing): print('Missing Journal') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'conference' in entrytypes: g = entrytypes['conference'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['booktitle'])] if len(missing): print('Missing Booktitle') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'incollection' in entrytypes: g = entrytypes['incollection'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['booktitle'])] if len(missing): print('Missing Booktitle') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'thesis' in entrytypes: g = entrytypes['thesis'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['institution'])] if len(missing): print('Missing Institution') print(ub.repr2(missing[['title', 'author']].values.tolist())) # import utool # utool.embed() # Overwrite BibDatabase structure bib_database._entries_dict = bibtex_dict bib_database.entries = list(bibtex_dict.values()) #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()} #print(ub.repr2(conftitle_to_types_set_hist)) print('Unknown conference keys:') print(ub.repr2(sorted(unknown_pubkeys))) print('len(unknown_pubkeys) = %r' % (len(unknown_pubkeys), )) writer = BibTexWriter() writer.contents = ['comments', 'entries'] writer.indent = ' ' writer.order_entries_by = ('type', 'author', 'year') new_bibtex_str = bibtexparser.dumps(bib_database, writer) # Need to check #jegou_aggregating_2012 # Fix the Journal Abreviations # References: # https://www.ieee.org/documents/trans_journal_names.pdf # Write out clean bibfile in ascii format clean_bib_fpath = ub.augpath(bib_fpath.replace(' ', '_'), suffix='_clean') if not ub.argflag('--dryrun'): ut.writeto(clean_bib_fpath, new_bibtex_str)
def 数组_查找重复项(items, 至少出现=2): data = ub.find_duplicates(items, k=至少出现) return data
def _devcheck_manage_snapshots(workdir, recent=5, factor=10, dry=True): """ Sometimes netharn produces too many snapshots. The Monitor class attempts to prevent this, but its not perfect. So, sometimes you need to manually clean up. This code snippet serves as a template for doing so. I recommend using IPython to do this following this code as a guide. Unfortunately, I don't have a safe automated way of doing this yet. The basic code simply lists all snapshots that you have. Its then your job to find a huerstic to remove the ones you don't need. Note: # Idea for more automatic method In the future, we should use monitor to inspect the critical points of all metric curves and include any epoch that is at those cricial points. A cricial point is defined as one where there is a significant change in trajectory. Basically, we try to fit a low-degree polynomial or piecewise linear function to the metric curves, and we take the places where there is a significant change from a global perspective. # Specify your workdir workdir = ub.expandpath('~/work/voc_yolo2') """ USE_RANGE_HUERISTIC = True run_dpath = join(workdir, 'fit', 'runs') snapshot_dpaths = list( glob.glob(join(run_dpath, '**/torch_snapshots'), recursive=True)) print('checking {} snapshot paths'.format(len(snapshot_dpaths))) all_keep = [] all_remove = [] for snapshot_dpath in snapshot_dpaths: snapshots = sorted(glob.glob(join(snapshot_dpath, '_epoch_*.pt'))) epoch_to_snap = { int(parse.parse('{}_epoch_{num:d}.pt', path).named['num']): path for path in snapshots } existing_epochs = sorted(epoch_to_snap.keys()) # print('existing_epochs = {}'.format(ub.repr2(existing_epochs))) toremove = [] tokeep = [] if USE_RANGE_HUERISTIC: # My Critieron is that I'm only going to keep the two latest and # I'll also keep an epoch in the range [0,50], [50,100], and # [100,150], and so on. existing_epochs = sorted(existing_epochs) dups = ub.find_duplicates(np.array(sorted(existing_epochs)) // factor, k=0) keep_idxs = [max(idxs) for _, idxs in dups.items()] keep = set(ub.take(existing_epochs, keep_idxs)) keep.update(existing_epochs[-recent:]) if existing_epochs and existing_epochs[0] != 0: keep.update(existing_epochs[0:1]) kill = [] for epoch, path in epoch_to_snap.items(): if epoch in keep: tokeep.append(path) else: kill.append(epoch) toremove.append(path) # print('toremove = {!r}'.format(toremove)) print('keep = {!r}'.format(sorted(keep))) print('kill = {!r}'.format(sorted(kill))) print('Keep {}/{} from {}'.format(len(keep), len(existing_epochs), snapshot_dpath)) all_keep += [tokeep] all_remove += [toremove] # print('all_keep = {}'.format(ub.repr2(all_keep, nl=2))) # print('all_remove = {}'.format(ub.repr2(all_remove, nl=2))) """ pip install send2trash import send2trash send2trash.send2trash(path) """ total = 0 for path in ub.flatten(all_remove): total += os.path.getsize(path) total_mb = total / 2**20 if dry: print('Cleanup would delete {} snapshots and free {!r} MB'.format( len(all_remove), total_mb)) print('Use -f to confirm and force cleanup') else: print('About to free {!r} MB'.format(total_mb)) for path in ub.flatten(all_remove): ub.delete(path, verbose=True)
def compute_fmech_score(cm, qreq_=None, hack_single_ori=False): r""" nsum. This is the fmech scoring mechanism. Args: cm (ibeis.ChipMatch): Returns: tuple: (unique_nids, nsum_score_list) CommandLine: python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:0 python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:2 utprof.py -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:2 utprof.py -m ibeis.algo.hots.pipeline --test-request_ibeis_query_L0:0 --db PZ_Master1 -a timectrl:qindex=0:256 Example0: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> cm = testdata_chipmatch() >>> nsum_score_list = compute_fmech_score(cm) >>> assert np.all(nsum_score_list == [ 4., 7., 5.]) Example1: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18]) >>> cm = cm_list[0] >>> cm.evaluate_dnids(qreq_) >>> cm._cast_scores() >>> #cm.qnid = 1 # Hack for testdb1 names >>> nsum_score_list = compute_fmech_score(cm, qreq_) >>> #assert np.all(nsum_nid_list == cm.unique_nids), 'nids out of alignment' >>> flags = (cm.unique_nids == cm.qnid) >>> max_true = nsum_score_list[flags].max() >>> max_false = nsum_score_list[~flags].max() >>> assert max_true > max_false, 'is this truely a hard case?' >>> assert max_true > 1.2, 'score=%r should be higher for aid=18' % (max_true,) Example2: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18], cfgdict=dict(query_rotation_heuristic=True)) >>> cm = cm_list[0] >>> cm.score_name_nsum(qreq_) >>> ut.quit_if_noshow() >>> cm.show_ranked_matches(qreq_, ori=True) Example3: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> #ibs, qreq_, cm_list = plh.testdata_pre_sver('testdb1', qaid_list=[1]) >>> ibs, qreq_, cm_list = plh.testdata_post_sver('testdb1', qaid_list=[1], cfgdict=dict(query_rotation_heuristic=True)) >>> cm = cm_list[0] >>> cm.score_name_nsum(qreq_) >>> ut.quit_if_noshow() >>> cm.show_ranked_matches(qreq_, ori=True) """ #assert qreq_ is not None if hack_single_ori is None: try: hack_single_ori = qreq_ is not None and ( qreq_.qparams.query_rotation_heuristic or qreq_.qparams.rotation_invariance) except AttributeError: hack_single_ori = True # The core for each feature match # # The query feature index for each feature match fm_list = cm.fm_list fs_list = cm.get_fsv_prod_list() fx1_list = [fm.T[0] for fm in fm_list] if hack_single_ori: # Group keypoints with the same xy-coordinate. # Combine these feature so each only recieves one vote kpts1 = qreq_.ibs.get_annot_kpts(cm.qaid, config2_=qreq_.extern_query_config2) xys1_ = vt.get_xys(kpts1).T fx1_to_comboid = vt.compute_unique_arr_dataids(xys1_) fcombo_ids = [fx1_to_comboid.take(fx1) for fx1 in fx1_list] else: # use the feature index itself as a combo id # so each feature only recieves one vote fcombo_ids = fx1_list if False: import ubelt as ub for ids in fcombo_ids: ub.find_duplicates(ids) # Group annotation matches by name # nsum_nid_list, name_groupxs = vt.group_indices(cm.dnid_list) # nsum_nid_list = cm.unique_nids name_groupxs = cm.name_groupxs nsum_score_list = [] # For all indicies matched to a particular name for name_idxs in name_groupxs: # Get feat indicies and scores corresponding to the name's annots name_combo_ids = ut.take(fcombo_ids, name_idxs) name_fss = ut.take(fs_list, name_idxs) # Flatten over annots in the name fs = np.hstack(name_fss) if len(fs) == 0: nsum_score_list.append(0) continue combo_ids = np.hstack(name_combo_ids) # Features (with the same id) can't vote for this name twice group_idxs = vt.group_indices(combo_ids)[1] flagged_idxs = [idxs[fs.take(idxs).argmax()] for idxs in group_idxs] # Detail: sorting the idxs preseveres summation order # this fixes the numerical issue where nsum and csum were off flagged_idxs = np.sort(flagged_idxs) name_score = fs.take(flagged_idxs).sum() nsum_score_list.append(name_score) nsum_score_list = np.array(nsum_score_list) return nsum_score_list