Exemplo n.º 1
0
    def load_predefined_train_test(task):
        """
        the v1-annotations dataset has a predefined train / test split
        The training set can be used however you want (cross-validation wise)
        The test may only be used for final evaluation.
        """
        # TODO: generalize path spec
        # HACK
        train_scenes = sorted(['0000', '0002', '0101', '0102', '0401', '0503'])
        test_scenes = sorted(['0001', '0100', '0400', '0500', '0501', '0502'])
        return train_scenes, test_scenes

        # FIXME
        dpath = expanduser(
            '~/code/baseline-algorithms/DIVA/splits/VIRAT/videos/new_v1_annotation'
        )

        def parse_scene(fname):
            """
            ~~All~~ (most of) the filenames are formatted as follows:
            VIRAT_S_XXYYZZ_KK_SSSSSS_TTTTTT.mp4
            """
            import parse
            # virat_format = 'VIRAT_S_{group:DD}{scene:DD}{seq:DD}_{segmentid}_{start}_{stop}.mp4'
            virat_format = 'VIRAT_S_{group:DD}{scene:DD}{therest}.mp4'
            extra_types = {'DD': parse.with_pattern(r'\d\d')(lambda x: x)}
            result = parse.parse(virat_format, fname, extra_types)
            if result:
                return result.named

        train_scenes = set()
        test_scenes = set()
        for fpath in glob.glob(join(dpath, 'Validation_*')):
            paths = [p for p in ub.readfrom(fpath).split('\n') if p]
            info = [parse_scene(p) for p in paths]
            info = [p for p in info if p]
            scenes = {p['group'] + p['scene'] for p in info}
            train_scenes.update(scenes)

        for fpath in glob.glob(join(dpath, 'test_*')):
            paths = [p for p in ub.readfrom(fpath).split('\n') if p]
            info = [parse_scene(p) for p in paths]
            info = [p for p in info if p]
            scenes = {p['group'] + p['scene'] for p in info}
            test_scenes.update(scenes)
        # Ensure determenism
        train_scenes = sorted(train_scenes)
        test_scenes = sorted(test_scenes)
        return train_scenes, test_scenes
Exemplo n.º 2
0
def get_bibtex_dict():
    import ubelt as ub
    # HACK: custom current bibtex file
    possible_bib_fpaths = [
        ub.truepath('./My_Library_clean.bib'),
        #ub.truepath('~/latex/crall-thesis-2017/My_Library_clean.bib'),
    ]

    bib_fpath = None
    for bib_fpath_ in possible_bib_fpaths:
        if exists(bib_fpath_):
            bib_fpath = bib_fpath_
            break

    if bib_fpath is None:
        raise Exception('cant find bibtex file')

    # import bibtexparser
    from bibtexparser import bparser
    parser = bparser.BibTexParser()
    parser.ignore_nonstandard_types = True
    bib_text = ub.readfrom(bib_fpath)
    bibtex_db = parser.parse(bib_text)
    bibtex_dict = bibtex_db.get_entry_dict()

    return bibtex_dict
Exemplo n.º 3
0
def _read_split_paths(devkit_dpath, split, year):
    """
    split = 'train'
    self = VOCDataset('test')
    year = 2007
    year = 2012
    """
    import glob
    import re
    split_idstrs = []
    data_dpath = join(devkit_dpath, 'VOC{}'.format(year))
    split_dpath = join(data_dpath, 'ImageSets', 'Main')
    pattern = join(split_dpath, '*_' + split + '.txt')
    for p in sorted(glob.glob(pattern)):
        rows = [
            list(re.split(' +', t)) for t in ub.readfrom(p).split('\n') if t
        ]
        # code = -1 if the image does not contain the object
        # code = 1 if the image contains at least one instance
        # code = 0 if the image contains only hard instances of the object
        idstrs = [idstr for idstr, code in rows if int(code) == 1]
        split_idstrs.extend(idstrs)
    split_idstrs = sorted(set(split_idstrs))

    image_dpath = join(data_dpath, 'JPEGImages')
    annot_dpath = join(data_dpath, 'Annotations')
    gpaths = [
        join(image_dpath, '{}.jpg'.format(idstr)) for idstr in split_idstrs
    ]
    apaths = [
        join(annot_dpath, '{}.xml'.format(idstr)) for idstr in split_idstrs
    ]
    return gpaths, apaths
Exemplo n.º 4
0
def _read_kresimir_results():
    # Load downloaded matlab csv results
    mat = scipy.io.loadmat(expanduser('~/data/camtrawl_stereo_sample_data/Haul_83/Haul_083_qcresult.mat'))
    header = ub.readfrom(expanduser('~/data/camtrawl_stereo_sample_data/Haul_83/mat_file_header.csv')).strip().split(',')
    data = mat['lengthsqc']

    mat_df = pd.DataFrame(data, columns=header)
    mat_df['current_frame'] = mat_df['current_frame'].astype(np.int)
    mat_df['Species'] = mat_df['Species'].astype(np.int)
    mat_df['QC'] = mat_df['QC'].astype(np.int)

    # Transform so each row corresponds to one set of (x, y) points per detection
    bbox_cols1 = ['LX1', 'LX2', 'LX3', 'LX4', 'LY1', 'LY2', 'LY3', 'LY4', 'Lar', 'LboxL', 'WboxL', 'aveL']
    bbox_pts1 = mat_df[bbox_cols1[0:8]]  # NOQA
    bbox_pts1_ = bbox_pts1.values
    bbox_pts1_ = bbox_pts1_.reshape(len(bbox_pts1_), 2, 4).transpose((0, 2, 1))

    bbox_cols2 = ['RX1', 'RX2', 'RX3', 'RX4', 'RY1', 'RY2', 'RY3', 'RY4', 'Rar', 'LboxR', 'WboxR', 'aveW']
    bbox_pts2 = mat_df[bbox_cols2]  # NOQA
    bbox_pts2 = mat_df[bbox_cols2[0:8]]  # NOQA
    bbox_pts2_ = bbox_pts2.values
    bbox_pts2_ = bbox_pts2_.reshape(len(bbox_pts2_), 2, 4).transpose((0, 2, 1))

    # Convert matlab bboxes into python-style bboxes
    mat_df['obox1'] = [ctalgo.OrientedBBox(*cv2.minAreaRect(pts[:, None, :].astype(np.int)))
                       for pts in bbox_pts1_]
    mat_df['obox2'] = [ctalgo.OrientedBBox(*cv2.minAreaRect(pts[:, None, :].astype(np.int)))
                       for pts in bbox_pts2_]

    mat_df.drop(bbox_cols2, axis=1, inplace=True)
    mat_df.drop(bbox_cols1, axis=1, inplace=True)
    return mat_df
Exemplo n.º 5
0
 def clean_lprof_file(self, input_fname, output_fname=None):
     """ Reads a .lprof file and cleans it """
     # Read the raw .lprof text dump
     text = ub.readfrom(input_fname)
     # Sort and clean the text
     output_text = self.clean_line_profile_text(text)
     return output_text
Exemplo n.º 6
0
def test_modify_directory_symlinks():
    dpath = ub.ensure_app_cache_dir('ubelt', 'test_modify_symlinks')
    ub.delete(dpath, verbose=2)
    ub.ensuredir(dpath, verbose=2)

    happy_dpath = join(dpath, 'happy_dpath')
    happy_dlink = join(dpath, 'happy_dlink')
    ub.ensuredir(happy_dpath, verbose=2)

    ub.symlink(happy_dpath, happy_dlink, verbose=2)

    # Test file inside directory symlink
    file_path1 = join(happy_dpath, 'file.txt')
    file_path2 = join(happy_dlink, 'file.txt')

    ub.touch(file_path1, verbose=2)
    assert exists(file_path1)
    assert exists(file_path2)

    ub.writeto(file_path1, 'foo')
    assert ub.readfrom(file_path1) == 'foo'
    assert ub.readfrom(file_path2) == 'foo'

    ub.writeto(file_path2, 'bar')
    assert ub.readfrom(file_path1) == 'bar'
    assert ub.readfrom(file_path2) == 'bar'

    ub.delete(file_path2, verbose=2)
    assert not exists(file_path1)
    assert not exists(file_path2)

    # Test directory inside directory symlink
    dir_path1 = join(happy_dpath, 'dir')
    dir_path2 = join(happy_dlink, 'dir')

    ub.ensuredir(dir_path1, verbose=2)
    assert exists(dir_path1)
    assert exists(dir_path2)

    subfile_path1 = join(dir_path1, 'subfile.txt')
    subfile_path2 = join(dir_path2, 'subfile.txt')

    ub.writeto(subfile_path2, 'foo')
    assert ub.readfrom(subfile_path1) == 'foo'
    assert ub.readfrom(subfile_path2) == 'foo'

    ub.writeto(subfile_path1, 'bar')
    assert ub.readfrom(subfile_path1) == 'bar'
    assert ub.readfrom(subfile_path2) == 'bar'

    ub.delete(dir_path1, verbose=2)
    assert not exists(dir_path1)
    assert not exists(dir_path2)
Exemplo n.º 7
0
def test_grabdata():
    # xdoctest: +REQUIRES(--network)
    import ubelt as ub
    # fname = 'foo.bar'
    # url = 'http://i.imgur.com/rqwaDag.png'
    # prefix1 = '944389a39dfb8fa9'
    fname = 'foo2.bar'
    url = _demo_url(128 * 11)
    prefix1 = 'b7fa848cd088ae842a89ef'
    fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1)
    stamp_fpath = fpath + '.sha512.hash'
    assert ub.readfrom(stamp_fpath) == prefix1
    # Check that the download doesn't happen again
    fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1)
    # todo: check file timestamps have not changed
    #
    # Check redo works with hash
    fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1, redo=True)
    # todo: check file timestamps have changed
    #
    # Check that a redownload occurs when the stamp is changed
    with open(stamp_fpath, 'w') as file:
        file.write('corrupt-stamp')
    fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1)
    assert ub.readfrom(stamp_fpath) == prefix1
    #
    # Check that a redownload occurs when the stamp is removed
    ub.delete(stamp_fpath)
    with open(fpath, 'w') as file:
        file.write('corrupt-data')
    assert not ub.hash_file(fpath, base='hex',
                            hasher='sha512').startswith(prefix1)
    fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1)
    assert ub.hash_file(fpath, base='hex', hasher='sha512').startswith(prefix1)
    #
    # Check that requesting new data causes redownload
    #url2 = 'https://data.kitware.com/api/v1/item/5b4039308d777f2e6225994c/download'
    #prefix2 = 'c98a46cb31205cf'  # hack SSL
    # url2 = 'http://i.imgur.com/rqwaDag.png'
    # prefix2 = '944389a39dfb8fa9'
    url2, prefix2 = url, prefix1
    fpath = ub.grabdata(url2, fname=fname, hash_prefix=prefix2)
    assert ub.readfrom(stamp_fpath) == prefix2
Exemplo n.º 8
0
def test_readwrite():
    import ubelt as ub
    dpath = ub.ensure_app_cache_dir('ubelt')
    fpath = dpath + '/' + 'testwrite.txt'
    if exists(fpath):
        os.remove(fpath)
    to_write = 'utf-8 symbols Δ, Й, ק, م, ๗, あ, 叶, 葉, and 말.'
    ub.writeto(fpath, to_write, verbose=True)
    read_ = ub.readfrom(fpath, verbose=True)
    assert read_ == to_write
Exemplo n.º 9
0
def test_modify_file_symlinks():
    """
    CommandLine:
        python -m ubelt.tests.test_links test_modify_symlinks
    """
    # TODO: test that we handle broken links
    dpath = ub.ensure_app_cache_dir('ubelt', 'test_modify_symlinks')
    happy_fpath = join(dpath, 'happy_fpath.txt')
    happy_flink = join(dpath, 'happy_flink.txt')
    ub.touch(happy_fpath, verbose=2)

    ub.symlink(happy_fpath, happy_flink, verbose=2)

    # Test file symlink
    ub.writeto(happy_fpath, 'foo')
    assert ub.readfrom(happy_fpath) == 'foo'
    assert ub.readfrom(happy_flink) == 'foo'

    ub.writeto(happy_flink, 'bar')
    assert ub.readfrom(happy_fpath) == 'bar'
    assert ub.readfrom(happy_flink) == 'bar'
Exemplo n.º 10
0
def research(r, start_line_str=None, rate='3', sentence_mode=True, open_file=False):
    fname = join(split(__file__)[0], 'to_speak.txt')
    if start_line_str == "prep":
        os.system(fname)
        return
    if open_file is True:
        os.system(fname)
    import ubelt as ub

    input_str = preprocess_research(ub.readfrom(fname))
    if sentence_mode:
        input_str = input_str.replace('\n', ' ').replace('. ', '.\n')
        input_str = re.sub('  *', ' ', input_str)

    line_count = 0
    page = 0
    page_re = re.compile(' *--- Page [0-9]* *--- *')
    if start_line_str is None:
        try:
            start_page = 0
            start_line = int(input('Did you forget the start line?'))
        except Exception:
            pass
    elif start_line_str.find('page') != -1:
        start_page = int(start_line_str.replace('page', ''))
        start_line = 0
    else:
        start_page = 0
        start_line = int(start_line_str)

    print('Starting on line: %d' % (start_line))
    print('Starting on page: %d' % (start_page))
    for line in input_str.split('\n'):
        print('____')
        # Check for page marker
        if page_re.findall(line) != []:
            page = int(re.sub(' *--- Page ', '', line).replace('---', ''))
        # Print out what is being read
        line_count += 1
        print('%d, %d > %s' % (page, line_count, line))
        if start_line > line_count or start_page > page:
            continue
        # Preprocess the line
        line = process_research_line(line)
        if line == '':
            continue
        print('--')
        robos.speak(r, line, rate)
Exemplo n.º 11
0
def _read_kresimir_results():
    # Load downloaded matlab csv results
    mat = scipy.io.loadmat(
        expanduser(
            '~/data/camtrawl_stereo_sample_data/Haul_83/Haul_083_qcresult.mat')
    )
    header = ub.readfrom(
        expanduser(
            '~/data/camtrawl_stereo_sample_data/Haul_83/mat_file_header.csv')
    ).strip().split(',')
    data = mat['lengthsqc']

    mat_df = pd.DataFrame(data, columns=header)
    mat_df['current_frame'] = mat_df['current_frame'].astype(np.int)
    mat_df['Species'] = mat_df['Species'].astype(np.int)
    mat_df['QC'] = mat_df['QC'].astype(np.int)

    # Transform so each row corresponds to one set of (x, y) points per detection
    bbox_cols1 = [
        'LX1', 'LX2', 'LX3', 'LX4', 'LY1', 'LY2', 'LY3', 'LY4', 'Lar', 'LboxL',
        'WboxL', 'aveL'
    ]
    bbox_pts1 = mat_df[bbox_cols1[0:8]]  # NOQA
    bbox_pts1_ = bbox_pts1.values
    bbox_pts1_ = bbox_pts1_.reshape(len(bbox_pts1_), 2, 4).transpose((0, 2, 1))

    bbox_cols2 = [
        'RX1', 'RX2', 'RX3', 'RX4', 'RY1', 'RY2', 'RY3', 'RY4', 'Rar', 'LboxR',
        'WboxR', 'aveW'
    ]
    bbox_pts2 = mat_df[bbox_cols2]  # NOQA
    bbox_pts2 = mat_df[bbox_cols2[0:8]]  # NOQA
    bbox_pts2_ = bbox_pts2.values
    bbox_pts2_ = bbox_pts2_.reshape(len(bbox_pts2_), 2, 4).transpose((0, 2, 1))

    # Convert matlab bboxes into python-style bboxes
    mat_df['obox1'] = [
        ctalgo.OrientedBBox(*cv2.minAreaRect(pts[:, None, :].astype(np.int)))
        for pts in bbox_pts1_
    ]
    mat_df['obox2'] = [
        ctalgo.OrientedBBox(*cv2.minAreaRect(pts[:, None, :].astype(np.int)))
        for pts in bbox_pts2_
    ]

    mat_df.drop(bbox_cols2, axis=1, inplace=True)
    mat_df.drop(bbox_cols1, axis=1, inplace=True)
    return mat_df
Exemplo n.º 12
0
def generate():
    content = ub.readfrom('base_diff.py') + '\n\n'
    xdoc_version = content + ub.codeblock('''
        if __name__ == '__main__':
            import xdoctest
            xdoctest.doctest_module(__file__)
        ''') + '\n'

    doc_version = content + ub.codeblock('''
        if __name__ == '__main__':
            import doctest
            doctest.testmod()
        ''') + '\n'

    ub.writeto('_doc_version.py', doc_version)
    ub.writeto('_xdoc_version.py', xdoc_version)
Exemplo n.º 13
0
 def evaulate_all(harn):
     """
     Performs testing on trained snapshots.
     """
     harn.prepare_test_model(force=False)
     test_weight_dpaths = harn.find_test_weights_dpaths()
     for test_weights_dpath in test_weight_dpaths:
         harn.test_weights_dpath = test_weights_dpath
         harn.test_dump_dpath = test_weights_dpath
         link_fpath = join(test_weights_dpath,
                           'test_weights.caffemodel.lnk')
         harn.test_weights_fpath = ub.readfrom(link_fpath)
         # if not exists(join(harn.test_weights_dpath, 'pred')):
         if not exists(join(harn.test_dump_dpath, 'results.json')):
             print(
                 'Need to evaluate: harn.test_weights_fpath = {!r}'.format(
                     harn.test_weights_fpath))
             harn.evaluate()
Exemplo n.º 14
0
def modify_conf():
    """
    pip install redbaron
    """
    import redbaron
    import ubelt as ub
    conf_path = 'docs/conf.py'

    source = ub.readfrom(conf_path)
    red = redbaron.RedBaron(source)

    # Insert custom extensions
    extra_extensions = ['"sphinxcontrib.napoleon"']

    ext_node = red.find('name', value='extensions').parent
    ext_node.value.value.extend(extra_extensions)

    # Overwrite theme to read-the-docs
    theme_node = red.find('name', value='html_theme').parent
    theme_node.value.value = '"sphinx_rtd_theme"'

    ub.writeto(conf_path, red.dumps())
Exemplo n.º 15
0
    def _read_snapshot_results(self):
        # Parse the output of the test dir
        results_fpaths = glob.glob(
            join(self.test_dir, 'weights_*/results.json'))
        result_infos = []
        for fpath in results_fpaths:
            dname = basename(dirname(fpath))
            parts = dname.split('_')
            # Weird parsing because arch may contain an underscore.
            item = {}
            item['train_input_id'] = parts[1]
            item['init_id'], item['hyper_id'], item['n_iters'] = parts[-3:]
            item['arch_id'] = '_'.join(parts[2:-3])
            item['n_iters'] = int(item['n_iters'])
            item['fpath'] = fpath
            result_infos.append(item)
        snap_paths_df = pd.DataFrame(result_infos)

        # Group items by their overall train id
        train_id_keys = ('arch_id', 'hyper_id', 'init_id', 'train_input_id')
        groups = list(snap_paths_df.groupby(train_id_keys))
        assert len(groups) == 1, 'TODO, support train_id comparisons'
        for train_id, group in groups:

            # Short by number of iterations
            group = group.sort_values('n_iters')

            # Read the results data into a dictionary
            group_datas = []
            for n_iters, fpath in zip(group.n_iters, group.fpath):
                item = json.loads(ub.readfrom(fpath))
                item['n_iters'] = n_iters
                item['fpath'] = fpath
                group_datas.append(item)

            yield train_id, group_datas
Exemplo n.º 16
0
            new_lines.append(line)
        elif state == mode:
            new_lines.append(line)
        # elif state == 'THEIRS':
        #     pass
        # elif state == 'ANCESTORS':
        #     pass
        # elif state == 'OURS':
        #     new_lines.append(line)

    return ''.join(new_lines[::-1])


if __name__ == '__main__':
    r"""
    CommandLine:
        export PYTHONPATH=$PYTHONPATH:/home/joncrall/misc
        python ~/misc/fix_merge_3.py
    """
    import sys
    argv = sys.argv[1:]

    import ubelt as ub
    fpaths = ub.cmd('git diff --name-only --diff-filter=U')['out'].splitlines()
    print('fpaths = {!r}'.format(fpaths))
    for fpath in fpaths:
        text = ub.readfrom(fpath)
        text = resolve_diff3_conflict(text)
        # print(text)
        ub.writeto(fpath, text)
Exemplo n.º 17
0
def _autojit_cython(pyx_fpath, verbose=1, recompile=False, annotate=False):
    """
    This idea is that given a pyx file, we try to compile it. We write a stamp
    file so subsequent calls should be very fast as long as the source pyx has
    not changed.

    Parameters
    ----------
    pyx_fpath : str
        path to the pyx file

    verbose : int
        higher is more verbose.
    """
    import shutil
    if verbose > 3:
        print('_autojit_cython')

    # TODO: move necessary ubelt utilities to nx.utils?
    # Separate this into its own util?
    if shutil.which("cythonize"):
        pyx_dpath = dirname(pyx_fpath)

        if verbose > 3:
            print('pyx_dpath = {!r}'.format(pyx_dpath))

        # Check if the compiled library exists
        pyx_base = splitext(basename(pyx_fpath))[0]

        SO_EXTS = _platform_pylib_exts()
        so_fname = False
        for fname in os.listdir(pyx_dpath):
            if fname.startswith(pyx_base) and fname.endswith(SO_EXTS):
                so_fname = fname
                break

        if verbose > 3:
            print('so_fname = {!r}'.format(so_fname))

        try:
            # Currently this functionality depends on ubelt.
            # We could replace ub.cmd with subprocess.check_call and ub.augpath
            # with os.path operations, but hash_file and CacheStamp are harder
            # to replace. We can use "liberator" to statically extract these
            # and add them to nx.utils though.
            import ubelt as ub
        except Exception:
            if verbose > 3:
                print('return false, no ubelt')
            return False
        else:
            if so_fname is False:
                # We can compute what the so_fname will be if it doesnt exist
                so_fname = pyx_base + SO_EXTS[0]

            so_fpath = join(pyx_dpath, so_fname)
            content = ub.readfrom(pyx_fpath)
            mtime = os.stat(pyx_fpath).st_mtime

            depends = [ub.hash_data(content, hasher="sha1"), mtime]
            stamp_fname = ub.augpath(so_fname, ext=".jit.stamp")
            stamp = ub.CacheStamp(
                stamp_fname,
                dpath=pyx_dpath,
                product=so_fpath,
                depends=depends,
                verbose=verbose,
            )
            if verbose > 3:
                print('stamp = {!r}'.format(stamp))
            if recompile or stamp.expired():
                # Heuristic to try and grab the numpy include dir or not
                cythonize_args = ['cythonize']
                cythonize_env = os.environ.copy()
                needs_numpy = 'numpy' in content
                if needs_numpy:
                    import numpy as np
                    import pathlib
                    numpy_include_dpath = pathlib.Path(np.get_include())
                    numpy_dpath = (numpy_include_dpath / '../..').resolve()
                    # cythonize_env['CPATH'] = numpy_include_dpath + ':' + cythonize_env.get('CPATH', '')
                    cythonize_env['CFLAGS'] = ' '.join([
                        '-I{}'.format(numpy_include_dpath),
                    ]) + cythonize_env.get('CFLAGS', '')

                    cythonize_env['LDFLAGS'] = ' '.join([
                        '-L{} -lnpyrandom'.format(numpy_dpath / 'random/lib'),
                        '-L{} -lnpymath'.format(numpy_dpath / 'core/lib'),
                    ]) + cythonize_env.get('LDFLAGS', '')
                if annotate:
                    cythonize_args.append('-a')
                cythonize_args.append('-i {}'.format(pyx_fpath))
                cythonize_cmd = ' '.join(cythonize_args)
                if needs_numpy:
                    print('CFLAGS="{}" '.format(cythonize_env['CFLAGS']) +
                          'LDFLAGS="{}" '.format(cythonize_env['LDFLAGS']) +
                          cythonize_cmd)
                ub.cmd(cythonize_cmd,
                       verbose=verbose,
                       check=True,
                       env=cythonize_env)
                stamp.renew()
            return True
    else:
        if verbose > 2:
            print('Cythonize not found!')
Exemplo n.º 18
0
def convert_file_docstrings(path_to_convert, dry=True):
    """
    path_to_convert = ub.expandpath('~/code/networkx/networkx/algorithms/isomorphism/_embeddinghelpers/balanced_sequence.py')
    """
    import ubelt as ub
    from xdoctest.core import package_calldefs
    pkg_calldefs = list(package_calldefs(path_to_convert))

    def recnone(val, default):
        return default if val is None else val

    for calldefs, modpath in pkg_calldefs:
        to_insert = []
        old_text = ub.readfrom(modpath)
        old_lines = old_text.split('\n')
        sortnames = ub.argsort(calldefs,
                               key=lambda node: recnone(node.doclineno, -1))
        for name in sortnames:
            node = calldefs[name]
            if node.docstr is not None:
                google_docstr = node.docstr
                numpy_docstr = google_to_numpy_docstr(google_docstr)
                body_lines = numpy_docstr.split('\n')
                start = node.doclineno
                stop = node.doclineno_end
                to_insert.append((start, stop, body_lines))

        to_insert = sorted(to_insert)[::-1]

        new_lines = old_lines.copy()
        for start, stop, body_lines in to_insert:
            old_middle = old_lines[start - 1:stop]
            print('old_middle = {}'.format(ub.repr2(old_middle, nl=1)))
            print('start = {!r}'.format(start))
            startline = new_lines[start - 1]
            print('startline = {!r}'.format(startline))
            ssline = startline.strip(' ')
            sq = ssline[0]
            tq = sq * 3
            n_indent = len(startline) - len(ssline)
            indent = ' ' * n_indent
            print('n_indent = {!r}'.format(n_indent))
            body_lines = [indent + line for line in body_lines]
            body_lines = [indent + tq] + body_lines + [indent + tq]
            prefix = new_lines[:start - 1]
            suffix = new_lines[stop:]
            mid = body_lines
            new_lines = prefix + mid + suffix

        new_text = '\n'.join(new_lines)
        # print(new_text)
        if dry:
            import xdev
            print(
                xdev.misc.difftext(old_text,
                                   new_text,
                                   context_lines=10,
                                   colored=True))
            print('^^^ modpath = {!r}'.format(modpath))
        else:
            ub.writeto(modpath, new_text, verbose=3)
Exemplo n.º 19
0
def grab_tiny_imagenet_as_coco():
    import ubelt as ub

    url = 'http://cs231n.stanford.edu/tiny-imagenet-200.zip'
    dpath = ub.ensure_app_cache_dir('netharn', 'tiny-imagenet-200')
    dset_root = join(dpath, 'tiny-imagenet-200')

    zip_fpath = ub.grabdata(url, dpath=dpath)

    if not exists(dset_root):
        import zipfile
        zip_ref = zipfile.ZipFile(zip_fpath, 'r')
        zip_ref.extractall(dpath)
        zip_ref.close()

    tiny_imgnet_info = {
        'train': join(dset_root, 'train'),
        'test': join(dset_root, 'test'),
        'vali': join(dset_root, 'val'),

        'wnids': join(dset_root, 'wnids.txt'),
        'words': join(dset_root, 'words.txt'),
    }

    import glob
    train_annots = list(glob.glob(join(tiny_imgnet_info['train'], '*/*boxes.txt')))
    vali_annots = list(glob.glob(join(tiny_imgnet_info['vali'], 'val_annotations.txt')))

    import ndsampler

    img_root = {
        'train': join(tiny_imgnet_info['train']),
        'vali': join(tiny_imgnet_info['vali'], 'images'),
        'test': join(tiny_imgnet_info['test'], 'images'),
    }
    gpaths = {
        'train': list(glob.glob(join(tiny_imgnet_info['train'], '*/images/*.JPEG'))),
        'vali': list(glob.glob(join(tiny_imgnet_info['vali'], 'images/*.JPEG'))),
        'test': list(glob.glob(join(tiny_imgnet_info['test'], 'images/*.JPEG')))
    }
    annots_text = {
        'train': ''.join(ub.readfrom(fpath) for fpath in train_annots),
        'vali': ''.join(ub.readfrom(fpath) for fpath in vali_annots),
    }
    coco_datasets = {
        'train': ndsampler.CocoDataset(tag='tiny-imagenet-train'),
        'vali': ndsampler.CocoDataset(tag='tiny-imagenet-vali'),
    }

    for catname in (_ for _ in ub.readfrom(tiny_imgnet_info['wnids']).split('\n') if _):
        for dset in coco_datasets.values():
            dset.add_category(name=catname)

    for tag in ['train', 'vali']:
        gpaths_ = gpaths[tag]
        annots_ = annots_text[tag]
        dset = coco_datasets[tag]

        dset.img_root = img_root[tag]

        for gpath in gpaths_:
            dset.add_image(file_name=gpath)

        for line in (_ for _ in annots_.split('\n') if _):
            parts = line.split('\t')
            if tag == 'train':
                gname = parts[0]
                catname = gname.split('_')[0]
                x, y, w, h = list(map(float, parts[1:]))
                gpath = join(img_root[tag], catname, 'images', gname)
            else:
                gname, catname = parts[0:2]
                x, y, w, h = list(map(float, parts[2:]))
                gpath = join(img_root[tag], gname)

            bbox = (x, y, w + 1, h + 1)
            cat = dset.name_to_cat[catname]
            img = dset.index.file_name_to_img[gpath]

            dset.add_annotation(image_id=img['id'], bbox=bbox,
                                category_id=cat['id'])

        dset._ensure_imgsize()
        dset._build_hashid()
        print('dset.hashid = {!r}'.format(dset.hashid))

    return coco_datasets
Exemplo n.º 20
0
def convert_camvid_raw_to_coco(camvid_raw_info):
    """
    Converts the raw camvid format to an MSCOCO based format, ( which lets use
    use kwcoco's COCO backend).

    Example:
        >>> # xdoctest: +REQUIRES(--download)
        >>> camvid_raw_info = grab_raw_camvid()
        >>> # test with a reduced set of data
        >>> del camvid_raw_info['img_paths'][2:]
        >>> del camvid_raw_info['mask_paths'][2:]
        >>> dset = convert_camvid_raw_to_coco(camvid_raw_info)
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> plt = kwplot.autoplt()
        >>> kwplot.figure(fnum=1, pnum=(1, 2, 1))
        >>> dset.show_image(gid=1)
        >>> kwplot.figure(fnum=1, pnum=(1, 2, 2))
        >>> dset.show_image(gid=2)
    """
    import re
    import kwimage
    import kwcoco
    print('Converting CamVid to MS-COCO format')

    dset_root, img_paths, label_path, mask_paths = ub.take(
        camvid_raw_info,
        'dset_root, img_paths, label_path, mask_paths'.split(', '))

    img_infos = {
        'img_fname': img_paths,
        'mask_fname': mask_paths,
    }
    keys = list(img_infos.keys())
    next_vals = list(zip(*img_infos.values()))
    image_items = [{k: v for k, v in zip(keys, vals)} for vals in next_vals]

    dataset = {
        'img_root': dset_root,
        'images': [],
        'categories': [],
        'annotations': [],
    }

    lines = ub.readfrom(label_path).split('\n')
    lines = [line for line in lines if line]
    for line in lines:
        color_text, name = re.split('\t+', line)
        r, g, b = map(int, color_text.split(' '))
        color = (r, g, b)

        # Parse the special camvid format
        cid = (r << 16) + (g << 8) + (b << 0)
        cat = {
            'id': cid,
            'name': name,
            'color': color,
        }
        dataset['categories'].append(cat)

    for gid, img_item in enumerate(image_items, start=1):
        img = {
            'id': gid,
            'file_name': img_item['img_fname'],
            # nonstandard image field
            'segmentation': img_item['mask_fname'],
        }
        dataset['images'].append(img)

    dset = kwcoco.CocoDataset(dataset)
    dset.rename_categories({'Void': 'background'})

    assert dset.name_to_cat['background']['id'] == 0
    dset.name_to_cat['background'].setdefault('alias', []).append('Void')

    if False:
        _define_camvid_class_hierarcy(dset)

    if 1:
        # TODO: Binarize CCs (and efficiently encode if possible)
        import numpy as np

        bad_info = []
        once = False

        # Add images
        dset.remove_annotations(list(dset.index.anns.keys()))
        for gid, img in ub.ProgIter(dset.imgs.items(),
                                    desc='parse label masks'):
            mask_fpath = join(dset_root, img['segmentation'])

            rgb_mask = kwimage.imread(mask_fpath, space='rgb')
            r, g, b = rgb_mask.T.astype(np.int64)
            cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T)

            cids = set(np.unique(cid_mask)) - {0}

            for cid in cids:
                if cid not in dset.cats:
                    if gid == 618:
                        # Handle a known issue with image 618
                        c_mask = (cid == cid_mask).astype(np.uint8)
                        total_bad = c_mask.sum()
                        if total_bad < 32:
                            if not once:
                                print(
                                    'gid 618 has a few known bad pixels, ignoring them'
                                )
                                once = True
                            continue
                        else:
                            raise Exception('more bad pixels than expected')
                    else:
                        raise Exception(
                            'UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid))

                    # bad_rgb = cid_to_rgb(cid)
                    # print('bad_rgb = {!r}'.format(bad_rgb))
                    # print('WARNING UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid))
                    # bad_info.append({
                    #     'gid': gid,
                    #     'cid': cid,
                    # })
                else:
                    ann = {
                        'category_id': cid,
                        'image_id': gid
                        # 'segmentation': mask.to_coco()
                    }
                    assert cid in dset.cats
                    c_mask = (cid == cid_mask).astype(np.uint8)
                    mask = kwimage.Mask(c_mask, 'c_mask')

                    box = kwimage.Boxes([mask.get_xywh()], 'xywh')
                    # box = mask.to_boxes()

                    ann['bbox'] = ub.peek(box.to_coco())
                    ann['segmentation'] = mask.to_coco()
                    dset.add_annotation(**ann)

        if 0:
            bad_cids = [i['cid'] for i in bad_info]
            print(sorted([c['color'] for c in dataset['categories']]))
            print(sorted(set([cid_to_rgb(i['cid']) for i in bad_info])))

            gid = 618
            img = dset.imgs[gid]
            mask_fpath = join(dset_root, img['segmentation'])
            rgb_mask = kwimage.imread(mask_fpath, space='rgb')
            r, g, b = rgb_mask.T.astype(np.int64)
            cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T)
            cid_hist = ub.dict_hist(cid_mask.ravel())

            bad_cid_hist = {}
            for cid in bad_cids:
                bad_cid_hist[cid] = cid_hist.pop(cid)

            import kwplot
            kwplot.autompl()
            kwplot.imshow(rgb_mask)

    if 0:
        import kwplot
        plt = kwplot.autoplt()
        plt.clf()
        dset.show_image(1)

        import xdev
        gid_list = list(dset.imgs)
        for gid in xdev.InteractiveIter(gid_list):
            dset.show_image(gid)
            xdev.InteractiveIter.draw()

    dset._build_index()
    dset._build_hashid()
    return dset
Exemplo n.º 21
0
def count_usage(cmdline=True):

    config = UsageConfig(cmdline=cmdline)

    import ubelt as ub
    import glob
    from os.path import join
    names = [
        'netharn',
        'ndsampler',
        'kwimage',
        'kwplot',
    ] + config['extra_modnames']

    all_fpaths = []
    for name in names:
        if name:
            repo_fpath = ub.expandpath(join('~/code', name))
            fpaths = glob.glob(join(repo_fpath, '**', '*.py'), recursive=True)
            for fpath in fpaths:
                all_fpaths.append((name, fpath))

    print('names = {}'.format(ub.repr2(names)))

    import re

    import ubelt as ub
    modname = 'kwarray'
    module = ub.import_module_from_name(modname)

    package_name = module.__name__
    package_allvar = module.__all__

    pat = re.compile(r'\b' + package_name +
                     r'\.(?P<attr>[a-zA-Z_][A-Za-z_0-9]*)\b')

    pkg_to_hist = ub.ddict(lambda: ub.ddict(int))
    for name, fpath in ub.ProgIter(all_fpaths):
        # print('fpath = {!r}'.format(fpath))
        text = ub.readfrom(fpath, verbose=0)
        # text = open(fpath, 'r').read()
        for match in pat.finditer(text):
            attr = match.groupdict()['attr']
            if attr in package_allvar:
                pkg_to_hist[name][attr] += 1

    hist_iter = iter(pkg_to_hist.values())
    usage = next(hist_iter).copy()
    for other in hist_iter:
        for k, v in other.items():
            usage[k] += v
    for attr in package_allvar:
        usage[attr] += 0

    for name in pkg_to_hist.keys():
        pkg_to_hist[name] = ub.odict(
            sorted(pkg_to_hist[name].items(), key=lambda t: t[1])[::-1])

    usage = ub.odict(sorted(usage.items(), key=lambda t: t[1])[::-1])

    if config['print_packages']:
        print(ub.repr2(pkg_to_hist, nl=2))

    if config['remove_zeros']:
        for k, v in list(usage.items()):
            if v == 0:
                usage.pop(k)

    # if config['hardcoded_ubelt_hack']:
    #     for k in list(usage):
    #         if k.startswith('util_'):
    #             usage.pop(k)
    #         if k.startswith('_util_'):
    #             usage.pop(k)
    #         # ub._util_deprecated
    #         from ubelt import _util_deprecated
    #         if k in dir(_util_deprecated):
    #             usage.pop(k)

    print(ub.repr2(usage, nl=1))
    return usage
Exemplo n.º 22
0
 def readlines(fpath):
     return ub.readfrom(fpath, aslines=True)