Exemplo n.º 1
0
 def new_database(back):
     # File -> New Database
     new_db = back.user_input('Enter the new database name')
     msg_put = 'Where should I put %r?' % new_db
     opt_put = ['Choose Directory', 'My Work Dir']
     reply = back.user_option(msg_put, 'options', opt_put, True)
     if reply == opt_put[1]:
         put_dir = back.get_work_directory()
     elif reply == opt_put[0]:
         msg = 'Select where to put the new database'
         put_dir = guitools.select_directory(msg)
     elif reply is None:
         back.user_info('No Reply. Aborting new database')
         print('[*back] abort new database()')
         return None
     else:
         raise Exception('Unknown reply=%r' % reply)
     new_db_dir = join(put_dir, new_db)
     # Check the put directory exists and the new database does not exist
     msg_try = None
     if not exists(put_dir):
         msg_try = 'Directory %r does not exist.' % put_dir
     elif exists(new_db_dir):
         msg_try = 'New Database %r already exists.' % new_db_dir
     if msg_try is not None:
         opt_try = ['Try Again']
         title_try = 'New Database Failed'
         try_again = back.user_option(msg_try, title_try, opt_try, False)
         if try_again == 'Try Again':
             return back.new_database()
     print('[*back] valid new_db_dir = %r' % new_db_dir)
     helpers.ensurepath(new_db_dir)
     back.open_database(new_db_dir)
Exemplo n.º 2
0
 def new_database(back):
     # File -> New Database
     new_db = back.user_input('Enter the new database name')
     msg_put = 'Where should I put %r?' % new_db
     opt_put = ['Choose Directory', 'My Work Dir']
     reply = back.user_option(msg_put, 'options', opt_put, True)
     if reply == opt_put[1]:
         put_dir = back.get_work_directory()
     elif reply == opt_put[0]:
         msg = 'Select where to put the new database'
         put_dir = guitools.select_directory(msg)
     elif reply is None:
         back.user_info('No Reply. Aborting new database')
         print('[*back] abort new database()')
         return None
     else:
         raise Exception('Unknown reply=%r' % reply)
     new_db_dir = join(put_dir, new_db)
     # Check the put directory exists and the new database does not exist
     msg_try = None
     if not exists(put_dir):
         msg_try = 'Directory %r does not exist.' % put_dir
     elif exists(new_db_dir):
         msg_try = 'New Database %r already exists.' % new_db_dir
     if msg_try is not None:
         opt_try = ['Try Again']
         title_try = 'New Database Failed'
         try_again = back.user_option(msg_try, title_try, opt_try, False)
         if try_again == 'Try Again':
             return back.new_database()
     print('[*back] valid new_db_dir = %r' % new_db_dir)
     helpers.ensurepath(new_db_dir)
     back.open_database(new_db_dir)
Exemplo n.º 3
0
def convert_named_chips(db_dir, img_dpath=None):
    print('\n --- Convert Named Chips ---')
    # --- Initialize ---
    gt_format = '{}_{:d}.jpg'
    print('gt_format (name, num) = %r' % gt_format)
    if img_dpath is None:
        img_dpath = db_dir + '/images'
    print('Converting db_dir=%r and img_dpath=%r' % (db_dir, img_dpath))
    # --- Build Image Table ---
    helpers.print_('Building name table: ')
    gx2_gname = helpers.list_images(img_dpath)
    gx2_gid = range(1, len(gx2_gname) + 1)
    print('There are %d images' % len(gx2_gname))
    # ---- Build Name Table ---
    helpers.print_('Building name table: ')
    name_set = set([])
    for gx, gname in enumerate(gx2_gname):
        name, num = parse.parse(gt_format, gname)
        name_set.add(name)
    nx2_name = ['____', '____'] + list(name_set)
    nx2_nid = [1, 1] + range(2, len(name_set) + 2)
    print('There are %d names' % (len(nx2_name) - 2))
    # ---- Build Chip Table ---
    print('[converdb] Building chip table: ')
    cx2_cid = []
    cx2_theta = []
    cx2_roi = []
    cx2_nx = []
    cx2_gx = []
    cid = 1

    def add_to_hs_tables(gname, name, roi, theta=0):
        cid = len(cx2_cid) + 1
        nx = nx2_name.index(name)
        gx = gx2_gname.index(gname)
        cx2_cid.append(cid)
        cx2_roi.append(roi)
        cx2_nx.append(nx)
        cx2_gx.append(gx)
        cx2_theta.append(theta)
        return cid

    for gx, gname in enumerate(gx2_gname):
        name, num = parse.parse(gt_format, gname)
        img_fpath = join(img_dpath, gname)
        (w, h) = Image.open(img_fpath).size
        roi = [1, 1, w, h]
        cid = add_to_hs_tables(gname, name, roi)
    cx2_nid = np.array(nx2_nid)[cx2_nx]
    cx2_gid = np.array(gx2_gid)[cx2_gx]
    print('There are %d chips' % (cid - 1))

    # Write tables
    internal_dir = join(db_dir, ld2.RDIR_INTERNAL2)
    helpers.ensurepath(internal_dir)
    write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi,
                     cx2_theta)
    write_name_table(internal_dir, nx2_nid, nx2_name)
    write_image_table(internal_dir, gx2_gid, gx2_gname)
Exemplo n.º 4
0
def convert_named_chips(db_dir, img_dpath=None):
    print('\n --- Convert Named Chips ---')
    # --- Initialize ---
    gt_format = '{}_{:d}.jpg'
    print('gt_format (name, num) = %r' % gt_format)
    if img_dpath is None:
        img_dpath = db_dir + '/images'
    print('Converting db_dir=%r and img_dpath=%r' % (db_dir, img_dpath))
    # --- Build Image Table ---
    helpers.print_('Building name table: ')
    gx2_gname = helpers.list_images(img_dpath)
    gx2_gid   = range(1, len(gx2_gname) + 1)
    print('There are %d images' % len(gx2_gname))
    # ---- Build Name Table ---
    helpers.print_('Building name table: ')
    name_set = set([])
    for gx, gname in enumerate(gx2_gname):
        name, num = parse.parse(gt_format, gname)
        name_set.add(name)
    nx2_name  = ['____', '____'] + list(name_set)
    nx2_nid   = [1, 1] + range(2, len(name_set) + 2)
    print('There are %d names' % (len(nx2_name) - 2))
    # ---- Build Chip Table ---
    print('[converdb] Building chip table: ')
    cx2_cid     = []
    cx2_theta   = []
    cx2_roi     = []
    cx2_nx      = []
    cx2_gx      = []
    cid = 1

    def add_to_hs_tables(gname, name, roi, theta=0):
        cid = len(cx2_cid) + 1
        nx = nx2_name.index(name)
        gx = gx2_gname.index(gname)
        cx2_cid.append(cid)
        cx2_roi.append(roi)
        cx2_nx.append(nx)
        cx2_gx.append(gx)
        cx2_theta.append(theta)
        return cid

    for gx, gname in enumerate(gx2_gname):
        name, num = parse.parse(gt_format, gname)
        img_fpath = join(img_dpath, gname)
        (w, h) = Image.open(img_fpath).size
        roi = [1, 1, w, h]
        cid = add_to_hs_tables(gname, name, roi)
    cx2_nid = np.array(nx2_nid)[cx2_nx]
    cx2_gid = np.array(gx2_gid)[cx2_gx]
    print('There are %d chips' % (cid - 1))

    # Write tables
    internal_dir = join(db_dir, ld2.RDIR_INTERNAL2)
    helpers.ensurepath(internal_dir)
    write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta)
    write_name_table(internal_dir, nx2_nid, nx2_name)
    write_image_table(internal_dir, gx2_gid, gx2_gname)
Exemplo n.º 5
0
def init_database_from_images(db_dir,
                              img_dpath=None,
                              gt_format=None,
                              allow_unknown_chips=False):
    # --- Initialize ---
    if img_dpath is None:
        img_dpath = db_dir + '/images'
    print('Converting db_dir=%r and img_dpath=%r' % (db_dir, img_dpath))
    gx2_gid, gx2_gname = imagetables_from_img_dpath(img_dpath)
    name_set = groundtruth_from_imagenames(gx2_gname, gt_format)
    nx2_name, nx2_nid = nametables_from_nameset(name_set)
    # ---- Build Chip Table ---
    helpers.print_('Building chip table: ')
    cx2_cid = []
    cx2_theta = []
    cx2_roi = []
    cx2_nx = []
    cx2_gx = []
    cid = 1

    def add_to_hs_tables(gname, name, roi, theta=0):
        cid = len(cx2_cid) + 1
        nx = nx2_name.index(name)
        gx = gx2_gname.index(gname)
        cx2_cid.append(cid)
        cx2_roi.append(roi)
        cx2_nx.append(nx)
        cx2_gx.append(gx)
        cx2_theta.append(theta)
        return cid

    for gx, gname in enumerate(gx2_gname):
        if gt_format is None:
            name = '____'
        else:
            name, num = parse.parse(gt_format, gname)
        if name == '____' and not allow_unknown_chips:
            continue
        img_fpath = join(img_dpath, gname)
        roi = roi_from_imgsize(img_fpath)
        if not roi is None:
            cid = add_to_hs_tables(gname, name, roi)
    cx2_nid = np.array(nx2_nid)[cx2_nx]
    cx2_gid = np.array(gx2_gid)[cx2_gx]
    print('There are %d chips' % (cid - 1))

    # Write tables
    internal_dir = join(db_dir, ld2.RDIR_INTERNAL2)
    helpers.ensurepath(internal_dir)
    write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi,
                     cx2_theta)
    write_name_table(internal_dir, nx2_nid, nx2_name)
    write_image_table(internal_dir, gx2_gid, gx2_gname)
    return True
Exemplo n.º 6
0
def save_if_requested(hs, subdir):
    if not hs.args.save_figures:
        return
    #print('[viz] Dumping Image')
    fpath = hs.dirs.result_dir
    if not subdir is None:
        subdir = helpers.sanatize_fname2(subdir)
        fpath = join(fpath, subdir)
        helpers.ensurepath(fpath)
    df2.save_figure(fpath=fpath, usetitle=True)
    df2.reset()
Exemplo n.º 7
0
 def add_images(hs, fpath_list, move_images=True):
     nImages = len(fpath_list)
     print('[hs.add_imgs] adding %d images' % nImages)
     img_dir = hs.dirs.img_dir
     copy_list = []
     helpers.ensurepath(img_dir)
     if move_images:
         # Build lists of where the new images will be
         fpath_list2 = [
             join(img_dir,
                  split(fpath)[1]) for fpath in fpath_list
         ]
         copy_iter = izip(fpath_list, fpath_list2)
         copy_list = [(src, dst) for src, dst in copy_iter
                      if not exists(dst)]
         nExist = len(fpath_list2) - len(copy_list)
         print('[hs] copying %d images' % len(copy_list))
         print('[hs] %d images already exist' % nExist)
         # RCOS TODO: Copying like this should be a helper function.
         # It appears in multiple places
         # Also there should be the option of parallelization? IDK, these are
         # disk writes, but it still might help.
         mark_progress, end_progress = helpers.progress_func(
             len(copy_list), lbl='Copying Image')
         for count, (src, dst) in enumerate(copy_list):
             shutil.copy(src, dst)
             mark_progress(count)
         end_progress()
     else:
         print('[hs.add_imgs] using original image paths')
         fpath_list2 = fpath_list
     # Get location of the new images relative to the image dir
     gx2_gname = hs.tables.gx2_gname.tolist()
     gx2_aif = hs.tables.gx2_aif.tolist()
     relpath_list = [relpath(fpath, img_dir) for fpath in fpath_list2]
     current_gname_set = set(gx2_gname)
     # Check to make sure the gnames are not currently indexed
     new_gnames = [
         gname for gname in relpath_list if not gname in current_gname_set
     ]
     new_aifs = [False] * len(new_gnames)
     nNewImages = len(new_gnames)
     nIndexed = nImages - nNewImages
     print('[hs.add_imgs] new_gnames:\n' + '\n'.join(new_gnames))
     print('[hs.add_imgs] %d images already indexed.' % nIndexed)
     print('[hs.add_imgs] Added %d new images.' % nIndexed)
     # Append the new gnames to the hotspotter table
     hs.tables.gx2_gname = np.array(gx2_gname + new_gnames)
     hs.tables.gx2_aif = np.array(gx2_aif + new_aifs)
     hs.update_samples()
     return nNewImages
Exemplo n.º 8
0
def init_database_from_images(db_dir, img_dpath=None, gt_format=None,
                              allow_unknown_chips=False):
    # --- Initialize ---
    if img_dpath is None:
        img_dpath = db_dir + '/images'
    print('Converting db_dir=%r and img_dpath=%r' % (db_dir, img_dpath))
    gx2_gid, gx2_gname = imagetables_from_img_dpath(img_dpath)
    name_set = groundtruth_from_imagenames(gx2_gname, gt_format)
    nx2_name, nx2_nid = nametables_from_nameset(name_set)
    # ---- Build Chip Table ---
    helpers.print_('Building chip table: ')
    cx2_cid     = []
    cx2_theta   = []
    cx2_roi     = []
    cx2_nx      = []
    cx2_gx      = []
    cid = 1

    def add_to_hs_tables(gname, name, roi, theta=0):
        cid = len(cx2_cid) + 1
        nx = nx2_name.index(name)
        gx = gx2_gname.index(gname)
        cx2_cid.append(cid)
        cx2_roi.append(roi)
        cx2_nx.append(nx)
        cx2_gx.append(gx)
        cx2_theta.append(theta)
        return cid
    for gx, gname in enumerate(gx2_gname):
        if gt_format is None:
            name = '____'
        else:
            name, num = parse.parse(gt_format, gname)
        if name == '____' and not allow_unknown_chips:
            continue
        img_fpath = join(img_dpath, gname)
        roi = roi_from_imgsize(img_fpath)
        if not roi is None:
            cid = add_to_hs_tables(gname, name, roi)
    cx2_nid = np.array(nx2_nid)[cx2_nx]
    cx2_gid = np.array(gx2_gid)[cx2_gx]
    print('There are %d chips' % (cid - 1))

    # Write tables
    internal_dir      = join(db_dir, ld2.RDIR_INTERNAL2)
    helpers.ensurepath(internal_dir)
    write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta)
    write_name_table(internal_dir, nx2_nid, nx2_name)
    write_image_table(internal_dir, gx2_gid, gx2_gname)
    return True
Exemplo n.º 9
0
def dump(hs, subdir=None, quality=False, overwrite=False):
    if quality is True:
        df2.FIGSIZE = df2.golden_wh2(12)
        df2.DPI = 120
        df2.FONTS.figtitle = df2.FONTS.small
    if quality is False:
        df2.FIGSIZE = df2.golden_wh2(8)
        df2.DPI = 90
        df2.FONTS.figtitle = df2.FONTS.smaller
    #print('[viz] Dumping Image')
    fpath = hs.dirs.result_dir
    if subdir is not None:
        fpath = join(fpath, subdir)
        helpers.ensurepath(fpath)
    df2.save_figure(fpath=fpath, usetitle=True, overwrite=overwrite)
    df2.reset()
Exemplo n.º 10
0
def __dump_text_report(allres, report_type):
    if not 'report_type' in vars():
        report_type = 'rankres_str'
    print('[rr2] Dumping textfile: ' + report_type)
    report_str = allres.__dict__[report_type]
    # Get directories
    result_dir = allres.hs.dirs.result_dir
    timestamp_dir = join(result_dir, 'timestamped_results')
    helpers.ensurepath(timestamp_dir)
    helpers.ensurepath(result_dir)
    # Write to timestamp and result dir
    timestamp = helpers.get_timestamp()
    csv_timestamp_fname = report_type + allres.title_suffix + timestamp + '.csv'
    csv_timestamp_fpath = join(timestamp_dir, csv_timestamp_fname)
    csv_fname = report_type + allres.title_suffix + '.csv'
    csv_fpath = join(result_dir, csv_fname)
    helpers.write_to(csv_fpath, report_str)
    helpers.write_to(csv_timestamp_fpath, report_str)
Exemplo n.º 11
0
def __dump_text_report(allres, report_type):
    if not 'report_type' in vars():
        report_type = 'rankres_str'
    print('[rr2] Dumping textfile: ' + report_type)
    report_str = allres.__dict__[report_type]
    # Get directories
    result_dir    = allres.hs.dirs.result_dir
    timestamp_dir = join(result_dir, 'timestamped_results')
    helpers.ensurepath(timestamp_dir)
    helpers.ensurepath(result_dir)
    # Write to timestamp and result dir
    timestamp = helpers.get_timestamp()
    csv_timestamp_fname = report_type + allres.title_suffix + timestamp + '.csv'
    csv_timestamp_fpath = join(timestamp_dir, csv_timestamp_fname)
    csv_fname  = report_type + allres.title_suffix + '.csv'
    csv_fpath = join(result_dir, csv_fname)
    helpers.write_to(csv_fpath, report_str)
    helpers.write_to(csv_timestamp_fpath, report_str)
Exemplo n.º 12
0
 def add_images(hs, fpath_list, move_images=True):
     nImages = len(fpath_list)
     print('[hs.add_imgs] adding %d images' % nImages)
     img_dir = hs.dirs.img_dir
     copy_list = []
     helpers.ensurepath(img_dir)
     if move_images:
         # Build lists of where the new images will be
         fpath_list2 = [join(img_dir, split(fpath)[1]) for fpath in fpath_list]
         copy_iter = izip(fpath_list, fpath_list2)
         copy_list = [(src, dst) for src, dst in copy_iter if not exists(dst)]
         nExist = len(fpath_list2) - len(copy_list)
         print('[hs] copying %d images' % len(copy_list))
         print('[hs] %d images already exist' % nExist)
         # RCOS TODO: Copying like this should be a helper function.
         # It appears in multiple places
         # Also there should be the option of parallelization? IDK, these are
         # disk writes, but it still might help.
         mark_progress, end_progress = helpers.progress_func(len(copy_list), lbl='Copying Image')
         for count, (src, dst) in enumerate(copy_list):
             shutil.copy(src, dst)
             mark_progress(count)
         end_progress()
     else:
         print('[hs.add_imgs] using original image paths')
         fpath_list2 = fpath_list
     # Get location of the new images relative to the image dir
     gx2_gname = hs.tables.gx2_gname.tolist()
     gx2_aif   = hs.tables.gx2_aif.tolist()
     relpath_list = [relpath(fpath, img_dir) for fpath in fpath_list2]
     current_gname_set = set(gx2_gname)
     # Check to make sure the gnames are not currently indexed
     new_gnames = [gname for gname in relpath_list if not gname in current_gname_set]
     new_aifs   = [False] * len(new_gnames)
     nNewImages = len(new_gnames)
     nIndexed = nImages - nNewImages
     print('[hs.add_imgs] new_gnames:\n' + '\n'.join(new_gnames))
     print('[hs.add_imgs] %d images already indexed.' % nIndexed)
     print('[hs.add_imgs] Added %d new images.' % nIndexed)
     # Append the new gnames to the hotspotter table
     hs.tables.gx2_gname = np.array(gx2_gname + new_gnames)
     hs.tables.gx2_aif   = np.array(gx2_aif   + new_aifs)
     hs.update_samples()
     return nNewImages
Exemplo n.º 13
0
    return roi


#%%
# =============================================================================
#     Initialization -  dataset
# =============================================================================
db_dir = join(dpath, new_db)

#------ function:[hsgui]-[guitools]- def select_directory
caption = 'Select Directory'
print('Selected Directory: %r' % dpath)
io.global_cache_write('select_directory', split(dpath)[0])
print('[*back] valid new_db_dir = %r' % db_dir)
io.global_cache_write('db_dir', db_dir)
helpers.ensurepath(db_dir)

if Flag_new_db & 1:
    if exists(db_dir):
        shutil.rmtree(db_dir)

defaultdb = None
preload = False

args = parse_arguments(defaultdb, defaultdb == 'cache')
# --- Build HotSpotter API ---
hs = open_database(db_dir)

#%%
# =============================================================================
#     Initialization -  images
Exemplo n.º 14
0
def wildid_to_tables(db_dir, img_dpath, column_labels, column_list):
    row_lengths = [len(col) for col in column_list]
    num_rows = row_lengths[0]
    assert all([num_rows == rowlen for rowlen in row_lengths
                ]), 'number of rows in xlsx file must be consistent'
    #header = 'Converted from: '+repr(xlsx_fpath)
    #csv_string = ld2.make_csv_table(column_labels, column_list, header)
    # Get Image set
    print('[convert] Building image table')
    gx2_gid, gx2_gname = imagetables_from_img_dpath(img_dpath)
    # Get name set
    print('[convert] Building name table')

    def get_lbl_pos(column_labels, valid_labels):
        for lbl in valid_labels:
            index = helpers.listfind(column_labels, lbl)
            if index is not None:
                return index
        raise Exception('There is no valid label')

    name_colx = get_lbl_pos(column_labels, ['ANIMAL_ID', 'AnimalID'])
    name_set = set(column_list[name_colx])
    nx2_name, nx2_nid = nametables_from_nameset(name_set)
    # Get chip set
    print('[convert] build chip table')
    # ---------
    # This format has multiple images per row
    chips_per_name = 2  # this is apparently always 2

    def get_multiprop_colx_list(prefix):
        colx_list = []
        for num in xrange(chips_per_name):
            lbl = prefix + str(num + 1)
            colx = get_lbl_pos(column_labels, [lbl])
            colx_list.append(colx)
        return colx_list

    # ---------
    # Essential properties
    #prop2_colx_list = {}
    try:
        image_colx_list = get_multiprop_colx_list('IMAGE_')
    except Exception:
        image_colx_list = get_multiprop_colx_list('Image')
    # ---------
    # Nonessential multi-properties
    try_multiprops = ['DATE_NO']
    multiprop2_colx = {}
    for key in try_multiprops:
        try:
            multiprop2_colx[key] = get_multiprop_colx_list(key)
        except Exception:
            pass
    # ---------
    # Nonessential single-properties
    try_props = ['SEX']
    prop2_colx = {}
    for key in try_props:
        try:
            other_colx = get_lbl_pos(column_labels, [key])
            prop2_colx[key] = other_colx
        except Exception:
            pass
    # ---------
    # Nonessential pairwise-properties
    try_match_props = ['matches', 'WildID_score']
    pairprop2_colx = {}
    for key in try_match_props:
        try:
            other_colx = get_lbl_pos(column_labels, [key])
            pairprop2_colx[key] = other_colx
        except Exception:
            pass
    # ---------
    # Build tables
    cx2_cid = []
    cx2_theta = []
    cx2_roi = []
    cx2_nx = []
    cx2_gx = []
    prop_dict = {}
    pairwise_dict = {}
    gnameroi_to_cid = {}
    for key in prop2_colx.keys():
        prop_dict[key] = []
    for key in multiprop2_colx.keys():
        prop_dict[key] = []
    cid = 1

    def wildid_add_to_hs_tables(gname, name, roi, theta=0, **kwargs):
        cid = len(cx2_cid) + 1
        nx = nx2_name.index(name)
        gx = gx2_gname.index(gname)
        cx2_cid.append(cid)
        cx2_roi.append(roi)
        cx2_nx.append(nx)
        cx2_gx.append(gx)
        for key, val in kwargs.iteritems():
            prop_dict[key].append(val)
        cx2_theta.append(theta)
        sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid)
        return cid

    # ---------
    # Wildid parsing
    bad_rows = 0
    for rowx in xrange(num_rows):
        name = column_list[name_colx][rowx]
        tbl_kwargs2 = {
            key: column_list[val][rowx]
            for key, val in prop2_colx.iteritems()
        }
        pairwise_vals = [
            column_list[colx][rowx] for colx in pairprop2_colx.values()
        ]
        cid_tup = []
        for num in xrange(
                chips_per_name):  # TODO: This is always just pairwise
            img_colx = image_colx_list[num]
            gname = column_list[img_colx][rowx]
            tbl_kwargs1 = {
                key: column_list[val[num]][rowx]
                for key, val in multiprop2_colx.iteritems()
            }
            tbl_kwargs = dict(tbl_kwargs1.items() + tbl_kwargs2.items())
            roi = roi_from_imgsize(join(img_dpath, gname), silent=True)
            if roi is None:
                img_fpath = join(img_dpath, gname)
                bad_rows += 1
                if not exists(img_fpath):
                    print('nonexistant image: %r' % gname)
                else:
                    print('corrupted image: %r' % gname)
                continue
            gnameroi = (gname, tuple(roi))
            if gnameroi in gnameroi_to_cid.keys():
                cid = gnameroi_to_cid[gnameroi]
                cid_tup.append(cid)
                continue
            cid = wildid_add_to_hs_tables(gname, name, roi, **tbl_kwargs)
            gnameroi_to_cid[gnameroi] = cid
            cid_tup.append(cid)
        pairwise_dict[tuple(cid_tup)] = pairwise_vals

    print('bad_rows = %r ' % bad_rows)
    print('num_rows = %r ' % num_rows)
    print('chips_per_name = %r ' % chips_per_name)
    print('cid = %r ' % cid)

    print('num pairwise properties: %r' % len(pairwise_dict))
    print('implementation of pairwise properties does not yet exist')

    num_known_chips = len(cx2_cid)
    print('[convert] Added %r known chips.' % num_known_chips)
    # Add the rest of the nongroundtruthed chips
    print('[convert] Adding unknown images to table')

    # Check that images were unique
    unique_gx = np.unique(np.array(cx2_gx))
    print('len(cx2_gx)=%r' % len(cx2_gx))
    print('len(unique_gx)=%r' % len(unique_gx))
    assert len(cx2_gx) == len(unique_gx), \
        'There are images specified twice'

    # Check that cids were unique
    cx2_cid_arr = np.array(cx2_cid)
    valid_cids = cx2_cid_arr[np.where(cx2_cid_arr > 0)[0]]
    unique_cids = np.unique(valid_cids)
    print('len(cx2_cid)     = %r' % len(cx2_cid))
    print('len(valid_cids)  = %r' % len(valid_cids))
    print('len(unique_cids) = %r' % len(unique_cids))
    assert len(valid_cids) == len(unique_cids), \
        'There are chipids specified twice'

    known_gx_set = set(cx2_gx)
    for gx, gname in enumerate(gx2_gname):
        if gx in known_gx_set:
            continue
        name = '____'
        roi = roi_from_imgsize(join(img_dpath, gname), silent=False)
        tbl_kwargs1 = {key: 'NA' for key, val in multiprop2_colx.iteritems()}
        tbl_kwargs2 = {key: 'NA' for key, val in prop2_colx.iteritems()}
        tbl_kwargs = dict(tbl_kwargs1.items() + tbl_kwargs2.items())
        if not roi is None:
            cid = wildid_add_to_hs_tables(gname, name, roi, **tbl_kwargs)
    num_unknown_chips = len(cx2_cid) - num_known_chips
    print('[convert] Added %r more unknown chips.' % num_unknown_chips)
    cx2_nid = np.array(nx2_nid)[cx2_nx]
    cx2_gid = np.array(gx2_gid)[cx2_gx]
    print('[convert] There are %d chips' % (cid - 1))
    #
    # Write tables
    internal_dir = join(db_dir, ld2.RDIR_INTERNAL2)
    helpers.ensurepath(internal_dir)
    write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi,
                     cx2_theta, prop_dict)
    write_name_table(internal_dir, nx2_nid, nx2_name)
    write_image_table(internal_dir, gx2_gid, gx2_gname)
    print('[convert] finished conversion')
Exemplo n.º 15
0
def convert_from_oxford_style(db_dir):
    # Get directories for the oxford groundtruth
    oxford_gt_dpath      = join(db_dir, 'oxford_style_gt')
    helpers.assertpath(oxford_gt_dpath)
    # Check for corrupted files (Looking at your Paris Buildings Dataset)
    corrupted_file_fpath = join(oxford_gt_dpath, 'corrupted_files.txt')
    corrupted_gname_set = set([])
    if helpers.checkpath(corrupted_file_fpath):
        with open(corrupted_file_fpath) as f:
            corrupted_gname_list = f.read().splitlines()
        corrupted_gname_set = set(corrupted_gname_list)

    # Recursively get relative path of all files in img_dpath
    print('Loading Oxford Style Images from: ' + db_dir)
    img_dpath  = join(db_dir, 'images')
    helpers.assertpath(img_dpath)
    gname_list_ = [join(relpath(root, img_dpath), fname).replace('\\', '/').replace('./', '')
                   for (root, dlist, flist) in os.walk(img_dpath)
                   for fname in iter(flist)]
    gname_list = [gname for gname in iter(gname_list_)
                  if not gname in corrupted_gname_set and helpers.matches_image(gname)]
    print(' * num_images = %d ' % len(gname_list))

    # Read the Oxford Style Groundtruth files
    print('Loading Oxford Style Names and Chips')
    gt_fname_list = os.listdir(oxford_gt_dpath)
    num_gt_files = len(gt_fname_list)
    query_chips  = []
    gname2_chips_raw = collections.defaultdict(list)
    name_set = set([])
    print(' * num_gt_files = %d ' % num_gt_files)
    sys.stdout.write('parsed: 0000/%4d' % (num_gt_files))
    for gtx, gt_fname in enumerate(gt_fname_list):
        sys.stdout.write(('\b' * 9) + '%4d/%4d' % (gtx + 1, num_gt_files))
        if gtx % 10 - 1 == 0:
            sys.stdout.flush()
        if gt_fname == 'corrupted_files.txt':
            continue
        #Get name, quality, and num from fname
        (name, num, quality) = __oxgtfile2_oxsty_gttup(gt_fname)
        gt_fpath = join(oxford_gt_dpath, gt_fname)
        name_set.add(name)
        oxsty_chip_info_sublist = __read_oxsty_gtfile(gt_fpath, name,
                                                      quality, img_dpath,
                                                      corrupted_gname_set)
        if quality == 'query':
            for (gname, roi) in iter(oxsty_chip_info_sublist):
                query_chips.append((gname, roi, name, num))
        else:
            for (gname, roi) in iter(oxsty_chip_info_sublist):
                gname2_chips_raw[gname].append((name, roi, quality))
    sys.stdout.write('\n')
    print(' * num_query images = %d ' % len(query_chips))
    # Remove duplicates img.jpg : (*1.txt, *2.txt, ...) -> (*.txt)
    gname2_chips     = collections.defaultdict(list)
    multinamed_gname_list = []
    for gname, val in gname2_chips_raw.iteritems():
        val_repr = map(repr, val)
        unique_reprs = set(val_repr)
        unique_indexes = [val_repr.index(urep) for urep in unique_reprs]
        for ux in unique_indexes:
            gname2_chips[gname].append(val[ux])
        if len(gname2_chips[gname]) > 1:
            multinamed_gname_list.append(gname)
    # print some statistics
    query_gname_list = [tup[0] for tup in query_chips]
    gname_with_groundtruth_list = gname2_chips.keys()
    gname_without_groundtruth_list = np.setdiff1d(gname_list, gname_with_groundtruth_list)
    print(' * num_images = %d ' % len(gname_list))
    print(' * images with groundtruth    = %d ' % len(gname_with_groundtruth_list))
    print(' * images without groundtruth = %d ' % len(gname_without_groundtruth_list))
    print(' * images with multi-groundtruth = %d ' % len(multinamed_gname_list))
    #make sure all queries have ground truth and there are no duplicate queries
    assert len(query_gname_list) == len(np.intersect1d(query_gname_list, gname_with_groundtruth_list))
    assert len(query_gname_list) == len(set(query_gname_list))
    # build hotspotter tables
    print('adding to table: ')
    gx2_gname = gname_list
    nx2_name  = ['____', '____'] + list(name_set)
    nx2_nid   = [1, 1] + range(2, len(name_set) + 2)
    gx2_gid   = range(1, len(gx2_gname) + 1)

    cx2_cid     = []
    cx2_theta   = []
    cx2_quality = []
    cx2_roi     = []
    cx2_nx      = []
    cx2_gx      = []
    prop_dict   = {'oxnum': []}

    def add_to_hs_tables(gname, name, roi, quality, num=''):
        cid = len(cx2_cid) + 1
        nx = nx2_name.index(name)
        if nx == 0:
            nx = 1
        gx = gx2_gname.index(gname)
        cx2_cid.append(cid)
        cx2_roi.append(roi)
        cx2_quality.append(quality)
        cx2_nx.append(nx)
        cx2_gx.append(gx)
        cx2_theta.append(0)
        prop_dict['oxnum'].append(num)
        sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid)
        return cid

    for gname, roi, name, num in query_chips:
        add_to_hs_tables(gname, name, roi, 'query', num)
    for gname in gname2_chips.keys():
        if len(gname2_chips[gname]) == 1:
            (name, roi, quality) = gname2_chips[gname][0]
            add_to_hs_tables(gname, name, roi, quality)
        else:
            # just take the first name. This is foobar
            names, rois, qualities = zip(*gname2_chips[gname])
            add_to_hs_tables(gname, names[0], rois[0], qualities[0])
    for gname in gname_without_groundtruth_list:
        gpath = join(img_dpath, gname)
        try:
            (w, h) = Image.open(gpath).size
            roi = [0, 0, w, h]
            add_to_hs_tables(gname, '____', roi, 'unknown')
        except Exception as ex:
            print('Exception ex=%r' % ex)
            print('Not adding gname=%r' % gname)
            print('----')
    cx2_nid = np.array(nx2_nid)[cx2_nx]
    cx2_gid = np.array(gx2_gid)[cx2_gx]
    #
    # Write tables
    internal_dir      = join(db_dir, ld2.RDIR_INTERNAL2)
    helpers.ensurepath(internal_dir)
    write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta, prop_dict)
    write_name_table(internal_dir, nx2_nid, nx2_name)
    write_image_table(internal_dir, gx2_gid, gx2_gname)
Exemplo n.º 16
0
def convert_from_oxford_style(db_dir):
    # Get directories for the oxford groundtruth
    oxford_gt_dpath = join(db_dir, 'oxford_style_gt')
    helpers.assertpath(oxford_gt_dpath)
    # Check for corrupted files (Looking at your Paris Buildings Dataset)
    corrupted_file_fpath = join(oxford_gt_dpath, 'corrupted_files.txt')
    corrupted_gname_set = set([])
    if helpers.checkpath(corrupted_file_fpath):
        with open(corrupted_file_fpath) as f:
            corrupted_gname_list = f.read().splitlines()
        corrupted_gname_set = set(corrupted_gname_list)

    # Recursively get relative path of all files in img_dpath
    print('Loading Oxford Style Images from: ' + db_dir)
    img_dpath = join(db_dir, 'images')
    helpers.assertpath(img_dpath)
    gname_list_ = [
        join(relpath(root, img_dpath), fname).replace('\\',
                                                      '/').replace('./', '')
        for (root, dlist, flist) in os.walk(img_dpath) for fname in iter(flist)
    ]
    gname_list = [
        gname for gname in iter(gname_list_)
        if not gname in corrupted_gname_set and helpers.matches_image(gname)
    ]
    print(' * num_images = %d ' % len(gname_list))

    # Read the Oxford Style Groundtruth files
    print('Loading Oxford Style Names and Chips')
    gt_fname_list = os.listdir(oxford_gt_dpath)
    num_gt_files = len(gt_fname_list)
    query_chips = []
    gname2_chips_raw = collections.defaultdict(list)
    name_set = set([])
    print(' * num_gt_files = %d ' % num_gt_files)
    sys.stdout.write('parsed: 0000/%4d' % (num_gt_files))
    for gtx, gt_fname in enumerate(gt_fname_list):
        sys.stdout.write(('\b' * 9) + '%4d/%4d' % (gtx + 1, num_gt_files))
        if gtx % 10 - 1 == 0:
            sys.stdout.flush()
        if gt_fname == 'corrupted_files.txt':
            continue
        #Get name, quality, and num from fname
        (name, num, quality) = __oxgtfile2_oxsty_gttup(gt_fname)
        gt_fpath = join(oxford_gt_dpath, gt_fname)
        name_set.add(name)
        oxsty_chip_info_sublist = __read_oxsty_gtfile(gt_fpath, name, quality,
                                                      img_dpath,
                                                      corrupted_gname_set)
        if quality == 'query':
            for (gname, roi) in iter(oxsty_chip_info_sublist):
                query_chips.append((gname, roi, name, num))
        else:
            for (gname, roi) in iter(oxsty_chip_info_sublist):
                gname2_chips_raw[gname].append((name, roi, quality))
    sys.stdout.write('\n')
    print(' * num_query images = %d ' % len(query_chips))
    # Remove duplicates img.jpg : (*1.txt, *2.txt, ...) -> (*.txt)
    gname2_chips = collections.defaultdict(list)
    multinamed_gname_list = []
    for gname, val in gname2_chips_raw.iteritems():
        val_repr = map(repr, val)
        unique_reprs = set(val_repr)
        unique_indexes = [val_repr.index(urep) for urep in unique_reprs]
        for ux in unique_indexes:
            gname2_chips[gname].append(val[ux])
        if len(gname2_chips[gname]) > 1:
            multinamed_gname_list.append(gname)
    # print some statistics
    query_gname_list = [tup[0] for tup in query_chips]
    gname_with_groundtruth_list = gname2_chips.keys()
    gname_without_groundtruth_list = np.setdiff1d(gname_list,
                                                  gname_with_groundtruth_list)
    print(' * num_images = %d ' % len(gname_list))
    print(' * images with groundtruth    = %d ' %
          len(gname_with_groundtruth_list))
    print(' * images without groundtruth = %d ' %
          len(gname_without_groundtruth_list))
    print(' * images with multi-groundtruth = %d ' %
          len(multinamed_gname_list))
    #make sure all queries have ground truth and there are no duplicate queries
    assert len(query_gname_list) == len(
        np.intersect1d(query_gname_list, gname_with_groundtruth_list))
    assert len(query_gname_list) == len(set(query_gname_list))
    # build hotspotter tables
    print('adding to table: ')
    gx2_gname = gname_list
    nx2_name = ['____', '____'] + list(name_set)
    nx2_nid = [1, 1] + range(2, len(name_set) + 2)
    gx2_gid = range(1, len(gx2_gname) + 1)

    cx2_cid = []
    cx2_theta = []
    cx2_quality = []
    cx2_roi = []
    cx2_nx = []
    cx2_gx = []
    prop_dict = {'oxnum': []}

    def add_to_hs_tables(gname, name, roi, quality, num=''):
        cid = len(cx2_cid) + 1
        nx = nx2_name.index(name)
        if nx == 0:
            nx = 1
        gx = gx2_gname.index(gname)
        cx2_cid.append(cid)
        cx2_roi.append(roi)
        cx2_quality.append(quality)
        cx2_nx.append(nx)
        cx2_gx.append(gx)
        cx2_theta.append(0)
        prop_dict['oxnum'].append(num)
        sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid)
        return cid

    for gname, roi, name, num in query_chips:
        add_to_hs_tables(gname, name, roi, 'query', num)
    for gname in gname2_chips.keys():
        if len(gname2_chips[gname]) == 1:
            (name, roi, quality) = gname2_chips[gname][0]
            add_to_hs_tables(gname, name, roi, quality)
        else:
            # just take the first name. This is foobar
            names, rois, qualities = zip(*gname2_chips[gname])
            add_to_hs_tables(gname, names[0], rois[0], qualities[0])
    for gname in gname_without_groundtruth_list:
        gpath = join(img_dpath, gname)
        try:
            (w, h) = Image.open(gpath).size
            roi = [0, 0, w, h]
            add_to_hs_tables(gname, '____', roi, 'unknown')
        except Exception as ex:
            print('Exception ex=%r' % ex)
            print('Not adding gname=%r' % gname)
            print('----')
    cx2_nid = np.array(nx2_nid)[cx2_nx]
    cx2_gid = np.array(gx2_gid)[cx2_gx]
    #
    # Write tables
    internal_dir = join(db_dir, ld2.RDIR_INTERNAL2)
    helpers.ensurepath(internal_dir)
    write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi,
                     cx2_theta, prop_dict)
    write_name_table(internal_dir, nx2_nid, nx2_name)
    write_image_table(internal_dir, gx2_gid, gx2_gname)
Exemplo n.º 17
0
def wildid_to_tables(db_dir, img_dpath, column_labels, column_list):
    row_lengths = [len(col) for col in column_list]
    num_rows = row_lengths[0]
    assert all([num_rows == rowlen for rowlen in row_lengths]), 'number of rows in xlsx file must be consistent'
    #header = 'Converted from: '+repr(xlsx_fpath)
    #csv_string = ld2.make_csv_table(column_labels, column_list, header)
    # Get Image set
    print('[convert] Building image table')
    gx2_gid, gx2_gname = imagetables_from_img_dpath(img_dpath)
    # Get name set
    print('[convert] Building name table')

    def get_lbl_pos(column_labels, valid_labels):
        for lbl in valid_labels:
            index = helpers.listfind(column_labels, lbl)
            if index is not None:
                return index
        raise Exception('There is no valid label')
    name_colx = get_lbl_pos(column_labels, ['ANIMAL_ID', 'AnimalID'])
    name_set = set(column_list[name_colx])
    nx2_name, nx2_nid = nametables_from_nameset(name_set)
    # Get chip set
    print('[convert] build chip table')
    # ---------
    # This format has multiple images per row
    chips_per_name = 2  # this is apparently always 2

    def get_multiprop_colx_list(prefix):
        colx_list = []
        for num in xrange(chips_per_name):
            lbl = prefix + str(num + 1)
            colx = get_lbl_pos(column_labels, [lbl])
            colx_list.append(colx)
        return colx_list
    # ---------
    # Essential properties
    #prop2_colx_list = {}
    try:
        image_colx_list = get_multiprop_colx_list('IMAGE_')
    except Exception:
        image_colx_list = get_multiprop_colx_list('Image')
    # ---------
    # Nonessential multi-properties
    try_multiprops = ['DATE_NO']
    multiprop2_colx = {}
    for key in try_multiprops:
        try:
            multiprop2_colx[key]  = get_multiprop_colx_list(key)
        except Exception:
            pass
    # ---------
    # Nonessential single-properties
    try_props = ['SEX']
    prop2_colx = {}
    for key in try_props:
        try:
            other_colx = get_lbl_pos(column_labels, [key])
            prop2_colx[key] = other_colx
        except Exception:
            pass
    # ---------
    # Nonessential pairwise-properties
    try_match_props = ['matches', 'WildID_score']
    pairprop2_colx = {}
    for key in try_match_props:
        try:
            other_colx = get_lbl_pos(column_labels, [key])
            pairprop2_colx[key] = other_colx
        except Exception:
            pass
    # ---------
    # Build tables
    cx2_cid     = []
    cx2_theta   = []
    cx2_roi     = []
    cx2_nx      = []
    cx2_gx      = []
    prop_dict       = {}
    pairwise_dict   = {}
    gnameroi_to_cid = {}
    for key in prop2_colx.keys():
        prop_dict[key] = []
    for key in multiprop2_colx.keys():
        prop_dict[key] = []
    cid = 1

    def wildid_add_to_hs_tables(gname, name, roi, theta=0, **kwargs):
        cid = len(cx2_cid) + 1
        nx = nx2_name.index(name)
        gx = gx2_gname.index(gname)
        cx2_cid.append(cid)
        cx2_roi.append(roi)
        cx2_nx.append(nx)
        cx2_gx.append(gx)
        for key, val in kwargs.iteritems():
            prop_dict[key].append(val)
        cx2_theta.append(theta)
        sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid)
        return cid
    # ---------
    # Wildid parsing
    bad_rows = 0
    for rowx in xrange(num_rows):
        name        = column_list[name_colx][rowx]
        tbl_kwargs2 = {key: column_list[val][rowx] for key, val in prop2_colx.iteritems()}
        pairwise_vals = [column_list[colx][rowx] for colx in pairprop2_colx.values()]
        cid_tup = []
        for num in xrange(chips_per_name):  # TODO: This is always just pairwise
            img_colx = image_colx_list[num]
            gname = column_list[img_colx][rowx]
            tbl_kwargs1 = {key: column_list[val[num]][rowx] for key, val in multiprop2_colx.iteritems()}
            tbl_kwargs = dict(tbl_kwargs1.items() + tbl_kwargs2.items())
            roi      = roi_from_imgsize(join(img_dpath, gname), silent=True)
            if roi is None:
                img_fpath = join(img_dpath, gname)
                bad_rows += 1
                if not exists(img_fpath):
                    print('nonexistant image: %r' % gname)
                else:
                    print('corrupted image: %r' % gname)
                continue
            gnameroi = (gname, tuple(roi))
            if gnameroi in gnameroi_to_cid.keys():
                cid = gnameroi_to_cid[gnameroi]
                cid_tup.append(cid)
                continue
            cid = wildid_add_to_hs_tables(gname, name, roi, **tbl_kwargs)
            gnameroi_to_cid[gnameroi] = cid
            cid_tup.append(cid)
        pairwise_dict[tuple(cid_tup)] = pairwise_vals

    print('bad_rows = %r ' % bad_rows)
    print('num_rows = %r ' % num_rows)
    print('chips_per_name = %r ' % chips_per_name)
    print('cid = %r ' % cid)

    print('num pairwise properties: %r' % len(pairwise_dict))
    print('implementation of pairwise properties does not yet exist')

    num_known_chips = len(cx2_cid)
    print('[convert] Added %r known chips.' % num_known_chips)
    # Add the rest of the nongroundtruthed chips
    print('[convert] Adding unknown images to table')

    # Check that images were unique
    unique_gx = np.unique(np.array(cx2_gx))
    print('len(cx2_gx)=%r'    % len(cx2_gx))
    print('len(unique_gx)=%r' % len(unique_gx))
    assert len(cx2_gx) == len(unique_gx), \
        'There are images specified twice'

    # Check that cids were unique
    cx2_cid_arr = np.array(cx2_cid)
    valid_cids  = cx2_cid_arr[np.where(cx2_cid_arr > 0)[0]]
    unique_cids = np.unique(valid_cids)
    print('len(cx2_cid)     = %r' % len(cx2_cid))
    print('len(valid_cids)  = %r' % len(valid_cids))
    print('len(unique_cids) = %r' % len(unique_cids))
    assert len(valid_cids) == len(unique_cids), \
        'There are chipids specified twice'

    known_gx_set = set(cx2_gx)
    for gx, gname in enumerate(gx2_gname):
        if gx in known_gx_set:
            continue
        name     = '____'
        roi      = roi_from_imgsize(join(img_dpath, gname), silent=False)
        tbl_kwargs1 = {key: 'NA' for key, val in multiprop2_colx.iteritems()}
        tbl_kwargs2 = {key: 'NA' for key, val in prop2_colx.iteritems()}
        tbl_kwargs = dict(tbl_kwargs1.items() + tbl_kwargs2.items())
        if not roi is None:
            cid = wildid_add_to_hs_tables(gname, name, roi, **tbl_kwargs)
    num_unknown_chips = len(cx2_cid) - num_known_chips
    print('[convert] Added %r more unknown chips.' % num_unknown_chips)
    cx2_nid = np.array(nx2_nid)[cx2_nx]
    cx2_gid = np.array(gx2_gid)[cx2_gx]
    print('[convert] There are %d chips' % (cid - 1))
    #
    # Write tables
    internal_dir      = join(db_dir, ld2.RDIR_INTERNAL2)
    helpers.ensurepath(internal_dir)
    write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta, prop_dict)
    write_name_table(internal_dir, nx2_nid, nx2_name)
    write_image_table(internal_dir, gx2_gid, gx2_gname)
    print('[convert] finished conversion')