Exemplo n.º 1
0
def is_imgdir(path):
    if not isdir(path):
        return False
    img_dir = path + '/images'
    if exists(img_dir):
        return True
    files = os.listdir(path)
    num_files = 0
    num_imgs = 0
    num_dirs = 0
    for name in files:
        subpath = join(path, name)
        if helpers.matches_image(subpath):
            num_imgs += 1
            return True
        elif isdir(subpath):
            num_dirs += 1
        elif isfile(subpath):
            num_files += 1
    return False
Exemplo n.º 2
0
def convert_from_oxford_style(db_dir):
    # Get directories for the oxford groundtruth
    oxford_gt_dpath = join(db_dir, 'oxford_style_gt')
    helpers.assertpath(oxford_gt_dpath)
    # Check for corrupted files (Looking at your Paris Buildings Dataset)
    corrupted_file_fpath = join(oxford_gt_dpath, 'corrupted_files.txt')
    corrupted_gname_set = set([])
    if helpers.checkpath(corrupted_file_fpath):
        with open(corrupted_file_fpath) as f:
            corrupted_gname_list = f.read().splitlines()
        corrupted_gname_set = set(corrupted_gname_list)

    # Recursively get relative path of all files in img_dpath
    print('Loading Oxford Style Images from: ' + db_dir)
    img_dpath = join(db_dir, 'images')
    helpers.assertpath(img_dpath)
    gname_list_ = [
        join(relpath(root, img_dpath), fname).replace('\\',
                                                      '/').replace('./', '')
        for (root, dlist, flist) in os.walk(img_dpath) for fname in iter(flist)
    ]
    gname_list = [
        gname for gname in iter(gname_list_)
        if not gname in corrupted_gname_set and helpers.matches_image(gname)
    ]
    print(' * num_images = %d ' % len(gname_list))

    # Read the Oxford Style Groundtruth files
    print('Loading Oxford Style Names and Chips')
    gt_fname_list = os.listdir(oxford_gt_dpath)
    num_gt_files = len(gt_fname_list)
    query_chips = []
    gname2_chips_raw = collections.defaultdict(list)
    name_set = set([])
    print(' * num_gt_files = %d ' % num_gt_files)
    sys.stdout.write('parsed: 0000/%4d' % (num_gt_files))
    for gtx, gt_fname in enumerate(gt_fname_list):
        sys.stdout.write(('\b' * 9) + '%4d/%4d' % (gtx + 1, num_gt_files))
        if gtx % 10 - 1 == 0:
            sys.stdout.flush()
        if gt_fname == 'corrupted_files.txt':
            continue
        #Get name, quality, and num from fname
        (name, num, quality) = __oxgtfile2_oxsty_gttup(gt_fname)
        gt_fpath = join(oxford_gt_dpath, gt_fname)
        name_set.add(name)
        oxsty_chip_info_sublist = __read_oxsty_gtfile(gt_fpath, name, quality,
                                                      img_dpath,
                                                      corrupted_gname_set)
        if quality == 'query':
            for (gname, roi) in iter(oxsty_chip_info_sublist):
                query_chips.append((gname, roi, name, num))
        else:
            for (gname, roi) in iter(oxsty_chip_info_sublist):
                gname2_chips_raw[gname].append((name, roi, quality))
    sys.stdout.write('\n')
    print(' * num_query images = %d ' % len(query_chips))
    # Remove duplicates img.jpg : (*1.txt, *2.txt, ...) -> (*.txt)
    gname2_chips = collections.defaultdict(list)
    multinamed_gname_list = []
    for gname, val in gname2_chips_raw.iteritems():
        val_repr = map(repr, val)
        unique_reprs = set(val_repr)
        unique_indexes = [val_repr.index(urep) for urep in unique_reprs]
        for ux in unique_indexes:
            gname2_chips[gname].append(val[ux])
        if len(gname2_chips[gname]) > 1:
            multinamed_gname_list.append(gname)
    # print some statistics
    query_gname_list = [tup[0] for tup in query_chips]
    gname_with_groundtruth_list = gname2_chips.keys()
    gname_without_groundtruth_list = np.setdiff1d(gname_list,
                                                  gname_with_groundtruth_list)
    print(' * num_images = %d ' % len(gname_list))
    print(' * images with groundtruth    = %d ' %
          len(gname_with_groundtruth_list))
    print(' * images without groundtruth = %d ' %
          len(gname_without_groundtruth_list))
    print(' * images with multi-groundtruth = %d ' %
          len(multinamed_gname_list))
    #make sure all queries have ground truth and there are no duplicate queries
    assert len(query_gname_list) == len(
        np.intersect1d(query_gname_list, gname_with_groundtruth_list))
    assert len(query_gname_list) == len(set(query_gname_list))
    # build hotspotter tables
    print('adding to table: ')
    gx2_gname = gname_list
    nx2_name = ['____', '____'] + list(name_set)
    nx2_nid = [1, 1] + range(2, len(name_set) + 2)
    gx2_gid = range(1, len(gx2_gname) + 1)

    cx2_cid = []
    cx2_theta = []
    cx2_quality = []
    cx2_roi = []
    cx2_nx = []
    cx2_gx = []
    prop_dict = {'oxnum': []}

    def add_to_hs_tables(gname, name, roi, quality, num=''):
        cid = len(cx2_cid) + 1
        nx = nx2_name.index(name)
        if nx == 0:
            nx = 1
        gx = gx2_gname.index(gname)
        cx2_cid.append(cid)
        cx2_roi.append(roi)
        cx2_quality.append(quality)
        cx2_nx.append(nx)
        cx2_gx.append(gx)
        cx2_theta.append(0)
        prop_dict['oxnum'].append(num)
        sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid)
        return cid

    for gname, roi, name, num in query_chips:
        add_to_hs_tables(gname, name, roi, 'query', num)
    for gname in gname2_chips.keys():
        if len(gname2_chips[gname]) == 1:
            (name, roi, quality) = gname2_chips[gname][0]
            add_to_hs_tables(gname, name, roi, quality)
        else:
            # just take the first name. This is foobar
            names, rois, qualities = zip(*gname2_chips[gname])
            add_to_hs_tables(gname, names[0], rois[0], qualities[0])
    for gname in gname_without_groundtruth_list:
        gpath = join(img_dpath, gname)
        try:
            (w, h) = Image.open(gpath).size
            roi = [0, 0, w, h]
            add_to_hs_tables(gname, '____', roi, 'unknown')
        except Exception as ex:
            print('Exception ex=%r' % ex)
            print('Not adding gname=%r' % gname)
            print('----')
    cx2_nid = np.array(nx2_nid)[cx2_nx]
    cx2_gid = np.array(gx2_gid)[cx2_gx]
    #
    # Write tables
    internal_dir = join(db_dir, ld2.RDIR_INTERNAL2)
    helpers.ensurepath(internal_dir)
    write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi,
                     cx2_theta, prop_dict)
    write_name_table(internal_dir, nx2_nid, nx2_name)
    write_image_table(internal_dir, gx2_gid, gx2_gname)
Exemplo n.º 3
0
def convert_from_oxford_style(db_dir):
    # Get directories for the oxford groundtruth
    oxford_gt_dpath      = join(db_dir, 'oxford_style_gt')
    helpers.assertpath(oxford_gt_dpath)
    # Check for corrupted files (Looking at your Paris Buildings Dataset)
    corrupted_file_fpath = join(oxford_gt_dpath, 'corrupted_files.txt')
    corrupted_gname_set = set([])
    if helpers.checkpath(corrupted_file_fpath):
        with open(corrupted_file_fpath) as f:
            corrupted_gname_list = f.read().splitlines()
        corrupted_gname_set = set(corrupted_gname_list)

    # Recursively get relative path of all files in img_dpath
    print('Loading Oxford Style Images from: ' + db_dir)
    img_dpath  = join(db_dir, 'images')
    helpers.assertpath(img_dpath)
    gname_list_ = [join(relpath(root, img_dpath), fname).replace('\\', '/').replace('./', '')
                   for (root, dlist, flist) in os.walk(img_dpath)
                   for fname in iter(flist)]
    gname_list = [gname for gname in iter(gname_list_)
                  if not gname in corrupted_gname_set and helpers.matches_image(gname)]
    print(' * num_images = %d ' % len(gname_list))

    # Read the Oxford Style Groundtruth files
    print('Loading Oxford Style Names and Chips')
    gt_fname_list = os.listdir(oxford_gt_dpath)
    num_gt_files = len(gt_fname_list)
    query_chips  = []
    gname2_chips_raw = collections.defaultdict(list)
    name_set = set([])
    print(' * num_gt_files = %d ' % num_gt_files)
    sys.stdout.write('parsed: 0000/%4d' % (num_gt_files))
    for gtx, gt_fname in enumerate(gt_fname_list):
        sys.stdout.write(('\b' * 9) + '%4d/%4d' % (gtx + 1, num_gt_files))
        if gtx % 10 - 1 == 0:
            sys.stdout.flush()
        if gt_fname == 'corrupted_files.txt':
            continue
        #Get name, quality, and num from fname
        (name, num, quality) = __oxgtfile2_oxsty_gttup(gt_fname)
        gt_fpath = join(oxford_gt_dpath, gt_fname)
        name_set.add(name)
        oxsty_chip_info_sublist = __read_oxsty_gtfile(gt_fpath, name,
                                                      quality, img_dpath,
                                                      corrupted_gname_set)
        if quality == 'query':
            for (gname, roi) in iter(oxsty_chip_info_sublist):
                query_chips.append((gname, roi, name, num))
        else:
            for (gname, roi) in iter(oxsty_chip_info_sublist):
                gname2_chips_raw[gname].append((name, roi, quality))
    sys.stdout.write('\n')
    print(' * num_query images = %d ' % len(query_chips))
    # Remove duplicates img.jpg : (*1.txt, *2.txt, ...) -> (*.txt)
    gname2_chips     = collections.defaultdict(list)
    multinamed_gname_list = []
    for gname, val in gname2_chips_raw.iteritems():
        val_repr = map(repr, val)
        unique_reprs = set(val_repr)
        unique_indexes = [val_repr.index(urep) for urep in unique_reprs]
        for ux in unique_indexes:
            gname2_chips[gname].append(val[ux])
        if len(gname2_chips[gname]) > 1:
            multinamed_gname_list.append(gname)
    # print some statistics
    query_gname_list = [tup[0] for tup in query_chips]
    gname_with_groundtruth_list = gname2_chips.keys()
    gname_without_groundtruth_list = np.setdiff1d(gname_list, gname_with_groundtruth_list)
    print(' * num_images = %d ' % len(gname_list))
    print(' * images with groundtruth    = %d ' % len(gname_with_groundtruth_list))
    print(' * images without groundtruth = %d ' % len(gname_without_groundtruth_list))
    print(' * images with multi-groundtruth = %d ' % len(multinamed_gname_list))
    #make sure all queries have ground truth and there are no duplicate queries
    assert len(query_gname_list) == len(np.intersect1d(query_gname_list, gname_with_groundtruth_list))
    assert len(query_gname_list) == len(set(query_gname_list))
    # build hotspotter tables
    print('adding to table: ')
    gx2_gname = gname_list
    nx2_name  = ['____', '____'] + list(name_set)
    nx2_nid   = [1, 1] + range(2, len(name_set) + 2)
    gx2_gid   = range(1, len(gx2_gname) + 1)

    cx2_cid     = []
    cx2_theta   = []
    cx2_quality = []
    cx2_roi     = []
    cx2_nx      = []
    cx2_gx      = []
    prop_dict   = {'oxnum': []}

    def add_to_hs_tables(gname, name, roi, quality, num=''):
        cid = len(cx2_cid) + 1
        nx = nx2_name.index(name)
        if nx == 0:
            nx = 1
        gx = gx2_gname.index(gname)
        cx2_cid.append(cid)
        cx2_roi.append(roi)
        cx2_quality.append(quality)
        cx2_nx.append(nx)
        cx2_gx.append(gx)
        cx2_theta.append(0)
        prop_dict['oxnum'].append(num)
        sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid)
        return cid

    for gname, roi, name, num in query_chips:
        add_to_hs_tables(gname, name, roi, 'query', num)
    for gname in gname2_chips.keys():
        if len(gname2_chips[gname]) == 1:
            (name, roi, quality) = gname2_chips[gname][0]
            add_to_hs_tables(gname, name, roi, quality)
        else:
            # just take the first name. This is foobar
            names, rois, qualities = zip(*gname2_chips[gname])
            add_to_hs_tables(gname, names[0], rois[0], qualities[0])
    for gname in gname_without_groundtruth_list:
        gpath = join(img_dpath, gname)
        try:
            (w, h) = Image.open(gpath).size
            roi = [0, 0, w, h]
            add_to_hs_tables(gname, '____', roi, 'unknown')
        except Exception as ex:
            print('Exception ex=%r' % ex)
            print('Not adding gname=%r' % gname)
            print('----')
    cx2_nid = np.array(nx2_nid)[cx2_nx]
    cx2_gid = np.array(gx2_gid)[cx2_gx]
    #
    # Write tables
    internal_dir      = join(db_dir, ld2.RDIR_INTERNAL2)
    helpers.ensurepath(internal_dir)
    write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta, prop_dict)
    write_name_table(internal_dir, nx2_nid, nx2_name)
    write_image_table(internal_dir, gx2_gid, gx2_gname)