def merge_dates(in_root, ins_root, out_root): # Find all dates with INS data (not all images have ins, but all ins should have images) all_dates = sorted( os.listdir(ins_root)) # Sort to make sure we always get the same order first = True all_info = dict() for date in all_dates: split_file = os.path.join(in_root, '{}.csv'.format(date)) if not os.path.exists(split_file): print('Missing {}.'.format(split_file)) continue date_info = load_csv(split_file) # Add date and tags column num_entries = len(date_info['easting']) rep_date = [date] * num_entries date_info['date'] = rep_date if first: all_info = date_info first = False else: for key in all_info.keys(): all_info[key] = all_info[key] + date_info[key] out_file = os.path.join(out_root, 'merged.csv') save_csv(all_info, out_file)
def sample_anchors(shuffled_root, cluster_root, out_root, s, mode, r, epoch): train_meta = load_csv(os.path.join(shuffled_root, '{}_{}_{:03d}.csv'.format(s, mode, epoch))) train_xy = get_xy(train_meta) out_file = os.path.join(out_root, '{}_{}_{}_{:03d}.csv'.format(s, mode, r, epoch)) if not os.path.exists(out_file): ref_meata = load_csv(os.path.join(cluster_root, '{}_{}_{}.csv'.format(s, mode, r))) ref_xy = get_xy(ref_meata) # Sample reference images (random image withing r/2 of reference location) ref_tree = KDTree(train_xy) ref_neighbors = ref_tree.query_radius(ref_xy, r=1, return_distance=False) anchors = [np.random.choice(potential_anchors) for potential_anchors in ref_neighbors] np.random.shuffle(anchors) anchor_indices = {'idx': anchors} save_csv(anchor_indices, out_file) else: anchor_indices = load_csv(out_file) anchor_xy = np.array([train_xy[int(i), :] for i in anchor_indices['idx']]) out_img = os.path.join(out_root, '{}_{}_{}_{}.png'.format(s, mode, r, epoch)) plt.clf() plt.clf() f, (ax1) = plt.subplots(1, 1, sharey=False) f.set_figheight(50) f.set_figwidth(50) ax1.scatter(anchor_xy[:, 0], anchor_xy[:, 1], c=np.arange(len(anchor_xy))) plt.savefig(out_img)
def find_and_remove_errors(mode, out_root, ref_bin_xy, ref_data, s): true_ref_xy = np.array([[e, n] for e, n in zip(ref_data['easting'], ref_data['northing'])]) binned_ref_xy = np.array([ref_bin_xy[math.floor(l)] for l in ref_data['l']]) ref_errors = np.linalg.norm(true_ref_xy - binned_ref_xy, axis=1) ref_hist_path = os.path.join(out_root, '{}_{}_bin_errors.png'.format(s, mode)) if not os.path.exists(ref_hist_path): plt.clf() plt.hist(ref_errors, bins=1000, histtype='step') plt.savefig(ref_hist_path) for key in ref_data.keys(): ref_data[key] = [el for el, er in zip(ref_data[key], ref_errors) if er < 5.0] save_csv(ref_data, os.path.join(out_root, '{}_{}.csv'.format(s, mode))) stats = dict() stats['raw_mean_error'] = np.mean(ref_errors) stats['raw_median_error'] = np.median(ref_errors) stats['raw_max_error'] = np.max(ref_errors) stats['raw_min_error'] = np.min(ref_errors) stats['raw_error_std'] = np.std(ref_errors) clean_errors = [er for er in ref_errors if er < 5.0] stats['clean_mean_error'] = np.mean(clean_errors) stats['clean_median_error'] = np.median(clean_errors) stats['clean_max_error'] = np.max(clean_errors) stats['clean_min_error'] = np.min(clean_errors) stats['clean_error_std'] = np.std(clean_errors) save_csv(stats, os.path.join(out_root, '{}_{}_errors.csv'.format(s, mode))) return len(ref_data['t']), ref_data
def create_reference(s): date = getattr(sys.modules[__name__], '{}_ref_date'.format(s)) out_file = os.path.join(out_root, '{}_{}_geodesic.csv'.format(s, date)) if not os.path.exists(out_file): data = load_csv(os.path.join(in_root, 'clean_{}.csv'.format(s))) ref_data = dict() for key in data.keys(): ref_data[key] = [ e for e, d in zip(data[key], data['date']) if d == date ] ref_xy = [(float(x), float(y)) for x, y in zip(ref_data['easting'], ref_data['northing'])] ref_d = [0] + [ math.sqrt((p[0] - q[0])**2 + (p[1] - q[1])**2) for p, q in zip(ref_xy[1:], ref_xy[:-1]) ] ref_l = [sum(ref_d[:i]) for i in range(1, len(ref_data['date']) + 1)] vmin = min(ref_l) vmax = max(ref_l) ref_data['l'] = ref_l ref_yaw = np.array(ref_data['yaw'], dtype=float) plot_results(ref_xy, ref_yaw, ref_l, date, ref_data, s, vmin, vmax) save_csv(ref_data, out_file)
def get_l_based_fixed_localization_reference(in_root, out_root, s, r): out_txt = os.path.join(out_root, '{}_ref_l_{}.txt'.format(s, int(r))) out_csv = os.path.join(out_root, '{}_ref_l_{}.csv'.format(s, int(r))) if not os.path.exists(out_csv): meta = load_csv(os.path.join(in_root, '{}_ref.csv'.format(s))) # Not using query locations for this l = np.array(meta['l']).reshape(-1, 1) ll = np.arange(math.floor(l[-1]), step=r).reshape(-1, 1) l_tree = KDTree(l) i_l = l_tree.query(ll, return_distance=False, k=1) i_l = np.squeeze(i_l) save_txt('\n'.join(['{}'.format(i) for i in i_l]), out_txt) selected_meta = dict() for key in meta.keys(): selected_meta[key] = [meta[key][i] for i in i_l] save_csv(selected_meta, out_csv) else: selected_meta = load_csv(out_csv) out_folder = os.path.join(out_root, '{}_ref_l_{}'.format(s, int(r))) if not os.path.exists(out_folder): os.makedirs(out_folder) for i, (d, f, t) in tqdm(enumerate(zip(selected_meta['date'], selected_meta['folder'], selected_meta['t']))): f = int(f) img = load_img(img_path((d, f, t))) save_img(img, os.path.join(out_folder, '{:04d}_{}_{:02d}_{}.png'.format(i, d, f, t)))
def downsize_images(task_id, max_side, img_root, ins_root, tar_root, out_img_root, out_root, cams): # Find all dates with INS data (not all images have ins, but all ins should have images) all_dates = sorted(os.listdir(ins_root)) # Sort to make sure we always get the same order date = all_dates[int(task_id) - 1] print(date) out_file = os.path.join(out_root, 'img_info_{}'.format(max_side), '{}.csv'.format(date)) if os.path.exists(out_file): print('Output already exists.') return imgs = load_csv(os.path.join(img_root, date, 'stereo.timestamps'), has_header=False, delimiter=' ', keys=['t', 'folder']) cam = oxford_camera.CameraModel(cams, '/stereo/centre/') exposures = [0] * len(imgs['t']) max_folder = max(np.array(imgs['folder'], dtype=int)) if date == '2015-09-02-10-37-32': max_folder = 4 # Folders 5 and 6 are missing from the website imgs['t'] = [t for f, t in zip(imgs['folder'], imgs['t']) if int(f) <= max_folder] imgs['folder'] = [f for f in imgs['folder'] if int(f) <= max_folder] for folder in range(1, max_folder + 1): filename = os.path.join(tar_root, '{}_stereo_centre_{:02d}.tar'.format(date, folder)) print(filename) if not os.path.exists(filename): print("MISSING!!") save_txt(txt=filename, mode='a', out_file=os.path.join(out_root, 'missing.txt')) with tarfile.open(filename) as archive: print(archive) for entry in archive.getmembers(): img_name = os.path.basename(entry.name) if '.png' not in img_name: continue ts = img_name.split('.')[0] img_path = entry.name with archive.extractfile(archive.getmember(img_path)) as file: timer = time.time() index = imgs['t'].index(ts) # Assuming that timestamps are not ordered try: img = oxford_image.load_image(file, cam) # One file has unloadable image... img = resize_img(img, max_side) exposures[index] = sum(np.array(img).flatten()) out_img_folder = os.path.join(out_img_root, '{}_stereo_centre_{:02d}'.format(date, folder)) if not os.path.exists(out_img_folder): os.makedirs(out_img_folder) out_img_path = os.path.join(out_img_folder, img_name) save_img(img, out_img_path) print('Processed {} in {}s.'.format(ts, time.time() - timer)) except: del exposures[index] del imgs['t'][index] del imgs['folder'][index] imgs['exposure'] = exposures save_csv(imgs, out_file)
def get_splits(task_id, grids, in_root, ins_root, out_root): # Find all dates with INS data (not all images have ins, but all ins should have images) all_dates = sorted(os.listdir(ins_root)) # Sort to make sure we always get the same order date = all_dates[int(task_id) - 1] print(date) out_file = os.path.join(out_root, '{}.csv'.format(date)) if os.path.exists(out_file): print('Already calculated {}.'.format(out_file)) return xy_file = os.path.join(in_root, '{}.csv'.format(date)) if not os.path.exists(xy_file): print('Missing {}.'.format(xy_file)) return xy = load_csv(xy_file) X = [0 if math.isnan(float(e)) else int(float(e) - 619500.0) for e in xy['easting']] Y = [0 if math.isnan(float(n)) else int(5736480.0 - float(n)) for n in xy['northing']] out_img_grid = os.path.join(out_root, '{}_grid.png'.format(date)) draw_grid(X, Y, out_img_grid) out_img_scatter = os.path.join(out_root, '{}_scatter.png'.format(date)) plt.clf() plt.scatter(np.array(xy['easting'], dtype=float), np.array(xy['northing'], dtype=float), c=np.array(xy['yaw'], dtype=float)) plt.savefig(out_img_scatter) for grid_name in grids.keys(): grid = cv2.imread(grids[grid_name]) grid = np.asarray(grid, dtype=np.uint8) # Fix for failing img loading in_fold = list() for x, y in zip(X, Y): if x < 0 or y < 0 or x >= grid.shape[1] or y >= grid.shape[0]: in_fold.append(0) elif grid[y, x, 0] > 0: # All color channels are the same in_fold.append(1) else: in_fold.append(0) xy[grid_name] = in_fold max_assigned = [a1 + a2 + a3 for a1, a2, a3 in zip(xy['train'], xy['test'], xy['val'])] assert max(max_assigned) <= 1, 'Please increase in_fold grid threshold.' for grid_name in grids.keys(): X_g = [x for x, in_fold in zip(X, xy[grid_name]) if in_fold == 1] Y_g = [y for y, in_fold in zip(Y, xy[grid_name]) if in_fold == 1] print('Found {} imgs in {} for {}.'.format(len(X_g), grid_name, date)) out_img_file = os.path.join(out_root, '{}_{}.png'.format(date, grid_name)) draw_grid(X_g, Y_g, out_img_file) save_csv(xy, out_file)
def cluster(in_root, out_root, s, mode, r): out_file = os.path.join(out_root, '{}_{}_{}.pickle'.format(s, mode, r)) meta_file = os.path.join(in_root, '{}_{}_000.csv'.format(s, mode)) meta = load_csv(meta_file) if not os.path.exists(out_file): date = getattr(sys.modules[__name__], '{}_ref_date'.format(s)) temp_meta = dict() for key in meta.keys(): temp_meta[key] = [ e for e, d in zip(meta[key], meta['date']) if d in date ] t_idx = np.argsort(temp_meta['t']) date_meta = dict() for key in meta.keys(): date_meta[key] = [temp_meta[key][i] for i in t_idx] print(len(date_meta['t'])) xy = get_xy(date_meta) ref_xy = [xy[0, :]] ref_idx = [0] for i in tqdm(range(len(date_meta['t']))): if sum((xy[i, :] - ref_xy[-1])**2) > r**2: ref_xy.append(xy[i, :]) ref_idx.append(i) ref_xy = np.array(ref_xy) save_pickle([ref_xy, date_meta, ref_idx], out_file) else: ref_xy, date_meta, ref_idx = load_pickle(out_file) print('{}: {}'.format(s, len(ref_idx))) out_img = os.path.join(out_root, '{}_{}_{}.png'.format(s, mode, r)) plt.clf() plt.clf() f, (ax1) = plt.subplots(1, 1, sharey=False) f.set_figheight(50) f.set_figwidth(50) ax1.scatter(ref_xy[:, 0], ref_xy[:, 1], c=np.arange(len(ref_xy))) plt.savefig(out_img) out_meta = dict() for key in meta.keys(): out_meta[key] = [date_meta[key][i] for i in ref_idx] out_file = os.path.join(out_root, '{}_{}_{}.csv'.format(s, mode, r)) save_csv(out_meta, out_file)
def shuffle(in_root, out_root, s, mode, num_epochs): meta = load_csv(os.path.join(in_root, '{}_{}.csv'.format( s, mode))) # Not using query locations for this for e in range(num_epochs): out_file = os.path.join(out_root, '{}_{}_{:03d}.csv'.format(s, mode, e)) if os.path.exists(out_file): print('{} exists. Not recalculating.'.format(out_file)) else: print('Shuffling {}.'.format(out_file)) shuffled_indices = np.random.permutation(len(meta['t'])) shuffled_meta = dict() for key in meta.keys(): shuffled_meta[key] = [meta[key][i] for i in shuffled_indices] save_csv(shuffled_meta, out_file)
def merge_parametrized(in_root, folds, cols_to_keep, out_root): files = os.listdir(in_root) meta_info = dict() full_data = dict() for c in cols_to_keep: full_data[c] = [] for fold in folds: data = dict() date_count = dict() for c in cols_to_keep: data[c] = [] fold_files = [f for f in files if f.split('_')[0] == fold] for file in fold_files: if '.csv' in file: date_data = load_csv(os.path.join(in_root, file)) if len( date_data['t'] ) < 100: # Very few files indicate bad l alignment or bad ins estimates continue for c in cols_to_keep: data[c].extend(date_data[c]) full_data[c].extend(date_data[c]) date_count[file.split('_')[1]] = len(date_data['t']) out_file = os.path.join(out_root, '{}.csv'.format(fold)) save_csv(data, out_file) meta_info[fold] = len(data['t']) save_csv(date_count, os.path.join(out_root, '{}_date_count.csv'.format(fold))) out_file = os.path.join(out_root, 'full.csv') save_csv(full_data, out_file) meta_info['full'] = len(full_data['t']) save_csv(meta_info, os.path.join(out_root, 'meta.csv'))
def set_aside_queries(in_root, folds, query_dates): num_per_fold = dict() for fold in folds: clean_file = os.path.join(in_root, '{}.csv'.format(fold)) data = load_csv(clean_file) query_out = clean_file.replace(fold, '{}_query'.format(fold)) ref_out = clean_file.replace(fold, '{}_ref'.format(fold)) query_data = dict() ref_data = dict() for key in data.keys(): query_data[key] = [el for el, date in zip(data[key], data['date']) if date in query_dates] ref_data[key] = [el for el, date in zip(data[key], data['date']) if date not in query_dates] num_per_fold['{}_query'.format(fold)] = len(query_data['t']) num_per_fold['{}_ref'.format(fold)] = len(ref_data['t']) save_csv(query_data, query_out) save_csv(ref_data, ref_out) save_csv(num_per_fold, os.path.join(in_root, 'num_per_fold.csv'))
def plot_statistics(in_root, out_root, folds, tag_root): date_tags, all_tags = get_tags(tag_root) for fold in folds: print('Plotting {} statistics.'.format(fold)) clean_file = os.path.join(in_root, '{}.csv'.format(fold)) data = load_csv(clean_file) # Images per date images_per_date = Counter(data['date']) save_csv(images_per_date, os.path.join(out_root, 'images_per_date_{}.csv'.format(fold))) dict_to_bar(images_per_date, os.path.join(out_root, 'images_per_date_{}.pdf'.format(fold))) # Images/dates per tag, month and hour images_per_tag = dict.fromkeys(all_tags, 0) images_per_month = dict.fromkeys(range(1, 13), 0) images_per_hour = dict.fromkeys(range(0, 24), 0) dates_per_tag = dict.fromkeys(all_tags, 0) dates_per_month = dict.fromkeys(range(1, 13), 0) dates_per_hour = dict.fromkeys(range(0, 24), 0) for date in images_per_date.keys(): month = int(date[5:7]) hour = int(date[11:13]) images_per_month[month] = images_per_date[date] + images_per_month[month] images_per_hour[hour] = images_per_date[date] + images_per_hour[hour] dates_per_month[month] = 1 + dates_per_month[month] dates_per_hour[hour] = 1 + dates_per_hour[hour] for tag in date_tags[date]: images_per_tag[tag] = images_per_date[date] + images_per_tag[tag] dates_per_tag[tag] = 1 + dates_per_tag[tag] save_csv(images_per_tag, os.path.join(out_root, 'images_per_tag_{}.csv'.format(fold))) dict_to_bar(images_per_tag, os.path.join(out_root, 'images_per_tag_{}.pdf'.format(fold))) save_csv(images_per_tag, os.path.join(out_root, 'images_per_tag_{}.csv'.format(fold))) dict_to_bar(images_per_tag, os.path.join(out_root, 'images_per_tag_{}.pdf'.format(fold))) save_csv(images_per_month, os.path.join(out_root, 'images_per_month_{}.csv'.format(fold))) dict_to_bar(images_per_month, os.path.join(out_root, 'images_per_month_{}.pdf'.format(fold))) new_months = OrderedDict() months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] for i in range(12): new_months[months[i]] = images_per_month[i + 1] save_csv(new_months, os.path.join(out_root, 'images_per_month_pretty_{}.csv'.format(fold))) dict_to_bar(new_months, os.path.join(out_root, 'images_per_month_pretty_{}.pdf'.format(fold))) save_csv(images_per_hour, os.path.join(out_root, 'images_per_hour_{}.csv'.format(fold))) dict_to_bar(images_per_hour, os.path.join(out_root, 'images_per_hour_{}.pdf'.format(fold))) new_hours = OrderedDict() for i in range(6, 22): new_hours['{:02d}:00'.format(i)] = images_per_hour[i] save_csv(new_hours, os.path.join(out_root, 'images_per_pretty_hour_{}.csv'.format(fold))) dict_to_bar(new_hours, os.path.join(out_root, 'images_per_pretty_hour_{}.pdf'.format(fold))) save_csv(dates_per_tag, os.path.join(out_root, 'dates_per_tag_{}.csv'.format(fold))) dict_to_bar(dates_per_tag, os.path.join(out_root, 'dates_per_tag_{}.pdf'.format(fold))) save_csv(dates_per_month, os.path.join(out_root, 'dates_per_month_{}.csv'.format(fold))) dict_to_bar(dates_per_month, os.path.join(out_root, 'dates_per_month_{}.pdf'.format(fold))) save_csv(dates_per_hour, os.path.join(out_root, 'dates_per_hour_{}.csv'.format(fold))) dict_to_bar(dates_per_hour, os.path.join(out_root, 'dates_per_hour_{}.pdf'.format(fold)))
for j in tuple_info['positives'][i]: if j < i: f_dist = np.sum((features[i] - features[j])**2) f_dists.append(f_dist) e_dist = np.sum((xy[i, :] - xy[j, :])**2) e_dists.append(e_dist) save_pickle([e_dists, f_dists], out_file) else: e_dists, f_dists = load_pickle(out_file) full_info = dict() full_info['f_mean'] = np.mean(f_dists) full_info['e_mean'] = np.mean(e_dists) full_info['f_med'] = np.median(f_dists) full_info['e_med'] = np.median(e_dists) full_info['f_max'] = np.max(f_dists) full_info['e_max'] = np.max(e_dists) save_csv(full_info, out_file_meta) plt.clf() f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) f.set_figheight(10) f.set_figwidth(20) ax1.hist(f_dists, bins=10000, histtype='step') ax1.title.set_text('F dists') ax2.hist(e_dists, bins=10000, histtype='step') ax2.title.set_text('E dists') plt.savefig(out_file_hist)
def clean_parametrization(in_root, folds, cols_to_keep, out_root): full_data = dict() full_ref_data = dict() full_query_data = dict() for key in cols_to_keep: full_data[key] = [] full_ref_data[key] = [] full_query_data[key] = [] meta = dict() for s in folds: ref_data = load_csv(os.path.join(in_root, '{}_ref.csv'.format(s))) query_data = load_csv(os.path.join(in_root, '{}_query.csv'.format(s))) # Not used to detect ref outliers for key in ['l', 'northing', 'easting']: ref_data[key] = np.array(ref_data[key], dtype=float) query_data[key] = np.array(query_data[key], dtype=float) l_max = max(ref_data['l']) num_bins = math.ceil(l_max) ref_member_path = os.path.join(out_root, '{}_ref_bin_raw_members.pickle'.format(s)) if not os.path.exists(ref_member_path): bin_members = [[i for i in range(len(ref_data['t'])) if math.floor(ref_data['l'][i]) == j] for j in tqdm(range(num_bins))] save_pickle(bin_members, ref_member_path) else: bin_members = load_pickle(ref_member_path) ref_bin_xy_path = os.path.join(out_root, '{}_ref_bin_raw_xy.pickle'.format(s)) if not os.path.exists(ref_bin_xy_path): ref_bin_xy = [ np.median(np.array([[ref_data['easting'][i], ref_data['northing'][i]] for i in bin_members[j]]), axis=0) if len( bin_members[j]) else np.array([-1, -1]) for j in tqdm(range(num_bins))] save_pickle(ref_bin_xy, ref_bin_xy_path) else: ref_bin_xy = load_pickle(ref_bin_xy_path) meta['{}_ref'.format(s)], clean_ref_data = find_and_remove_errors('ref', out_root, ref_bin_xy, ref_data, s) # Cleaning query files to allow for more efficient testing, should not influence performance # (other than possibly excluding faulty gps/ins 'ground truth', which we don't want anyways) meta['{}_query'.format(s)], clean_query_data = find_and_remove_errors('query', out_root, ref_bin_xy, query_data, s) fold_clean_data = dict() for key in clean_ref_data.keys(): fold_clean_data[key] = [] fold_clean_data[key].extend(clean_ref_data[key]) fold_clean_data[key].extend(clean_query_data[key]) full_data[key].extend(clean_ref_data[key]) full_data[key].extend(clean_query_data[key]) full_query_data[key].extend(clean_ref_data[key]) full_ref_data[key].extend(clean_query_data[key]) save_csv(fold_clean_data, os.path.join(out_root, '{}.csv'.format(s))) save_csv(full_data, os.path.join(out_root, 'full.csv'.format(s))) save_csv(full_ref_data, os.path.join(out_root, 'full_ref.csv'.format(s))) save_csv(full_query_data, os.path.join(out_root, 'full_query.csv'.format(s))) save_csv(meta, os.path.join(out_root, 'meta.csv'))
def compile_table(l, d): mkdir(OUT_ROOT) top_n_root = os.path.join(fs_root(), 'top_n') queries = [ 'oxford_night', 'oxford_overcast', 'oxford_snow', 'oxford_sunny', 'freiburg_cloudy', 'freiburg_sunny', 'pittsburgh_query' ] checkpoints = [ # Trained on cold 'triplet_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0', 'quadruplet_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0', 'lazy_triplet_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0', 'lazy_quadruplet_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0', 'sum_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0', 'h_sum_5e-6_all_conditions_angle_1-4_cu_LRD0.9-5_noPCA_lam0.5_me0', # Trained on small oxford 'triplet_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0', 'quadruplet_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0', 'lazy_triplet_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0', 'lazy_quadruplet_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0', 'h_sum_5e-6_full-10-25_cu_LRD0.9-5_noPCA_lam0.5_me0', # Trained on large oxford 'ha0_lotriplet_vl64' 'ha0_loquadruplet_vl64' 'ha0_lolazy_triplet_vl64' 'ha0_lolazy_quadruplet_vl64' 'ha0_lodistance_triplet_vl64' 'ha0_lohuber_distance_triplet_vl64' 'ha6_loevil_triplet_muTrue_vl64' 'ha6_loevil_quadruplet_muTrue_vl64' 'ha6_loresidual_det_muTrue_vl64' 'ha0_lotriplet_vl0' 'ha0_loquadruplet_vl0' 'ha6_loevil_quadruplet_muTrue_vl0' 'ha6_loresidual_det_muTrue_vl0' 'ha0_lotriplet_muTrue_vl64' 'ha0_lotriplet_muFalse_vl64' 'ha6_lotriplet_muTrue_vl64' 'ha6_lotriplet_muFalse_vl64' # Treined on Pittsburgh 'pittsnetvlad', # Image-net 'offtheshelf' ] losses = [ 'GT', 'Triplet \cite{arandjelovic2016netvlad}', 'Quadruplet \cite{chen2017beyond}', 'Lazy triplet \cite{angelina2018pointnetvlad}', 'Lazy quadruplet \cite{angelina2018pointnetvlad}', 'Triplet + distance \cite{thoma2020geometrically}', 'Triplet + Huber dist.~\cite{thoma2020geometrically}', 'Triplet \cite{arandjelovic2016netvlad}', 'Quadruplet \cite{chen2017beyond}', 'Lazy triplet \cite{angelina2018pointnetvlad}', 'Lazy quadruplet \cite{angelina2018pointnetvlad}', 'Triplet + Huber dist.~\cite{thoma2020geometrically}', 'Triplet \cite{arandjelovic2016netvlad}', 'Quadruplet \cite{chen2017beyond}', 'Lazy triplet \cite{angelina2018pointnetvlad}', 'Lazy quadruplet \cite{angelina2018pointnetvlad}', 'Triplet + distance \cite{thoma2020geometrically}', 'Triplet + Huber dist.~\cite{thoma2020geometrically}', '\\textit{Triplet + HP}', '\\textit{Quadruplet + HP}', '\\textit{Volume}', '$\\mathit{Volume}^*$', 'Triplet \cite{arandjelovic2016netvlad}', 'Off-the-shelf \cite{deng2009imagenet}' ] setting = 'l{}_dim{}'.format(l, d) print(setting) table = defaultdict(list) table['Loss'] = losses for_mean = defaultdict(list) for i, query in enumerate(queries): print(query) print_gt = True if query.startswith('freiburg'): T = [0.5, 1.0, 1.5] else: T = [5.0, 10.0, 15.0] for j, checkpoint in enumerate(checkpoints): cp_name = checkpoint t_n_file = os.path.join(top_n_root, setting, '{}_{}.pickle'.format(query, cp_name)) if not os.path.exists(t_n_file): print('Missing: {}'.format(t_n_file)) table[query].append('-') for_mean[query].append([-1, -1, -1]) continue print(t_n_file) [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist, ref_idx] = load_pickle(t_n_file) top_g_dists = np.array(top_g_dists) if print_gt: print_gt = False Y = [float(sum(gt_g_dist < x)) / float(len(gt_g_dist)) * 100 for x in T] table[query].append(['{:.1f}'.format(y) for y in Y]) for_mean[query].append(Y) t_1_d = np.array([td[0] for td in top_g_dists]) Y = [float(sum(t_1_d < x)) / float(len(t_1_d)) * 100 for x in T] table[query].append(['{:.1f}'.format(y) for y in Y]) for_mean[query].append(Y) # Highlight best values: b = np.argmax(np.array(for_mean[query])[1:], axis=0) b = b + 1 for ii, ib in enumerate(b): table[query][ib][ii] = '\\textbf{' + table[query][ib][ii] + '}' for ii in range(len(losses)): table[query][ii] = '/'.join(table[query][ii]) for i in range(len(losses)): all = np.array([for_mean[query][i] for query in queries if for_mean[query][i][0] > -1]) Y = np.mean(all, axis=0) table['mean'].append(['{:.1f}'.format(y) for y in Y]) for_mean['mean'].append(Y) # Highlight best values: b = np.argmax(np.array(for_mean['mean'])[1:], axis=0) b = b + 1 for ii, ib in enumerate(b): table['mean'][ib][ii] = '\\textbf{' + table['mean'][ib][ii] + '}' for ii in range(len(losses)): table['mean'][ii] = '/'.join(table['mean'][ii]) out_name = os.path.join(OUT_ROOT, 'accuracy_table.csv') save_csv(table, out_name)
def img_path(info): date = info[0] folder = info[1] t = info[2] return os.path.join('datasets/oxford_512', '{}_stereo_centre_{:02d}'.format(date, int(folder)), '{}.png'.format(t)) # Preselected reference preselected_ref = os.path.join(fs_root(), 'data/learnlarge/shuffled/train_ref_000.csv') p_meta = load_csv(preselected_ref) p_meta['path'] = [img_path((d, f, t)) for d, f, t in zip(p_meta['date'], p_meta['folder'], p_meta['t'])] idxs_to_keep = np.linspace(0, len(p_meta['path']), num=N_SAMPLES, endpoint=False, dtype=int) for key in p_meta.keys(): p_meta[key] = [p_meta[key][i] for i in idxs_to_keep] save_csv(p_meta, os.path.join(out_root, '{}_pca.csv'.format(place))) # Cold place = 'cold' def parse_cold_folder(path, pattern): all_files = get_recursive_file_list(path, pattern) all_files, TXYA = parse_file_list(all_files) if len(all_files) > N_SAMPLES: idxs_to_keep = np.linspace(0, len(all_files), num=N_SAMPLES, endpoint=False, dtype=int) else: idxs_to_keep = np.arange(len(all_files)) meta = dict()
ax.legend([ '{} reference images'.format(len(remaining_ref_i)), '{} query images'.format(len(remaining_query_i)) ], markerscale=5) ax.set_xlabel('Easting [m]') ax.set_ylabel('Northing [m]') ref_meta = dict() ref_meta['path'] = [ os.path.join('datasets/pittsburgh_used/ref', ref_paths[i]) for i in remaining_ref_i ] ref_meta['easting'] = [ref_xy[i, 0] for i in remaining_ref_i] ref_meta['northing'] = [ref_xy[i, 1] for i in remaining_ref_i] save_csv(ref_meta, os.path.join(list_out_root, '{}_ref.csv'.format(place))) query_meta = dict() query_meta['path'] = [ os.path.join('datasets/pittsburgh_used/query', query_paths[i]) for i in remaining_query_i ] query_meta['easting'] = [query_xy[i, 0] for i in remaining_query_i] query_meta['northing'] = [query_xy[i, 1] for i in remaining_query_i] save_csv(query_meta, os.path.join(list_out_root, '{}_query.csv'.format(place))) # ------------------------------------- Oxford ------------------------------------- place = 'oxford' def img_path(info):
def interpolate_xy(task_id, in_root, ins_root, out_root): # Find all dates with INS data (not all images have ins, but all ins should have images) all_dates = sorted( os.listdir(ins_root)) # Sort to make sure we always get the same order date = all_dates[int(task_id) - 1] out_file = os.path.join(out_root, '{}.csv'.format(date)) if os.path.exists(out_file): # print('Already calculated {}.'.format(out_file)) return imgs_file = os.path.join(in_root, '{}.csv'.format(date)) if not os.path.exists(imgs_file): print('Missing {}: {}.'.format(task_id, imgs_file)) return imgs = load_csv(imgs_file) ins = load_csv(os.path.join(ins_root, date, 'gps', 'ins.csv')) ins_ts = np.array(ins['timestamp'], dtype=int).reshape( (-1, 1)) # num_samples x num_features img_ts = np.array(imgs['t'], dtype=int).reshape((-1, 1)) northing = np.array(ins['northing'], dtype=float) easting = np.array(ins['easting'], dtype=float) yaw = np.array(ins['yaw'], dtype=float) # Yaw range: 0-2pi status = ins['ins_status'] # Ins measures are roughly 3 times more frequent than images mean_td_img = np.mean( [img_ts[i, 0] - img_ts[i - 1, 0] for i in range(1, img_ts.shape[0])]) mean_td_ins = np.mean( [ins_ts[i, 0] - ins_ts[i - 1, 0] for i in range(1, ins_ts.shape[0])]) print('Found {} times more ins measures than images.'.format(mean_td_img / mean_td_ins)) print('The mean time between ins measures is {}.'.format(mean_td_ins)) print('The mean time between img measures is {}.'.format(mean_td_img)) ins_ts_tree = KDTree(ins_ts) d_closest, i_closest = ins_ts_tree.query(img_ts, 2) img_northing = [ lin_ip(northing[i_c[0]], northing[i_c[1]], d_c[0], d_c[1]) for d_c, i_c in zip(d_closest, i_closest) ] img_easting = [ lin_ip(easting[i_c[0]], easting[i_c[1]], d_c[0], d_c[1]) for d_c, i_c in zip(d_closest, i_closest) ] img_yaw = [ lin_ip(yaw[i_c[0]], yaw[i_c[1]], d_c[0], d_c[1]) % (2 * pi) for d_c, i_c in zip(d_closest, i_closest) ] # Yaw range: 0-2pi # Remove interpolations of unclean ins states ins_good = [0] * len(img_easting) for j, i_c in enumerate(i_closest): if status[i_c[0]] == 'INS_SOLUTION_GOOD' and status[ i_c[1]] == 'INS_SOLUTION_GOOD': ins_good[j] = 1 imgs['northing'] = img_northing imgs['easting'] = img_easting imgs['ins_good'] = ins_good imgs['yaw'] = img_yaw ic1 = [i_c[0] for i_c in i_closest] ic2 = [i_c[1] for i_c in i_closest] tn1 = [ins_ts[i, 0] for i in ic1] tn2 = [ins_ts[i, 0] for i in ic2] imgs['ic1'] = ic1 # Index of closest ins point imgs['ic2'] = ic2 imgs['tn1'] = tn1 # Timestamp of closest ins point imgs['tn2'] = tn2 save_csv(imgs, out_file)
def clean(in_root, out_root, folds, cols_to_keep): merged_file = os.path.join(in_root, 'merged.csv') meta_file = os.path.join(out_root, 'meta.csv') meta_info = dict() merged = load_csv(merged_file) # Original number of imgs meta_info['total_imgs'] = len(merged['exposure']) # Valid ins valid_ins = np.array(merged['ins_good'], dtype=int) meta_info['valid_ins'] = sum(valid_ins) # Valid location on grid valid_grid = np.array(merged['full'], dtype=int) meta_info['valid_grid'] = sum(valid_grid) # Analise and clean exposure # Visual inspection shows that images below 50'000'000 are very dark and above 110'000'000 very light exposures = np.array(merged['exposure'], dtype=float) low_exposure = np.percentile(exposures, 1) high_exposure = np.percentile(exposures, 99) print('Lo: {} \nHi: {}'.format(low_exposure, high_exposure)) plt.clf() plt.hist(exposures, bins=10000, histtype='step') plt.xticks(rotation=90) plt.savefig(os.path.join(out_root, 'exposures.pdf')) valid_exposure = [ 1 if low_exposure < e < high_exposure else 0 for e in exposures ] meta_info['valid_exposures'] = sum(valid_exposure) # Manual cleaning valid_date = [1 if d not in bad_dates else 0 for d in merged['date']] meta_info['valid_date'] = sum(valid_date) # Get fully valid fully_valid = np.array(valid_exposure) * np.array(valid_grid) * np.array( valid_ins) * np.array(valid_date) meta_info['fully_valid'] = sum(fully_valid) # Save for different folds for fold in folds: fold_valid = np.array(fully_valid) * np.array(merged[fold], dtype=int) meta_info['valid_{}'.format(fold)] = sum(fold_valid) out_data = dict() for col in cols_to_keep: out_col = [e for e, v in zip(merged[col], fold_valid) if v == 1] out_data[col] = out_col clean_file = os.path.join(out_root, 'clean_{}.csv'.format(fold)) save_csv(out_data, clean_file) # Plot fold exposure: fold_exposure = [e for e, v in zip(exposures, fold_valid) if v == 1] plt.clf() plt.hist(fold_exposure, bins=10000, histtype='step') plt.xticks(rotation=90) plt.savefig(os.path.join(out_root, 'exposures_{}.pdf'.format(fold))) save_csv(meta_info, meta_file) dict_to_bar(meta_info, os.path.join(out_root, 'meta_info.pdf'))
def parametrize(s, date): ref_date = getattr(sys.modules[__name__], '{}_ref_date'.format(s)) ref_file = os.path.join(out_root, '{}_{}_geodesic.csv'.format(s, ref_date)) data = load_csv(os.path.join(in_root, 'clean_{}.csv'.format(s))) ref_data = load_csv(ref_file) ref_xy = [(float(x), float(y)) for x, y in zip(ref_data['easting'], ref_data['northing'])] ref_l = np.array(ref_data['l'], dtype=float) ref_yaw = np.array(ref_data['yaw'], dtype=float) ref_tree = KDTree(np.array(ref_xy)) vmin = min(ref_l) vmax = max(ref_l) date_data = dict() for key in data.keys(): date_data[key] = [ e for e, d in zip(data[key], data['date']) if d == date ] date_xy = [(float(x), float(y)) for x, y in zip(date_data['easting'], date_data['northing'])] date_d = [0] + [ math.sqrt((p[0] - q[0])**2 + (p[1] - q[1])**2) for p, q in zip(date_xy[1:], date_xy[:-1]) ] date_l = [sum(date_d[:i]) for i in range(1, len(date_d) + 1)] date_yaw = np.array(date_data['yaw'], dtype=float) matched_l = np.zeros(len(date_yaw)) matchable = [] r = 20 if s == 'val': r = 100 date_ni, date_nd = ref_tree.query_radius(np.array(date_xy), r=100, return_distance=True, sort_results=True) current_l = 0 latest_valid = 0 for j, (yaw, ni, nd) in enumerate(zip(date_yaw, date_ni, date_nd)): if len(ni) < 2: continue angle_neighbors = [ i for i in range(len(ni)) if abs(yaw - ref_yaw[ni[i]]) % (2 * math.pi) < math.pi / 3 ] ni = [ni[i] for i in angle_neighbors] nd = [nd[i] for i in angle_neighbors] if len(ni) < 2: continue potential_l = np.array([ref_l[i] for i in ni]) if j == 0: threshold = 40 if s == 'val': threshold = 5 km = KMeans(n_clusters=2, random_state=0).fit(potential_l.reshape(-1, 1)) if abs(km.cluster_centers_[0] - km.cluster_centers_[1]) > threshold: closest_center = km.predict( np.array(current_l).reshape(-1, 1))[0] assignments = km.labels_ l_neighbors = [ i for i, a in zip(range(len(ni)), assignments) if a == closest_center ] else: l_neighbors = range(len(ni)) else: l_neighbors = [ i for i, l in enumerate(potential_l) if abs(current_l - date_l[latest_valid] + date_l[j] - l) < 500 ] ni = [ni[i] for i in l_neighbors] nd = [nd[i] for i in l_neighbors] if len(ni) < 2: continue interp_l = lin_ip(ref_l[ni[0]], ref_l[ni[1]], nd[0], nd[1]) current_l = interp_l latest_valid = j matched_l[j] = interp_l print(interp_l) matchable.append(j) if len(matchable) > 0: date_data['l'] = matched_l for key in ref_data.keys(): date_data[key] = [date_data[key][i] for i in matchable] plot_results(date_xy, date_yaw, date_l, date, date_data, s, vmin, vmax) out_file = os.path.join(out_root, '{}_{}_geodesic.csv'.format(s, date)) save_csv(date_data, out_file)