def prepare_data(dataset: DatasetEnum, index: Index): cursor = MongoDescriptor.objects(dataset=dataset) pbar = tqdm.tqdm(cursor, total=cursor.count(), desc=f"Processing {dataset} dataset") new_dataset = [] dataset_info = {} for query in pbar: # type: MongoDescriptor dists, ids = index.search(np.expand_dims(query.descriptor, axis=0), 70) dataset_object = {"query": query.photo_id, "neighbours": []} dataset_info[query.photo_id] = {"neighbours": [], "num_neighbours": 0} for neighbour_id, dist in zip(ids[0, :], dists[0, :]): dataset_object['neighbours'].append(int(neighbour_id)) neighbour_doc: MongoDescriptor = MongoDescriptor.objects( dataset=DatasetEnum.DATABASE, photo_id=neighbour_id).first() geo_dist = compute_geo_distance( np.array([query.coordinates]), np.array([neighbour_doc.coordinates])) dataset_info[query.photo_id]["neighbours"].append({ "id": int(neighbour_id), "geo_dist": float(geo_dist[0]), "desc_dist": float(dist) }) new_dataset.append(dataset_object) return new_dataset, dataset_info
def prepare_data(dataset: DatasetEnum): cursor = MongoDescriptor.objects(dataset=dataset) pbar = tqdm.tqdm(cursor, total=cursor.count(), desc=f"Processing {dataset} dataset") new_dataset = [] dataset_info = {} for query in pbar: # type: MongoDescriptor dataset_object = {"query": query.photo_id, "neighbours": []} dataset_info[query.photo_id] = {"neighbours": [], "num_neighbours": 0} neighbours = MongoDescriptor.objects( dataset=DatasetEnum.DATABASE, coords__near=query.coordinates)[:50] for neighbour in neighbours: # type: MongoDescriptor dataset_object['neighbours'].append(neighbour.photo_id) geo_dist = compute_geo_distance(np.array([query.coordinates]), np.array([neighbour.coordinates])) dist = np.linalg.norm(query.descriptor - neighbour.descriptor) dataset_info[query.photo_id]["neighbours"].append({ "id": neighbour.photo_id, "geo_dist": float(geo_dist[0]), "desc_dist": float(dist) }) new_dataset.append(dataset_object) return new_dataset, dataset_info
def _get_benchmark_results(pred_locations, true_locations, image_ids: np.ndarray = None) -> BenchmarkResult: result = BenchmarkResult() distance_err = metric.compute_geo_distance(true_locations, pred_locations) result.accuracy = metric.localization_accuracy(distance_err) result.errors = metric.avg_errors(distance_err) result.predictions_by_dist = metric.distribution_of_predictions_by_distance( distance_err) if image_ids is not None: for img_id, dist_err, pred_loc in zip(image_ids, distance_err, pred_locations): img_id = int(img_id) result.img_dist_error[img_id] = dist_err result.img_predicted_coords[img_id] = pred_loc.tolist() return result
desc="Processing train query") for i, train_query in enumerate(pbar): neighbours = MongoDescriptor.objects( dataset=DatasetEnum.DATABASE, coords__near=train_query.coordinates)[:25] dists, ids = index.search(np.expand_dims(train_query.descriptor, axis=0), 25) neighbour_ids = set() train_object = {"query": train_query.photo_id, "neighbours": []} train_info[train_query.photo_id] = {"neighbours": [], "num_neighbours": 0} for neighbour in neighbours: train_object['neighbours'].append(neighbour.photo_id) neighbour_ids.add(neighbour.photo_id) geo_dist = compute_geo_distance(np.array([train_query.coordinates]), np.array([neighbour.coordinates])) desc_dist = np.linalg.norm(train_query.descriptor - neighbour.descriptor) train_info[train_query.photo_id]['neighbours'].append({ "id": neighbour.photo_id, "geo_dist": float(geo_dist[0]), "desc_dist": float(desc_dist) }) count = 0 for n_id, desc_dist in zip(ids[0, :], dists[0, :]): if n_id not in neighbour_ids:
def prepare_data(dataset: DatasetEnum, index: Index): cursor = MongoDescriptor.objects(dataset=dataset) pbar = tqdm.tqdm(cursor, total=cursor.count(), desc=f"Processing {dataset} dataset") new_dataset = [] dataset_info = {} count = 0 for query in pbar: # type: MongoDescriptor dataset_object = { "query": query.photo_id, "neighbours": [], "target_id": -1 } dataset_info[query.photo_id] = { "neighbours": [], "num_neighbours": 0, "target_id": -1 } d_dists, d_neighbours_ids = index.search( np.expand_dims(query.descriptor, axis=0), 100) c_neighbours = [ desc_doc for desc_doc in MongoDescriptor.objects(dataset=DatasetEnum.DATABASE, coords__near=query.coordinates)[:500] ] n_descriptors = np.array( [desc_doc.descriptor for desc_doc in c_neighbours]) c_neighbours_ids_set = { neighbour.photo_id for neighbour in c_neighbours } d_neighbours_ids_set = set(d_neighbours_ids[0]) intersection = d_neighbours_ids_set.intersection(c_neighbours_ids_set) desc_dists = np.linalg.norm(query.descriptor - n_descriptors, axis=1) candidates = [] if len(intersection) == 0: # find closest in d-space among c_neighbours target_neighbour_idx = np.argmin(desc_dists) target_neighbour = c_neighbours[target_neighbour_idx].photo_id candidates.append(target_neighbour) candidates.extend(d_neighbours_ids[0][:50].tolist()) count = 0 for neighbour in c_neighbours: if neighbour.photo_id == target_neighbour: continue candidates.append(neighbour.photo_id) count += 1 if count == 49: break else: # find closest in c-space among intersection candidates.extend(list(intersection)) intersection_length = len(intersection) max_neighbours = 100 - intersection_length max_d_neighbours = round(max_neighbours / 2) max_c_neighbours = max_neighbours - max_d_neighbours count = 0 for d_id in d_neighbours_ids[0].tolist(): if d_id in intersection: continue candidates.append(d_id) count += 1 if count == max_d_neighbours: break target_neighbour = -1 for n in c_neighbours: if n.photo_id in intersection: target_neighbour = n.photo_id break count = 0 for neighbour in c_neighbours: if neighbour.photo_id in intersection: continue candidates.append(neighbour.photo_id) count += 1 if count == max_c_neighbours: break dataset_object['target_id'] = target_neighbour for neighbour_id in candidates: dataset_object['neighbours'].append(int(neighbour_id)) # add info neighbour_doc: MongoDescriptor = MongoDescriptor.objects( dataset=DatasetEnum.DATABASE, photo_id=neighbour_id).first() geo_dist = compute_geo_distance( np.array([query.coordinates]), np.array([neighbour_doc.coordinates])) desc_dist = np.linalg.norm(query.descriptor - neighbour_doc.descriptor) dataset_info[query.photo_id]["neighbours"].append({ "id": int(neighbour_id), "geo_dist": float(geo_dist[0]), "desc_dist": float(desc_dist) }) new_dataset.append(dataset_object) print(count) return new_dataset, dataset_info