Пример #1
0
def prepare_data(dataset: DatasetEnum, index: Index):
    cursor = MongoDescriptor.objects(dataset=dataset)
    pbar = tqdm.tqdm(cursor,
                     total=cursor.count(),
                     desc=f"Processing {dataset} dataset")

    new_dataset = []
    dataset_info = {}
    for query in pbar:  # type: MongoDescriptor
        dists, ids = index.search(np.expand_dims(query.descriptor, axis=0), 70)
        dataset_object = {"query": query.photo_id, "neighbours": []}
        dataset_info[query.photo_id] = {"neighbours": [], "num_neighbours": 0}

        for neighbour_id, dist in zip(ids[0, :], dists[0, :]):
            dataset_object['neighbours'].append(int(neighbour_id))

            neighbour_doc: MongoDescriptor = MongoDescriptor.objects(
                dataset=DatasetEnum.DATABASE, photo_id=neighbour_id).first()

            geo_dist = compute_geo_distance(
                np.array([query.coordinates]),
                np.array([neighbour_doc.coordinates]))

            dataset_info[query.photo_id]["neighbours"].append({
                "id":
                int(neighbour_id),
                "geo_dist":
                float(geo_dist[0]),
                "desc_dist":
                float(dist)
            })
        new_dataset.append(dataset_object)

    return new_dataset, dataset_info
Пример #2
0
def prepare_data(dataset: DatasetEnum):
    cursor = MongoDescriptor.objects(dataset=dataset)
    pbar = tqdm.tqdm(cursor,
                     total=cursor.count(),
                     desc=f"Processing {dataset} dataset")

    new_dataset = []
    dataset_info = {}
    for query in pbar:  # type: MongoDescriptor

        dataset_object = {"query": query.photo_id, "neighbours": []}
        dataset_info[query.photo_id] = {"neighbours": [], "num_neighbours": 0}

        neighbours = MongoDescriptor.objects(
            dataset=DatasetEnum.DATABASE, coords__near=query.coordinates)[:50]
        for neighbour in neighbours:  # type: MongoDescriptor
            dataset_object['neighbours'].append(neighbour.photo_id)
            geo_dist = compute_geo_distance(np.array([query.coordinates]),
                                            np.array([neighbour.coordinates]))
            dist = np.linalg.norm(query.descriptor - neighbour.descriptor)

            dataset_info[query.photo_id]["neighbours"].append({
                "id":
                neighbour.photo_id,
                "geo_dist":
                float(geo_dist[0]),
                "desc_dist":
                float(dist)
            })
        new_dataset.append(dataset_object)

    return new_dataset, dataset_info
Пример #3
0
def _get_benchmark_results(pred_locations,
                           true_locations,
                           image_ids: np.ndarray = None) -> BenchmarkResult:
    result = BenchmarkResult()
    distance_err = metric.compute_geo_distance(true_locations, pred_locations)
    result.accuracy = metric.localization_accuracy(distance_err)
    result.errors = metric.avg_errors(distance_err)
    result.predictions_by_dist = metric.distribution_of_predictions_by_distance(
        distance_err)
    if image_ids is not None:
        for img_id, dist_err, pred_loc in zip(image_ids, distance_err,
                                              pred_locations):
            img_id = int(img_id)
            result.img_dist_error[img_id] = dist_err
            result.img_predicted_coords[img_id] = pred_loc.tolist()
    return result
Пример #4
0
                 desc="Processing train query")
for i, train_query in enumerate(pbar):
    neighbours = MongoDescriptor.objects(
        dataset=DatasetEnum.DATABASE,
        coords__near=train_query.coordinates)[:25]

    dists, ids = index.search(np.expand_dims(train_query.descriptor, axis=0),
                              25)
    neighbour_ids = set()
    train_object = {"query": train_query.photo_id, "neighbours": []}

    train_info[train_query.photo_id] = {"neighbours": [], "num_neighbours": 0}
    for neighbour in neighbours:
        train_object['neighbours'].append(neighbour.photo_id)
        neighbour_ids.add(neighbour.photo_id)
        geo_dist = compute_geo_distance(np.array([train_query.coordinates]),
                                        np.array([neighbour.coordinates]))
        desc_dist = np.linalg.norm(train_query.descriptor -
                                   neighbour.descriptor)

        train_info[train_query.photo_id]['neighbours'].append({
            "id":
            neighbour.photo_id,
            "geo_dist":
            float(geo_dist[0]),
            "desc_dist":
            float(desc_dist)
        })

    count = 0
    for n_id, desc_dist in zip(ids[0, :], dists[0, :]):
        if n_id not in neighbour_ids:
Пример #5
0
def prepare_data(dataset: DatasetEnum, index: Index):
    cursor = MongoDescriptor.objects(dataset=dataset)
    pbar = tqdm.tqdm(cursor,
                     total=cursor.count(),
                     desc=f"Processing {dataset} dataset")

    new_dataset = []
    dataset_info = {}
    count = 0
    for query in pbar:  # type: MongoDescriptor
        dataset_object = {
            "query": query.photo_id,
            "neighbours": [],
            "target_id": -1
        }
        dataset_info[query.photo_id] = {
            "neighbours": [],
            "num_neighbours": 0,
            "target_id": -1
        }

        d_dists, d_neighbours_ids = index.search(
            np.expand_dims(query.descriptor, axis=0), 100)
        c_neighbours = [
            desc_doc for desc_doc in
            MongoDescriptor.objects(dataset=DatasetEnum.DATABASE,
                                    coords__near=query.coordinates)[:500]
        ]

        n_descriptors = np.array(
            [desc_doc.descriptor for desc_doc in c_neighbours])

        c_neighbours_ids_set = {
            neighbour.photo_id
            for neighbour in c_neighbours
        }
        d_neighbours_ids_set = set(d_neighbours_ids[0])

        intersection = d_neighbours_ids_set.intersection(c_neighbours_ids_set)

        desc_dists = np.linalg.norm(query.descriptor - n_descriptors, axis=1)

        candidates = []

        if len(intersection) == 0:
            # find closest in d-space among c_neighbours
            target_neighbour_idx = np.argmin(desc_dists)
            target_neighbour = c_neighbours[target_neighbour_idx].photo_id
            candidates.append(target_neighbour)
            candidates.extend(d_neighbours_ids[0][:50].tolist())

            count = 0
            for neighbour in c_neighbours:
                if neighbour.photo_id == target_neighbour:
                    continue
                candidates.append(neighbour.photo_id)
                count += 1
                if count == 49:
                    break
        else:
            # find closest in c-space among intersection
            candidates.extend(list(intersection))
            intersection_length = len(intersection)
            max_neighbours = 100 - intersection_length
            max_d_neighbours = round(max_neighbours / 2)
            max_c_neighbours = max_neighbours - max_d_neighbours

            count = 0
            for d_id in d_neighbours_ids[0].tolist():
                if d_id in intersection:
                    continue
                candidates.append(d_id)
                count += 1
                if count == max_d_neighbours:
                    break

            target_neighbour = -1
            for n in c_neighbours:
                if n.photo_id in intersection:
                    target_neighbour = n.photo_id
                    break
            count = 0
            for neighbour in c_neighbours:
                if neighbour.photo_id in intersection:
                    continue
                candidates.append(neighbour.photo_id)
                count += 1
                if count == max_c_neighbours:
                    break
        dataset_object['target_id'] = target_neighbour
        for neighbour_id in candidates:
            dataset_object['neighbours'].append(int(neighbour_id))

            # add info
            neighbour_doc: MongoDescriptor = MongoDescriptor.objects(
                dataset=DatasetEnum.DATABASE, photo_id=neighbour_id).first()
            geo_dist = compute_geo_distance(
                np.array([query.coordinates]),
                np.array([neighbour_doc.coordinates]))
            desc_dist = np.linalg.norm(query.descriptor -
                                       neighbour_doc.descriptor)
            dataset_info[query.photo_id]["neighbours"].append({
                "id":
                int(neighbour_id),
                "geo_dist":
                float(geo_dist[0]),
                "desc_dist":
                float(desc_dist)
            })

        new_dataset.append(dataset_object)
    print(count)
    return new_dataset, dataset_info