Exemplo n.º 1
0
def prepare_data(dataset: DatasetEnum, index: Index):
    cursor = MongoDescriptor.objects(dataset=dataset)
    pbar = tqdm.tqdm(cursor,
                     total=cursor.count(),
                     desc=f"Processing {dataset} dataset")

    new_dataset = []
    dataset_info = {}
    for query in pbar:  # type: MongoDescriptor
        dists, ids = index.search(np.expand_dims(query.descriptor, axis=0), 70)
        dataset_object = {"query": query.photo_id, "neighbours": []}
        dataset_info[query.photo_id] = {"neighbours": [], "num_neighbours": 0}

        for neighbour_id, dist in zip(ids[0, :], dists[0, :]):
            dataset_object['neighbours'].append(int(neighbour_id))

            neighbour_doc: MongoDescriptor = MongoDescriptor.objects(
                dataset=DatasetEnum.DATABASE, photo_id=neighbour_id).first()

            geo_dist = compute_geo_distance(
                np.array([query.coordinates]),
                np.array([neighbour_doc.coordinates]))

            dataset_info[query.photo_id]["neighbours"].append({
                "id":
                int(neighbour_id),
                "geo_dist":
                float(geo_dist[0]),
                "desc_dist":
                float(dist)
            })
        new_dataset.append(dataset_object)

    return new_dataset, dataset_info
Exemplo n.º 2
0
def prepare_data(dataset: DatasetEnum):
    cursor = MongoDescriptor.objects(dataset=dataset)
    pbar = tqdm.tqdm(cursor,
                     total=cursor.count(),
                     desc=f"Processing {dataset} dataset")

    new_dataset = []
    dataset_info = {}
    for query in pbar:  # type: MongoDescriptor

        dataset_object = {"query": query.photo_id, "neighbours": []}
        dataset_info[query.photo_id] = {"neighbours": [], "num_neighbours": 0}

        neighbours = MongoDescriptor.objects(
            dataset=DatasetEnum.DATABASE, coords__near=query.coordinates)[:50]
        for neighbour in neighbours:  # type: MongoDescriptor
            dataset_object['neighbours'].append(neighbour.photo_id)
            geo_dist = compute_geo_distance(np.array([query.coordinates]),
                                            np.array([neighbour.coordinates]))
            dist = np.linalg.norm(query.descriptor - neighbour.descriptor)

            dataset_info[query.photo_id]["neighbours"].append({
                "id":
                neighbour.photo_id,
                "geo_dist":
                float(geo_dist[0]),
                "desc_dist":
                float(dist)
            })
        new_dataset.append(dataset_object)

    return new_dataset, dataset_info
Exemplo n.º 3
0
def perform_localisation_benchmark(
        model_params: ModelParameters, index_config: IndexConfig,
        benchmark_params: BenchmarkParameters) -> BenchmarkResult:
    model = LocalisationModel(model_params.localisation_type, index_config,
                              model_params.sigma, model_params.m,
                              model_params.k, model_params.num_workers)
    log.info(f"Localisation model: {repr(model)}")

    if index_config.index_dir is None:
        log.info("Getting training data")
        ids, coordinates, descriptors = MongoDescriptor.get_data_as_arrays(
            DatasetEnum.DATABASE)
        log.info("Finished getting training data")
    else:
        log.info("Getting ids and coords for training data")
        ids, coordinates = MongoDescriptor.get_ids_and_coords(
            DatasetEnum.DATABASE)
        descriptors = None
        log.info("Finished getting training data")

    log.debug(f"Current memory usage: {utils.get_memory_usage():.2f}GB")

    log.info(f"Fitting model...")
    model.fit(ids, coordinates, descriptors)
    del ids, coordinates, descriptors
    log.debug(f"Current memory usage: {utils.get_memory_usage():.2f}GB")
    log.info(f"Model is trained.")

    log.info(f"Getting query data")
    q_ids, q_coordinates, q_descriptors = MongoDescriptor.get_data_as_arrays(
        dataset=benchmark_params.query_dataset)
    log.info("Finished getting query data")
    log.debug(f"Current memory usage: {utils.get_memory_usage():.2f}GB")

    log.info("Getting predictions...")
    predicted_locations = model.predict(q_descriptors)
    log.info("Finished getting predictions")

    if benchmark_params.extended_results:
        img_ids = q_ids
    else:
        img_ids = None

    result = _get_benchmark_results(predicted_locations, q_coordinates,
                                    img_ids)

    if benchmark_params.save_result:
        log.info("Saving test results")
        with open(benchmark_params.save_path, 'w') as f:
            json.dump(asdict(result), f)

    return result
Exemplo n.º 4
0
def localisation_tuning(parameters: TuningParameters):
    log.info("Getting ids and coords for training data")
    ids, coordinates = MongoDescriptor.get_ids_and_coords(DatasetEnum.DATABASE)
    log.info("Finished getting training data")
    log.debug(f"Current memory usage: {utils.get_memory_usage():.2f}GB")

    log.info(f"Getting query data")
    q_ids, q_coordinates, q_descriptors = MongoDescriptor.get_data_as_arrays(
        dataset=parameters.query_dataset)
    log.info("Finished getting query data")
    log.debug(f"Current memory usage: {utils.get_memory_usage():.2f}GB")

    coord_map = LocalisationModel.compute_coordinate_map(ids, coordinates)
    grid_tuples = _param_grid(parameters.grid)
    parameters_name = parameters.grid.keys()

    records = []
    for index_config in parameters.index_configs:
        for i, tup in enumerate(grid_tuples):
            start = time.time()
            experiment_parameters = _tuple_to_dict(parameters_name, tup)
            log.info(
                f"Tuning experiment: {i + 1}/{len(grid_tuples)}, index: {index_config.index_type.name}, "
                f"parameters: {experiment_parameters}")
            model_params = _params_from_tuple(parameters_name, tup,
                                              parameters.default_parameters)
            model = LocalisationModel(model_params.localisation_type,
                                      index_config, model_params.sigma,
                                      model_params.m, model_params.k,
                                      model_params.num_workers)
            model.fit_from_coord_map(coord_map)
            predicted_locations = model.predict(q_descriptors)

            result = _get_benchmark_results(predicted_locations, q_coordinates)
            tuning_record = {
                "index_type": index_config.index_type.value,
                "parameters": experiment_parameters,
                "accuracy": result.accuracy,
                "errors": result.errors,
                "predictions_by_dist": result.predictions_by_dist
            }
            records.append(tuning_record)
            end = time.time()
            log.info(f"Current iteration time: {end - start:.3f}s")
            if (i + 1) % parameters.save_every == 0 or (i +
                                                        1) == len(grid_tuples):
                log.info(
                    f"Saving tuning results. Step: {i + 1}/{len(grid_tuples)}, "
                    f"index: {index_config.index_type.name}, parameters: {experiment_parameters}"
                )
                _save_tuning_results(records, parameters.save_path)
                log.info(f"Results saved to: {parameters.save_path}")
Exemplo n.º 5
0
    def __getitem__(self, index) -> T_co:
        photo_id = self.__ids[index]
        doc: MongoDescriptor = MongoDescriptor.objects(photo_id=photo_id).first()
        descriptor = torch.tensor(doc.descriptor, dtype=torch.float32)
        coordinates = torch.tensor(doc.coordinates, dtype=torch.float32)

        return descriptor, photo_id, coordinates
Exemplo n.º 6
0
def get_index(index_config: IndexConfig) -> Index:
    if index_config.index_dir is None:
        log.info("Getting training data")
        ids, coordinates, descriptors = MongoDescriptor.get_data_as_arrays(
            DatasetEnum.DATABASE)
        log.info("Finished getting training data")
        index = IndexBuilder(index_config, descriptors, ids).build()
    else:
        index = IndexBuilder(index_config).build()

    return index
Exemplo n.º 7
0
    def __getitem__(self, index) -> T_co:
        query_id = self.dataset[index]['query']
        neighbours_ids = self.dataset[index]['neighbours']
        query_doc = MongoDescriptor.objects(photo_id=query_id).first()
        query = query_doc.descriptor
        q_coords = query_doc.coordinates

        neighbour_docs = MongoDescriptor.objects(photo_id__in=neighbours_ids)
        neighbours = []
        n_coords = []
        for n_doc in neighbour_docs:
            neighbours.append(n_doc.descriptor)
            n_coords.append(n_doc.coordinates)

        query = torch.tensor(query, dtype=torch.float32)
        neighbours = torch.tensor(neighbours, dtype=torch.float32)
        q_coords = torch.tensor(q_coords, dtype=torch.float32)
        n_coords = torch.tensor(n_coords, dtype=torch.float32)
        query_id = torch.tensor(query_id)
        neighbours_ids = torch.tensor(neighbours_ids)
        return query, neighbours, q_coords, n_coords, query_id, neighbours_ids
Exemplo n.º 8
0
def migrate():
    tables_pbar = tqdm(path_to_descriptors.items(), desc="Processing ")
    for dataset, path in tables_pbar:
        tables_pbar.set_description(f"Processing {dataset.value} dataset")
        with DescriptorsTable(path, 2048) as table:
            for descriptor in tqdm(table.iterrows(), leave=False, total=len(table)):
                desc = MongoDescriptor(photo_id=descriptor.photo_id, dataset=dataset)
                desc.coordinates = [descriptor.lon, descriptor.lat]
                desc.descriptor = descriptor.descriptor
                desc.save()
Exemplo n.º 9
0
def create_and_save_index(index_config: IndexConfig, save_path):
    log.info("Reading data from db...")
    ids, coordinates, descriptors = MongoDescriptor.get_data_as_arrays(
        DatasetEnum.DATABASE)
    log.info("Finished reading data from db")

    log.info("Building index...")
    index = IndexBuilder(index_config, descriptors, ids).build()
    log.info(f"Index built: {repr(index)}")

    class_path = os.path.join(save_path, INDEX_CLASS_FILE)
    log.info(f"Saving class file to {class_path}")
    with open(class_path, "wb") as f:
        index_config.index_dir = save_path
        pickle.dump(index_config, f)
        index_config.index_dir = None

    index_path = os.path.join(save_path, INDEX_FILE_NAME)
    log.info(f"Saving index file to {index_path}")
    index.write_index(index_path)
Exemplo n.º 10
0
import json
import numpy as np
import tqdm
from mongoengine import connect

from im2gps.data.descriptors import MongoDescriptor, DatasetEnum
from im2gps.core.index import IndexConfig
from im2gps.services.index import get_index
from im2gps.core.metric import compute_geo_distance

PATH_TO_DS = "/home/andrew/Documents/study/thesis/thesis-src/experiments/datasets/V1_0__Simple_dataset"

connect(db="im2gps", host="localhost", port=51998)

print("Getting queries")
train_cursor = MongoDescriptor.objects(dataset=DatasetEnum.VALIDATION_QUERY)
train_count = train_cursor.count()

print("Building index")
index_config = IndexConfig.from_path(
    "/home/andrew/Documents/study/thesis/indices/l2_512_index")
index = get_index(index_config)

train_dataset = []
train_info = {}

pbar = tqdm.tqdm(train_cursor,
                 total=train_count,
                 desc="Processing train query")
for i, train_query in enumerate(pbar):
    neighbours = MongoDescriptor.objects(
Exemplo n.º 11
0
def prepare_data(dataset: DatasetEnum, index: Index):
    cursor = MongoDescriptor.objects(dataset=dataset)
    pbar = tqdm.tqdm(cursor,
                     total=cursor.count(),
                     desc=f"Processing {dataset} dataset")

    new_dataset = []
    dataset_info = {}
    count = 0
    for query in pbar:  # type: MongoDescriptor
        dataset_object = {
            "query": query.photo_id,
            "neighbours": [],
            "target_id": -1
        }
        dataset_info[query.photo_id] = {
            "neighbours": [],
            "num_neighbours": 0,
            "target_id": -1
        }

        d_dists, d_neighbours_ids = index.search(
            np.expand_dims(query.descriptor, axis=0), 100)
        c_neighbours = [
            desc_doc for desc_doc in
            MongoDescriptor.objects(dataset=DatasetEnum.DATABASE,
                                    coords__near=query.coordinates)[:500]
        ]

        n_descriptors = np.array(
            [desc_doc.descriptor for desc_doc in c_neighbours])

        c_neighbours_ids_set = {
            neighbour.photo_id
            for neighbour in c_neighbours
        }
        d_neighbours_ids_set = set(d_neighbours_ids[0])

        intersection = d_neighbours_ids_set.intersection(c_neighbours_ids_set)

        desc_dists = np.linalg.norm(query.descriptor - n_descriptors, axis=1)

        candidates = []

        if len(intersection) == 0:
            # find closest in d-space among c_neighbours
            target_neighbour_idx = np.argmin(desc_dists)
            target_neighbour = c_neighbours[target_neighbour_idx].photo_id
            candidates.append(target_neighbour)
            candidates.extend(d_neighbours_ids[0][:50].tolist())

            count = 0
            for neighbour in c_neighbours:
                if neighbour.photo_id == target_neighbour:
                    continue
                candidates.append(neighbour.photo_id)
                count += 1
                if count == 49:
                    break
        else:
            # find closest in c-space among intersection
            candidates.extend(list(intersection))
            intersection_length = len(intersection)
            max_neighbours = 100 - intersection_length
            max_d_neighbours = round(max_neighbours / 2)
            max_c_neighbours = max_neighbours - max_d_neighbours

            count = 0
            for d_id in d_neighbours_ids[0].tolist():
                if d_id in intersection:
                    continue
                candidates.append(d_id)
                count += 1
                if count == max_d_neighbours:
                    break

            target_neighbour = -1
            for n in c_neighbours:
                if n.photo_id in intersection:
                    target_neighbour = n.photo_id
                    break
            count = 0
            for neighbour in c_neighbours:
                if neighbour.photo_id in intersection:
                    continue
                candidates.append(neighbour.photo_id)
                count += 1
                if count == max_c_neighbours:
                    break
        dataset_object['target_id'] = target_neighbour
        for neighbour_id in candidates:
            dataset_object['neighbours'].append(int(neighbour_id))

            # add info
            neighbour_doc: MongoDescriptor = MongoDescriptor.objects(
                dataset=DatasetEnum.DATABASE, photo_id=neighbour_id).first()
            geo_dist = compute_geo_distance(
                np.array([query.coordinates]),
                np.array([neighbour_doc.coordinates]))
            desc_dist = np.linalg.norm(query.descriptor -
                                       neighbour_doc.descriptor)
            dataset_info[query.photo_id]["neighbours"].append({
                "id":
                int(neighbour_id),
                "geo_dist":
                float(geo_dist[0]),
                "desc_dist":
                float(desc_dist)
            })

        new_dataset.append(dataset_object)
    print(count)
    return new_dataset, dataset_info