Exemplo n.º 1
0
 def test_polygon_exceed_max_points(self) -> None:
     with open('test_files/multipart_multipolygon_wkt.txt', 'r') as file:
         wkt = file.read()
         shape = wktreader.loads(wkt)
         geom = shape.geoms[0]
         max_points = 20
         with self.assertRaises(Exception):
             vectorize_wkt(geom.wkt, max_points)
Exemplo n.º 2
0
 def test_vectorize_big_multipolygon(self) -> None:
     with open('test_files/big_multipolygon_wkt.txt', 'r') as file:
         wkt = file.read()
         max_pts = get_max_points([wkt])
         vectorized = vectorize_wkt(wkt, max_pts)
         self.assertEqual((144, GEO_VECTOR_LEN), vectorized.shape)
         self.assertEqual(vectorized[-1, FULL_STOP_INDEX], 1)  # Test full stop bit
Exemplo n.º 3
0
 def test_simplify_multipolygon_gt_max_points(self) -> None:
     with open('test_files/multipart_multipolygon_wkt.txt', 'r') as file:
         wkt = file.read()
         max_points = 20
         vectorized = vectorize_wkt(wkt, max_points, simplify=True)
         self.assertEqual((20, GEO_VECTOR_LEN), vectorized.shape)
         self.assertEqual(vectorized[-1, FULL_STOP_INDEX], 1)  # Test full stop bit
Exemplo n.º 4
0
 def test_fixed_size(self) -> None:
     max_points = 20
     input_set = np.array(target_wkt)
     vectorized = [vectorize_wkt(wkt, max_points, simplify=True, fixed_size=True) for wkt in input_set]
     self.assertEqual(np.array(vectorized).shape, (input_set.size, 20, GEO_VECTOR_LEN))
     for vector in vectorized:
         self.assertEqual(vector[-1, FULL_STOP_INDEX], 1)
Exemplo n.º 5
0
 def test_multipolygon_with_hole(self):
     with open('test_files/multipolygon_with_hole.txt', 'r') as file:
         wkt = file.read()
         vectorized = vectorize_wkt(wkt)
         self.assertEqual((683, GEO_VECTOR_LEN), vectorized.shape)
         self.assertEqual(vectorized[-1, FULL_STOP_INDEX],
                          1)  # Test full stop bit
Exemplo n.º 6
0
 def test_polygon_with_hole(self) -> None:
     polygon_with_hole = "POLYGON((0 0, 3 0, 3 3, 0 3, 0 0), (1 1, 2 1, 2 2, 1 2, 1 1))"
     vectorized = vectorize_wkt(polygon_with_hole)
     for is_inner_bit in vectorized[:5, IS_INNER_INDEX]:
         self.assertEqual(is_inner_bit, 1)
     for is_outer_bit in vectorized[5:, IS_OUTER_INDEX]:
         self.assertEqual(is_outer_bit, 1)
     self.assertEqual(vectorized[-1, FULL_STOP_INDEX], 1)
Exemplo n.º 7
0
 def test_vectorize_one_wkt(self) -> None:
     max_points = 20
     input_set = target_wkt
     vectorized = []
     for index in range(len(input_set)):
         vectorized.append(vectorize_wkt(input_set[index], max_points, simplify=True))
     self.assertEqual(len(input_set), len(brt_wkt))
     self.assertEqual(vectorized[0].shape, (19, GEO_VECTOR_LEN))
     self.assertEqual(vectorized[1].shape, (1, GEO_VECTOR_LEN))
    def test_centroid(self):
        geom1 = 'POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'
        geom1 = gv.vectorize_wkt(geom1)

        with self.subTest('It does not accept a numpy ndarray'):
            with self.assertRaises(AssertionError):
                centroid(geom1)

        with self.subTest('It rejects 3D geometries'):
            with self.assertRaises(AssertionError):
                centroid(torch.rand((10, 3)))

        with self.subTest('Our stand-in centroid function does the same as pyefd'):
            geom2 = 'POLYGON((1 1, 0 1, 0 0, 1 0, 1 1))'
            geom2 = gv.vectorize_wkt(geom2)

            coords_batch = geom2[:, :2]
            coords_batch = coords_batch.reshape(1, geom2.shape[0], 2)
            polygon2_tensor = torch.from_numpy(coords_batch)

            pyefd_centroid = pyefd.calculate_dc_coefficients(coords_batch[0])
            pytorch_centroid = centroid(polygon2_tensor)

            np.testing.assert_array_almost_equal(pyefd_centroid, pytorch_centroid[0])

        with self.subTest('It correctly calculates centroids for batches'):
            geom2 = 'POLYGON((1 1, 0 1, 0 0, 1 0, 1 1))'
            geom2 = gv.vectorize_wkt(geom2)

            coords_batch = geom2[:, :2]
            coords_batch = coords_batch.reshape(1, geom2.shape[0], 2)
            polygon2_tensor = torch.from_numpy(coords_batch)
            batch_size = 6
            batch = polygon2_tensor.repeat(batch_size, 1, 1)
            multiply_range = torch.range(1., 6., dtype=batch.dtype).reshape((batch_size, 1, 1))
            batch = batch * multiply_range

            reference_centroids = np.arange(1, batch_size + 1)
            reference_centroids = reference_centroids.reshape(batch_size, 1) * 0.5
            reference_centroids = reference_centroids.repeat(2, axis=1)

            batch_centroids = centroid(batch)

            np.testing.assert_array_almost_equal(reference_centroids, batch_centroids.numpy())
Exemplo n.º 9
0
    def test_loss_function(self):
        geom1 = 'POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'
        geom2 = 'POLYGON((1 0, 1 1, 0 1, 0 0, 1 0))'

        diamond = 'POLYGON((1 0, 2 1, 1 2, 0 1, 1 0))'

        test_square = gv.vectorize_wkt(geom1)
        test_square = torch.from_numpy(test_square).unsqueeze(0)

        output_square = gv.vectorize_wkt(geom2)
        output_square = torch.from_numpy(output_square).unsqueeze(0)

        test_diamond = gv.vectorize_wkt(diamond)
        test_diamond = torch.from_numpy(test_diamond).unsqueeze(0) + 1  # offset by 1

        with self.subTest('It rejects tensors of length other than 7 on the second axis'):
            loss_function = EFDloss(order=10)
            with self.assertRaises(AssertionError):
                loss_function(torch.rand((1, 1, 5)), output_square)

        with self.subTest('It returns a tensor'):
            loss = loss_function(test_square, output_square)
            self.assertEqual(type(loss).__name__, 'Tensor')

        with self.subTest('It returns a loss of 0 for geometries that are really identical'):
            loss = loss_function(test_square, test_square)
            loss = loss.numpy()
            self.assertEqual(loss, 0.)

        with self.subTest('It returns a loss of 0 for geometries that are almost identical'):
            loss_function = EFDloss(order=50)
            loss = loss_function(test_square, output_square)
            loss = loss.numpy()
            self.assertAlmostEqual(loss, 0.0, places=1)

        with self.subTest('It returns a non-zero tensor for non-identical geometries'):
            loss_function = EFDloss(order=50)
            loss = loss_function(test_square, test_diamond)
            loss = loss.numpy()
            self.assertGreater(loss, 1.)
Exemplo n.º 10
0
def generate_relationwise_features(nodes_map, node_predicate_map, config,
                                   time_dim):
    """ Stack vectors row-wise per relation and column stack relations
    """
    n = len(nodes_map)
    m = dict()
    node_idx = dict()
    data = dict()
    vec_length_map = dict()
    for node, i in nodes_map.items():
        if not isinstance(node, Literal):
            continue
        if node.datatype is None or node.datatype.neq(
                _OGC_NAMESPACE.wktLiteral):
            continue

        try:
            value = str(node)
            vec = gv.vectorize_wkt(value)[:_MAX_POINTS, :]
        except:
            continue

        vec_length = vec.shape[0]
        if vec_length <= 0:
            continue

        # add means of X,Y to vector
        mean_x = np.mean(vec[:, 0])
        mean_y = np.mean(vec[:, 1])
        vec = np.hstack([np.vstack([[mean_x, mean_y]] * vec_length), vec])

        sp_rows, sp_cols = np.where(vec > 0.0)
        if time_dim == 0:
            a = sp.csr_matrix(
                (vec[(sp_rows, sp_cols)], (sp_rows, sp_cols)),
                shape=(vec_length, _GEOVECTORIZER_VEC_LENGTH + 2),
                dtype=np.float64)
        else:  # time_dim == 1
            a = sp.csr_matrix(
                (vec[(sp_rows, sp_cols)], (sp_cols, sp_rows)),
                shape=(_GEOVECTORIZER_VEC_LENGTH + 2, vec_length),
                dtype=np.float64)

        for p in node_predicate_map[node]:
            if p not in data.keys():
                data[p] = list()
                node_idx[p] = np.empty(shape=(n), dtype=np.int32)
                vec_length_map[p] = np.empty(shape=(n), dtype=np.int32)
                m[p] = 0

            data[p].append(a)

            idx = m[p]
            vec_length_map[p][idx] = vec_length
            node_idx[p][idx] = i
            m[p] = idx + 1

    msum = sum(m.values())
    logger.debug("Generated {} unique wktLiteral features".format(msum))

    if msum <= 0:
        return None

    # normalization
    for p, pdata in data.items():
        sc = GeomScalerSparse(time_dim)
        means = sc.fit(pdata)
        data[p] = sc.transform(pdata, means)

    return [[data[p], node_idx[p][:m[p]], vec_length_map[p][:m[p]]]
            for p in data.keys()]
Exemplo n.º 11
0
 def test_no_max_points_fixed_size(self) -> None:
     input_set = np.array(target_wkt)
     with self.assertRaises(AssertionError):
         vectorized = [vectorize_wkt(wkt, fixed_size=True) for wkt in input_set]
Exemplo n.º 12
0
def get_data_from_db(cursor):
    """
    Get data from the database given a query-instantiated cursor
    :param cursor: query-instantiated database cursor
    :return: tuple of labels and training data
    """
    training_data, labels = [], []
    cols = [desc[0] for desc in cursor.description]

    for record in tqdm(cursor, total=cursor.rowcount):
        record = dict(record)
        record['purposes'] = [purpose_to_english[p] for p in record['purposes']]

        # just duplicate for house_number and year of construction
        record['house_number_vec'] = record['house_number']
        record['year_of_construction_vec'] = record['year_of_construction']

        # one-hot encoding for house number addition
        if record['house_number_addition']:
            hna = np.zeros(shape=(len(record['house_number_addition']), len(VOCABULARY)))
            for idx, char in enumerate(record['house_number_addition']):
                hna[idx, VOCABULARY.index(char.lower())] = 1.
        else:
            hna = np.zeros(shape=(1, len(VOCABULARY)))
        record['house_number_addition_vec'] = hna

        # 'multi-hot' encoding for building purposes
        purposes = np.zeros(shape=(len(PURPOSES,)))
        for purpose in record['purposes']:
            purposes[PURPOSES.index(purpose)] = 1.
        record['purposes_vec'] = purposes

        # character-level vectorization of postal code
        pc = np.zeros((len(record['postal_code']), len(VOCABULARY)))
        for idx, char in enumerate(record['postal_code']):
            pc[idx, VOCABULARY.index(char.lower())] = 1.
        record['postal_code_vec'] = pc

        # building geometry vectorization
        geom = record['geometry_crs84']
        geom = vectorize_wkt(geom)
        record['geometry_vec'] = geom
        record['centroid_vec'] = vectorize_wkt(record['centroid_crs84'])[0, :2]

        # vectorization of neighbouring buildings
        neighbours = record['neighbouring_buildings_crs84']
        neighbours = vectorize_wkt(neighbours)
        record['neighbouring_buildings_vec'] = neighbours

        rd = record['recorded_date']
        record['recorded_date_vec'] = [rd.year, rd.month, rd.day, rd.weekday()]
        rgd = record['registration_date']
        record['registration_date_vec'] = [rgd.year, rgd.month, rgd.day, rgd.weekday()]

        training_data.append(record)
        labels.append({
            'energy_performance_index': record['energy_performance_index'],
            'energy_performance_label': record['energy_performance_label'],
            'energy_performance_vec': ENERGY_CLASSES.index(record['energy_performance_label'])
        })

    return training_data, labels
from deep_geometry import vectorizer as gv

import shapefile
from shapely.geometry import shape
import numpy as np

# Load the shapes from the shapefile
from sklearn.model_selection import train_test_split
from tqdm import tqdm

TODAY = datetime.today().strftime('%Y-%m-%d')
TRAIN_SET_FILE_NAME = 'train_data_{}.npz'.format(TODAY)
TEST_SET_FILE_NAME = 'test_data_{}.npz'.format(TODAY)

shapes = shapefile.Reader('Uitvoer_shape/buurt_2017')
shapes = shapes.shapes()
shapes = [shape(s) for s in shapes]

# convert the shapes to machine learning vectors
vectors = [gv.vectorize_wkt(s.wkt) for s in tqdm(shapes)]
dummy_labels = [0 for p in vectors]
train_data, test_data, _, _ = train_test_split(vectors, dummy_labels, test_size=0.15, random_state=42)

print('Saving training data...')
np.savez_compressed(file=TRAIN_SET_FILE_NAME, data=train_data)
print('Saving test data...')
np.savez_compressed(file=TEST_SET_FILE_NAME, data=test_data)

print('Done!')
    def test_efd(self):
        geom1 = 'POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'
        geom1 = gv.vectorize_wkt(geom1)

        geom2 = 'POLYGON((1 1, 0 1, 0 0, 1 0, 1 1))'
        geom2 = gv.vectorize_wkt(geom2)

        coords_batch = geom2[:, :2]
        coords_batch = coords_batch.reshape(1, geom2.shape[0], 2)
        pyefd_descriptors = pyefd.elliptic_fourier_descriptors(coords_batch[0], order=10)
        numpy_vectorized_descriptors = numpy_vectorized_efd(coords_batch[0], order=10)

        polygon2_tensor = torch.from_numpy(coords_batch)
        polygon2_tensor.requires_grad = True
        pytorch_descriptors = efd(polygon2_tensor, order=10)

        with self.subTest('It does not accept a numpy ndarray'):
            with self.assertRaises(AssertionError):
                efd(geom1)

        with self.subTest('It rejects 3D geometries'):
            with self.assertRaises(AssertionError):
                efd(torch.rand((10, 3)))

        with self.subTest('Our stand-in efd function does the same as pyefd'):
            np.testing.assert_array_equal(pyefd_descriptors, numpy_vectorized_descriptors)

        with self.subTest('The pytorch efd does the same as the numpy vectorized function'):
            np.testing.assert_array_almost_equal(pytorch_descriptors[0].detach().numpy(), numpy_vectorized_descriptors)

        with self.subTest('It creates an elliptic fourier descriptor of a geometry, the same as pyefd creates'):
            # polygon1_tensor = geom1[:, :2]
            # polygon1_tensor = torch.from_numpy(polygon1_tensor)
            # polygon1_efd = efd(polygon1_tensor, order=10).numpy()

            np.testing.assert_array_almost_equal(pyefd_descriptors, pytorch_descriptors[0].detach().numpy())

        with self.subTest('It handles inputs of zeros without nans'):
            zero_coordinates = torch.zeros((1, 10, 2), dtype=torch.double)
            coeffs = efd(zero_coordinates)
            coeffs = coeffs.detach().numpy()

            for element in coeffs.flatten():
                self.assertFalse(np.isnan(element))

        with self.subTest('It creates equal coefficients for replication-padded coordinate sequences'):
            torch.manual_seed(42)
            random_coordinates = torch.rand((1, 4, 2), dtype=torch.double)
            last_point = random_coordinates[:, -1]
            replication_padding = last_point.repeat(1, 4, 1)
            padded_random_coords = torch.cat((random_coordinates, replication_padding), dim=1)

            non_zero_coeffs = efd(random_coordinates).detach().numpy()
            padded_coeffs = efd(padded_random_coords).detach().numpy()

            np.testing.assert_array_almost_equal(non_zero_coeffs, padded_coeffs)

        with self.subTest('It creates descriptors for a batch of size 2 same as the pyefd implementation'):
            size_two_batch = torch.cat((polygon2_tensor, polygon2_tensor * 2), dim=0)
            resized_descriptors = pyefd.elliptic_fourier_descriptors(polygon2_tensor[0].detach().numpy() * 2, order=10)
            size_two_descriptors = efd(size_two_batch)

            size_two_descriptors = size_two_descriptors.detach().numpy()
            np.testing.assert_array_almost_equal(pyefd_descriptors, size_two_descriptors[0])
            np.testing.assert_array_almost_equal(resized_descriptors, size_two_descriptors[1])

        with self.subTest('It creates a differentiable function, returning gradients'):
            random_coordinates = torch.randn((1, 4, 2), dtype=torch.double, requires_grad=True)
            descriptors = efd(random_coordinates)
            scalar = torch.mean(descriptors)
            scalar.backward()
            gradients = random_coordinates.grad
            self.assertEqual(gradients.shape, random_coordinates.shape)
Exemplo n.º 15
0
 def test_multipolygon_exceed_max_points(self) -> None:
     with open('test_files/multipart_multipolygon_wkt.txt', 'r') as file:
         wkt = file.read()
         max_points = 20
         with self.assertRaises(Exception):
             vectorize_wkt(wkt, max_points)
Exemplo n.º 16
0
 def test_simplify_without_max_points(self) -> None:
     with open('test_files/multipart_multipolygon_wkt.txt', 'r') as file:
         wkt = file.read()
         with self.assertRaises(AssertionError):
             vectorize_wkt(wkt, simplify=True)
Exemplo n.º 17
0
 def test_non_empty_geom_coll(self) -> None:
     with self.assertRaises(ValueError):
         vectorize_wkt(non_empty_geom_collection, 100)
Exemplo n.º 18
0
 def test_point_with_max_points(self) -> None:
     vectorized = vectorize_wkt('POINT(12 14)', 5)
     self.assertEqual(vectorized.shape, (1, GEO_VECTOR_LEN))
     self.assertEqual(vectorized[-1, FULL_STOP_INDEX], 1)  # Test full stop bit
Exemplo n.º 19
0
 def test_unsupported_geom(self) -> None:
     with self.assertRaises(Exception):
         vectorize_wkt(
             'THIS_SHOULD_THROW_AN_EXCEPTION ((10 10, 20 20, 10 40),(40 40, 30 30, 40 20, 30 10))', 16)