def test_polygon_exceed_max_points(self) -> None: with open('test_files/multipart_multipolygon_wkt.txt', 'r') as file: wkt = file.read() shape = wktreader.loads(wkt) geom = shape.geoms[0] max_points = 20 with self.assertRaises(Exception): vectorize_wkt(geom.wkt, max_points)
def test_vectorize_big_multipolygon(self) -> None: with open('test_files/big_multipolygon_wkt.txt', 'r') as file: wkt = file.read() max_pts = get_max_points([wkt]) vectorized = vectorize_wkt(wkt, max_pts) self.assertEqual((144, GEO_VECTOR_LEN), vectorized.shape) self.assertEqual(vectorized[-1, FULL_STOP_INDEX], 1) # Test full stop bit
def test_simplify_multipolygon_gt_max_points(self) -> None: with open('test_files/multipart_multipolygon_wkt.txt', 'r') as file: wkt = file.read() max_points = 20 vectorized = vectorize_wkt(wkt, max_points, simplify=True) self.assertEqual((20, GEO_VECTOR_LEN), vectorized.shape) self.assertEqual(vectorized[-1, FULL_STOP_INDEX], 1) # Test full stop bit
def test_fixed_size(self) -> None: max_points = 20 input_set = np.array(target_wkt) vectorized = [vectorize_wkt(wkt, max_points, simplify=True, fixed_size=True) for wkt in input_set] self.assertEqual(np.array(vectorized).shape, (input_set.size, 20, GEO_VECTOR_LEN)) for vector in vectorized: self.assertEqual(vector[-1, FULL_STOP_INDEX], 1)
def test_multipolygon_with_hole(self): with open('test_files/multipolygon_with_hole.txt', 'r') as file: wkt = file.read() vectorized = vectorize_wkt(wkt) self.assertEqual((683, GEO_VECTOR_LEN), vectorized.shape) self.assertEqual(vectorized[-1, FULL_STOP_INDEX], 1) # Test full stop bit
def test_polygon_with_hole(self) -> None: polygon_with_hole = "POLYGON((0 0, 3 0, 3 3, 0 3, 0 0), (1 1, 2 1, 2 2, 1 2, 1 1))" vectorized = vectorize_wkt(polygon_with_hole) for is_inner_bit in vectorized[:5, IS_INNER_INDEX]: self.assertEqual(is_inner_bit, 1) for is_outer_bit in vectorized[5:, IS_OUTER_INDEX]: self.assertEqual(is_outer_bit, 1) self.assertEqual(vectorized[-1, FULL_STOP_INDEX], 1)
def test_vectorize_one_wkt(self) -> None: max_points = 20 input_set = target_wkt vectorized = [] for index in range(len(input_set)): vectorized.append(vectorize_wkt(input_set[index], max_points, simplify=True)) self.assertEqual(len(input_set), len(brt_wkt)) self.assertEqual(vectorized[0].shape, (19, GEO_VECTOR_LEN)) self.assertEqual(vectorized[1].shape, (1, GEO_VECTOR_LEN))
def test_centroid(self): geom1 = 'POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))' geom1 = gv.vectorize_wkt(geom1) with self.subTest('It does not accept a numpy ndarray'): with self.assertRaises(AssertionError): centroid(geom1) with self.subTest('It rejects 3D geometries'): with self.assertRaises(AssertionError): centroid(torch.rand((10, 3))) with self.subTest('Our stand-in centroid function does the same as pyefd'): geom2 = 'POLYGON((1 1, 0 1, 0 0, 1 0, 1 1))' geom2 = gv.vectorize_wkt(geom2) coords_batch = geom2[:, :2] coords_batch = coords_batch.reshape(1, geom2.shape[0], 2) polygon2_tensor = torch.from_numpy(coords_batch) pyefd_centroid = pyefd.calculate_dc_coefficients(coords_batch[0]) pytorch_centroid = centroid(polygon2_tensor) np.testing.assert_array_almost_equal(pyefd_centroid, pytorch_centroid[0]) with self.subTest('It correctly calculates centroids for batches'): geom2 = 'POLYGON((1 1, 0 1, 0 0, 1 0, 1 1))' geom2 = gv.vectorize_wkt(geom2) coords_batch = geom2[:, :2] coords_batch = coords_batch.reshape(1, geom2.shape[0], 2) polygon2_tensor = torch.from_numpy(coords_batch) batch_size = 6 batch = polygon2_tensor.repeat(batch_size, 1, 1) multiply_range = torch.range(1., 6., dtype=batch.dtype).reshape((batch_size, 1, 1)) batch = batch * multiply_range reference_centroids = np.arange(1, batch_size + 1) reference_centroids = reference_centroids.reshape(batch_size, 1) * 0.5 reference_centroids = reference_centroids.repeat(2, axis=1) batch_centroids = centroid(batch) np.testing.assert_array_almost_equal(reference_centroids, batch_centroids.numpy())
def test_loss_function(self): geom1 = 'POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))' geom2 = 'POLYGON((1 0, 1 1, 0 1, 0 0, 1 0))' diamond = 'POLYGON((1 0, 2 1, 1 2, 0 1, 1 0))' test_square = gv.vectorize_wkt(geom1) test_square = torch.from_numpy(test_square).unsqueeze(0) output_square = gv.vectorize_wkt(geom2) output_square = torch.from_numpy(output_square).unsqueeze(0) test_diamond = gv.vectorize_wkt(diamond) test_diamond = torch.from_numpy(test_diamond).unsqueeze(0) + 1 # offset by 1 with self.subTest('It rejects tensors of length other than 7 on the second axis'): loss_function = EFDloss(order=10) with self.assertRaises(AssertionError): loss_function(torch.rand((1, 1, 5)), output_square) with self.subTest('It returns a tensor'): loss = loss_function(test_square, output_square) self.assertEqual(type(loss).__name__, 'Tensor') with self.subTest('It returns a loss of 0 for geometries that are really identical'): loss = loss_function(test_square, test_square) loss = loss.numpy() self.assertEqual(loss, 0.) with self.subTest('It returns a loss of 0 for geometries that are almost identical'): loss_function = EFDloss(order=50) loss = loss_function(test_square, output_square) loss = loss.numpy() self.assertAlmostEqual(loss, 0.0, places=1) with self.subTest('It returns a non-zero tensor for non-identical geometries'): loss_function = EFDloss(order=50) loss = loss_function(test_square, test_diamond) loss = loss.numpy() self.assertGreater(loss, 1.)
def generate_relationwise_features(nodes_map, node_predicate_map, config, time_dim): """ Stack vectors row-wise per relation and column stack relations """ n = len(nodes_map) m = dict() node_idx = dict() data = dict() vec_length_map = dict() for node, i in nodes_map.items(): if not isinstance(node, Literal): continue if node.datatype is None or node.datatype.neq( _OGC_NAMESPACE.wktLiteral): continue try: value = str(node) vec = gv.vectorize_wkt(value)[:_MAX_POINTS, :] except: continue vec_length = vec.shape[0] if vec_length <= 0: continue # add means of X,Y to vector mean_x = np.mean(vec[:, 0]) mean_y = np.mean(vec[:, 1]) vec = np.hstack([np.vstack([[mean_x, mean_y]] * vec_length), vec]) sp_rows, sp_cols = np.where(vec > 0.0) if time_dim == 0: a = sp.csr_matrix( (vec[(sp_rows, sp_cols)], (sp_rows, sp_cols)), shape=(vec_length, _GEOVECTORIZER_VEC_LENGTH + 2), dtype=np.float64) else: # time_dim == 1 a = sp.csr_matrix( (vec[(sp_rows, sp_cols)], (sp_cols, sp_rows)), shape=(_GEOVECTORIZER_VEC_LENGTH + 2, vec_length), dtype=np.float64) for p in node_predicate_map[node]: if p not in data.keys(): data[p] = list() node_idx[p] = np.empty(shape=(n), dtype=np.int32) vec_length_map[p] = np.empty(shape=(n), dtype=np.int32) m[p] = 0 data[p].append(a) idx = m[p] vec_length_map[p][idx] = vec_length node_idx[p][idx] = i m[p] = idx + 1 msum = sum(m.values()) logger.debug("Generated {} unique wktLiteral features".format(msum)) if msum <= 0: return None # normalization for p, pdata in data.items(): sc = GeomScalerSparse(time_dim) means = sc.fit(pdata) data[p] = sc.transform(pdata, means) return [[data[p], node_idx[p][:m[p]], vec_length_map[p][:m[p]]] for p in data.keys()]
def test_no_max_points_fixed_size(self) -> None: input_set = np.array(target_wkt) with self.assertRaises(AssertionError): vectorized = [vectorize_wkt(wkt, fixed_size=True) for wkt in input_set]
def get_data_from_db(cursor): """ Get data from the database given a query-instantiated cursor :param cursor: query-instantiated database cursor :return: tuple of labels and training data """ training_data, labels = [], [] cols = [desc[0] for desc in cursor.description] for record in tqdm(cursor, total=cursor.rowcount): record = dict(record) record['purposes'] = [purpose_to_english[p] for p in record['purposes']] # just duplicate for house_number and year of construction record['house_number_vec'] = record['house_number'] record['year_of_construction_vec'] = record['year_of_construction'] # one-hot encoding for house number addition if record['house_number_addition']: hna = np.zeros(shape=(len(record['house_number_addition']), len(VOCABULARY))) for idx, char in enumerate(record['house_number_addition']): hna[idx, VOCABULARY.index(char.lower())] = 1. else: hna = np.zeros(shape=(1, len(VOCABULARY))) record['house_number_addition_vec'] = hna # 'multi-hot' encoding for building purposes purposes = np.zeros(shape=(len(PURPOSES,))) for purpose in record['purposes']: purposes[PURPOSES.index(purpose)] = 1. record['purposes_vec'] = purposes # character-level vectorization of postal code pc = np.zeros((len(record['postal_code']), len(VOCABULARY))) for idx, char in enumerate(record['postal_code']): pc[idx, VOCABULARY.index(char.lower())] = 1. record['postal_code_vec'] = pc # building geometry vectorization geom = record['geometry_crs84'] geom = vectorize_wkt(geom) record['geometry_vec'] = geom record['centroid_vec'] = vectorize_wkt(record['centroid_crs84'])[0, :2] # vectorization of neighbouring buildings neighbours = record['neighbouring_buildings_crs84'] neighbours = vectorize_wkt(neighbours) record['neighbouring_buildings_vec'] = neighbours rd = record['recorded_date'] record['recorded_date_vec'] = [rd.year, rd.month, rd.day, rd.weekday()] rgd = record['registration_date'] record['registration_date_vec'] = [rgd.year, rgd.month, rgd.day, rgd.weekday()] training_data.append(record) labels.append({ 'energy_performance_index': record['energy_performance_index'], 'energy_performance_label': record['energy_performance_label'], 'energy_performance_vec': ENERGY_CLASSES.index(record['energy_performance_label']) }) return training_data, labels
from deep_geometry import vectorizer as gv import shapefile from shapely.geometry import shape import numpy as np # Load the shapes from the shapefile from sklearn.model_selection import train_test_split from tqdm import tqdm TODAY = datetime.today().strftime('%Y-%m-%d') TRAIN_SET_FILE_NAME = 'train_data_{}.npz'.format(TODAY) TEST_SET_FILE_NAME = 'test_data_{}.npz'.format(TODAY) shapes = shapefile.Reader('Uitvoer_shape/buurt_2017') shapes = shapes.shapes() shapes = [shape(s) for s in shapes] # convert the shapes to machine learning vectors vectors = [gv.vectorize_wkt(s.wkt) for s in tqdm(shapes)] dummy_labels = [0 for p in vectors] train_data, test_data, _, _ = train_test_split(vectors, dummy_labels, test_size=0.15, random_state=42) print('Saving training data...') np.savez_compressed(file=TRAIN_SET_FILE_NAME, data=train_data) print('Saving test data...') np.savez_compressed(file=TEST_SET_FILE_NAME, data=test_data) print('Done!')
def test_efd(self): geom1 = 'POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))' geom1 = gv.vectorize_wkt(geom1) geom2 = 'POLYGON((1 1, 0 1, 0 0, 1 0, 1 1))' geom2 = gv.vectorize_wkt(geom2) coords_batch = geom2[:, :2] coords_batch = coords_batch.reshape(1, geom2.shape[0], 2) pyefd_descriptors = pyefd.elliptic_fourier_descriptors(coords_batch[0], order=10) numpy_vectorized_descriptors = numpy_vectorized_efd(coords_batch[0], order=10) polygon2_tensor = torch.from_numpy(coords_batch) polygon2_tensor.requires_grad = True pytorch_descriptors = efd(polygon2_tensor, order=10) with self.subTest('It does not accept a numpy ndarray'): with self.assertRaises(AssertionError): efd(geom1) with self.subTest('It rejects 3D geometries'): with self.assertRaises(AssertionError): efd(torch.rand((10, 3))) with self.subTest('Our stand-in efd function does the same as pyefd'): np.testing.assert_array_equal(pyefd_descriptors, numpy_vectorized_descriptors) with self.subTest('The pytorch efd does the same as the numpy vectorized function'): np.testing.assert_array_almost_equal(pytorch_descriptors[0].detach().numpy(), numpy_vectorized_descriptors) with self.subTest('It creates an elliptic fourier descriptor of a geometry, the same as pyefd creates'): # polygon1_tensor = geom1[:, :2] # polygon1_tensor = torch.from_numpy(polygon1_tensor) # polygon1_efd = efd(polygon1_tensor, order=10).numpy() np.testing.assert_array_almost_equal(pyefd_descriptors, pytorch_descriptors[0].detach().numpy()) with self.subTest('It handles inputs of zeros without nans'): zero_coordinates = torch.zeros((1, 10, 2), dtype=torch.double) coeffs = efd(zero_coordinates) coeffs = coeffs.detach().numpy() for element in coeffs.flatten(): self.assertFalse(np.isnan(element)) with self.subTest('It creates equal coefficients for replication-padded coordinate sequences'): torch.manual_seed(42) random_coordinates = torch.rand((1, 4, 2), dtype=torch.double) last_point = random_coordinates[:, -1] replication_padding = last_point.repeat(1, 4, 1) padded_random_coords = torch.cat((random_coordinates, replication_padding), dim=1) non_zero_coeffs = efd(random_coordinates).detach().numpy() padded_coeffs = efd(padded_random_coords).detach().numpy() np.testing.assert_array_almost_equal(non_zero_coeffs, padded_coeffs) with self.subTest('It creates descriptors for a batch of size 2 same as the pyefd implementation'): size_two_batch = torch.cat((polygon2_tensor, polygon2_tensor * 2), dim=0) resized_descriptors = pyefd.elliptic_fourier_descriptors(polygon2_tensor[0].detach().numpy() * 2, order=10) size_two_descriptors = efd(size_two_batch) size_two_descriptors = size_two_descriptors.detach().numpy() np.testing.assert_array_almost_equal(pyefd_descriptors, size_two_descriptors[0]) np.testing.assert_array_almost_equal(resized_descriptors, size_two_descriptors[1]) with self.subTest('It creates a differentiable function, returning gradients'): random_coordinates = torch.randn((1, 4, 2), dtype=torch.double, requires_grad=True) descriptors = efd(random_coordinates) scalar = torch.mean(descriptors) scalar.backward() gradients = random_coordinates.grad self.assertEqual(gradients.shape, random_coordinates.shape)
def test_multipolygon_exceed_max_points(self) -> None: with open('test_files/multipart_multipolygon_wkt.txt', 'r') as file: wkt = file.read() max_points = 20 with self.assertRaises(Exception): vectorize_wkt(wkt, max_points)
def test_simplify_without_max_points(self) -> None: with open('test_files/multipart_multipolygon_wkt.txt', 'r') as file: wkt = file.read() with self.assertRaises(AssertionError): vectorize_wkt(wkt, simplify=True)
def test_non_empty_geom_coll(self) -> None: with self.assertRaises(ValueError): vectorize_wkt(non_empty_geom_collection, 100)
def test_point_with_max_points(self) -> None: vectorized = vectorize_wkt('POINT(12 14)', 5) self.assertEqual(vectorized.shape, (1, GEO_VECTOR_LEN)) self.assertEqual(vectorized[-1, FULL_STOP_INDEX], 1) # Test full stop bit
def test_unsupported_geom(self) -> None: with self.assertRaises(Exception): vectorize_wkt( 'THIS_SHOULD_THROW_AN_EXCEPTION ((10 10, 20 20, 10 40),(40 40, 30 30, 40 20, 30 10))', 16)