def test_enrich_previously_assigned(self): """ It can happen that in an import batch for some records an autoid had previously been assigned. That's why we always store the highest value in the database when the enricher has been initialized. """ msg = self.mock_msg msg["header"]["enrich"]["id"]["template"] = "0123X" msg["contents"] = [{ "id": None, "code": "0" }, { "id": None, "code": "1" }, { "id": None, "code": "2" }] # Create a mock_record for the first entity in the message Record = namedtuple('Record', ['id', 'code']) mock_record = Record(id="01232", code="0") # The first record had been assigned an autoid in a previous run self.mock_storage.get_column_values_for_key_value.side_effect = [[ mock_record ], None, None] # The database contains records with higher values self.mock_storage.get_last_column_value.return_value = "1234" enricher = Enricher(self.mock_storage, msg) for content in msg["contents"]: enricher.enrich(content) self.assertEqual(msg["contents"][0]["id"], "01232") self.assertEqual(msg["contents"][1]["id"], "01235") self.assertEqual(msg["contents"][2]["id"], "01236")
def test_enrich_reuse_value(self): msg = self.mock_msg msg["contents"] = [ { "id": None, "code": "A" }, { "id": None, "code": "B" }, { "id": None, "code": "A" }, { "id": None, "code": "B" }, ] self.mock_storage.get_column_values_for_key_value.return_value = None self.mock_storage.get_last_column_value.return_value = None enricher = Enricher(self.mock_storage, msg) for content in msg["contents"]: enricher.enrich(content) self.assertEqual(msg["contents"][0]["id"], "01230") self.assertEqual(msg["contents"][1]["id"], "01231") self.assertEqual(msg["contents"][2]["id"], "01230") self.assertEqual(msg["contents"][3]["id"], "01231")
def test_enrich_existing_contents(self): msg = self.mock_msg msg["contents"] = [{"geo": "aap"}] enricher = Enricher(self.mock_storage, msg) for content in msg["contents"]: enricher.enrich(content) self.mock_storage.get_query_value.assert_not_called() self.assertEqual(msg["contents"][0]["geo"], "aap")
def test_enrich_empty_contents(self): msg = self.mock_msg msg["contents"] = [] enricher = Enricher(self.mock_storage, msg) for content in msg["contents"]: enricher.enrich(content) self.mock_storage.get_query_value.assert_not_called() self.assertEqual(msg["contents"], [])
def test_enrich_id_already_filled(self): msg = self.mock_msg msg["contents"] = [{"id": "123", "code": "A"}] self.mock_storage.get_column_values_for_key_value.return_value = None self.mock_storage.get_last_column_value.return_value = None enricher = Enricher(self.mock_storage, msg) for content in msg["contents"]: enricher.enrich(content) self.assertEqual(msg["contents"][0]["id"], "123")
def test_enrich_empty_contents(self): msg = self.mock_msg msg["contents"] = [] self.mock_storage.get_column_values_for_key_value.return_value = None self.mock_storage.get_last_column_value.return_value = None enricher = Enricher(self.mock_storage, msg) for content in msg["contents"]: enricher.enrich(content) self.assertEqual(msg["contents"], [])
def test_enrich_with_last_value(self): msg = self.mock_msg msg["contents"] = [{"id": None, "code": "A"}] self.mock_storage.get_column_values_for_key_value.return_value = None self.mock_storage.get_last_column_value.return_value = "123" enricher = Enricher(self.mock_storage, msg) for content in msg["contents"]: enricher.enrich(content) # Check that the length is OK (padded with zeroes) and that 1 is added (123 => 124) self.assertEqual(msg["contents"][0]["id"], "00124")
def test_enrich_max_contents(self): msg = self.mock_msg msg["contents"] = [ { "id": None, "code": "0" }, { "id": None, "code": "1" }, { "id": None, "code": "2" }, { "id": None, "code": "3" }, { "id": None, "code": "4" }, { "id": None, "code": "5" }, { "id": None, "code": "6" }, { "id": None, "code": "7" }, { "id": None, "code": "8" }, { "id": None, "code": "9" }, ] self.mock_storage.get_column_values_for_key_value.return_value = None self.mock_storage.get_last_column_value.return_value = None enricher = Enricher(self.mock_storage, msg) for content in msg["contents"]: enricher.enrich(content) self.assertEqual(msg["contents"][0]["id"], "01230") self.assertEqual(msg["contents"][9]["id"], "01239")
def test_enrich_with_mulitple_current_values(self): msg = self.mock_msg msg["contents"] = [{"id": None, "code": "A"}] Record = namedtuple('Record', ['id', 'code']) self.mock_storage.get_column_values_for_key_value.return_value = [ Record(id="123", code="A"), Record(id="456", code="A"), ] self.mock_storage.get_last_column_value.return_value = None with self.assertRaises(AssertionError): enricher = Enricher(self.mock_storage, msg) for content in msg["contents"]: enricher.enrich(content)
def test_enrich_with_current_value(self): msg = self.mock_msg msg["contents"] = [{"id": None, "code": "A"}] Record = namedtuple('Record', ['id', 'code']) self.mock_storage.get_column_values_for_key_value.return_value = [ Record(id="123", code="A") ] self.mock_storage.get_last_column_value.return_value = None enricher = Enricher(self.mock_storage, msg) for content in msg["contents"]: enricher.enrich(content) # Check that the length is OK (padded with zeroes) and that 1 is added (123 => 124) self.assertEqual(msg["contents"][0]["id"], "123")
def test_enrich_dry_run(self): msg = self.mock_msg msg["contents"] = [ { "id": None, "code": "0" }, ] msg["header"]["enrich"]["id"]["dry_run"] = True self.mock_storage.get_column_values_for_key_value.return_value = None self.mock_storage.get_last_column_value.return_value = None enricher = Enricher(self.mock_storage, msg) for content in msg["contents"]: enricher.enrich(content) self.assertIsNone(content["id"])
def test_enrich_simple_contents(self): self.mock_storage.get_query_value.return_value = "POINT (1 2)" msg = self.mock_msg msg["contents"] = [{"x": [1, 2]}] enricher = Enricher(self.mock_storage, msg) for content in msg["contents"]: enricher.enrich(content) self.mock_storage.get_query_value.assert_called_with(""" SELECT ST_AsText( ST_Union(geometrie) ) FROM cat_col WHERE fld in ('1', '2') AND eind_geldigheid IS NULL """) self.assertEqual(msg["contents"][0]["geo"], "POINT (1.000 2.000)")
def test_enrich_overflow_contents(self): msg = self.mock_msg msg["contents"] = [{ "id": None, "code": "0" }, { "id": None, "code": "1" }, { "id": None, "code": "2" }, { "id": None, "code": "3" }, { "id": None, "code": "4" }, { "id": None, "code": "5" }, { "id": None, "code": "6" }, { "id": None, "code": "7" }, { "id": None, "code": "8" }, { "id": None, "code": "9" }, { "id": None, "code": "A" }] self.mock_storage.get_column_values_for_key_value.return_value = None self.mock_storage.get_last_column_value.return_value = None with self.assertRaises(AssertionError): enricher = Enricher(self.mock_storage, msg) for content in msg["contents"]: enricher.enrich(content)
def compare(msg): """Compare new data in msg (contents) with the current data :param msg: The new data, including header and summary :return: result message """ logger.configure(msg, "COMPARE") header = msg.get('header', {}) mode = header.get('mode', FULL_UPLOAD) logger.info( f"Compare (mode = {mode}) to GOB Database {GOBStorageHandler.user_name} started" ) # Parse the message header message = ImportMessage(msg) metadata = message.metadata # Get the model for the collection to be compared gob_model = GOBModel() entity_model = gob_model.get_collection(metadata.catalogue, metadata.entity) # Initialize a storage handler for the collection storage = GOBStorageHandler(metadata) model = f"{metadata.source} {metadata.catalogue} {metadata.entity}" logger.info(f"Compare {model}") stats = CompareStatistics() tmp_table_name = None with storage.get_session(): with ProgressTicker("Collect compare events", 10000) as progress: # Check any dependencies if not meets_dependencies(storage, msg): return { "header": msg["header"], "summary": logger.get_summary(), "contents": None } enricher = Enricher(storage, msg) populator = Populator(entity_model, msg) # If there are no records in the database all data are ADD events initial_add = not storage.has_any_entity() if initial_add: logger.info("Initial load of new collection detected") # Write ADD events directly, without using a temporary table contents_writer = ContentsWriter() contents_writer.open() # Pass a None confirms_writer because only ADD events are written collector = EventCollector(contents_writer, confirms_writer=None, version=entity_model['version']) collect = collector.collect_initial_add else: # Collect entities in a temporary table collector = EntityCollector(storage) collect = collector.collect tmp_table_name = collector.tmp_table_name for entity in msg["contents"]: progress.tick() stats.collect(entity) enricher.enrich(entity) populator.populate(entity) collect(entity) collector.close() if initial_add: filename = contents_writer.filename confirms = None contents_writer.close() else: # Compare entities from temporary table with storage.get_session(): diff = storage.compare_temporary_data(tmp_table_name, mode) filename, confirms = _process_compare_results( storage, entity_model, diff, stats) # Build result message results = stats.results() logger.info(f"Compare {model} completed", {'data': results}) results.update(logger.get_summary()) message = { "header": msg["header"], "summary": results, "contents_ref": filename, "confirms": confirms } return message