def test_meeting_start_change(): """ As there are meetings without an associated id, we can't use oparl_id as unique_id. But since name+start are unique in the db and the start of a meeting can be updated to the actual start after the meeting happened, we need to hard delete old meetings or the import will crash with a failed unque constraint """ organizations = [Organization("City Council", 1, True)] meetings_old = [ Meeting( "City Council", "City Council Meeting 1", None, None, None, start=datetime.fromisoformat("2020-01-01T09:00:00+01:00"), ), Meeting( "City Council", "City Council Meeting 2", None, None, 2, start=datetime.fromisoformat("2020-02-01T09:00:00+01:00"), ), ] meetings_new = [ Meeting( "City Council", "City Council Meeting 1", None, None, None, start=datetime.fromisoformat("2020-01-01T09:00:10+01:00"), ), Meeting( "City Council", "City Council Meeting 2", None, None, 2, start=datetime.fromisoformat("2020-02-01T09:00:05+01:00"), ), ] old = RisData(sample_city, None, [], organizations, [], [], meetings_old, [], [], 2) new = RisData(sample_city, None, [], organizations, [], [], meetings_new, [], [], 2) body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags) body.save() import_data(body, old) import_data(body, new) assert models.Meeting.objects.count() == 2 # The old meeting without id should have been deleted assert models.Meeting.objects_with_deleted.count() == 3
def test_body(self): body = Body() data = self.api_data["https://oparl.example.org/body/1"] self.converter.body(data, body) self.assertEqual(body.outline, None) self.assertNotEqual(body.center, None) self.assertEqual(body.ags, "05315000") body.save() self.converter.body_related(data, body) self.assertEqual(body.legislative_terms.count(), 1)
def test_manual_deletion(pytestconfig): """Check that after a file has been manually deleted, it can't get re-imported and it's gone from minio""" url = "https://example.org/file/1" file_id = 1 sample_file = File( name="Bad File", original_id=file_id, url=url, claimed_size=None, paper_original_id=sample_paper.original_id, ) data = RisData(sample_city, None, [], [], [sample_paper], [sample_file], [], [], [], 2) body = Body(name=data.meta.name, short_name=data.meta.name, ags=data.meta.ags) body.save() import_data(body, data) with responses.RequestsMock() as requests_mock: requests_mock.add( responses.GET, url, body=Path(pytestconfig.rootdir).joinpath( "testdata/media/file.txt").read_bytes(), status=200, content_type="text/plain", ) importer = Importer(BaseLoader({}), force_singlethread=True) [successful, failed] = importer.load_files(sample_city.name) assert successful == 1 and failed == 0 # Ensure that the file is there assert minio_client().get_object(minio_file_bucket, str(file_id)) assert models.File.objects.filter(pk=file_id).first() # This is what we test models.File.objects.get(pk=file_id).manually_delete() with pytest.raises(MinioException): minio_client().get_object(minio_file_bucket, str(file_id)) # Another import, to ensure that manually delete is respected import_data(body, data) assert not models.File.objects.filter(pk=file_id).first() with responses.RequestsMock(): importer = Importer(BaseLoader({}), force_singlethread=True) [successful, failed] = importer.load_files(sample_city.name) assert successful == 0 and failed == 0 with pytest.raises(MinioException): minio_client().get_object(minio_file_bucket, str(file_id))
def test_agenda_item_with_id_name_changed(): organizations = [Organization("City Council", 1, True)] meetings = [ Meeting( "City Council", "City Council Meeting 1", None, None, 1, start=datetime.fromisoformat("2020-01-01T09:00:00+01:00"), ) ] agenda_items_old = [ AgendaItem( key="1", position=0, name="Old name", meeting_id=1, paper_reference=None, paper_original_id=None, original_id=1, result=None, voting=None, note=None, ) ] agenda_items_new = [ AgendaItem( key="1", position=0, name="New name", meeting_id=1, paper_reference=None, paper_original_id=None, original_id=1, result=None, voting=None, note=None, ) ] old = RisData(sample_city, None, [], organizations, [], [], meetings, [], agenda_items_old, 2) new = RisData(sample_city, None, [], organizations, [], [], meetings, [], agenda_items_new, 2) body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags) body.save() import_data(body, old) import_data(body, new) assert models.AgendaItem.objects_with_deleted.count() == 1 assert models.AgendaItem.objects.count() == 1
def body(self, lib_object: JSON, body: Body) -> Body: body.short_name = self.utils.normalize_body_name(body.short_name) body.ags = lib_object.get("ags") if body.ags: body.ags = body.ags.replace(" ", "") if len(body.ags or "") > 8: # Special case for https://ris.krefeld.de/webservice/oparl/v1/body/1 if body.ags[8:] == "0" * len(body.ags[8:]): body.ags = body.ags[:8] else: raise RuntimeError( "The Amtliche Gemeindeschlüssel of {} is longer than 8 characters: '{}'".format( body, body.ags ) ) # We don't really need the location because we have our own outline # importing logic and don't need the city, but we import it for comprehensiveness location = self.retrieve(Location, lib_object.get("location"), body.oparl_id) if location and location.geometry: if location.geometry["type"] == "Point": body.center = location body.outline = None elif location.geometry["type"] == "Polygon": logger.warning("Overriding outline of Body with api version") body.center = None body.outline = location else: logger.warning( "Location object is of type {}, which is neither 'Point' nor 'Polygon'." "Skipping this location.".format(location.geometry["type"]) ) return body
def make_sample_file(file_id, url): sample_file = File( name="Bad File", original_id=file_id, url=url, claimed_size=None, paper_original_id=sample_paper.original_id, ) data = RisData(sample_city, None, [], [], [sample_paper], [sample_file], [], [], [], 2) body = Body(name=data.meta.name, short_name=data.meta.name, ags=data.meta.ags) body.save() import_data(body, data) return body, data
def organization(self, libobject: OParl.Organization): logging.info("Processing Organization {}".format(libobject.get_id())) if not libobject: return organization = self.check_existing(libobject, Organization) if not organization: return type_id = self.organization_classification.get( libobject.get_organization_type()) if type_id: orgtype = OrganizationType.objects.get(id=type_id) else: orgtype, _ = OrganizationType.objects.get_or_create( name=libobject.get_organization_type()) organization.organization_type = orgtype organization.body = Body.by_oparl_id(libobject.get_body().get_id()) organization.start = self.glib_datetime_or_date_to_python( libobject.get_start_date()) organization.end = self.glib_datetime_or_date_to_python( libobject.get_end_date()) organization.save() for membership in libobject.get_membership(): self.membership(organization, membership) organization.save() return organization
def organization(self, libobject: JSON, organization: Organization) -> Organization: type_name = libobject.get("organizationType") # E.g. Leipzig sets organizationType: "Gremium" and classification: "Fraktion" for factions, # so we give priority to classification if libobject.get("classification") in self.utils.organization_classification: type_name = libobject["classification"] type_id = self.utils.organization_classification.get(type_name) if type_id: orgtype = OrganizationType.objects.get(id=type_id) else: orgtype, _ = OrganizationType.objects.get_or_create( name=libobject.get("organizationType") ) organization.organization_type = orgtype if libobject.get("body"): # If we really have a case with an extra body then this should error because then we need some extra handling organization.body = Body.by_oparl_id(libobject["body"]) else: organization.body = self.default_body organization.start = self.utils.parse_date(libobject.get("startDate")) organization.end = self.utils.parse_date(libobject.get("endDate")) organization.location = self.retrieve(Location, libobject.get("location")) if organization.name == organization.short_name and type_name: pattern = "[- ]?" + re.escape(type_name) + "[ ]?" organization.short_name = re.sub( pattern, "", organization.short_name, flags=re.I ) return organization
def test_incremental_agenda_items(): old = load_ris_data("importer/test-data/amtzell_old.json") new = load_ris_data("importer/test-data/amtzell_new.json") body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags) body.save() import_data(body, old) models.AgendaItem.objects_with_deleted.all().delete() # We don't have original ids for all agenda items (yet?), # so we just assume meeting x paper is unique consultation_map = { (a, b): c for a, b, c in models.Consultation.objects.values_list( "meeting_id", "paper_id", "id" ) } meeting_id_map = make_id_map(models.Meeting.objects.filter(oparl_id__isnull=False)) paper_id_map = make_id_map(models.Paper.objects) def convert_function(x): return convert_agenda_item(x, consultation_map, meeting_id_map, paper_id_map) incremental_import( models.AgendaItem, [convert_function(i) for i in old.agenda_items] ) agenda_items = sorted(models.AgendaItem.objects.values_list("oparl_id", flat=True)) agenda_items_with_deleted = sorted( models.AgendaItem.objects_with_deleted.values_list("oparl_id", flat=True) ) assert agenda_items == ["1302", "1880"] assert agenda_items_with_deleted == ["1302", "1880"] incremental_import( models.AgendaItem, [convert_function(i) for i in new.agenda_items] ) agenda_items = sorted(models.AgendaItem.objects.values_list("oparl_id", flat=True)) agenda_items_with_deleted = sorted( models.AgendaItem.objects_with_deleted.values_list("oparl_id", flat=True) ) assert agenda_items == ["1267", "1302"] assert agenda_items_with_deleted == ["1267", "1302", "1880"]
def test_import_json(): old = load_ris_data("importer/test-data/amtzell_old.json") new = load_ris_data("importer/test-data/amtzell_new.json") body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags) body.save() import_data(body, old) actual = make_db_snapshot() expected = json.loads(Path("importer/test-data/amtzell_old_db.json").read_text()) assert expected == actual import_data(body, new) actual = make_db_snapshot() expected = json.loads(Path("importer/test-data/amtzell_new_db.json").read_text()) assert expected == actual
def test_undelete(): """A paper gets created, (spuriously?) deleted, and then undeleted""" with_paper = RisData(sample_city, None, [], [], [sample_paper], [], [], [], [], 2) without_paper = RisData(sample_city, None, [], [], [], [], [], [], [], 2) body = Body( name=with_paper.meta.name, short_name=with_paper.meta.name, ags=with_paper.meta.ags, ) body.save() import_data(body, with_paper) import_data(body, without_paper) import_data(body, with_paper) [paper] = models.Paper.objects_with_deleted.all() assert not paper.deleted
def test_duplicate_meetings_with_id(fixture, target_number, target_number_with_deleted): """ There are two meetings with the same name/start, and a) different ids, b) with and without id, c) without ids. Inspired by https://ris.wuppertal.de/si0057.php?__ksinr=18329 and https://ris.wuppertal.de/si0057.php?__ksinr=18837 """ for meeting in serializers.deserialize("json", Path(fixture).read_text()): meeting.save() new_meeting = converter.structure( { "organization_name": "BV Uellendahl-Katernberg", "name": "BV Uellendahl-Katernberg", "location": "Rathaus Barmen, Ratssaal, Johannes-Rau-Platz 1, 42275 Wuppertal", "note": None, "original_id": 18329, "start": "2020-04-23T18:30:00+02:00", "end": "2020-04-23T19:20:00+02:00", "cancelled": False, }, Meeting, ) with_paper = RisData(sample_city, None, [], [], [], [], [new_meeting], [], [], 2) body = Body( name=with_paper.meta.name, short_name=with_paper.meta.name, ags=with_paper.meta.ags, ) body.save() import_data(body, with_paper) assert models.Meeting.objects.count() == target_number, list( models.Meeting.objects.values_list("oparl_id", "name", "start")) assert models.Meeting.objects_with_deleted.count( ) == target_number_with_deleted
def import_outline(body: Body, gemeindeschluessel: str): if not body.outline: outline = Location() outline.name = "Outline of " + body.name outline.short_name = body.short_name outline.is_official = False else: outline = body.outline logger.info("Importing outline from {}".format(gemeindeschluessel)) query = query_template_outline.format(gemeindeschluessel) response = requests.post(overpass_api, data={"data": query}) response.raise_for_status() geojson = convert_to_geojson(response.text) outline.geometry = geojson outline.save() body.outline = outline body.save()
def test_import_outline(pytestconfig, ags): """This test exists mostly for the handling of the AGS with 5 vs. 8 digits""" # This currently assumes that we don't want to do any transformations with the ags before assigning it to the body body = Body(name=f"Place with AGS {ags}", short_name=f"AGS{ags}", ags=ags) with responses.RequestsMock() as requests_mock: fixture = pytestconfig.rootpath.joinpath( f"testdata/outline_query_responses/{ags}.json") fixture = json.loads(fixture.read_text()) requests_mock.add(method=responses.POST, url=fixture["url"], body=fixture["response"]) import_outline(body, ags)
def test_index_deletion(): """Check that deleted persons get deleted from the elasticsearch index""" for index in registry.get_indices(registry.get_models()): index.delete(ignore=404) for index in registry.get_indices(registry.get_models()): index.create() old_persons = [ Person(name="Frank Underwood", party="Democrats"), Person(name="Claire Underwood", party="Democrats"), ] new_persons = [Person(name="Claire Underwood", party="Democrats")] old = RisData(sample_city, None, old_persons, [], [], [], [], [], [], 2) new = RisData(sample_city, None, new_persons, [], [], [], [], [], [], 2) body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags) body.save() import_data(body, old) assert len(MainappSearch({"query": "Underwood"}).execute().hits) == 2 import_data(body, new) assert len(MainappSearch({"query": "Underwood"}).execute().hits) == 1
def setUpClass(cls): super().setUpClass() cls.api_data = {} cls.loader = MockLoader() cls.loader.api_data = cls.api_data for file in os.listdir(cls.dummy_data): if not file.endswith(".json"): continue with open(os.path.join(cls.dummy_data, file)) as fp: data = json.load(fp) cls.api_data[data["id"]] = data for entry in externalize(data): if entry.data["id"] not in cls.api_data: cls.api_data[entry.data["id"]] = entry.data # Used by test_location_default_body body = Body() body.short_name = "München" cls.converter = JsonToDb(cls.loader, default_body=body) cls.converter.warn_missing = False cls.utils = Utils()
def import_outline(body: Body, ags: Optional[str] = None): ags = ags or body.ags assert ags is not None logger.info("Importing outline from {}".format(ags)) if not body.outline: outline = Location() outline.name = "Outline of " + body.name outline.short_name = body.short_name outline.is_official = False else: outline = body.outline query = format_template(query_template_outline, ags) response = requests.post(overpass_api, data={"data": query}) response.raise_for_status() geojson = osm2geojson.json2geojson(response.text) outline.geometry = geojson outline.save() body.outline = outline body.save()
def test_json_to_db_empty_object(caplog): url = "https://lahr.ratsinfomanagement.net/webservice/oparl/v1.1/body/1/consultation/5999" loader = MockLoader(api_data={url: {}}) converter = JsonToDb(loader, default_body=Body(), ensure_organization_type=False) with pytest.raises( RuntimeError, match= f"The object {url} has not type field and object_type wasn't given", ): converter.import_anything(url) converter.import_anything(url, Consultation) assert Consultation.objects.filter(oparl_id=url).count() == 1 assert caplog.messages == [ f"Object loaded from {url} has no type field, inferred to https://schema.oparl.org/1.0/Consultation", f"Object loaded from {url} has no id field, setting id to url", ]
def organization_core(self, libobject, organization): self.logger.info("Processing Organization {}".format( libobject.get_id())) type_id = self.organization_classification.get( libobject.get_organization_type()) if type_id: orgtype = OrganizationType.objects.get(id=type_id) else: orgtype, _ = OrganizationType.objects.get_or_create( name=libobject.get_organization_type()) organization.organization_type = orgtype organization.body = Body.by_oparl_id(libobject.get_body().get_id()) organization.start = self.glib_datetime_or_date_to_python( libobject.get_start_date()) organization.end = self.glib_datetime_or_date_to_python( libobject.get_end_date()) self.call_custom_hook("sanitize_organization", organization)
def test_json_to_db_missing_object(caplog): url = "https://lahr.ratsinfomanagement.net/webservice/oparl/v1.1/body/1/consultation/5999" loader = MockLoader(api_data={url: None}) converter = JsonToDb(loader, default_body=Body(), ensure_organization_type=False) with pytest.raises( RuntimeError, match= rf"The object {url} is missing and the object type was not specified", ): converter.import_anything(url) converter.import_anything(url, Consultation) assert Consultation.objects.filter(oparl_id=url).count() == 1 assert caplog.messages == [ f"JSON loaded from {url} is not a dict/object. Using a dummy instead. THIS IS BAD", f"JSON loaded from {url} is not a dict/object. Using a dummy instead. THIS IS BAD", ]
def test_import_json(send_mail_function): """This test runs with elasticsearch if available and otherwise uses saved responses""" # Create the base state old = load_ris_data("importer/test-data/amtzell_old.json") body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags) body.save() import_data(body, old) actual = make_db_snapshot() expected = json.loads( Path("importer/test-data/amtzell_old_db.json").read_text()) assert expected == actual last_notification = timezone.now() # Create notification user = User(username="******", email="*****@*****.**") user.save() UserProfile.objects.create(user=user) user_alert = UserAlert( user=user, search_string="Digitalisierungsstrategie", created=datetime.fromisoformat("2008-01-01T12:00:00+01:00"), ) user_alert.save() # Import the new data new = load_ris_data("importer/test-data/amtzell_new.json") import_data(body, new) actual = make_db_snapshot() expected = json.loads( Path("importer/test-data/amtzell_new_db.json").read_text()) assert expected == actual # Check that the notification was sent elasticsearch_mock = ElasticsearchMock({ "importer/test-data/notification_request.json": "importer/test-data/notification_response.json" }) if is_es_online(): context = contextlib.nullcontext() else: context = mock.patch( "elasticsearch_dsl.search.get_connection", new=lambda _alias: elasticsearch_mock, ) with context: if is_es_online(): notifier = NotifyUsers(last_notification) else: notifier = NotifyUsers( datetime.fromisoformat("2020-05-17T12:07:37.887853+00:00")) notifier.notify_all() assert send_mail_function.call_count == 1 assert send_mail_function.call_args[0][0] == "*****@*****.**" assert "Digitalisierungsstrategie" in send_mail_function.call_args[0][ 2] assert "Digitalisierungsstrategie" in send_mail_function.call_args[0][ 3]