Пример #1
0
    def test_uniq_resources(self):
        """In a list of resources with two identical URLs, the second one should be removed."""

        resources = [{
            'url': GETCAPABILITIES_URL_1,
            'name': 'WFS Service',
            'format': 'WFS'
        }, {
            'url':
            'https://fbinter.stadt-berlin.de/fb?loginkey=alphaDataStart&alphaDataId=s01_11_07naehr2015@senstadt',
            'name': 'Serviceseite im FIS-Broker',
            'format': 'HTML'
        }, {
            'url':
            'https://www.stadtentwicklung.berlin.de/umwelt/umweltatlas/dd11107.htm',
            'name': 'Inhaltliche Beschreibung',
            'format': 'HTML'
        }, {
            'url':
            'https://fbinter.stadt-berlin.de/fb_daten/beschreibung/umweltatlas/datenformatbeschreibung/Datenformatbeschreibung_kriterien_zur_bewertung_der_bodenfunktionen2015.pdf',
            'name': 'Technische Beschreibung',
            'format': 'PDF'
        }]

        duplicate = {
            'url': GETCAPABILITIES_URL_2,
            'name': 'WFS Service',
            'format': 'WFS'
        }

        test_resources = copy.deepcopy(resources)
        test_resources.append(duplicate)
        uniq_resources = uniq_resources_by_url(test_resources)
        _assert_equal(uniq_resources, resources)
Пример #2
0
 def test_filter_tags(self):
     '''Check if all tags from `to_remove` are removed from the
        output tag list. In case of duplicate tags, all occurrences of
        a tag should be removed, not just the first one.'''
     to_remove = [
         'open data', # that's a duplicate; both occurrences should be removed
         'Berlin',
         'Hamburg', # that's not in the original tag list, shouldn't change anything
         'Boden',
         u'N\xe4hrstoffversorgung',
     ]
     data_dict = self._csw_resource_data_dict('wfs-open-data.xml')
     simple_tag_list = data_dict['iso_values']['tags']
     complex_tag_list = data_dict['package_dict']['tags']
     expected_result = [
         {'name': 'inspireidentifiziert'},
         {'name': 'opendata'},
         {'name': 'Sachdaten'},
         {'name': 'Umweltatlas'},
         {'name': 'Bodengesellschaft'},
         {'name': 'Ausgangsmaterial'},
         {'name': 'Oberboden'},
         {'name': 'Unterboden'},
         {'name': 'KAKeff'},
         {'name': 'pH-Wert'},
         {'name': 'Bodenart'},
         {'name': u'Basens\xe4ttigung'},
         {'name': u'B\xf6den'},
         {'name': 'infoFeatureAccessService'},
     ]
     filter_tags(to_remove, simple_tag_list, complex_tag_list)
     _assert_equal(complex_tag_list, expected_result)
Пример #3
0
    def test_timedelta_config_returned_as_int(self):
        '''Test that get_timedelta() always returns an int, if the `timedelta``
           config is set.'''

        FisbrokerPlugin().source_config = { 'timedelta': '1' }
        timedelta = FisbrokerPlugin().get_timedelta()
        _assert_equal(timedelta, 1)
Пример #4
0
    def test_successful_reimport(self):
        '''If all is good and the FIS-Broker service returns a record,
           return an HTTP 200.'''

        fb_dataset_dict, source, job = self._harvester_setup(
            FISBROKER_HARVESTER_CONFIG)
        job.status = u'Finished'
        job.save()
        package_update(self.context, fb_dataset_dict)
        package_id = fb_dataset_dict['id']
        package = Package.get(package_id)
        old_title = package.title
        response = self.app.get(
            url="/api/harvest/reimport?id={}".format(package_id),
            headers={'Accept': 'application/json'},
            extra_environ={
                'REMOTE_USER': self.context['user'].encode('ascii')
            })
        # assert successful HTTP response
        _assert_equal(response.status_int, 200)
        content = json.loads(response.body)
        # assert success marker in resonse JSON
        assert content['success']
        # assert that title has changed to the correct value (i.e., the reimport has actually happened)
        _assert_equal(
            package.title,
            u"Nährstoffversorgung des Oberbodens 2015 (Umweltatlas) - [WFS]")
        _assert_not_equal(package.title, old_title)
Пример #5
0
    def test_import_since_regular_value_returned_unchanged(self):
        '''Test that any value other than 'big_bang' or 'last_changed' for
           `import_since` is returned unchanged.'''

        FisbrokerPlugin().source_config = {'import_since': "2020-03-01"}
        import_since = FisbrokerPlugin().get_import_since_date(None)
        _assert_equal(import_since, "2020-03-01")
Пример #6
0
    def test_timeout_config_returned_as_int(self):
        '''Test that get_timeout() always returns an int, if the `timeout``
           config is set.'''

        FisbrokerPlugin().source_config = { 'timeout': '100' }
        timeout = FisbrokerPlugin().get_timeout()
        _assert_equal(timeout, 100)
Пример #7
0
    def test_no_preview_graphic_no_image(self):
        '''Test that, for a dataset that has doesn't have any graphics,
           no image is generated.'''

        data_dict = self._csw_resource_data_dict('wfs-no-preview_2.xml')
        preview_markup = extract_preview_markup(data_dict)
        _assert_equal(preview_markup, None)
Пример #8
0
    def test_import_since_big_bang_means_none(self):
        '''Test that 'big_bang' for the `import_since` config means
           returns None.'''

        FisbrokerPlugin().source_config = { 'import_since': "big_bang" }
        import_since = FisbrokerPlugin().get_import_since_date(None)
        _assert_equal(import_since, None)
Пример #9
0
    def test_sort_resources_by_weight(self):
        '''A list of resource dicts should be returned ordered in ascending
           order by the 'weight' member.'''

        resources = [
            {
                'name': 'foo',
                'weight': 20,
            },
            {
                'name': 'bar',
                'weight': 5,
            },
            {
                'name': 'daz',
                'weight': 10,
            },
            {
                'name': 'dingo',
                'weight': 15,
            },
            {
                'name': 'baz',
                'weight': 25,
            },
        ]

        annotator = FISBrokerResourceAnnotator()
        sorted_weights = [resource['weight'] for resource in annotator.sort_resources(resources)]
        _assert_equal([5, 10, 15, 20, 25], sorted_weights)
Пример #10
0
    def test_revision_interpreted_as_updated_creation_as_released(self):
        '''Test if a reference date of type `revision` is interpreted as
           `date_updated` and a date of type `creation` as `date_released`.
           `publication` should be ignored if `creation` was already present.'''

        creation = '1974-06-07'
        publication = '1994-05-03'
        revision = '2000-01-01'
        data_dict = {
            'iso_values': {
                'dataset-reference-date': [
                    {
                        'type': 'creation',
                        'value': creation,
                    } ,
                    {
                        'type': 'publication',
                        'value': publication,
                    } ,
                    {
                        'type': 'revision' ,
                        'value': revision,
                    } ,
                ]
            }
        }

        reference_dates = extract_reference_dates(data_dict)
        _assert_equal(reference_dates['date_released'], creation)
        _assert_equal(reference_dates['date_updated'], revision)
Пример #11
0
    def test_build_preview_graphic_markup(self):
        '''Test that, for a dataset that has an MD_BrowseGraphic named 'Vorschaugrafik',
           the correct image markdown is generated.'''
        data_dict = self._csw_resource_data_dict('wfs-open-data.xml')

        preview_markup = extract_preview_markup(data_dict)
        _assert_equal(
            preview_markup, u"![Vorschaugrafik zu Datensatz 'Nährstoffversorgung des Oberbodens 2015 (Umweltatlas)'](https://fbinter.stadt-berlin.de/fb_daten/vorschau/sachdaten/svor_default.gif)")
Пример #12
0
    def test_no_web_interface_or_api_means_no_url(self):
        '''Test that no url is picked if neither `web_interface` nor `api` is present.'''

        resources = self._resource_list()
        resources = list(filter(lambda x: x.get('internal_function') != 'web_interface', resources))
        resources = list(filter(lambda x: x.get('internal_function') != 'api', resources))
        url = extract_url(resources)
        _assert_equal(url, None)
Пример #13
0
    def test_web_interface_resource_picked_as_url(self):
        '''Test that the resource marked as `web_interface` is picked as the
           dataset's `url` metadatum.'''

        resources = self._resource_list()
        url = extract_url(resources)
        _assert_equal(
            url, u'https://fbinter.stadt-berlin.de/fb?loginkey=alphaDataStart&alphaDataId=s01_11_07naehr2015@senstadt')
Пример #14
0
    def test_skip_on_missing_release_date(self):
        '''Test if get_package_dict() returns 'skip' for a service resource
           without a release date.'''

        data_dict = self._csw_resource_data_dict('wfs-no-release-date.xml')
        # LOG.info("iso_valalala: %s", data_dict['iso_values'])
        # assert False
        _assert_equal(FisbrokerPlugin().get_package_dict(self.context, data_dict), 'skip')
Пример #15
0
    def test_api_resource_as_fallback_for_url(self):
        '''Test that the resource marked as `api` is picked as the
           dataset's `url` metadatum, if no `web_interface` is present.'''

        resources = self._resource_list()
        resources = list(filter(lambda x: x.get('internal_function') != 'web_interface', resources))
        url = extract_url(resources)
        _assert_equal(
            url, u'https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s01_11_07naehr2015?request=getcapabilities&service=wfs&version=2.0.0')
Пример #16
0
    def test_import_since_date_is_none_if_no_jobs(self):
        '''Test that, if the `import_since` setting is `last_error_free`, but
        no jobs have run successfully (or at all), get_import_since_date()
        returns None.'''

        source, job = self._create_source_and_job()
        FisbrokerPlugin().source_config['import_since'] = "last_error_free"
        import_since = FisbrokerPlugin().get_import_since_date(job)
        _assert_equal(import_since, None)
Пример #17
0
 def test_reimport_api_unknown_package_id_fails(self):
     '''If the reimport is triggered via the API and the requested package id does not exist,
        the server should respond with an HTTP 404, and the response should have content type
        application/json.'''
     response = self.app.get('/api/harvest/reimport?id=dunk',
                             headers={'Accept': 'application/json'},
                             expect_errors=True)
     _assert_equal(response.status_int, 404)
     _assert_equal(response.content_type, "application/json")
Пример #18
0
 def test_known_error_code_returns_dict(self):
     '''Requesting a known error code should return a dict with members
        'message' and 'code', with the correct values.'''
     error_code = controller.ERROR_NOT_FOUND_IN_CKAN
     error_dict = get_error_dict(error_code)
     assert 'message' in error_dict
     assert 'code' in error_dict
     _assert_equal(error_dict['message'], ERROR_MESSAGES[error_code])
     _assert_equal(error_dict['code'], error_code)
Пример #19
0
    def test_arbitrary_url_without_description_is_ignored(self):
        url = 'https://fbinter.stadt-berlin.de/fb_daten/beschreibung/umweltatlas/datenformatbeschreibung/Datenformatbeschreibung_kriterien_zur_bewertung_der_bodenfunktionen2015.pdf'
        res_format = FORMAT_PDF
        resource = {
            'url': url,
            'name': 'Technische Beschreibung',
            'format': res_format
        }
        annotator = FISBrokerResourceAnnotator()
        converted_resource = annotator.annotate_resource(resource)

        _assert_equal(converted_resource, None)
Пример #20
0
 def test_can_only_reimport_harvested_packages(self):
     '''If we try to reimport an existing package that was not generated by a harvester, the response
        should be an HTTP 422, with an internal error code 5.'''
     non_fb_dataset_dict = ckan_factories.Dataset()
     package_id = non_fb_dataset_dict['id']
     response = self.app.get(
         "/api/harvest/reimport?id={}".format(package_id),
         headers={'Accept': 'application/json'},
         expect_errors=True)
     _assert_equal(response.status_int, 422)
     content = json.loads(response.body)
     _assert_equal(content['error']['code'], controller.ERROR_NOT_HARVESTED)
Пример #21
0
    def test_fix_bad_dl_de_id(self):
        '''Test if incorrect license id for DL-DE-BY has been corrected.'''

        data_dict = {
            'iso_values': {
                'limitations-on-public-access': [
                    '{ "id": "dl-de-by-2-0" , "name": " Datenlizenz Deutschland - Namensnennung - Version 2.0 ", "url": "https://www.govdata.de/dl-de/by-2-0", "quelle": "Umweltatlas Berlin / [Titel des Datensatzes]" }'
                ]
            }
        }
        license_and_attribution = extract_license_and_attribution(data_dict)
        _assert_equal(license_and_attribution['license_id'], "dl-de-by-2.0")
Пример #22
0
    def test_annotate_atom_feed(self):

        url = 'https://fbinter.stadt-berlin.de/fb/feed/senstadt/a_SU_LOR'
        resource = {'url': url}
        annotator = FISBrokerResourceAnnotator()
        converted_resource = annotator.annotate_resource(resource)

        _assert_equal(converted_resource['url'], url)
        _assert_equal(converted_resource['name'], "Atom Feed")
        _assert_equal(converted_resource['description'], "Atom Feed")
        _assert_equal(converted_resource['format'], FORMAT_ATOM)
        _assert_equal(converted_resource['internal_function'], FUNCTION_API_ENDPOINT)
        assert converted_resource['main']
Пример #23
0
    def test_reimport_browser_triggers_redirect(self):
        '''If the reimport is triggered via the Browser (HTML is requested), the response should be a
           302 redirect to a specific URL.'''
        # unsuccessful request, /dataset/dunk does not exist:
        response = self.app.get(url='/dataset/dunk/reimport',
                                headers={'Accept': 'text/html'},
                                expect_errors=True,
                                extra_environ={
                                    'REMOTE_USER':
                                    self.context['user'].encode('ascii')
                                })
        url = urlparse(response.location)
        _assert_equal(response.status_int, 302)
        _assert_equal(url.path, "/dataset/dunk")

        # successful request:
        fb_dataset_dict, source, job = self._harvester_setup(
            FISBROKER_HARVESTER_CONFIG)
        job.status = u'Finished'
        job.save()
        package_update(self.context, fb_dataset_dict)
        package_id = fb_dataset_dict['id']
        response = self.app.get(url='/dataset/{}/reimport'.format(package_id),
                                headers={'Accept': 'text/html'},
                                expect_errors=True,
                                extra_environ={
                                    'REMOTE_USER':
                                    self.context['user'].encode('ascii')
                                })
        url = urlparse(response.location)
        _assert_equal(response.status_int, 302)
        _assert_equal(url.path, "/dataset/{}".format(package_id))
Пример #24
0
    def test_last_error_free_returns_correct_job(self):
        '''Test that, after a successful job A, last_error_free() returns A.'''

        source, job = self._create_source_and_job()
        object_ids = gather_stage(FisbrokerPlugin(), job)
        for object_id in object_ids:
            harvest_object = HarvestObject.get(object_id)
            fetch_and_import_stages(FisbrokerPlugin(), harvest_object)
        job.status = u'Finished'
        job.save()

        new_job = self._create_job(source.id)
        last_error_free_job = FisbrokerPlugin().last_error_free_job(new_job)
        _assert_equal(last_error_free_job, job)

        # the import_since date should be the time job_a finished:
        FisbrokerPlugin().source_config['import_since'] = "last_error_free"
        import_since = FisbrokerPlugin().get_import_since_date(new_job)
        import_since_expected = (job.gather_started +
                                 timedelta(hours=FisbrokerPlugin().get_timedelta()))
        _assert_equal(import_since, import_since_expected.strftime("%Y-%m-%dT%H:%M:%S%z"))

        # the query constraints should reflect the import_since date:
        constraint = FisbrokerPlugin().get_constraints(new_job)[0]
        _assert_equal(constraint.literal, PropertyIsGreaterThanOrEqualTo(
            'modified', import_since).literal)
        _assert_equal(constraint.propertyname, PropertyIsGreaterThanOrEqualTo(
            'modified', import_since).propertyname)
Пример #25
0
    def test_last_error_free_does_not_return_reimport_job(self):
        '''Test that reimport jobs are ignored for determining
           the last error-free job.'''

        # do a successful job
        source, job_a = self._create_source_and_job()
        object_ids = gather_stage(FisbrokerPlugin(), job_a)
        for object_id in object_ids:
            harvest_object = HarvestObject.get(object_id)
            fetch_and_import_stages(FisbrokerPlugin(), harvest_object)
        job_a.status = u'Finished'
        job_a.save()

        LOG.debug("successful job done ...")

        # do an unsuccessful job
        # This harvest job should fail, because the mock FIS-broker will look for a different
        # file on the second harvest run, will not find it and return a "no_record_found"
        # error.
        job_b = self._create_job(source.id)
        object_ids = gather_stage(FisbrokerPlugin(), job_b)
        for object_id in object_ids:
            harvest_object = HarvestObject.get(object_id)
            fetch_and_import_stages(FisbrokerPlugin(), harvest_object)
        job_b.status = u'Finished'
        job_b.save()

        LOG.debug("unsuccessful job done ...")

        # reset the mock server's counter
        reset_mock_server(1)

        # do a reimport job
        package_id = "3d-gebaudemodelle-im-level-of-detail-2-lod-2-wms-f2a8a483"
        self._get_test_app().get(
            url="/api/harvest/reimport?id={}".format(package_id),
            headers={'Accept': 'application/json'},
            extra_environ={'REMOTE_USER': self.context['user'].encode('ascii')}
        )

        LOG.debug("reimport job done ...")

        new_job = self._create_job(source.id)
        last_error_free_job = FisbrokerPlugin().last_error_free_job(new_job)
        # job_a should be the last error free job:
        _assert_equal(last_error_free_job.id, job_a.id)
Пример #26
0
    def test_can_only_reimport_with_guid(self):
        '''If we cannot determine a FIS-Broker guid for the package we try to reimport 
           return an HTTP 500 with internal error code 7.'''

        fb_dataset_dict, source, job = self._harvester_setup(
            FISBROKER_HARVESTER_CONFIG, fb_guid=None)
        # datasets created in this way have no extras set, so also no 'guid'
        package_id = fb_dataset_dict['id']
        response = self.app.get(
            "/api/harvest/reimport?id={}".format(package_id),
            headers={'Accept': 'application/json'},
            expect_errors=True,
            extra_environ={
                'REMOTE_USER': self.context['user'].encode('ascii')
            })
        _assert_equal(response.status_int, 500)
        content = json.loads(response.body)
        _assert_equal(content['error']['code'], controller.ERROR_NO_GUID)
Пример #27
0
    def test_date_updated_as_fallback_for_date_released(self):
        '''Test that, if no `date_released` could be extracted, the
           value of `date_updated` is used as a fallback.'''

        revision = '2000-01-01'
        data_dict = {
            'iso_values': {
                'dataset-reference-date': [
                    {
                        'type': 'revision',
                        'value': revision,
                    },
                ]
            }
        }

        reference_dates = extract_reference_dates(data_dict)
        _assert_equal(reference_dates['date_released'], revision)
        _assert_equal(reference_dates['date_updated'], revision)
Пример #28
0
    def test_annotate_arbitrary_url_with_description(self):
        url = 'https://fbinter.stadt-berlin.de/fb_daten/beschreibung/umweltatlas/datenformatbeschreibung/Datenformatbeschreibung_kriterien_zur_bewertung_der_bodenfunktionen2015.pdf'
        description = 'Technische Beschreibung'
        res_format = FORMAT_PDF
        resource = {
            'url': url,
            'name': description,
            'description': description,
            'format': res_format
        }
        annotator = FISBrokerResourceAnnotator()
        converted_resource = annotator.annotate_resource(resource)

        _assert_equal(converted_resource['name'], description)
        _assert_equal(converted_resource['description'], description)
        _assert_equal(converted_resource['format'], FORMAT_PDF)
        _assert_equal(converted_resource['internal_function'], FUNCTION_DOCUMENTATION)
        _assert_equal(converted_resource['url'], url)
        assert not converted_resource['main']
Пример #29
0
    def test_handle_not_found_fisbroker(self):
        '''If FIS-Broker service replies that no record with the given guid exisits, return an
           HTTP 404 with internal error code 9.'''

        fb_dataset_dict, source, job = self._harvester_setup(
            FISBROKER_HARVESTER_CONFIG, fb_guid='invalid_guid')
        package_update(self.context, fb_dataset_dict)
        package_id = fb_dataset_dict['id']
        response = self.app.get(
            "/api/harvest/reimport?id={}".format(package_id),
            headers={'Accept': 'application/json'},
            expect_errors=True,
            extra_environ={
                'REMOTE_USER': self.context['user'].encode('ascii')
            })
        _assert_equal(response.status_int, 404)
        content = json.loads(response.body)
        _assert_equal(content['error']['code'],
                      controller.ERROR_NOT_FOUND_IN_FISBROKER)
Пример #30
0
 def test_can_only_reimport_fisbroker_packages(self):
     '''If we try to reimport an existing package that was generated by a harvester other than ckanext-fisbroker,
        the response should be an HTTP 422, with internal error code 6.'''
     harvester_config = {
         'title': 'Dummy Harvester',
         'name': 'dummy-harvester',
         'source_type': 'dummyharvest',
         'url': "http://test.org/csw"
     }
     dataset_dict, source, job = self._harvester_setup(harvester_config)
     package_id = dataset_dict['id']
     response = self.app.get(
         "/api/harvest/reimport?id={}".format(package_id),
         headers={'Accept': 'application/json'},
         expect_errors=True)
     _assert_equal(response.status_int, 422)
     content = json.loads(response.body)
     _assert_equal(content['error']['code'],
                   controller.ERROR_NOT_HARVESTED_BY_FISBROKER)