def test_tokenizer(self): load_license(self.g) Session.flush() tokens = License.get_as_tokens() self.assertTrue(len(tokens.keys()) > 0) from_token, default = License.find_by_token('cc-by-sa') self.assertFalse(default) self.assertTrue(from_token) self.assertTrue('ccbysa' in from_token.uri.lower()) from_token, default = License.find_by_token('cc-zero') # http://opendefinition.org/licenses/cc-zero/') self.assertFalse(default) self.assertTrue(from_token) self.assertTrue('PublicDomain' in from_token.license_type) from_token, default = License.find_by_token('Creative Commons Attribuzione') # http://opendefinition.org/licenses/cc-zero/') self.assertFalse(default) self.assertTrue(from_token) self.assertTrue('Attribution' in from_token.license_type) odbl = """["Open Data Commons Open Database License / OSM (ODbL/OSM): You are free to copy, distribute, transmit and adapt our data, as long as you credit OpenStreetMap and its contributors\nIf you alter or build upon our data, you may distribute the result only under the same licence. (http://www.openstreetmap.org/copyright)"]""" from_token, default = License.find_by_token(odbl, 'other') self.assertFalse(default) self.assertTrue(from_token) self.assertTrue('odbl' in from_token.default_name.lower())
def map_ckan_license(harvest_object=None, pkg_dict=None): """ license in resources' extra: if it exists, perform simple validation. If not valid, replace with the unknown license type if it does not exist, try to map the dataset's license to a license in the controlled voc fallback to the unknown license type :param harvest_object: :param pkg_dict: :type harvest_object: HarvestObject model :type pkg_dict: dict dictized dataset :return: This will return dataset's dict with modified licenses :rtype: dict with dictized dataset """ if not (harvest_object or pkg_dict) or (harvest_object and pkg_dict): raise ValueError( "You should provide either harvest_object or pkg_dict") if harvest_object: data = json.loads(harvest_object.content) else: data = pkg_dict dataset_license = get_license_from_package(data) for res in data.get('resources') or []: if res.get('license_type'): l, _ = License.find_by_token(res['license_type']) res['license_type'] = l.uri else: res['license_type'] = dataset_license.uri return data
def test_licenses(self): load_license(self.g) Session.flush() all_licenses = License.q() count = all_licenses.count() self.assertTrue(count > 0) self.assertTrue(count == len(list(self.g.subjects(None, SKOS.Concept)))) all_localized = LocalizedLicenseName.q() self.assertTrue(all_localized.count() > 0) for_select = License.for_select('it') # check license type self.assertTrue(all([s[0] for s in for_select]))
def before_index(self, dataset_dict): ''' Insert `dcat_theme` into solr ''' extra_theme = dataset_dict.get("extras_theme" , None) or '' themes = helpers.dump_dcatapit_subthemes(extra_theme) search_terms = [t['theme'] for t in themes] if search_terms: dataset_dict['dcat_theme'] = search_terms search_subthemes = [] for t in themes: search_subthemes.extend(t.get('subthemes') or []) if search_terms: dataset_dict['dcat_theme'] = search_terms if search_subthemes: dataset_dict['dcat_subtheme'] = search_subthemes localized_subthemes = interfaces.get_localized_subthemes(search_subthemes) for lang, subthemes in localized_subthemes.items(): dataset_dict['dcat_subtheme_{}'.format(lang)] = subthemes ddict = json.loads(dataset_dict['data_dict']) resources = ddict.get('resources') or [] _licenses = list(set([r.get('license_type') for r in resources if r.get('license_type')])) licenses = [] for l in _licenses: lic = License.get(l) if lic: for loclic in lic.get_names(): lname = loclic['name'] lang = loclic['lang'] if lname: dataset_dict['resource_license_{}'.format(lang)] = lname else: log.warn('Bad license: license not found: %r ', l) dataset_dict['resource_license'] = _licenses org_id = dataset_dict['owner_org'] organization_show = plugins.toolkit.get_action('organization_show') if org_id: org = organization_show(DEFAULT_ORG_CTX, {'id': org_id}) else: org = {} if org.get('region'): # multilang! region_base = org['region'] tags = interfaces.get_all_localized_tag_labels(region_base) for lang, region in tags.items(): dataset_dict['organization_region_{}'.format(lang)] = region self._update_pkg_rights_holder(dataset_dict, org=org) return dataset_dict
def get_license_from_package(pkg_dict): """ Returns license from package """ for_license = pkg_dict.get('license_title') license, fallback = License.find_by_token(for_license or 'Unknown') if fallback: log.warning("Got fallback license for %s", for_license) return license
def test_ckan_harvester_license(self): dataset = { 'title': 'some title', 'id': 'sometitle', 'resources': [{ 'id': 'resource/1111', 'url': 'http://resource/1111', 'license_type': 'invalid', }, { 'id': 'resource/2222', 'url': 'http://resource/2222', 'license_type': 'https://w3id.org/italia/controlled-vocabulary/licences/A311_GFDL13' }] } data = json.dumps(dataset) harvest_dict = self._create_harvest_obj('http://mock/source/', name='testpkg') harvest_obj = HarvestObject.get(harvest_dict['id']) harvest_obj.content = data h = CKANMappingHarvester() h.import_stage(harvest_obj) Session.flush() pkg_dict = helpers.call_action('package_show', context={}, name_or_id='sometitle') self.assertTrue(len(pkg_dict['resources']) == 2) resources = pkg_dict['resources'] r = dataset['resources'] for res in resources: if res['id'] == r[0]['id']: self.assertEqual(res['license_type'], License.get(License.DEFAULT_LICENSE).uri) else: self.assertEqual(res['license_type'], r[1]['license_type'])
def test_license(self): def get_path(fname): return os.path.join(os.path.dirname(__file__), '..', '..', '..', 'examples', fname) licenses = get_path('licenses.rdf') load_from_graph(path=licenses) Session.flush() dataset = {'title': 'some title', 'id': 'sometitle', 'resources': [ { 'id': 'resource/1111', 'uri': 'http://resource/1111', 'license_type': 'invalid', }, { 'id': 'resource/2222', 'uri': 'http://resource/2222', 'license_type': 'https://w3id.org/italia/controlled-vocabulary/licences/A311_GFDL13' } ] } p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap']) s = RDFSerializer() dataset_ref = s.graph_from_dataset(dataset) g = s.g r1 = URIRef(dataset['resources'][0]['uri']) r2 = URIRef(dataset['resources'][1]['uri']) unknown = License.get(License.DEFAULT_LICENSE) license_ref = g.value(r1, DCT.license) assert license_ref is not None assert str(license_ref) == unknown.uri,\ "got license {}, instead of {}".format(license_ref, unknown.license_type) gpl = License.get(dataset['resources'][1]['license_type']) assert gpl is not None license_ref = g.value(r2, DCT.license) license_type = g.value(license_ref, DCT.type) assert license_ref is not None assert str(license_ref) == gpl.document_uri assert str(license_type) == gpl.license_type serialized = s.serialize_dataset(dataset) p.parse(serialized) datasets = list(p.datasets()) assert len(datasets) == 1 new_dataset = datasets[0] resources = new_dataset['resources'] def _find_res(res_uri): for res in resources: if res_uri == res['uri']: return res raise ValueError("No resource for {}".format(res_uri)) new_res_unknown = _find_res(str(r1)) new_res_gpl = _find_res(str(r2)) assert new_res_unknown['license_type'] == unknown.uri, (new_res_unknown['license_type'], unknown.uri,) assert new_res_gpl['license_type'] == dataset['resources'][1]['license_type']
def before_index(self, dataset_dict): ''' Insert `dcat_theme` into solr ''' extra_theme = dataset_dict.get(f'extras_{FIELD_THEMES_AGGREGATE}', None) or '' aggr_themes = helpers.dcatapit_string_to_aggregated_themes(extra_theme) search_terms = [t['theme'] for t in aggr_themes] if search_terms: dataset_dict['dcat_theme'] = search_terms search_subthemes = [] for t in aggr_themes: search_subthemes.extend(t.get('subthemes') or []) if search_subthemes: dataset_dict['dcat_subtheme'] = search_subthemes localized_subthemes = interfaces.get_localized_subthemes(search_subthemes) for lang, subthemes in localized_subthemes.items(): dataset_dict['dcat_subtheme_{}'.format(lang)] = subthemes ddict = json.loads(dataset_dict['data_dict']) resources = ddict.get('resources') or [] _licenses = list(set([r.get('license_type') for r in resources if r.get('license_type')])) for l in _licenses: lic = License.get(l) if lic: for loclic in lic.get_names(): lname = loclic['name'] lang = loclic['lang'] if lname: dataset_dict['resource_license_{}'.format(lang)] = lname else: log.warning('Bad license: license not found: %r ', l) dataset_dict['resource_license'] = _licenses org_id = dataset_dict['owner_org'] organization_show = plugins.toolkit.get_action('organization_show') if org_id: org = organization_show(get_org_context(), {'id': org_id, 'include_tags': False, 'include_users': False, 'include_groups': False, 'include_extras': True, 'include_followers': False, 'include_datasets': False, }) else: org = {} if org.get('region'): # multilang values # note region can be in {val1,val2} notation for multiple values region_base = org['region'] if not isinstance(region_base, (list, tuple,)): region_base = region_base.strip('{}').split(',') tags = {} for region_name in region_base: ltags = interfaces.get_all_localized_tag_labels(region_name) for tlang, tvalue in ltags.items(): try: tags[tlang].append(tvalue) except KeyError: tags[tlang] = [tvalue] for lang, region in tags.items(): dataset_dict['organization_region_{}'.format(lang)] = region self._update_pkg_rights_holder(dataset_dict, org=org) return dataset_dict