class SubthemeTestCase(unittest.TestCase): MAPPING_FILE = 'eurovoc_mapping.rdf' EUROVOC_FILE = 'eurovoc.rdf' def setUp(self): self._load_mapping() def _load_mapping(self): self.map_f = get_path(self.MAPPING_FILE) self.voc_f = get_path(self.EUROVOC_FILE) def test_subthemes(self): clear_subthemes() g = Graph() g.parse(self.map_f) refs = list(g.objects(None, SKOS.narrowMatch)) self.assertTrue(len(refs)> 0) load_subthemes(self.map_f, self.voc_f) all_subthemes = Subtheme.q() self.assertTrue(all_subthemes.count()> 0) for ref in refs: try: subtheme = Subtheme.q().filter_by(uri=str(ref)).one() self.assertIsNotNone(subtheme) except Exception, err: self.assertIsNone(err, "No results for {}: {}".format(ref, err)) themes = g.subjects(RDF.type, SKOS.Concept) for theme in themes: theme_len = g.objects(theme, SKOS.narrowMatch) theme_name = Subtheme.normalize_theme(theme) q = Subtheme.for_theme(theme_name) self.assertTrue(q.count() >= len(list(theme_len)))
def load_themes(): vocab_file_path = _get_path('data-theme-skos.rdf', 'vocabularies') class Opts(object): def __init__(self, filename, name, format): self.filename = filename self.url = None #filename self.name = name self.format = format themes_loader.options = Opts(vocab_file_path, 'eu_themes', None) themes_loader.load() tag_localized = interfaces.get_localized_tag_name('ECON') Session.flush() assert tag_localized q = Session.query(Vocabulary).filter_by(name='eu_themes') vocab = q.first() assert vocab map_f = _get_path(MAPPING_FILE) voc_f = _get_path(EUROVOC_FILE) clear_subthemes() load_subthemes(map_f, voc_f) assert Subtheme.q().first()
def get_dcatapit_subthemes(lang): """ Dump subthemes tree with localized lables for all themes """ out = {} def _get_name(opt_val, depth): return '{} {}'.format('-' * depth, opt_val) for theme in Subtheme.get_theme_names(): out[theme] = theme_l = [] for opt, label in Subtheme.for_theme(theme, lang): theme_l.append({ 'name': _get_name(label, opt.depth), 'value': opt.uri }) return out
def test_subthemes(self): clear_subthemes() g = Graph() g.parse(self.map_f) refs = list(g.objects(None, SKOS.narrowMatch)) self.assertTrue(len(refs)> 0) load_subthemes(self.map_f, self.voc_f) all_subthemes = Subtheme.q() self.assertTrue(all_subthemes.count()> 0) for ref in refs: try: subtheme = Subtheme.q().filter_by(uri=str(ref)).one() self.assertIsNotNone(subtheme) except Exception, err: self.assertIsNone(err, "No results for {}: {}".format(ref, err))
def test_subthemes(self): clear_subthemes() g = Graph() g.parse(self.map_f) refs = list(g.objects(None, SKOS.narrowMatch)) self.assertTrue(len(refs) > 0) load_subthemes(self.map_f, self.voc_f) all_subthemes = Subtheme.q() self.assertGreater(all_subthemes.count(), 0) for ref in refs: try: subtheme = Subtheme.q().filter_by(uri=str(ref)).one() self.assertIsNotNone(subtheme) except Exception as err: self.fail(f'No results for {ref}: {err}') themes = g.subjects(RDF.type, SKOS.Concept) for theme in themes: theme_len = g.objects(theme, SKOS.narrowMatch) theme_name = Subtheme.normalize_theme(theme) q = Subtheme.for_theme(theme_name) self.assertGreaterEqual(q.count(), len(list(theme_len)))
def dcatapit_string_to_localized_aggregated_themes(value, lang): """ Load json with subthemes and get localized subtheme names. Used in template """ data = dcatapit_string_to_aggregated_themes(value) out = [] for item in data: localized_theme = interfaces.get_localized_tag_name(item['theme'], lang=lang) outitem = {'theme': localized_theme, 'subthemes': []} from_model = Subtheme.for_theme(item['theme'], lang) for st, label in from_model: if st.uri in item['subthemes']: outitem['subthemes'].append(label) out.append(outitem) return out
def load_themes(): filename = get_test_file(SKOS_THEME_FILE) g = load_graph(path=filename) do_load(g, 'eu_themes') tag_localized = interfaces.get_localized_tag_name('ECON') Session.flush() assert tag_localized q = Session.query(Vocabulary).filter_by(name='eu_themes') vocab = q.first() assert vocab map_f = get_voc_file(MAPPING_FILE) voc_f = get_test_file(EUROVOC_FILE) clear_subthemes() load_subthemes(map_f, voc_f) assert Subtheme.q().first()
def _parse_themes(self, dataset, ref): self._remove_from_extra(dataset, 'theme') themes = list(self.g.objects(ref, DCAT.theme)) subthemes = list(self.g.objects(ref, DCT.subject)) out = [] for t in themes: theme_name = str(t).split('/')[-1] try: subthemes_for_theme = Subtheme.for_theme_values(theme_name) except ValueError, err: subthemes_for_theme = [] row = {'theme': theme_name, 'subthemes': []} for subtheme in subthemes: s = str(subtheme) if s in subthemes_for_theme: row['subthemes'].append(s) out.append(row)
def _add_subthemes(self, ref, subthemes): """ subthemes is a list of eurovoc hrefs. """ for subtheme in subthemes: sref = URIRef(subtheme) sthm = Subtheme.get(subtheme) if not sthm: print("No subtheme for {}".format(subtheme)) continue labels = sthm.get_names_dict() self.g.add((sref, RDF.type, SKOS.Concept)) for lang, label in labels.items(): if lang in OFFERED_LANGS: self.g.add((sref, SKOS.prefLabel, Literal(label, lang=lang))) self.g.add((ref, DCT.subject, sref))
def dcatapit_subthemes(key, flattened_data, errors, context): """ Validate aggregate_theme; expected format is [ { 'theme': THEME_CODE, 'subthemes': ['subtheme uri', 'subtheme uri'] }, ... ] If the aggregate theme does not exist, try and parse the extra theme value. """ def _get_flattened_theme(): for tkey in flattened_data: if len(tkey) == 3: x, idx, k = tkey if x == 'extras' and k == 'key' and flattened_data[ tkey] == 'theme': return flattened_data[('extras', idx, 'value')] # Not found in expected fields, Look into the discarded fields __extras = flattened_data.get(('__extras', ), None) if __extras and 'theme' in __extras: return __extras['theme'] return None def _do_return(value): flattened_data[key] = value value = flattened_data.get(key) if not value or value == '[]': # a little shortcut here theme = _get_flattened_theme() if theme and theme != '[]': # other shortcut log.warning( 'Aggregate theme is missing, trying setting values from extra theme key' ) theme_list = themes_parse_to_uris(theme) _do_return(themes_to_aggr_json(theme_list)) else: log.warning('Aggregate theme is missing, setting undefined value') _do_return(themes_to_aggr_json(['OP_DATPRO'])) return # raise Invalid(_('Theme data should not be empty')) try: aggr_list = json.loads(value) except (TypeError, ValueError): # handle old '{THEME1,THEME2}' notation if isinstance(value, str): _v = value.rstrip('}').lstrip('{').split(',') aggr_list = [{'theme': v, 'subthemes': []} for v in _v] elif isinstance(value, ( list, tuple, )): aggr_list = [{'theme': v, 'subthemes': []} for v in value] else: raise Invalid( _('Theme data is not valid, expected json, got {}'.format( type(value)))) if not isinstance(aggr_list, list): raise Invalid( _('Theme data should be a list, got {}'.format(type(aggr_list)))) allowed_keys = {'theme': str, 'subthemes': list} allowed_keys_set = set(allowed_keys.keys()) check_with_db = context.get( 'dcatapit_subthemes_check_in_db') if context else True if not aggr_list: raise Invalid(_('Theme data should not be empty')) for aggr in aggr_list: if not isinstance(aggr, dict): raise Invalid( _('Invalid theme aggr item, should be a dict, got {}'.format( type(aggr)))) keys_set = set(aggr.keys()) if keys_set - allowed_keys_set: raise Invalid( _('Theme aggr contains invalid keys: {}'.format( keys_set - allowed_keys_set))) if not aggr.get('theme'): raise Invalid(_('Theme data should not be empty')) for k, v in aggr.items(): allowed_type = allowed_keys[k] if (k == 'theme' and not isinstance(v, str)) or \ (k == 'subthemes' and not isinstance(v, list)): raise Invalid( _('Theme item {} value: {} should be {}, got {}'.format( k, v, allowed_type, type(v)))) if k == 'subthemes': for subtheme in v: if not isinstance(subtheme, str): raise Invalid( _('Subtheme {} value should be string'.format( subtheme))) if not check_with_db: continue theme_name = aggr['theme'] subthemes = aggr.get('subthemes') or [] try: slist = [s.uri for s in Subtheme.for_theme(theme_name)] except ValueError: raise Invalid(_('Invalid theme {}'.format(theme_name))) for s in subthemes: if s not in slist: raise Invalid(_('Invalid subtheme: {}'.format(s))) reduced_themes = set([s.get('theme') for s in aggr_list if s.get('theme')]) if len(aggr_list) != len(reduced_themes): raise Invalid( _('There are duplicate themes. Expected {} items, got {}'.format( len(aggr_list), len(reduced_themes)))) _do_return(json.dumps(aggr_list))
def dcatapit_subthemes(value, context): """ Expects [{'theme': THEME_CODE, 'subthemes': ['subtheme uri', 'subtheme uri']}, .. ] """ if not value: raise Invalid(_("Theme data should not be empty")) try: data = json.loads(value) except ( TypeError, ValueError, ): # handle old '{THEME1,THEME2}' notation if isinstance(value, ( str, unicode, )): _v = value.rstrip('}').lstrip('{').split(',') data = [{'theme': v, 'subthemes': []} for v in _v] elif isinstance(value, ( list, tuple, )): data = [{'theme': v, 'subthemes': []} for v in value] else: raise Invalid( _("Theme data is not valid, expected json, got {}".format( type(value)))) if not isinstance(data, list): raise Invalid( _("Theme data should be a list, got {}".format(type(data)))) allowed_keys = { 'theme': ( str, unicode, ), 'subthemes': list } allowed_keys_set = set(allowed_keys.keys()) check_with_db = context.get( 'dcatapit_subthemes_check_in_db') if context else True for item in data: if not isinstance(item, dict): raise Invalid( _("Invalid theme item, should be a dict, got {}".format( type(item)))) keys_set = set(item.keys()) if keys_set - allowed_keys_set: raise Invalid( _("Theme item contains invalid keys: {}".format( keys_set - allowed_keys_set))) for k, v in item.items(): allowed_type = allowed_keys[k] if not isinstance(v, allowed_type): raise Invalid( _("Theme item {} value: {} should be {}, got {}".format( k, v, allowed_type, type(v)))) if k == 'subthemes': for subtheme in v: if not isinstance(subtheme, ( str, unicode, )): raise Invalid( _("Subtheme {} value should be string".format( subtheme))) if not check_with_db: continue theme_name = item['theme'] subthemes = item.get('subthemes') or [] try: slist = [s.uri for s in Subtheme.for_theme(theme_name)] except ValueError: raise Invalid(_("Invalid theme {}".format(theme_name))) for s in subthemes: if s not in slist: raise Invalid(_("Invalid subtheme: {}".format(s))) reduced_themes = set([s['theme'] for s in data]) if len(data) != len(reduced_themes): raise Invalid( _("There are duplicate themes. Expected {} items, got {}".format( len(data), len(reduced_themes)))) return json.dumps(data)