def load_thesaurus(self, input_file, name, store): g = Graph() # if the input_file is an UploadedFile object rather than a file path the Graph.parse() # method may not have enough info to correctly guess the type; in this case supply the # name, which should include the extension, to guess_format manually... rdf_format = None if isinstance(input_file, UploadedFile): self.stderr.write(self.style.WARNING(f"Guessing RDF format from {input_file.name}...")) rdf_format = guess_format(input_file.name) g.parse(input_file, format=rdf_format) # An error will be thrown here there is more than one scheme in the file scheme = g.value(None, RDF.type, SKOS.ConceptScheme, any=False) if scheme is None: raise CommandError("ConceptScheme not found in file") default_lang = getattr(settings, 'THESAURUS_DEFAULT_LANG', None) available_titles = [t for t in g.objects(scheme, DC.title) if isinstance(t, Literal)] thesaurus_title = value_for_language(available_titles, default_lang) description = g.value(scheme, DC.description, None, default=thesaurus_title) date_issued = g.value(scheme, DCTERMS.issued, None, default="") self.stderr.write(self.style.SUCCESS(f'Thesaurus "{thesaurus_title}", desc: {description} issued at {date_issued}')) thesaurus = Thesaurus() thesaurus.identifier = name thesaurus.description = description thesaurus.title = thesaurus_title thesaurus.about = str(scheme) thesaurus.date = date_issued if store: thesaurus.save() for lang in available_titles: if lang.language is not None: thesaurus_label = ThesaurusLabel() thesaurus_label.lang = lang.language thesaurus_label.label = lang.value thesaurus_label.thesaurus = thesaurus if store: thesaurus_label.save() for concept in g.subjects(RDF.type, SKOS.Concept): pref = g.preferredLabel(concept, default_lang)[0][1] about = str(concept) alt_label = g.value(concept, SKOS.altLabel, object=None, default=None) if alt_label is not None: alt_label = str(alt_label) else: available_labels = [t for t in g.objects(concept, SKOS.prefLabel) if isinstance(t, Literal)] alt_label = value_for_language(available_labels, default_lang) self.stderr.write(self.style.SUCCESS(f'Concept {str(pref)}: {alt_label} ({about})')) tk = ThesaurusKeyword() tk.thesaurus = thesaurus tk.about = about tk.alt_label = alt_label if store: tk.save() for _, pref_label in g.preferredLabel(concept): lang = pref_label.language label = str(pref_label) self.stderr.write(self.style.SUCCESS(f' Label {lang}: {label}')) tkl = ThesaurusKeywordLabel() tkl.keyword = tk tkl.lang = lang tkl.label = label if store: tkl.save()
def load_thesaurus(self, input_file, name, store): RDF_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' XML_URI = 'http://www.w3.org/XML/1998/namespace' ABOUT_ATTRIB = f"{{{RDF_URI}}}about" LANG_ATTRIB = f"{{{XML_URI}}}lang" ns = { 'rdf': RDF_URI, 'foaf': 'http://xmlns.com/foaf/0.1/', 'dc': 'http://purl.org/dc/elements/1.1/', 'dcterms': 'http://purl.org/dc/terms/', 'skos': 'http://www.w3.org/2004/02/skos/core#' } tfile = dlxml.parse(input_file) root = tfile.getroot() scheme = root.find('skos:ConceptScheme', ns) if not scheme: raise CommandError("ConceptScheme not found in file") titles = scheme.findall('dc:title', ns) default_lang = getattr(settings, 'THESAURUS_DEFAULT_LANG', None) available_lang = get_all_lang_available_with_title(titles, LANG_ATTRIB) thesaurus_title = determinate_value(available_lang, default_lang) descr = scheme.find('dc:description', ns).text if scheme.find( 'dc:description', ns) else thesaurus_title date_issued = scheme.find('dcterms:issued', ns).text about = scheme.attrib.get(ABOUT_ATTRIB) print(f'Thesaurus "{thesaurus_title}" issued at {date_issued}') thesaurus = Thesaurus() thesaurus.identifier = name thesaurus.title = thesaurus_title thesaurus.description = descr thesaurus.about = about thesaurus.date = date_issued if store: thesaurus.save() for lang in available_lang: if lang[0] is not None: thesaurus_label = ThesaurusLabel() thesaurus_label.lang = lang[0] thesaurus_label.label = lang[1] thesaurus_label.thesaurus = thesaurus thesaurus_label.save() for concept in root.findall('skos:Concept', ns): about = concept.attrib.get(ABOUT_ATTRIB) alt_label = concept.find('skos:altLabel', ns) if alt_label is not None: alt_label = alt_label.text else: concepts = concept.findall('skos:prefLabel', ns) available_lang = get_all_lang_available_with_title( concepts, LANG_ATTRIB) alt_label = determinate_value(available_lang, default_lang) print(f'Concept {alt_label} ({about})') tk = ThesaurusKeyword() tk.thesaurus = thesaurus tk.about = about tk.alt_label = alt_label if store: tk.save() for pref_label in concept.findall('skos:prefLabel', ns): lang = pref_label.attrib.get(LANG_ATTRIB) label = pref_label.text print(f' Label {lang}: {label}') tkl = ThesaurusKeywordLabel() tkl.keyword = tk tkl.lang = lang tkl.label = label if store: tkl.save()