示例#1
0
    def load_thesaurus(self, input_file, name, store):
        g = Graph()

        # if the input_file is an UploadedFile object rather than a file path the Graph.parse()
        # method may not have enough info to correctly guess the type; in this case supply the
        # name, which should include the extension, to guess_format manually...
        rdf_format = None
        if isinstance(input_file, UploadedFile):
            self.stderr.write(self.style.WARNING(f"Guessing RDF format from {input_file.name}..."))
            rdf_format = guess_format(input_file.name)

        g.parse(input_file, format=rdf_format)

        # An error will be thrown here there is more than one scheme in the file
        scheme = g.value(None, RDF.type, SKOS.ConceptScheme, any=False)
        if scheme is None:
            raise CommandError("ConceptScheme not found in file")

        default_lang = getattr(settings, 'THESAURUS_DEFAULT_LANG', None)

        available_titles = [t for t in g.objects(scheme, DC.title) if isinstance(t, Literal)]
        thesaurus_title = value_for_language(available_titles, default_lang)
        description = g.value(scheme, DC.description, None, default=thesaurus_title)
        date_issued = g.value(scheme, DCTERMS.issued, None, default="")

        self.stderr.write(self.style.SUCCESS(f'Thesaurus "{thesaurus_title}", desc: {description} issued at {date_issued}'))

        thesaurus = Thesaurus()
        thesaurus.identifier = name
        thesaurus.description = description
        thesaurus.title = thesaurus_title
        thesaurus.about = str(scheme)
        thesaurus.date = date_issued

        if store:
            thesaurus.save()

        for lang in available_titles:
            if lang.language is not None:
                thesaurus_label = ThesaurusLabel()
                thesaurus_label.lang = lang.language
                thesaurus_label.label = lang.value
                thesaurus_label.thesaurus = thesaurus

                if store:
                    thesaurus_label.save()

        for concept in g.subjects(RDF.type, SKOS.Concept):
            pref = g.preferredLabel(concept, default_lang)[0][1]
            about = str(concept)
            alt_label = g.value(concept, SKOS.altLabel, object=None, default=None)
            if alt_label is not None:
                alt_label = str(alt_label)
            else:
                available_labels = [t for t in g.objects(concept, SKOS.prefLabel) if isinstance(t, Literal)]
                alt_label = value_for_language(available_labels, default_lang)

            self.stderr.write(self.style.SUCCESS(f'Concept {str(pref)}: {alt_label} ({about})'))

            tk = ThesaurusKeyword()
            tk.thesaurus = thesaurus
            tk.about = about
            tk.alt_label = alt_label

            if store:
                tk.save()

            for _, pref_label in g.preferredLabel(concept):
                lang = pref_label.language
                label = str(pref_label)
                self.stderr.write(self.style.SUCCESS(f'    Label {lang}: {label}'))

                tkl = ThesaurusKeywordLabel()
                tkl.keyword = tk
                tkl.lang = lang
                tkl.label = label

                if store:
                    tkl.save()
示例#2
0
    def load_thesaurus(self, input_file, name, store):

        RDF_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
        XML_URI = 'http://www.w3.org/XML/1998/namespace'

        ABOUT_ATTRIB = f"{{{RDF_URI}}}about"
        LANG_ATTRIB = f"{{{XML_URI}}}lang"

        ns = {
            'rdf': RDF_URI,
            'foaf': 'http://xmlns.com/foaf/0.1/',
            'dc': 'http://purl.org/dc/elements/1.1/',
            'dcterms': 'http://purl.org/dc/terms/',
            'skos': 'http://www.w3.org/2004/02/skos/core#'
        }

        tfile = dlxml.parse(input_file)
        root = tfile.getroot()

        scheme = root.find('skos:ConceptScheme', ns)
        if not scheme:
            raise CommandError("ConceptScheme not found in file")

        titles = scheme.findall('dc:title', ns)

        default_lang = getattr(settings, 'THESAURUS_DEFAULT_LANG', None)
        available_lang = get_all_lang_available_with_title(titles, LANG_ATTRIB)
        thesaurus_title = determinate_value(available_lang, default_lang)

        descr = scheme.find('dc:description', ns).text if scheme.find(
            'dc:description', ns) else thesaurus_title
        date_issued = scheme.find('dcterms:issued', ns).text
        about = scheme.attrib.get(ABOUT_ATTRIB)

        print(f'Thesaurus "{thesaurus_title}" issued at {date_issued}')

        thesaurus = Thesaurus()
        thesaurus.identifier = name

        thesaurus.title = thesaurus_title
        thesaurus.description = descr
        thesaurus.about = about
        thesaurus.date = date_issued

        if store:
            thesaurus.save()

        for lang in available_lang:
            if lang[0] is not None:
                thesaurus_label = ThesaurusLabel()
                thesaurus_label.lang = lang[0]
                thesaurus_label.label = lang[1]
                thesaurus_label.thesaurus = thesaurus
                thesaurus_label.save()

        for concept in root.findall('skos:Concept', ns):
            about = concept.attrib.get(ABOUT_ATTRIB)
            alt_label = concept.find('skos:altLabel', ns)
            if alt_label is not None:
                alt_label = alt_label.text
            else:
                concepts = concept.findall('skos:prefLabel', ns)
                available_lang = get_all_lang_available_with_title(
                    concepts, LANG_ATTRIB)
                alt_label = determinate_value(available_lang, default_lang)

            print(f'Concept {alt_label} ({about})')

            tk = ThesaurusKeyword()
            tk.thesaurus = thesaurus
            tk.about = about
            tk.alt_label = alt_label

            if store:
                tk.save()

            for pref_label in concept.findall('skos:prefLabel', ns):
                lang = pref_label.attrib.get(LANG_ATTRIB)
                label = pref_label.text

                print(f'    Label {lang}: {label}')

                tkl = ThesaurusKeywordLabel()
                tkl.keyword = tk
                tkl.lang = lang
                tkl.label = label

                if store:
                    tkl.save()