示例#1
0
    def get_concepts(self, uris):
        """
        Get a list of concepts given a list of AAT uris like http://vocab.getty.edu/aat/300380087

        """

        default_lang = settings.LANGUAGE_CODE
        dcterms_identifier_type = DValueType.objects.get(valuetype=str(
            DCTERMS.identifier).replace(str(DCTERMS), ''),
                                                         namespace='dcterms')

        concepts = []
        langs = []
        for lang in self.allowed_languages:
            # the AAT expects language codes to be all lower case
            langs.append('\"%s\"' % (lang.lower()))
        for uri in uris.split(','):
            query = """
                SELECT ?value ?type WHERE {
                  {
                    <%s> skos:prefLabel ?value .
                    BIND('prefLabel' AS ?type)
                  }
                  UNION
                  {
                    <%s> skos:scopeNote [rdf:value ?value] .
                    BIND('scopeNote' AS ?type)
                  }
                  FILTER (lang(?value) in (%s)) 
                }""" % (uri, uri, ','.join(langs))
            results = self.perform_sparql_query(query)

            if len(results["results"]["bindings"]) > 0:
                concept = Concept()
                concept.nodetype = 'Concept'
                for result in results["results"]["bindings"]:
                    concept.addvalue({
                        'type': result["type"]["value"],
                        'value': result["value"]["value"],
                        'language': result["value"]["xml:lang"]
                    })
                concept.addvalue({
                    'value': uri,
                    'language': settings.LANGUAGE_CODE,
                    'type': dcterms_identifier_type.valuetype,
                    'category': dcterms_identifier_type.category
                })
                concepts.append(concept)
            else:
                raise Exception(
                    _("<strong>Error in SPARQL query:</strong><br>Test this query directly by pasting the query below into the Getty's own SPARQL endpoint at <a href='http://vocab.getty.edu/sparql' target='_blank'>http://vocab.getty.edu/sparql</a><i><pre>%s</pre></i>Query returned 0 results, please check the query for errors.  You may need to add the appropriate languages into the database for this query to work<br><br>"
                      ) % (query.replace('<', '&lt').replace('>', '&gt')))

        return concepts
示例#2
0
    def get_concepts(self, uris):  
        """
        Get a list of concepts given a list of AAT uris like http://vocab.getty.edu/aat/300380087

        """  

        concepts = []    
        langs = []   
        for lang in self.allowed_languages:
            langs.append('\"%s\"' % (lang))
        for uri in uris.split(','):
            query = """
                SELECT ?value ?type WHERE {
                  {
                    <%s> skos:prefLabel ?value .
                    BIND('prefLabel' AS ?type)
                  }
                  UNION
                  {
                    <%s> skos:scopeNote [rdf:value ?value] .
                    BIND('scopeNote' AS ?type)
                  }
                  FILTER (lang(?value) in (%s)) 
                }""" % (uri, uri, ','.join(langs))
            results = self.perform_sparql_query(query)

            if len(results["results"]["bindings"]) > 0 :
                concept = Concept()
                concept.nodetype = 'Concept'
                for result in results["results"]["bindings"]:
                    concept.addvalue({
                        'type': result["type"]["value"],
                        'value': result["value"]["value"],
                        'language': result["value"]["xml:lang"]
                    }) 
                concepts.append(concept)
            else:
                raise Exception(_("<strong>Error in SPARQL query:</strong><br>Test this query directly by pasting the query below into the Getty's own SPARQL endpoint at <a href='http://vocab.getty.edu/sparql' target='_blank'>http://vocab.getty.edu/sparql</a><i><pre>%s</pre></i>Query returned 0 results, please check the query for errors.  You may need to add the appropriate languages into the database for this query to work<br><br>") % (query.replace('<', '&lt').replace('>', '&gt')))

        return concepts
示例#3
0
    def save_concepts_from_skos(self, graph):
        """
        given an RDF graph, tries to save the concpets to the system

        """

        baseuuid = uuid.uuid4()
        allowed_languages = models.DLanguage.objects.values_list('pk', flat=True)

        value_types = models.DValueType.objects.all()
        skos_value_types = value_types.filter(namespace = 'skos')
        skos_value_types_list = skos_value_types.values_list('valuetype', flat=True)
        dcterms_value_types = value_types.filter(namespace = 'dcterms')

        relation_types = models.DRelationType.objects.all()
        skos_relation_types = relation_types.filter(namespace = 'skos')


        # if the graph is of the type rdflib.graph.Graph
        if isinstance(graph, Graph):

            # Search for ConceptSchemes first
            for scheme, v, o in graph.triples((None, RDF.type , SKOS.ConceptScheme)):
                scheme_id = self.generate_uuid_from_subject(baseuuid, scheme)
                concept_scheme = Concept({
                    'id': scheme_id,
                    'legacyoid': str(scheme),
                    'nodetype': 'ConceptScheme'
                })

                for predicate, object in graph.predicate_objects(subject = scheme):
                    if str(DCTERMS) in predicate and predicate.replace(DCTERMS, '') in dcterms_value_types.values_list('valuetype', flat=True):
                        if hasattr(object, 'language') and object.language not in allowed_languages:
                            newlang = models.DLanguage()
                            newlang.pk = object.language
                            newlang.languagename = object.language
                            newlang.isdefault = False
                            newlang.save()
                            allowed_languages = models.DLanguage.objects.values_list('pk', flat=True)

                        try:
                            # first try and get any values associated with the concept_scheme
                            value_type = dcterms_value_types.get(valuetype=predicate.replace(DCTERMS, '')) # predicate.replace(SKOS, '') should yield something like 'prefLabel' or 'scopeNote', etc..
                            if predicate == DCTERMS.title:
                                concept_scheme.addvalue({'value':object, 'language': object.language, 'type': 'prefLabel', 'category': value_type.category})
                                print 'Casting dcterms:title to skos:prefLabel'
                            if predicate == DCTERMS.description:
                                concept_scheme.addvalue({'value':object, 'language': object.language, 'type': 'scopeNote', 'category': value_type.category})
                                print 'Casting dcterms:description to skos:scopeNote'
                        except:
                            pass

                    if str(SKOS) in predicate:
                        if predicate == SKOS.hasTopConcept:
                            self.relations.append({'source': scheme_id, 'type': 'hasTopConcept', 'target': self.generate_uuid_from_subject(baseuuid, object)})

                self.nodes.append(concept_scheme)

                if len(self.nodes) == 0:
                    raise Exception('No ConceptScheme found in file.')

                # Search for Concepts
                for s, v, o in graph.triples((None, SKOS.inScheme , scheme)):
                    concept = Concept({
                        'id': self.generate_uuid_from_subject(baseuuid, s),
                        'legacyoid': str(s),
                        'nodetype': 'Concept'
                    })

                    # loop through all the elements within a <skos:Concept> element
                    for predicate, object in graph.predicate_objects(subject = s):
                        if str(SKOS) in predicate:
                            if hasattr(object, 'language') and object.language not in allowed_languages:
                                newlang = models.DLanguage()
                                newlang.pk = object.language
                                newlang.languagename = object.language
                                newlang.isdefault = False
                                newlang.save()
                                allowed_languages = models.DLanguage.objects.values_list('pk', flat=True)

                            relation_or_value_type = predicate.replace(SKOS, '') # this is essentially the skos element type within a <skos:Concept> element (eg: prefLabel, broader, etc...)

                            if relation_or_value_type in skos_value_types_list:
                                value_type = skos_value_types.get(valuetype=relation_or_value_type)
                                concept.addvalue({'value':object, 'language': object.language, 'type': value_type.valuetype, 'category': value_type.category})
                            elif predicate == SKOS.broader:
                                self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, object), 'type': 'narrower', 'target': self.generate_uuid_from_subject(baseuuid, s)})
                            elif predicate == SKOS.narrower:
                                self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, s), 'type': relation_or_value_type, 'target': self.generate_uuid_from_subject(baseuuid, object)})
                            elif predicate == SKOS.related:
                                self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, s), 'type': relation_or_value_type, 'target': self.generate_uuid_from_subject(baseuuid, object)})

                    self.nodes.append(concept)

            # insert and index the concpets
            with transaction.atomic():
                for node in self.nodes:
                    node.save()

                # insert the concept relations
                for relation in self.relations:
                    newrelation = models.Relation()
                    newrelation.relationid = str(uuid.uuid4())
                    newrelation.conceptfrom_id = relation['source']
                    newrelation.conceptto_id = relation['target']
                    newrelation.relationtype_id = relation['type']
                    newrelation.save()

                # need to index after the concepts and relations have been entered into the db
                # so that the proper context gets indexed with the concept
                for node in self.nodes:
                    node.index()

            return self
        else:
            raise Exception('graph argument should be of type rdflib.graph.Graph')
示例#4
0
def load_authority_file(cursor, path_to_authority_files, filename, auth_file_to_entity_concept_mapping):
    print filename.upper()    

    start = time()
    value_types = models.ValueTypes.objects.all()
    filepath = os.path.join(path_to_authority_files, filename)
    unicodecsv.field_size_limit(sys.maxint)
    errors = []
    lookups = Lookups()

    #create nodes for each authority document file and relate them to the authority document node in the concept schema
    auth_doc_file_name = str(filename)
    display_file_name = string.capwords(auth_doc_file_name.replace('_',' ').replace('AUTHORITY DOCUMENT.csv', '').strip())
    if auth_doc_file_name.upper() != 'ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES.E32.CSV':
        top_concept = Concept()
        top_concept.id = str(uuid.uuid4())
        top_concept.nodetype = 'Concept'       
        top_concept.legacyoid = auth_doc_file_name
        top_concept.addvalue({'value':display_file_name, 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel', 'category': 'label'})
        lookups.add_relationship(source='00000000-0000-0000-0000-000000000001', type='hasTopConcept', target=top_concept.id)

    else:
        top_concept = Concept().get(id = '00000000-0000-0000-0000-000000000005')
        top_concept.legacyoid = 'ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES.E32.csv'

    lookups.add_lookup(concept=top_concept, rownum=0)
    
    try:
        with open(filepath, 'rU') as f:
            rows = unicodecsv.DictReader(f, fieldnames=['CONCEPTID','PREFLABEL','ALTLABELS','PARENTCONCEPTID','CONCEPTTYPE','PROVIDER'], 
                encoding='utf-8-sig', delimiter=',', restkey='ADDITIONAL', restval='MISSING')
            rows.next() # skip header row
            for row in rows:              
                try:
                    if 'MISSING' in row:
                        raise Exception('The row wasn\'t parsed properly. Missing %s' % (row['MISSING']))
                    else:
                        legacyoid = row[u'CONCEPTID']
                        concept = Concept()
                        concept.id = legacyoid if is_uuid(legacyoid) == True else str(uuid.uuid4())
                        concept.nodetype = 'Concept'# if row[u'CONCEPTTYPE'].upper() == 'INDEX' else 'Collection'
                        concept.legacyoid = row[u'CONCEPTID']
                        concept.addvalue({'value':row[u'PREFLABEL'], 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel', 'category': 'label'})
                        if row['CONCEPTTYPE'].lower() == 'collector':
                            concept.addvalue({'value':row[u'PREFLABEL'], 'language': settings.LANGUAGE_CODE, 'type': 'collector', 'category': 'label'})
                        if row[u'ALTLABELS'] != '':
                            altlabel_list = row[u'ALTLABELS'].split(';')
                            for altlabel in altlabel_list:
                                concept.addvalue({'value':altlabel, 'language': settings.LANGUAGE_CODE, 'type': 'altLabel', 'category': 'label'})    
                        
                        parent_concept_id = lookups.get_lookup(legacyoid=row[u'PARENTCONCEPTID']).id
                        lookups.add_relationship(source=parent_concept_id, type='narrower', target=concept.id, rownum=rows.line_num)
                        # don't add a member relationship between a top concept and it's children
                        if parent_concept_id != top_concept.id: 
                            lookups.add_relationship(source=parent_concept_id, type='member', target=concept.id, rownum=rows.line_num)
                        
                        # add the member relationship from the E55 type (typically) to their top members
                        if auth_doc_file_name in auth_file_to_entity_concept_mapping and row[u'PARENTCONCEPTID'] == auth_doc_file_name:
                            for entitytype_info in auth_file_to_entity_concept_mapping[auth_doc_file_name]:
                                lookups.add_relationship(source=entitytype_info['ENTITYTYPE_CONCEPTID'], type='member', target=concept.id, rownum=rows.line_num)

                        if row[u'PARENTCONCEPTID'] == '' or (row[u'CONCEPTTYPE'].upper() != 'INDEX' and row[u'CONCEPTTYPE'].upper() != 'COLLECTOR'):
                            raise Exception('The row has invalid values.')

                        lookups.add_lookup(concept=concept, rownum=rows.line_num)    
                        
                except Exception as e:
                    errors.append('ERROR in row %s: %s' % (rows.line_num, str(e)))           
    
    except UnicodeDecodeError as e:
        errors.append('ERROR: Make sure the file is saved with UTF-8 encoding\n%s\n%s' % (str(e), traceback.format_exc()))
    except Exception as e:
        errors.append('ERROR: %s\n%s' % (str(e), traceback.format_exc()))
    
    if len(errors) > 0:
        errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename))
        errors.append('\n\n\n\n')

    try:
        # try and open the values file if it exists
        if exists(filepath.replace('.csv', '.values.csv')):
            with open(filepath.replace('.csv', '.values.csv'), 'rU') as f:
                rows = unicodecsv.DictReader(f, fieldnames=['CONCEPTID','VALUE','VALUETYPE','PROVIDER'], 
                    encoding='utf-8-sig', delimiter=',', restkey='ADDITIONAL', restval='MISSING')
                rows.next() # skip header row
                for row in rows:
                    try:
                        if 'ADDITIONAL' in row:
                            raise Exception('The row wasn\'t parsed properly. Additional fields found %s.  Add quotes to values that have commas in them.' % (row['ADDITIONAL']))
                        else:
                            row_valuetype = row[u'VALUETYPE'].strip()
                            if row_valuetype not in value_types.values_list('valuetype', flat=True): 
                                valuetype = models.ValueTypes()
                                valuetype.valuetype = row_valuetype
                                valuetype.category = 'undefined'
                                valuetype.namespace = 'arches'
                                valuetype.save()
                            
                            value_types = models.ValueTypes.objects.all()
                            concept = lookups.get_lookup(legacyoid=row[u'CONCEPTID'])
                            category = value_types.get(valuetype=row_valuetype).category
                            concept.addvalue({'value':row[u'VALUE'], 'type': row[u'VALUETYPE'], 'category': category})

                    except Exception as e:
                        errors.append('ERROR in row %s (%s): %s' % (rows.line_num, str(e), row))
    
    except UnicodeDecodeError as e:
        errors.append('ERROR: Make sure the file is saved with UTF-8 encoding\n%s\n%s' % (str(e), traceback.format_exc()))
    except Exception as e:
        errors.append('ERROR: %s\n%s' % (str(e), traceback.format_exc()))            
        
    if len(errors) > 0:
        errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename.replace('.csv', '.values.csv')))
        errors.append('\n\n\n\n')


    # insert and index the concpets
    for key in lookups.lookup:
        try:
            lookups.lookup[key]['concept'].save()
        except Exception as e:
            errors.append('ERROR in row %s (%s):\n%s\n' % (lookups.lookup[key]['rownum'], str(e), traceback.format_exc()))
        
        lookups.lookup[key]['concept'].index(scheme=top_concept)            

    # insert the concept relations
    for relation in lookups.concept_relationships:
        sql = """
            INSERT INTO concepts.relations(conceptidfrom, conceptidto, relationtype)
            VALUES ('%s', '%s', '%s');
        """%(relation['source'], relation['target'], relation['type'])
        #print sql
        try:
            cursor.execute(sql)
        except Exception as e:
            errors.append('ERROR in row %s (%s):\n%s\n' % (relation['rownum'], str(e), traceback.format_exc()))
    
    if len(errors) > 0:
        errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename))
        errors.append('\n\n\n\n')

    #print 'Time to parse = %s' % ("{0:.2f}".format(time() - start))    

    return errors
示例#5
0
    def save_concepts_from_skos(self,
                                graph,
                                overwrite_options="overwrite",
                                staging_options="keep",
                                bulk_load=False,
                                path=""):
        """
        given an RDF graph, tries to save the concpets to the system

        Keyword arguments:
        overwrite_options -- 'overwrite', 'ignore'
        staging_options -- 'stage', 'keep'

        """

        baseuuid = uuid.uuid4()
        allowed_languages = models.DLanguage.objects.values_list("pk",
                                                                 flat=True)
        default_lang = settings.LANGUAGE_CODE
        if bulk_load is True:
            self.logger.setLevel(logging.ERROR)

        value_types = models.DValueType.objects.all()
        skos_value_types = value_types.filter(
            Q(namespace="skos") | Q(namespace="arches"))
        skos_value_types_list = list(
            skos_value_types.values_list("valuetype", flat=True))
        skos_value_types = {
            valuetype.valuetype: valuetype
            for valuetype in skos_value_types
        }
        dcterms_value_types = value_types.filter(namespace="dcterms")
        dcterms_identifier_type = dcterms_value_types.get(
            valuetype=str(DCTERMS.identifier).replace(str(DCTERMS), ""))

        # if the graph is of the type rdflib.graph.Graph
        if isinstance(graph, Graph):
            values = []

            # Search for ConceptSchemes first
            for scheme, v, o in graph.triples(
                (None, RDF.type, SKOS.ConceptScheme)):
                identifier = self.unwrapJsonLiteral(str(scheme))
                scheme_id = self.generate_uuid_from_subject(baseuuid, scheme)
                if bulk_load is True:
                    concept_scheme = models.Concept(
                        pk=scheme_id,
                        legacyoid=str(scheme),
                        nodetype_id="ConceptScheme")
                else:
                    concept_scheme = Concept({
                        'id': scheme_id,
                        'legacyoid': str(scheme),
                        'nodetype': 'ConceptScheme'
                    })

                for predicate, object in graph.predicate_objects(
                        subject=scheme):
                    if str(DCTERMS) in predicate and predicate.replace(
                            DCTERMS, "") in dcterms_value_types.values_list(
                                "valuetype", flat=True):
                        if not self.language_exists(object, allowed_languages):
                            allowed_languages = models.DLanguage.objects.values_list(
                                "pk", flat=True)

                        try:
                            # first try and get any values associated with the concept_scheme
                            # predicate.replace(SKOS, '') should yield something like 'prefLabel' or 'scopeNote', etc..
                            value_type = dcterms_value_types.get(
                                valuetype=predicate.replace(DCTERMS, ""))
                            val = self.unwrapJsonLiteral(object)
                            if predicate == DCTERMS.title:
                                if bulk_load is True:
                                    values.append(
                                        models.Value(
                                            pk=val["value_id"] if
                                            (val["value_id"] != ""
                                             and val["value_id"] is not None)
                                            else str(uuid.uuid4()),
                                            concept_id=concept_scheme.pk,
                                            value=val["value"],
                                            language_id=object.language
                                            or default_lang,
                                            valuetype_id="prefLabel",
                                        ))
                                else:
                                    concept_scheme.addvalue({
                                        'id':
                                        val['value_id'],
                                        'value':
                                        val['value'],
                                        'language':
                                        object.language or default_lang,
                                        'type':
                                        'prefLabel',
                                        'category':
                                        value_type.category
                                    })
                                # print('Casting dcterms:title to skos:prefLabel')
                            elif predicate == DCTERMS.description:
                                if bulk_load is True:
                                    values.append(
                                        models.Value(
                                            pk=val["value_id"] if
                                            (val["value_id"] != ""
                                             and val["value_id"] is not None)
                                            else str(uuid.uuid4()),
                                            concept_id=concept_scheme.pk,
                                            value=val["value"],
                                            language_id=object.language
                                            or default_lang,
                                            valuetype_id="scopeNote",
                                        ))
                                else:
                                    concept_scheme.addvalue({
                                        'id':
                                        val['value_id'],
                                        'value':
                                        val['value'],
                                        'language':
                                        object.language or default_lang,
                                        'type':
                                        'scopeNote',
                                        'category':
                                        value_type.category
                                    })
                                # print('Casting dcterms:description to skos:scopeNote')
                            elif predicate == DCTERMS.identifier:
                                identifier = self.unwrapJsonLiteral(
                                    str(object))
                        except:
                            pass

                    if str(SKOS) in predicate:
                        # print predicate
                        if predicate == SKOS.hasTopConcept:
                            top_concept_id = self.generate_uuid_from_subject(
                                baseuuid, object)
                            self.relations.append({
                                "source": scheme_id,
                                "type": "hasTopConcept",
                                "target": top_concept_id,
                            })

                if bulk_load is True:
                    values.append(
                        models.Value(
                            pk=identifier["value_id"] if
                            (identifier["value_id"] != ""
                             and identifier["value_id"] is not None) else str(
                                 uuid.uuid4()),
                            concept_id=concept_scheme.pk,
                            value=identifier["value"],
                            language_id=default_lang,
                            valuetype_id=dcterms_identifier_type.valuetype,
                        ))
                else:
                    concept_scheme.addvalue({
                        'id':
                        identifier['value_id'],
                        'value':
                        identifier['value'],
                        'language':
                        default_lang,
                        'type':
                        dcterms_identifier_type.valuetype,
                        'category':
                        dcterms_identifier_type.category
                    })
                self.nodes.append(concept_scheme)

                # Search for Concepts
                for s, v, o in graph.triples((None, SKOS.inScheme, scheme)):
                    identifier = self.unwrapJsonLiteral(str(s))
                    if bulk_load is True:
                        concept = models.Concept(
                            pk=self.generate_uuid_from_subject(baseuuid, s),
                            legacyoid=str(s),
                            nodetype_id="Concept",
                        )
                    else:
                        concept = Concept({
                            'id':
                            self.generate_uuid_from_subject(baseuuid, s),
                            'legacyoid':
                            str(s),
                            'nodetype':
                            'Concept'
                        })

                    # loop through all the elements within a <skos:Concept> element
                    for predicate, object in graph.predicate_objects(
                            subject=s):
                        if str(SKOS) in predicate or str(ARCHES) in predicate:
                            if not self.language_exists(
                                    object, allowed_languages):
                                allowed_languages = models.DLanguage.objects.values_list(
                                    "pk", flat=True)

                            # this is essentially the skos element type within a <skos:Concept>
                            # element (eg: prefLabel, broader, etc...)
                            relation_or_value_type = predicate.replace(
                                SKOS, "").replace(ARCHES, "")

                            if relation_or_value_type in skos_value_types_list:
                                value_type = skos_value_types[
                                    relation_or_value_type]
                                val = self.unwrapJsonLiteral(object)
                                if bulk_load is True:
                                    values.append(
                                        models.Value(
                                            pk=val["value_id"] if
                                            (val["value_id"] != ""
                                             and val["value_id"] is not None)
                                            else str(uuid.uuid4()),
                                            concept_id=concept.pk,
                                            value=val["value"],
                                            language_id=object.language
                                            or default_lang,
                                            valuetype_id=value_type.valuetype,
                                        ))
                                else:
                                    concept.addvalue({
                                        'id':
                                        val['value_id'],
                                        'value':
                                        val['value'],
                                        'language':
                                        object.language or default_lang,
                                        'type':
                                        value_type.valuetype,
                                        'category':
                                        value_type.category
                                    })
                            elif predicate == SKOS.broader:
                                self.relations.append({
                                    "source":
                                    self.generate_uuid_from_subject(
                                        baseuuid, object),
                                    "type":
                                    "narrower",
                                    "target":
                                    self.generate_uuid_from_subject(
                                        baseuuid, s),
                                })
                            elif predicate == SKOS.narrower:
                                self.relations.append({
                                    "source":
                                    self.generate_uuid_from_subject(
                                        baseuuid, s),
                                    "type":
                                    relation_or_value_type,
                                    "target":
                                    self.generate_uuid_from_subject(
                                        baseuuid, object),
                                })
                            elif predicate == SKOS.related:
                                self.relations.append({
                                    "source":
                                    self.generate_uuid_from_subject(
                                        baseuuid, s),
                                    "type":
                                    relation_or_value_type,
                                    "target":
                                    self.generate_uuid_from_subject(
                                        baseuuid, object),
                                })

                        elif predicate == DCTERMS.identifier:
                            identifier = self.unwrapJsonLiteral(str(object))

                    if bulk_load is True:
                        values.append(
                            models.Value(
                                pk=identifier["value_id"] if
                                (identifier["value_id"] != ""
                                 and identifier["value_id"] is not None) else
                                str(uuid.uuid4()),
                                concept_id=concept.pk,
                                value=identifier["value"],
                                language_id=default_lang,
                                valuetype_id=dcterms_identifier_type.valuetype,
                            ))
                    else:
                        concept.addvalue({
                            'id':
                            identifier['value_id'],
                            'value':
                            identifier['value'],
                            'language':
                            default_lang,
                            'type':
                            dcterms_identifier_type.valuetype,
                            'category':
                            dcterms_identifier_type.category
                        })
                    self.nodes.append(concept)

            # Search for SKOS.Collections
            for s, v, o in graph.triples((None, RDF.type, SKOS.Collection)):
                # print "%s %s %s " % (s,v,o)
                if bulk_load is True:
                    concept = models.Concept(
                        pk=self.generate_uuid_from_subject(baseuuid, s),
                        legacyoid=str(s),
                        nodetype_id="Collection",
                    )
                else:
                    concept = Concept({
                        'id':
                        self.generate_uuid_from_subject(baseuuid, s),
                        'legacyoid':
                        str(s),
                        'nodetype':
                        'Collection'
                    })
                # loop through all the elements within a <skos:Concept> element
                for predicate, object in graph.predicate_objects(subject=s):
                    if str(SKOS) in predicate or str(ARCHES) in predicate:
                        if not self.language_exists(object, allowed_languages):
                            allowed_languages = models.DLanguage.objects.values_list(
                                "pk", flat=True)

                        # this is essentially the skos element type within a <skos:Concept>
                        # element (eg: prefLabel, broader, etc...)
                        relation_or_value_type = predicate.replace(
                            SKOS, "").replace(ARCHES, "")

                        if relation_or_value_type in skos_value_types_list:
                            value_type = skos_value_types[
                                relation_or_value_type]
                            val = self.unwrapJsonLiteral(object)
                            if bulk_load is True:
                                values.append(
                                    models.Value(
                                        pk=val["value_id"],
                                        concept_id=concept.pk,
                                        value=val["value"],
                                        language_id=object.language
                                        or default_lang,
                                        valuetype_id=value_type.valuetype,
                                    ))
                            else:
                                concept.addvalue({
                                    'id':
                                    val['value_id'],
                                    'value':
                                    val['value'],
                                    'language':
                                    object.language or default_lang,
                                    'type':
                                    value_type.valuetype,
                                    'category':
                                    value_type.category
                                })

                self.nodes.append(concept)

            for s, v, o in graph.triples((None, SKOS.member, None)):
                # print "%s %s %s " % (s,v,o)
                self.member_relations.append({
                    "source":
                    self.generate_uuid_from_subject(baseuuid, s),
                    "type":
                    "member",
                    "target":
                    self.generate_uuid_from_subject(baseuuid, o),
                })

            # insert and index the concpets
            scheme_node = None
            orphaned_concepts = {}
            concepts = []
            # bulk_create() does NOT call the object's save() method, nor pre_save/post_save
            # TODO: figure out how to ensure functions get called with bulk_create()
            with transaction.atomic():
                if bulk_load is True:
                    models.Concept.objects.bulk_create(self.nodes,
                                                       ignore_conflicts=True)
                    models.Value.objects.bulk_create(values,
                                                     ignore_conflicts=True)
                    self.logger.info(
                        f"Bulk created: {len(self.nodes)} concepts and {len(values)} values from {path}"
                    )
                    for node in self.nodes:
                        if node.nodetype.nodetype == "ConceptScheme":
                            scheme_node = Concept({
                                "id": node.conceptid,
                                "legacyoid": str(scheme),
                                "nodetype": "ConceptScheme",
                            })
                        elif node.nodetype.nodetype == "Concept":
                            orphaned_concepts[str(node.conceptid)] = node
                        if staging_options == "stage":
                            try:
                                models.Concept.objects.get(pk=node.conceptid)
                            except:
                                # this is a new concept, so add a reference to it in the Candiates schema
                                if node.nodetype.nodetype != "ConceptScheme":
                                    self.relations.append({
                                        "source":
                                        "00000000-0000-0000-0000-000000000006",
                                        "type":
                                        "narrower",
                                        "target":
                                        node.conceptid,
                                    })

                        if overwrite_options == "overwrite":
                            node.save()
                            # concepts.append(node)
                        elif overwrite_options == "ignore":
                            try:
                                # don't do anything if the concept already exists
                                models.Concept.objects.get(pk=node.conceptid)
                            except:
                                # else save it
                                node.save()
                                # concepts.append(node)
                else:
                    for node in self.nodes:
                        if node.nodetype == 'ConceptScheme':
                            scheme_node = node
                        elif node.nodetype == 'Concept':
                            orphaned_concepts[str(node.id)] = node
                        if staging_options == 'stage':
                            try:
                                models.Concept.objects.get(pk=node.id)
                            except:
                                # this is a new concept, so add a reference to it in the Candiates schema
                                if node.nodetype != 'ConceptScheme':
                                    self.relations.append({
                                        'source':
                                        '00000000-0000-0000-0000-000000000006',
                                        'type': 'narrower',
                                        'target': node.id
                                    })

                        if overwrite_options == 'overwrite':
                            node.save()
                        elif overwrite_options == 'ignore':
                            try:
                                # don't do anything if the concept already exists
                                models.Concept.objects.get(pk=node.id)
                            except:
                                # else save it
                                node.save()

                # Concept().bulk_save(concepts, None)

                # insert the concept relations
                # TODO: make sure this still works with code commented out, then remove
                # relation_objs = []
                for relation in self.relations:
                    newrelation, created = models.Relation.objects.get_or_create(
                        conceptfrom_id=relation["source"],
                        conceptto_id=relation["target"],
                        relationtype_id=relation["type"],
                    )
                    # models.Relation.objects.bulk_create(relation_objs)
                    # check for orphaned concepts, every concept except the concept scheme should have an edge pointing to it
                    if (relation["type"] == "narrower" or relation["type"]
                            == "hasTopConcept") and orphaned_concepts.get(
                                relation["target"]) is not None:
                        orphaned_concepts.pop(str(relation["target"]))
                    # relation_objs.append(newrelation)

                if len(orphaned_concepts.keys()) > 0:
                    if scheme_node:
                        orphaned_scheme = Concept({
                            "id": uuid.uuid4(),
                            "legacyoid": uuid.uuid4(),
                            "nodetype": "ConceptScheme",
                        })
                        orphaned_scheme_value = None
                        for value in scheme_node.values:
                            if value.type == "prefLabel":
                                orphaned_scheme.addvalue({
                                    "id":
                                    uuid.uuid4(),
                                    "value":
                                    "ORPHANS - " + value.value,
                                    "language":
                                    value.language,
                                    "type":
                                    value.type,
                                    "category":
                                    value.category,
                                })
                        orphaned_scheme.save()
                        for (
                                orphaned_concept_id,
                                orphaned_concept,
                        ) in orphaned_concepts.items():
                            models.Relation.objects.create(
                                conceptfrom_id=str(orphaned_scheme.id),
                                conceptto_id=orphaned_concept_id,
                                relationtype_id="narrower",
                            )
                        self.logger.warning(
                            "The SKOS file appears to have orphaned concepts.")

                # need to index after the concepts and relations have been entered into the db
                # so that the proper context gets indexed with the concept
                if scheme_node:
                    scheme_node.bulk_index()

            # insert the concept collection relations
            # we do this outide a transaction so that we can load incomplete collections
            # relation_objs = []
            # TODO: debug bulk_create to speed up this section of skos
            for relation in self.member_relations:
                try:
                    # if bulk_load is True:
                    # newrelation = models.Relation(
                    #     conceptfrom_id=relation['source'],
                    #     conceptto_id=relation['target'],
                    #     relationtype_id=relation['type']
                    # )
                    # relation_objs.append(newrelation)
                    # else:
                    newrelation, created = models.Relation.objects.get_or_create(
                        conceptfrom_id=relation["source"],
                        conceptto_id=relation["target"],
                        relationtype_id=relation["type"],
                    )
                except IntegrityError as e:
                    self.logger.warning(e)
                    pass

            # if bulk_load is True:
            #   models.Relation.objects.bulk_create(relation_objs, ignore_conflicts=True)

            return scheme_node
        else:
            raise Exception(
                "graph argument should be of type rdflib.graph.Graph")
示例#6
0
文件: csvfile.py 项目: fargeo/arches
                def create_reference_data(new_concepts, create_collections):
                    errors = []
                    candidates = Concept().get(id='00000000-0000-0000-0000-000000000006')
                    for arches_nodeid, concepts in new_concepts.iteritems():
                        collectionid = str(uuid.uuid4())
                        topconceptid = str(uuid.uuid4())
                        node = Node.objects.get(nodeid=arches_nodeid)

                        # if node.datatype is concept or concept-list create concepts and collections
                        if node.datatype in ['concept', 'concept-list']:
                            # create collection if create_collections = create, otherwise append to collection already assigned to node
                            if create_collections == True:
                                collection_legacyoid = node.name + '_' + str(node.graph_id) + '_import'
                                # check to see that there is not already a collection for this node
                                if node.config['rdmCollection'] != None:
                                    errors.append({'type': 'WARNING', 'message': 'A collection already exists for the {0} node. Use the add option to add concepts to this collection.'.format(node.name)})
                                    if len(errors) > 0:
                                        self.errors += errors
                                    collection = None
                                else:
                                    # if there is no collection assigned to this node, create one and assign it to the node
                                    try:
                                        # check to see that a collection with this legacyid does not already exist
                                        collection = Concept().get(legacyoid=collection_legacyoid)
                                        errors.append({'type': 'WARNING', 'message': 'A collection with the legacyid {0} already exists.'.format(node.name + '_' + str(node.graph_id) + '_import')})
                                        if len(errors) > 0:
                                            self.errors += errors
                                    except:
                                        collection = Concept({
                                            'id': collectionid,
                                            'legacyoid': collection_legacyoid,
                                            'nodetype': 'Collection'
                                        })
                                        collection.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                        node.config['rdmCollection'] = collectionid
                                        node.save()
                                        collection.save()
                            else:
                                # if create collection = add check that there is a collection associated with node, if no collection associated with node create a collection and associated with the node
                                try:
                                    collection = Concept().get(id=node.config['rdmCollection'])
                                except:
                                    collection = Concept({
                                        'id': collectionid,
                                        'legacyoid': node.name + '_' + str(node.graph_id) + '_import',
                                        'nodetype': 'Collection'
                                    })
                                    collection.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                    node.config['rdmCollection'] = collectionid
                                    node.save()
                                    collection.save()

                            if collection != None:
                                topconcept_legacyoid = node.name + '_' + str(node.graph_id)
                                # Check if top concept already exists, if not create it and add to candidates scheme
                                try:
                                    topconcept = Concept().get(legacyoid=topconcept_legacyoid)
                                except:
                                    topconcept = Concept({
                                        'id': topconceptid,
                                        'legacyoid': topconcept_legacyoid,
                                        'nodetype': 'Concept'
                                    })
                                    topconcept.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                    topconcept.save()
                                candidates.add_relation(topconcept, 'narrower')

                                # create child concepts and relate to top concept and collection accordingly
                                for conceptid, value in concepts.iteritems():
                                    concept_legacyoid = value + '_' + node.name + '_' + str(node.graph_id)
                                    # check if concept already exists, if not create and add to topconcept and collection
                                    try:
                                        conceptid = [concept for concept in topconcept.get_child_concepts(topconcept.id) if concept[1] == value][0][0]
                                        concept = Concept().get(id=conceptid)
                                    except:
                                        concept = Concept({
                                            'id': conceptid,
                                            'legacyoid': concept_legacyoid,
                                            'nodetype': 'Concept'
                                        })
                                        concept.addvalue({'id': str(uuid.uuid4()), 'value': value, 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                        concept.save()
                                    collection.add_relation(concept, 'member')
                                    topconcept.add_relation(concept, 'narrower')

                        #if node.datatype is domain or domain-list create options array in node.config
                        elif node.datatype in ['domain-value', 'domain-value-list']:
                            for domainid, value in new_concepts[arches_nodeid].iteritems():
                                # check if value already exists in domain
                                if value not in [t['text'] for t in node.config['options']]:
                                    domainvalue = {
                                        "text": value,
                                        "selected": False,
                                        "id": domainid
                                    }
                                    node.config['options'].append(domainvalue)
                                    node.save()
示例#7
0
    def save_concepts_from_skos(self,
                                graph,
                                overwrite_options='overwrite',
                                staging_options='keep'):
        """
        given an RDF graph, tries to save the concpets to the system

        Keyword arguments:
        overwrite_options -- 'overwrite', 'ignore'
        staging_options -- 'stage', 'keep'

        """

        baseuuid = uuid.uuid4()
        allowed_languages = models.DLanguage.objects.values_list('pk',
                                                                 flat=True)
        default_lang = settings.LANGUAGE_CODE

        value_types = models.DValueType.objects.all()
        skos_value_types = value_types.filter(
            Q(namespace='skos') | Q(namespace='arches'))
        skos_value_types_list = list(
            skos_value_types.values_list('valuetype', flat=True))
        skos_value_types = {
            valuetype.valuetype: valuetype
            for valuetype in skos_value_types
        }
        dcterms_value_types = value_types.filter(namespace='dcterms')
        dcterms_identifier_type = dcterms_value_types.get(
            valuetype=str(DCTERMS.identifier).replace(str(DCTERMS), ''))

        # if the graph is of the type rdflib.graph.Graph
        if isinstance(graph, Graph):

            # Search for ConceptSchemes first
            for scheme, v, o in graph.triples(
                (None, RDF.type, SKOS.ConceptScheme)):
                identifier = self.unwrapJsonLiteral(str(scheme))
                scheme_id = self.generate_uuid_from_subject(baseuuid, scheme)
                concept_scheme = Concept({
                    'id': scheme_id,
                    'legacyoid': str(scheme),
                    'nodetype': 'ConceptScheme'
                })

                for predicate, object in graph.predicate_objects(
                        subject=scheme):
                    if str(DCTERMS) in predicate and predicate.replace(
                            DCTERMS, '') in dcterms_value_types.values_list(
                                'valuetype', flat=True):
                        if not self.language_exists(object, allowed_languages):
                            allowed_languages = models.DLanguage.objects.values_list(
                                'pk', flat=True)

                        try:
                            # first try and get any values associated with the concept_scheme
                            # predicate.replace(SKOS, '') should yield something like 'prefLabel' or 'scopeNote', etc..
                            value_type = dcterms_value_types.get(
                                valuetype=predicate.replace(DCTERMS, ''))
                            val = self.unwrapJsonLiteral(object)
                            if predicate == DCTERMS.title:
                                concept_scheme.addvalue({
                                    'id':
                                    val['value_id'],
                                    'value':
                                    val['value'],
                                    'language':
                                    object.language or default_lang,
                                    'type':
                                    'prefLabel',
                                    'category':
                                    value_type.category
                                })
                                print 'Casting dcterms:title to skos:prefLabel'
                            elif predicate == DCTERMS.description:
                                concept_scheme.addvalue({
                                    'id':
                                    val['value_id'],
                                    'value':
                                    val['value'],
                                    'language':
                                    object.language or default_lang,
                                    'type':
                                    'scopeNote',
                                    'category':
                                    value_type.category
                                })
                                print 'Casting dcterms:description to skos:scopeNote'
                            elif predicate == DCTERMS.identifier:
                                identifier = self.unwrapJsonLiteral(
                                    str(object))
                        except:
                            pass

                    if str(SKOS) in predicate:
                        # print predicate
                        if predicate == SKOS.hasTopConcept:
                            top_concept_id = self.generate_uuid_from_subject(
                                baseuuid, object)
                            self.relations.append({
                                'source': scheme_id,
                                'type': 'hasTopConcept',
                                'target': top_concept_id
                            })

                concept_scheme.addvalue({
                    'id':
                    identifier['value_id'],
                    'value':
                    identifier['value'],
                    'language':
                    default_lang,
                    'type':
                    dcterms_identifier_type.valuetype,
                    'category':
                    dcterms_identifier_type.category
                })
                self.nodes.append(concept_scheme)

                # Search for Concepts
                for s, v, o in graph.triples((None, SKOS.inScheme, scheme)):
                    identifier = self.unwrapJsonLiteral(str(s))
                    concept = Concept({
                        'id':
                        self.generate_uuid_from_subject(baseuuid, s),
                        'legacyoid':
                        str(s),
                        'nodetype':
                        'Concept'
                    })

                    # loop through all the elements within a <skos:Concept> element
                    for predicate, object in graph.predicate_objects(
                            subject=s):
                        if str(SKOS) in predicate or str(ARCHES) in predicate:
                            if not self.language_exists(
                                    object, allowed_languages):
                                allowed_languages = models.DLanguage.objects.values_list(
                                    'pk', flat=True)

                            # this is essentially the skos element type within a <skos:Concept>
                            # element (eg: prefLabel, broader, etc...)
                            relation_or_value_type = predicate.replace(
                                SKOS, '').replace(ARCHES, '')

                            if relation_or_value_type in skos_value_types_list:
                                value_type = skos_value_types[
                                    relation_or_value_type]
                                val = self.unwrapJsonLiteral(object)
                                concept.addvalue({
                                    'id':
                                    val['value_id'],
                                    'value':
                                    val['value'],
                                    'language':
                                    object.language or default_lang,
                                    'type':
                                    value_type.valuetype,
                                    'category':
                                    value_type.category
                                })
                            elif predicate == SKOS.broader:
                                self.relations.append({
                                    'source':
                                    self.generate_uuid_from_subject(
                                        baseuuid, object),
                                    'type':
                                    'narrower',
                                    'target':
                                    self.generate_uuid_from_subject(
                                        baseuuid, s)
                                })
                            elif predicate == SKOS.narrower:
                                self.relations.append({
                                    'source':
                                    self.generate_uuid_from_subject(
                                        baseuuid, s),
                                    'type':
                                    relation_or_value_type,
                                    'target':
                                    self.generate_uuid_from_subject(
                                        baseuuid, object)
                                })
                            elif predicate == SKOS.related:
                                self.relations.append({
                                    'source':
                                    self.generate_uuid_from_subject(
                                        baseuuid, s),
                                    'type':
                                    relation_or_value_type,
                                    'target':
                                    self.generate_uuid_from_subject(
                                        baseuuid, object)
                                })

                        elif predicate == DCTERMS.identifier:
                            identifier = self.unwrapJsonLiteral(str(object))

                    concept.addvalue({
                        'id':
                        identifier['value_id'],
                        'value':
                        identifier['value'],
                        'language':
                        default_lang,
                        'type':
                        dcterms_identifier_type.valuetype,
                        'category':
                        dcterms_identifier_type.category
                    })
                    self.nodes.append(concept)

            # Search for SKOS.Collections
            for s, v, o in graph.triples((None, RDF.type, SKOS.Collection)):
                # print "%s %s %s " % (s,v,o)
                concept = Concept({
                    'id':
                    self.generate_uuid_from_subject(baseuuid, s),
                    'legacyoid':
                    str(s),
                    'nodetype':
                    'Collection'
                })
                # loop through all the elements within a <skos:Concept> element
                for predicate, object in graph.predicate_objects(subject=s):
                    if str(SKOS) in predicate or str(ARCHES) in predicate:
                        if not self.language_exists(object, allowed_languages):
                            allowed_languages = models.DLanguage.objects.values_list(
                                'pk', flat=True)

                        # this is essentially the skos element type within a <skos:Concept>
                        # element (eg: prefLabel, broader, etc...)
                        relation_or_value_type = predicate.replace(
                            SKOS, '').replace(ARCHES, '')

                        if relation_or_value_type in skos_value_types_list:
                            value_type = skos_value_types[
                                relation_or_value_type]
                            val = self.unwrapJsonLiteral(object)
                            concept.addvalue({
                                'id': val['value_id'],
                                'value': val['value'],
                                'language': object.language or default_lang,
                                'type': value_type.valuetype,
                                'category': value_type.category
                            })

                self.nodes.append(concept)

            for s, v, o in graph.triples((None, SKOS.member, None)):
                # print "%s %s %s " % (s,v,o)
                self.member_relations.append({
                    'source':
                    self.generate_uuid_from_subject(baseuuid, s),
                    'type':
                    'member',
                    'target':
                    self.generate_uuid_from_subject(baseuuid, o)
                })

            # insert and index the concpets
            scheme_node = None
            with transaction.atomic():
                for node in self.nodes:
                    if node.nodetype == 'ConceptScheme':
                        scheme_node = node
                    if staging_options == 'stage':
                        try:
                            models.Concept.objects.get(pk=node.id)
                        except:
                            # this is a new concept, so add a reference to it in the Candiates schema
                            if node.nodetype != 'ConceptScheme':
                                self.relations.append({
                                    'source':
                                    '00000000-0000-0000-0000-000000000006',
                                    'type': 'narrower',
                                    'target': node.id
                                })

                    if overwrite_options == 'overwrite':
                        node.save()
                    elif overwrite_options == 'ignore':
                        try:
                            # don't do anything if the concept already exists
                            models.Concept.objects.get(pk=node.id)
                        except:
                            # else save it
                            node.save()

                # insert the concept relations
                for relation in self.relations:
                    newrelation = models.Relation.objects.get_or_create(
                        conceptfrom_id=relation['source'],
                        conceptto_id=relation['target'],
                        relationtype_id=relation['type'])

                # need to index after the concepts and relations have been entered into the db
                # so that the proper context gets indexed with the concept
                if scheme_node:
                    scheme_node.bulk_index()

            # insert the concept collection relations
            # we do this outide a transaction so that we can load incomplete collections
            for relation in self.member_relations:
                try:
                    newrelation = models.Relation.objects.get_or_create(
                        conceptfrom_id=relation['source'],
                        conceptto_id=relation['target'],
                        relationtype_id=relation['type'])
                except IntegrityError as e:
                    self.logger.warning(e.message)

            return scheme_node
        else:
            raise Exception(
                'graph argument should be of type rdflib.graph.Graph')
示例#8
0
    def save_concepts_from_skos(self, graph):
        """
        given an RDF graph, tries to save the concpets to the system

        """

        baseuuid = uuid.uuid4()
        allowed_languages = models.DLanguages.objects.values_list('pk',
                                                                  flat=True)

        value_types = models.ValueTypes.objects.all()
        skos_value_types = value_types.filter(namespace='skos')
        skos_value_types_list = skos_value_types.values_list('valuetype',
                                                             flat=True)
        dcterms_value_types = value_types.filter(namespace='dcterms')

        relation_types = models.DRelationtypes.objects.all()
        skos_relation_types = relation_types.filter(namespace='skos')

        # if the graph is of the type rdflib.graph.Graph
        if isinstance(graph, Graph):

            # Search for ConceptSchemes first
            for scheme, v, o in graph.triples(
                (None, RDF.type, SKOS.ConceptScheme)):
                scheme_id = self.generate_uuid_from_subject(baseuuid, scheme)
                concept_scheme = Concept({
                    'id': scheme_id,
                    'legacyoid': str(scheme),
                    'nodetype': 'ConceptScheme'
                })

                for predicate, object in graph.predicate_objects(
                        subject=scheme):
                    if str(DCTERMS) in predicate and predicate.replace(
                            DCTERMS, '') in dcterms_value_types.values_list(
                                'valuetype', flat=True):
                        if hasattr(
                                object, 'language'
                        ) and object.language not in allowed_languages:
                            newlang = models.DLanguages()
                            newlang.pk = object.language
                            newlang.languagename = object.language
                            newlang.isdefault = False
                            newlang.save()
                            allowed_languages = models.DLanguages.objects.values_list(
                                'pk', flat=True)

                        try:
                            # first try and get any values associated with the concept_scheme
                            value_type = dcterms_value_types.get(
                                valuetype=predicate.replace(DCTERMS, '')
                            )  # predicate.replace(SKOS, '') should yield something like 'prefLabel' or 'scopeNote', etc..
                            if predicate == DCTERMS.title:
                                concept_scheme.addvalue({
                                    'value':
                                    object,
                                    'language':
                                    object.language,
                                    'type':
                                    'prefLabel',
                                    'category':
                                    value_type.category
                                })
                                print 'Casting dcterms:title to skos:prefLabel'
                            if predicate == DCTERMS.description:
                                concept_scheme.addvalue({
                                    'value':
                                    object,
                                    'language':
                                    object.language,
                                    'type':
                                    'scopeNote',
                                    'category':
                                    value_type.category
                                })
                                print 'Casting dcterms:description to skos:scopeNote'
                        except:
                            pass

                    if str(SKOS) in predicate:
                        if predicate == SKOS.hasTopConcept:
                            self.relations.append({
                                'source':
                                scheme_id,
                                'type':
                                'hasTopConcept',
                                'target':
                                self.generate_uuid_from_subject(
                                    baseuuid, object)
                            })

                self.nodes.append(concept_scheme)

                if len(self.nodes) == 0:
                    raise Exception('No ConceptScheme found in file.')

                # Search for Concepts
                for s, v, o in graph.triples((None, SKOS.inScheme, scheme)):
                    concept = Concept({
                        'id':
                        self.generate_uuid_from_subject(baseuuid, s),
                        'legacyoid':
                        str(s),
                        'nodetype':
                        'Concept'
                    })

                    # loop through all the elements within a <skos:Concept> element
                    for predicate, object in graph.predicate_objects(
                            subject=s):
                        if str(SKOS) in predicate:
                            if hasattr(
                                    object, 'language'
                            ) and object.language not in allowed_languages:
                                newlang = models.DLanguages()
                                newlang.pk = object.language
                                newlang.languagename = object.language
                                newlang.isdefault = False
                                newlang.save()
                                allowed_languages = models.DLanguages.objects.values_list(
                                    'pk', flat=True)

                            relation_or_value_type = predicate.replace(
                                SKOS, ''
                            )  # this is essentially the skos element type within a <skos:Concept> element (eg: prefLabel, broader, etc...)

                            if relation_or_value_type in skos_value_types_list:
                                value_type = skos_value_types.get(
                                    valuetype=relation_or_value_type)
                                concept.addvalue({
                                    'value':
                                    object,
                                    'language':
                                    object.language,
                                    'type':
                                    value_type.valuetype,
                                    'category':
                                    value_type.category
                                })
                            elif predicate == SKOS.broader:
                                self.relations.append({
                                    'source':
                                    self.generate_uuid_from_subject(
                                        baseuuid, object),
                                    'type':
                                    'narrower',
                                    'target':
                                    self.generate_uuid_from_subject(
                                        baseuuid, s)
                                })
                            elif predicate == SKOS.narrower:
                                self.relations.append({
                                    'source':
                                    self.generate_uuid_from_subject(
                                        baseuuid, s),
                                    'type':
                                    relation_or_value_type,
                                    'target':
                                    self.generate_uuid_from_subject(
                                        baseuuid, object)
                                })
                            elif predicate == SKOS.related:
                                self.relations.append({
                                    'source':
                                    self.generate_uuid_from_subject(
                                        baseuuid, s),
                                    'type':
                                    relation_or_value_type,
                                    'target':
                                    self.generate_uuid_from_subject(
                                        baseuuid, object)
                                })

                    self.nodes.append(concept)

            # insert and index the concpets
            with transaction.atomic():
                for node in self.nodes:
                    node.save()

                # insert the concept relations
                for relation in self.relations:
                    newrelation = models.ConceptRelations()
                    newrelation.relationid = str(uuid.uuid4())
                    newrelation.conceptidfrom_id = relation['source']
                    newrelation.conceptidto_id = relation['target']
                    newrelation.relationtype_id = relation['type']
                    newrelation.save()

                # need to index after the concepts and relations have been entered into the db
                # so that the proper context gets indexed with the concept
                for node in self.nodes:
                    node.index()

            return self
        else:
            raise Exception(
                'graph argument should be of type rdflib.graph.Graph')
示例#9
0
文件: skos.py 项目: azerbini/eamena
    def save_concepts_from_skos(self, graph, overwrite_options='overwrite', staging_options='keep'):
        """
        given an RDF graph, tries to save the concpets to the system

        Keyword arguments: 
        overwrite_options -- 'overwrite', 'ignore'
        staging_options -- 'stage', 'keep'

        """

        baseuuid = uuid.uuid4()
        allowed_languages = models.DLanguage.objects.values_list('pk', flat=True)
        default_lang = settings.LANGUAGE_CODE

        value_types = models.DValueType.objects.all()
        skos_value_types = value_types.filter(Q(namespace = 'skos') | Q(namespace = 'arches'))
        skos_value_types_list = list(skos_value_types.values_list('valuetype', flat=True))
        skos_value_types = {valuetype.valuetype: valuetype for valuetype in skos_value_types}
        dcterms_value_types = value_types.filter(namespace = 'dcterms')



        # relation_types = models.DRelationType.objects.all()
        # skos_relation_types = relation_types.filter(namespace = 'skos')


        # if the graph is of the type rdflib.graph.Graph
        if isinstance(graph, Graph):

            # Search for ConceptSchemes first
            for scheme, v, o in graph.triples((None, RDF.type , SKOS.ConceptScheme)):
                scheme_id = self.generate_uuid_from_subject(baseuuid, scheme)
                concept_scheme = Concept({
                    'id': scheme_id,
                    'legacyoid': str(scheme),
                    'nodetype': 'ConceptScheme'
                })

                for predicate, object in graph.predicate_objects(subject = scheme):
                    if str(DCTERMS) in predicate and predicate.replace(DCTERMS, '') in dcterms_value_types.values_list('valuetype', flat=True):
                        if not self.language_exists(object, allowed_languages):
                            allowed_languages = models.DLanguage.objects.values_list('pk', flat=True)

                        try:
                            # first try and get any values associated with the concept_scheme
                            value_type = dcterms_value_types.get(valuetype=predicate.replace(DCTERMS, '')) # predicate.replace(SKOS, '') should yield something like 'prefLabel' or 'scopeNote', etc..
                            val = self.unwrapJsonLiteral(object)
                            if predicate == DCTERMS.title:
                                concept_scheme.addvalue({'id': val['value_id'], 'value':val['value'], 'language': object.language or default_lang, 'type': 'prefLabel', 'category': value_type.category})
                                print 'Casting dcterms:title to skos:prefLabel'
                            elif predicate == DCTERMS.description:
                                concept_scheme.addvalue({'id': val['value_id'], 'value':val['value'], 'language': object.language or default_lang, 'type': 'scopeNote', 'category': value_type.category})
                                print 'Casting dcterms:description to skos:scopeNote'
                        except:
                            pass

                    if str(SKOS) in predicate:
                        #print predicate
                        if predicate == SKOS.hasTopConcept:
                            top_concept_id = self.generate_uuid_from_subject(baseuuid, object)
                            self.relations.append({'source': scheme_id, 'type': 'hasTopConcept', 'target': top_concept_id})


                self.nodes.append(concept_scheme)

                if len(self.nodes) == 0:
                    raise Exception('No ConceptScheme found in file.')

                # Search for Concepts
                for s, v, o in graph.triples((None, SKOS.inScheme , scheme)):
                    concept = Concept({
                        'id': self.generate_uuid_from_subject(baseuuid, s),
                        'legacyoid': str(s),
                        'nodetype': 'Concept'
                    })


                    # loop through all the elements within a <skos:Concept> element
                    for predicate, object in graph.predicate_objects(subject = s):
                        if str(SKOS) in predicate or str(ARCHES) in predicate:
                            if not self.language_exists(object, allowed_languages):
                                allowed_languages = models.DLanguage.objects.values_list('pk', flat=True)

                            relation_or_value_type = predicate.replace(SKOS, '').replace(ARCHES, '')  # this is essentially the skos element type within a <skos:Concept> element (eg: prefLabel, broader, etc...)

                            if relation_or_value_type in skos_value_types_list:
                                value_type = skos_value_types[relation_or_value_type]
                                val = self.unwrapJsonLiteral(object)
                                concept.addvalue({'id': val['value_id'], 'value':val['value'], 'language': object.language or default_lang, 'type': value_type.valuetype, 'category': value_type.category})
                            elif predicate == SKOS.broader:
                                self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, object), 'type': 'narrower', 'target': self.generate_uuid_from_subject(baseuuid, s)})
                            elif predicate == SKOS.narrower:
                                self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, s), 'type': relation_or_value_type, 'target': self.generate_uuid_from_subject(baseuuid, object)})
                            elif predicate == SKOS.related:
                                self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, s), 'type': relation_or_value_type, 'target': self.generate_uuid_from_subject(baseuuid, object)})

                    self.nodes.append(concept)


            # Search for SKOS.Collections
            for s, v, o in graph.triples((None, RDF.type , SKOS.Collection)):
                #print "%s %s %s " % (s,v,o)
                concept = Concept({
                    'id': self.generate_uuid_from_subject(baseuuid, s),
                    'legacyoid': str(s),
                    'nodetype': 'Collection'
                })
                # loop through all the elements within a <skos:Concept> element
                for predicate, object in graph.predicate_objects(subject = s):
                    if str(SKOS) in predicate or str(ARCHES) in predicate:
                        if not self.language_exists(object, allowed_languages):
                            allowed_languages = models.DLanguage.objects.values_list('pk', flat=True)

                        relation_or_value_type = predicate.replace(SKOS, '').replace(ARCHES, '')  # this is essentially the skos element type within a <skos:Concept> element (eg: prefLabel, broader, etc...)

                        if relation_or_value_type in skos_value_types_list:
                            value_type = skos_value_types[relation_or_value_type]
                            val = self.unwrapJsonLiteral(object)
                            concept.addvalue({'id': val['value_id'], 'value':val['value'], 'language': object.language or default_lang, 'type': value_type.valuetype, 'category': value_type.category})
                
                self.nodes.append(concept)
            
            for s, v, o in graph.triples((None, SKOS.member, None)):
                #print "%s %s %s " % (s,v,o)
                self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, s), 'type': 'member', 'target': self.generate_uuid_from_subject(baseuuid, o)})

            # insert and index the concpets
            with transaction.atomic():
                for node in self.nodes:
                    if staging_options == 'stage':
                        try:
                            models.Concept.objects.get(pk=node.id)
                        except:
                            # this is a new concept, so add a reference to it in the Candiates schema
                            if node.nodetype != 'ConceptScheme':
                                self.relations.append({'source': '00000000-0000-0000-0000-000000000006', 'type': 'narrower', 'target': node.id})
                    
                    if overwrite_options == 'overwrite':
                        node.save()
                    elif overwrite_options == 'ignore':
                        try:
                            # don't do anything if the concept already exists
                            models.Concept.objects.get(pk=node.id)
                        except:
                            # else save it
                            node.save()

                # insert the concept relations
                for relation in self.relations:
                    newrelation = models.Relation.objects.get_or_create(
                        conceptfrom_id = relation['source'],
                        conceptto_id = relation['target'],
                        relationtype_id = relation['type']
                    )

                # need to index after the concepts and relations have been entered into the db
                # so that the proper context gets indexed with the concept
                for node in self.nodes:
                    node.index()

            return self
        else:
            raise Exception('graph argument should be of type rdflib.graph.Graph')
示例#10
0
                def create_reference_data(new_concepts, create_collections):
                    errors = []
                    candidates = Concept().get(id='00000000-0000-0000-0000-000000000006')
                    for arches_nodeid, concepts in new_concepts.iteritems():
                        collectionid = str(uuid.uuid4())
                        topconceptid = str(uuid.uuid4())
                        node = Node.objects.get(nodeid=arches_nodeid)

                        # if node.datatype is concept or concept-list create concepts and collections
                        if node.datatype in ['concept', 'concept-list']:
                            # create collection if create_collections = create, otherwise append to collection already assigned to node
                            if create_collections == True:
                                collection_legacyoid = node.name + '_' + str(node.graph_id) + '_import'
                                # check to see that there is not already a collection for this node
                                if node.config['rdmCollection'] != None:
                                    errors.append({'type': 'WARNING', 'message': 'A collection already exists for the {0} node. Use the add option to add concepts to this collection.'.format(node.name)})
                                    if len(errors) > 0:
                                        self.errors += errors
                                    collection = None
                                else:
                                    # if there is no collection assigned to this node, create one and assign it to the node
                                    try:
                                        # check to see that a collection with this legacyid does not already exist
                                        collection = Concept().get(legacyoid=collection_legacyoid)
                                        errors.append({'type': 'WARNING', 'message': 'A collection with the legacyid {0} already exists.'.format(node.name + '_' + str(node.graph_id) + '_import')})
                                        if len(errors) > 0:
                                            self.errors += errors
                                    except:
                                        collection = Concept({
                                            'id': collectionid,
                                            'legacyoid': collection_legacyoid,
                                            'nodetype': 'Collection'
                                        })
                                        collection.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                        node.config['rdmCollection'] = collectionid
                                        node.save()
                                        collection.save()
                            else:
                                # if create collection = add check that there is a collection associated with node, if no collection associated with node create a collection and associated with the node
                                try:
                                    collection = Concept().get(id=node.config['rdmCollection'])
                                except:
                                    collection = Concept({
                                        'id': collectionid,
                                        'legacyoid': node.name + '_' + str(node.graph_id) + '_import',
                                        'nodetype': 'Collection'
                                    })
                                    collection.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                    node.config['rdmCollection'] = collectionid
                                    node.save()
                                    collection.save()

                            if collection != None:
                                topconcept_legacyoid = node.name + '_' + str(node.graph_id)
                                # Check if top concept already exists, if not create it and add to candidates scheme
                                try:
                                    topconcept = Concept().get(legacyoid=topconcept_legacyoid)
                                except:
                                    topconcept = Concept({
                                        'id': topconceptid,
                                        'legacyoid': topconcept_legacyoid,
                                        'nodetype': 'Concept'
                                    })
                                    topconcept.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                    topconcept.save()
                                candidates.add_relation(topconcept, 'narrower')

                                # create child concepts and relate to top concept and collection accordingly
                                for conceptid, value in concepts.iteritems():
                                    concept_legacyoid = value + '_' + node.name + '_' + str(node.graph_id)
                                    # check if concept already exists, if not create and add to topconcept and collection
                                    try:
                                        conceptid = [concept for concept in topconcept.get_child_concepts(topconcept.id) if concept[1] == value][0][0]
                                        concept = Concept().get(id=conceptid)
                                    except:
                                        concept = Concept({
                                            'id': conceptid,
                                            'legacyoid': concept_legacyoid,
                                            'nodetype': 'Concept'
                                        })
                                        concept.addvalue({'id': str(uuid.uuid4()), 'value': value, 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'})
                                        concept.save()
                                    collection.add_relation(concept, 'member')
                                    topconcept.add_relation(concept, 'narrower')

                        #if node.datatype is domain or domain-list create options array in node.config
                        elif node.datatype in ['domain-value', 'domain-value-list']:
                            for domainid, value in new_concepts[arches_nodeid].iteritems():
                                # check if value already exists in domain
                                if value not in [t['text'] for t in node.config['options']]:
                                    domainvalue = {
                                        "text": value,
                                        "selected": False,
                                        "id": domainid
                                    }
                                    node.config['options'].append(domainvalue)
                                    node.save()
示例#11
0
文件: skos.py 项目: legiongis/arches
    def save_concepts_from_skos(self, graph, overwrite_options="overwrite", staging_options="keep", prevent_indexing=False):
        """
        given an RDF graph, tries to save the concpets to the system

        Keyword arguments:
        overwrite_options -- 'overwrite', 'ignore'
        staging_options -- 'stage', 'keep'
        prevent_indexing -- True to prevent indexing of concepts

        """

        baseuuid = uuid.uuid4()
        allowed_languages = models.DLanguage.objects.values_list("pk", flat=True)
        default_lang = settings.LANGUAGE_CODE

        value_types = models.DValueType.objects.all()
        skos_value_types = value_types.filter(Q(namespace="skos") | Q(namespace="arches"))
        skos_value_types_list = list(skos_value_types.values_list("valuetype", flat=True))
        skos_value_types = {valuetype.valuetype: valuetype for valuetype in skos_value_types}
        dcterms_value_types = value_types.filter(namespace="dcterms")
        dcterms_identifier_type = dcterms_value_types.get(valuetype=str(DCTERMS.identifier).replace(str(DCTERMS), ""))

        # if the graph is of the type rdflib.graph.Graph
        if isinstance(graph, Graph):

            # Search for ConceptSchemes first
            for scheme, v, o in graph.triples((None, RDF.type, SKOS.ConceptScheme)):
                identifier = self.unwrapJsonLiteral(str(scheme))
                scheme_id = self.generate_uuid_from_subject(baseuuid, scheme)
                concept_scheme = Concept({"id": scheme_id, "legacyoid": str(scheme), "nodetype": "ConceptScheme"})

                for predicate, object in graph.predicate_objects(subject=scheme):
                    if str(DCTERMS) in predicate and predicate.replace(DCTERMS, "") in dcterms_value_types.values_list(
                        "valuetype", flat=True
                    ):
                        if not self.language_exists(object, allowed_languages):
                            allowed_languages = models.DLanguage.objects.values_list("pk", flat=True)

                        try:
                            # first try and get any values associated with the concept_scheme
                            # predicate.replace(SKOS, '') should yield something like 'prefLabel' or 'scopeNote', etc..
                            value_type = dcterms_value_types.get(valuetype=predicate.replace(DCTERMS, ""))
                            val = self.unwrapJsonLiteral(object)
                            if predicate == DCTERMS.title:
                                concept_scheme.addvalue(
                                    {
                                        "id": val["value_id"],
                                        "value": val["value"],
                                        "language": object.language or default_lang,
                                        "type": "prefLabel",
                                        "category": value_type.category,
                                    }
                                )
                                # print('Casting dcterms:title to skos:prefLabel')
                            elif predicate == DCTERMS.description:
                                concept_scheme.addvalue(
                                    {
                                        "id": val["value_id"],
                                        "value": val["value"],
                                        "language": object.language or default_lang,
                                        "type": "scopeNote",
                                        "category": value_type.category,
                                    }
                                )
                                # print('Casting dcterms:description to skos:scopeNote')
                            elif predicate == DCTERMS.identifier:
                                identifier = self.unwrapJsonLiteral(str(object))
                        except:
                            pass

                    if str(SKOS) in predicate:
                        # print predicate
                        if predicate == SKOS.hasTopConcept:
                            top_concept_id = self.generate_uuid_from_subject(baseuuid, object)
                            self.relations.append({"source": scheme_id, "type": "hasTopConcept", "target": top_concept_id})

                concept_scheme.addvalue(
                    {
                        "id": identifier["value_id"],
                        "value": identifier["value"],
                        "language": default_lang,
                        "type": dcterms_identifier_type.valuetype,
                        "category": dcterms_identifier_type.category,
                    }
                )
                self.nodes.append(concept_scheme)

                # Search for Concepts
                for s, v, o in graph.triples((None, SKOS.inScheme, scheme)):
                    identifier = self.unwrapJsonLiteral(str(s))
                    concept = Concept({"id": self.generate_uuid_from_subject(baseuuid, s), "legacyoid": str(s), "nodetype": "Concept"})

                    # loop through all the elements within a <skos:Concept> element
                    for predicate, object in graph.predicate_objects(subject=s):
                        if str(SKOS) in predicate or str(ARCHES) in predicate:
                            if not self.language_exists(object, allowed_languages):
                                allowed_languages = models.DLanguage.objects.values_list("pk", flat=True)

                            # this is essentially the skos element type within a <skos:Concept>
                            # element (eg: prefLabel, broader, etc...)
                            relation_or_value_type = predicate.replace(SKOS, "").replace(ARCHES, "")

                            if relation_or_value_type in skos_value_types_list:
                                value_type = skos_value_types[relation_or_value_type]
                                val = self.unwrapJsonLiteral(object)
                                concept.addvalue(
                                    {
                                        "id": val["value_id"],
                                        "value": val["value"],
                                        "language": object.language or default_lang,
                                        "type": value_type.valuetype,
                                        "category": value_type.category,
                                    }
                                )
                            elif predicate == SKOS.broader:
                                self.relations.append(
                                    {
                                        "source": self.generate_uuid_from_subject(baseuuid, object),
                                        "type": "narrower",
                                        "target": self.generate_uuid_from_subject(baseuuid, s),
                                    }
                                )
                            elif predicate == SKOS.narrower:
                                self.relations.append(
                                    {
                                        "source": self.generate_uuid_from_subject(baseuuid, s),
                                        "type": relation_or_value_type,
                                        "target": self.generate_uuid_from_subject(baseuuid, object),
                                    }
                                )
                            elif predicate == SKOS.related:
                                self.relations.append(
                                    {
                                        "source": self.generate_uuid_from_subject(baseuuid, s),
                                        "type": relation_or_value_type,
                                        "target": self.generate_uuid_from_subject(baseuuid, object),
                                    }
                                )

                        elif predicate == DCTERMS.identifier:
                            identifier = self.unwrapJsonLiteral(str(object))

                    concept.addvalue(
                        {
                            "id": identifier["value_id"],
                            "value": identifier["value"],
                            "language": default_lang,
                            "type": dcterms_identifier_type.valuetype,
                            "category": dcterms_identifier_type.category,
                        }
                    )
                    self.nodes.append(concept)

            # Search for SKOS.Collections
            for s, v, o in graph.triples((None, RDF.type, SKOS.Collection)):
                # print "%s %s %s " % (s,v,o)
                concept = Concept({"id": self.generate_uuid_from_subject(baseuuid, s), "legacyoid": str(s), "nodetype": "Collection"})
                # loop through all the elements within a <skos:Concept> element
                for predicate, object in graph.predicate_objects(subject=s):
                    if str(SKOS) in predicate or str(ARCHES) in predicate:
                        if not self.language_exists(object, allowed_languages):
                            allowed_languages = models.DLanguage.objects.values_list("pk", flat=True)

                        # this is essentially the skos element type within a <skos:Concept>
                        # element (eg: prefLabel, broader, etc...)
                        relation_or_value_type = predicate.replace(SKOS, "").replace(ARCHES, "")

                        if relation_or_value_type in skos_value_types_list:
                            value_type = skos_value_types[relation_or_value_type]
                            val = self.unwrapJsonLiteral(object)
                            concept.addvalue(
                                {
                                    "id": val["value_id"],
                                    "value": val["value"],
                                    "language": object.language or default_lang,
                                    "type": value_type.valuetype,
                                    "category": value_type.category,
                                }
                            )

                self.nodes.append(concept)

            for s, v, o in graph.triples((None, SKOS.member, None)):
                # print "%s %s %s " % (s,v,o)
                self.member_relations.append(
                    {
                        "source": self.generate_uuid_from_subject(baseuuid, s),
                        "type": "member",
                        "target": self.generate_uuid_from_subject(baseuuid, o),
                    }
                )

            # insert and index the concpets
            scheme_node = None
            orphaned_concepts = {}
            with transaction.atomic():
                for node in self.nodes:
                    if node.nodetype == "ConceptScheme":
                        scheme_node = node
                    elif node.nodetype == "Concept":
                        orphaned_concepts[str(node.id)] = node
                    if staging_options == "stage":
                        try:
                            models.Concept.objects.get(pk=node.id)
                        except:
                            # this is a new concept, so add a reference to it in the Candiates schema
                            if node.nodetype != "ConceptScheme":
                                self.relations.append(
                                    {"source": "00000000-0000-0000-0000-000000000006", "type": "narrower", "target": node.id}
                                )

                    if overwrite_options == "overwrite":
                        node.save()
                    elif overwrite_options == "ignore":
                        try:
                            # don't do anything if the concept already exists
                            models.Concept.objects.get(pk=node.id)
                        except:
                            # else save it
                            node.save()

                # insert the concept relations
                for relation in self.relations:
                    newrelation = models.Relation.objects.get_or_create(
                        conceptfrom_id=relation["source"], conceptto_id=relation["target"], relationtype_id=relation["type"]
                    )
                    # check for orphaned concepts, every concept except the concept scheme should have an edge pointing to it
                    if (relation["type"] == "narrower" or relation["type"] == "hasTopConcept") and orphaned_concepts.get(
                        relation["target"]
                    ) is not None:
                        orphaned_concepts.pop(str(relation["target"]))

                if len(orphaned_concepts.keys()) > 0:
                    if scheme_node:
                        orphaned_scheme = Concept({"id": uuid.uuid4(), "legacyoid": uuid.uuid4(), "nodetype": "ConceptScheme"})
                        orphaned_scheme_value = None
                        for value in scheme_node.values:
                            if value.type == "prefLabel":
                                orphaned_scheme.addvalue(
                                    {
                                        "id": uuid.uuid4(),
                                        "value": "ORPHANS - " + value.value,
                                        "language": value.language,
                                        "type": value.type,
                                        "category": value.category,
                                    }
                                )
                        orphaned_scheme.save()
                        for orphaned_concept_id, orphaned_concept in orphaned_concepts.items():
                            models.Relation.objects.create(
                                conceptfrom_id=str(orphaned_scheme.id), conceptto_id=orphaned_concept_id, relationtype_id="narrower"
                            )
                        self.logger.warning(f'\nThe SKOS file "{os.path.split(self.path_to_file)[1]}" appears to have orphaned concepts.')

                # need to index after the concepts and relations have been entered into the db
                # so that the proper context gets indexed with the concept
                if scheme_node and not prevent_indexing:
                    scheme_node.bulk_index()

            # insert the concept collection relations
            # we do this outide a transaction so that we can load incomplete collections
            for relation in self.member_relations:
                try:
                    newrelation = models.Relation.objects.get_or_create(
                        conceptfrom_id=relation["source"], conceptto_id=relation["target"], relationtype_id=relation["type"]
                    )
                except IntegrityError as e:
                    self.logger.warning(e)

            return scheme_node
        else:
            raise Exception("graph argument should be of type rdflib.graph.Graph")